Merge pull request 'api2021 part I' (#4) from api2021 into master
Reviewed-on: #4
This commit is contained in:
commit
848cdf9c03
|
@ -0,0 +1,21 @@
|
|||
# EditorConfig
|
||||
# https://editorconfig.org/
|
||||
# https://editorconfig-specification.readthedocs.io/
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Unix-style newlines with a newline ending every file
|
||||
[*]
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
|
||||
# 4 space indentation
|
||||
[*.{h,cpp,html,css,js,conf,txt}]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
|
||||
# Tab indentation (no size specified)
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
|
@ -2,4 +2,6 @@
|
|||
.project
|
||||
.settings/
|
||||
*.o
|
||||
*.a
|
||||
src/pikotools.a
|
||||
tests/tests
|
||||
m
|
||||
|
|
88
Makefile
88
Makefile
|
@ -1,76 +1,58 @@
|
|||
# Makefile for GNU make
|
||||
|
||||
ifndef CXX
|
||||
CXX = clang++
|
||||
endif
|
||||
|
||||
ifndef CXXFLAGS
|
||||
CXXFLAGS = -Wall -O2 -I/usr/local/include
|
||||
endif
|
||||
|
||||
ifndef LDFLAGS
|
||||
LDFLAGS = -L/usr/local/lib
|
||||
endif
|
||||
|
||||
ifndef AR
|
||||
AR = ar
|
||||
endif
|
||||
|
||||
#CXX=g++5
|
||||
#CXXFLAGS=-Wall -O0 -g3 -gdwarf-2 -std=c++14
|
||||
|
||||
export CXX
|
||||
export CXXFLAGS
|
||||
export LDFLAGS
|
||||
export AR
|
||||
|
||||
|
||||
all: space mainparser mainspaceparser utf8 date convert log
|
||||
all: src
|
||||
|
||||
|
||||
src: FORCE
|
||||
$(MAKE) -C src
|
||||
|
||||
|
||||
space: FORCE
|
||||
@cd space ; $(MAKE) -e
|
||||
tests: FORCE
|
||||
$(MAKE) -C src
|
||||
$(MAKE) -C tests
|
||||
|
||||
mainparser: FORCE
|
||||
@cd mainparser ; $(MAKE) -e
|
||||
|
||||
mainspaceparser: FORCE
|
||||
@cd mainspaceparser ; $(MAKE) -e
|
||||
tests-gcc10: FORCE
|
||||
env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C src
|
||||
env CXX=g++10 CXXFLAGS="-Wl,-rpath=/usr/local/lib/gcc10/ -Wall -pedantic -O0 -g3 -std=c++20 -fmax-errors=1 -I../src -I/usr/local/include" $(MAKE) -C tests
|
||||
|
||||
utf8: FORCE
|
||||
@cd utf8 ; $(MAKE) -e
|
||||
|
||||
date: FORCE
|
||||
@cd date ; $(MAKE) -e
|
||||
tests-clang: FORCE
|
||||
env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
|
||||
env CXX=clang++ CXXFLAGS="-Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
|
||||
|
||||
convert: FORCE
|
||||
@cd convert ; $(MAKE) -e
|
||||
|
||||
log: FORCE
|
||||
@cd log ; $(MAKE) -e
|
||||
tests-clang-sa: FORCE
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=address -Wall -pedantic -O0 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
|
||||
|
||||
|
||||
tests-clang-sm: FORCE
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=memory -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
|
||||
|
||||
|
||||
tests-clang-su: FORCE
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C src
|
||||
env CXX=clang++ CXXFLAGS="-fsanitize=undefined -Wall -pedantic -O3 -g3 -std=c++20 -I../src -I/usr/local/include" $(MAKE) -C tests
|
||||
|
||||
|
||||
clean: FORCE
|
||||
$(MAKE) -C src clean
|
||||
$(MAKE) -C tests clean
|
||||
|
||||
|
||||
depend: FORCE
|
||||
$(MAKE) -C src depend
|
||||
$(MAKE) -C tests depend
|
||||
|
||||
|
||||
|
||||
|
||||
FORCE:
|
||||
|
||||
|
||||
clean:
|
||||
@cd space ; $(MAKE) -e clean
|
||||
@cd mainparser ; $(MAKE) -e clean
|
||||
@cd mainspaceparser ; $(MAKE) -e clean
|
||||
@cd utf8 ; $(MAKE) -e clean
|
||||
@cd date ; $(MAKE) -e clean
|
||||
@cd convert ; $(MAKE) -e clean
|
||||
@cd log ; $(MAKE) -e clean
|
||||
|
||||
depend:
|
||||
@cd space ; $(MAKE) -e depend
|
||||
@cd mainparser ; $(MAKE) -e depend
|
||||
@cd mainspaceparser ; $(MAKE) -e depend
|
||||
@cd utf8 ; $(MAKE) -e depend
|
||||
@cd date ; $(MAKE) -e depend
|
||||
@cd convert ; $(MAKE) -e depend
|
||||
@cd log ; $(MAKE) -e depend
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=convert.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,5 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
inttostr.o: inttostr.h
|
||||
misc.o: misc.h text.h
|
||||
text.o: text.h
|
|
@ -1 +0,0 @@
|
|||
o = inttostr.o misc.o text.o
|
196
convert/text.cpp
196
convert/text.cpp
|
@ -1,196 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include "text.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
// white_chars table should be sorted (a binary search algorithm is used to find a character)
|
||||
// we do not treat a new line character (10) as a white character here
|
||||
// also space (32) and tab (9) are not inserted here
|
||||
static const wchar_t white_chars_table[] = {
|
||||
0x000B, // LINE TABULATION (vertical tabulation)
|
||||
0x000C, // FORM FEED (FF)
|
||||
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
|
||||
0x0085, // NEXT LINE (NEL)
|
||||
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
|
||||
0x1680, // OGHAM SPACE MARK
|
||||
0x180E, // MONGOLIAN VOWEL SEPARATOR
|
||||
0x2000, // EN QUAD
|
||||
0x2001, // EM QUAD
|
||||
0x2002, // EN SPACE
|
||||
0x2003, // EM SPACE
|
||||
0x2004, // THREE-PER-EM SPACE
|
||||
0x2005, // FOUR-PER-EM SPACE
|
||||
0x2006, // SIX-PER-EM SPACE
|
||||
0x2007, // FIGURE SPACE
|
||||
0x2008, // PUNCTUATION SPACE
|
||||
0x2009, // THIN SPACE
|
||||
0x200A, // HAIR SPACE
|
||||
0x2028, // LINE SEPARATOR
|
||||
0x2029, // PARAGRAPH SEPARATOR
|
||||
0x202F, // NARROW NO-BREAK SPACE
|
||||
0x205F, // MEDIUM MATHEMATICAL SPACE
|
||||
0x3000, // IDEOGRAPHIC SPACE
|
||||
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
|
||||
*/
|
||||
bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
// space (32) and tab (9) are the most common white chars
|
||||
// so we check them at the beginning (optimisation)
|
||||
if( c == 32 || c == 9 )
|
||||
return true;
|
||||
|
||||
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
|
||||
std::size_t o1 = 0;
|
||||
std::size_t o2 = len - 1;
|
||||
|
||||
if( c == 10 )
|
||||
return treat_new_line_as_white ? true : false;
|
||||
|
||||
if( !check_additional_chars )
|
||||
return false;
|
||||
|
||||
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
|
||||
return false;
|
||||
|
||||
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
|
||||
return true;
|
||||
|
||||
while( o1 + 1 < o2 )
|
||||
{
|
||||
std::size_t o = (o2 - o1)/2 + o1;
|
||||
|
||||
if( c == white_chars_table[o] )
|
||||
return true;
|
||||
|
||||
if( c > white_chars_table[o] )
|
||||
o1 = o;
|
||||
else
|
||||
o2 = o;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool IsDigit(wchar_t c, int base, int * digit)
|
||||
{
|
||||
int d = 0;
|
||||
|
||||
if( c >= '0' && c <= '9' )
|
||||
{
|
||||
d = c - '0';
|
||||
}
|
||||
else
|
||||
if( c >= 'a' && c <= 'f' )
|
||||
{
|
||||
d = c - 'a' + 10;
|
||||
}
|
||||
else
|
||||
if( c >= 'A' && c <= 'F' )
|
||||
{
|
||||
d = c - 'A' + 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return d < base;
|
||||
}
|
||||
|
||||
|
||||
|
||||
wchar_t ToLower(wchar_t c)
|
||||
{
|
||||
if( c >= 'A' && c <= 'Z' )
|
||||
return c - 'A' + 'a';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
wchar_t ToUpper(wchar_t c)
|
||||
{
|
||||
if( c >= 'a' && c <= 'z' )
|
||||
return c - 'a' + 'A';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
void ToLower(std::wstring & s)
|
||||
{
|
||||
std::wstring::size_type i;
|
||||
|
||||
for(i=0 ; i<s.size() ; ++i)
|
||||
s[i] = ToLower(s[i]);
|
||||
}
|
||||
|
||||
|
||||
void ToUpper(std::wstring & s)
|
||||
{
|
||||
std::wstring::size_type i;
|
||||
|
||||
for(i=0 ; i<s.size() ; ++i)
|
||||
s[i] = ToUpper(s[i]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
269
convert/text.h
269
convert/text.h
|
@ -1,269 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_convert_text
|
||||
#define headerfile_picotools_convert_text
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
bool IsWhite(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
|
||||
|
||||
bool IsDigit(wchar_t c, int base = 10, int * digit = 0);
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
|
||||
{
|
||||
while( IsWhite(static_cast<wchar_t>(*str), check_additional_chars, treat_new_line_as_white) )
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* str_end is pointing at the end of the string (the last item + one)
|
||||
*
|
||||
* return value is a pointer to the first white character after a non-white character at the end
|
||||
* or to the last+one if there is no any white characters
|
||||
*
|
||||
*/
|
||||
template<class CharType>
|
||||
CharType * SkipWhiteFromBack(CharType * str_begin, CharType * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true)
|
||||
{
|
||||
while( str_end > str_begin && IsWhite(static_cast<wchar_t>(*(str_end-1)), check_additional_chars, treat_new_line_as_white) )
|
||||
{
|
||||
str_end -= 1;
|
||||
}
|
||||
|
||||
return str_end;
|
||||
}
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType * SkipWhiteFromBack(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
|
||||
{
|
||||
CharType * str_begin = str;
|
||||
|
||||
while( *str != 0 )
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
|
||||
return SkipWhiteFromBack(str_begin, str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
|
||||
wchar_t ToLower(wchar_t c);
|
||||
wchar_t ToUpper(wchar_t c);
|
||||
|
||||
|
||||
// change to a template
|
||||
void ToLower(std::wstring & s);
|
||||
void ToUpper(std::wstring & s);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int CompareNoCase(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
while( *str1 && *str2 && ToLower(*str1) == ToLower(*str2) )
|
||||
{
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( *str1 == 0 && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
return (int)ToLower(*str1) - (int)ToLower(*str2);
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int CompareNoCase(const StringType1 & str1, const StringType2 & str2)
|
||||
{
|
||||
return CompareNoCase(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int CompareNoCasep(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
return CompareNoCase(str1, str2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int CompareNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
|
||||
{
|
||||
while( str1_begin < str1_end && *str2 && ToLower(*str1_begin) == ToLower(*str2) )
|
||||
{
|
||||
++str1_begin;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( str1_begin == str1_end && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
wchar_t str1_char = 0;
|
||||
|
||||
if( str1_begin < str1_end )
|
||||
str1_char = *str1_begin;
|
||||
|
||||
return (int)ToLower(str1_char) - (int)ToLower(*str2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool EqualNoCase(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
return CompareNoCase(str1, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool EqualNoCase(const StringType1 & str1, const StringType2 & str2)
|
||||
{
|
||||
return EqualNoCase(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool EqualNoCasep(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
return EqualNoCase(str1, str2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool EqualNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
|
||||
{
|
||||
return CompareNoCase(str1_begin, str1_end, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) )
|
||||
{
|
||||
++short_str;
|
||||
++long_str;
|
||||
}
|
||||
|
||||
if( *short_str == 0 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubString(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
return IsSubStringp(short_str, long_str);
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubString(const StringType1 & short_str, const StringType2 & long_str)
|
||||
{
|
||||
return IsSubStringp(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubStringNoCasep(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
while( *short_str && *long_str && ToLower(*short_str) == ToLower(*long_str) )
|
||||
{
|
||||
++short_str;
|
||||
++long_str;
|
||||
}
|
||||
|
||||
if( *short_str == 0 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
return IsSubStringNoCasep(short_str, long_str);
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str)
|
||||
{
|
||||
return IsSubStringNoCasep(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=date.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,3 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
date.o: date.h ../convert/inttostr.h
|
|
@ -1 +0,0 @@
|
|||
o = date.o
|
27
log/Makefile
27
log/Makefile
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=log.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,9 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
filelog.o: filelog.h ../textstream/textstream.h ../space/space.h
|
||||
filelog.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
|
||||
filelog.o: ../membuffer/membuffer.h ../textstream/types.h ../utf8/utf8.h
|
||||
log.o: log.h ../textstream/textstream.h ../space/space.h
|
||||
log.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
|
||||
log.o: ../membuffer/membuffer.h ../textstream/types.h filelog.h
|
||||
log.o: ../utf8/utf8.h
|
|
@ -1 +0,0 @@
|
|||
o = filelog.o log.o
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=mainparser.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,3 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
mainparser.o: mainparser.h
|
|
@ -1 +0,0 @@
|
|||
o = mainparser.o
|
|
@ -1,237 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011-2012, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include "mainparser.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
MainParser::MainParser()
|
||||
{
|
||||
argsize = 0;
|
||||
arg = 0;
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
|
||||
MainParser::MainParser(int argc, const char ** argv)
|
||||
{
|
||||
Set(argc, argv);
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MainParser::Set(int argc, const char ** argv)
|
||||
{
|
||||
argsize = argc;
|
||||
arg = argv;
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MainParser::Reset()
|
||||
{
|
||||
argindex = 1;
|
||||
offset = 0;
|
||||
has_single_param = false;
|
||||
has_double_param = false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
char MainParser::GetSingleParam()
|
||||
{
|
||||
if( !has_single_param )
|
||||
return 0;
|
||||
|
||||
if( last_single_param != 0 )
|
||||
return last_single_param;
|
||||
|
||||
Advance();
|
||||
|
||||
if( argindex >= argsize )
|
||||
return 0;
|
||||
|
||||
last_single_param = arg[argindex][offset];
|
||||
offset += 1;
|
||||
|
||||
return last_single_param;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::IsSingleParam(char c)
|
||||
{
|
||||
return GetSingleParam() == c;
|
||||
}
|
||||
|
||||
|
||||
const char * MainParser::GetDoubleParam()
|
||||
{
|
||||
empty = 0;
|
||||
|
||||
if( !has_double_param )
|
||||
return ∅
|
||||
|
||||
if( last_double_param != &empty )
|
||||
return last_double_param;
|
||||
|
||||
Advance();
|
||||
|
||||
if( argindex >= argsize )
|
||||
return ∅
|
||||
|
||||
last_double_param = &arg[argindex][offset];
|
||||
offset = 0;
|
||||
argindex += 1;
|
||||
|
||||
return last_double_param;
|
||||
}
|
||||
|
||||
|
||||
bool MainParser::IsDoubleParam(const char * param)
|
||||
{
|
||||
return strcmp(GetDoubleParam(), param) == 0;
|
||||
}
|
||||
|
||||
|
||||
const char * MainParser::GetValue()
|
||||
{
|
||||
empty = 0;
|
||||
Advance();
|
||||
|
||||
if( argindex >= argsize )
|
||||
return ∅
|
||||
|
||||
const char * value = &arg[argindex][offset];
|
||||
offset = 0;
|
||||
argindex += 1;
|
||||
has_single_param = false;
|
||||
has_double_param = false;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::NextParam()
|
||||
{
|
||||
bool was_single_param = has_single_param;
|
||||
has_single_param = false;
|
||||
has_double_param = false;
|
||||
last_single_param = 0;
|
||||
last_double_param = ∅
|
||||
empty = 0;
|
||||
|
||||
if( Advance() )
|
||||
was_single_param = false;
|
||||
|
||||
if( argindex >= argsize )
|
||||
return false;
|
||||
|
||||
if( arg[argindex][offset]=='-' )
|
||||
{
|
||||
if( arg[argindex][offset+1]=='-' )
|
||||
{
|
||||
has_double_param = true;
|
||||
offset += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
has_single_param = true;
|
||||
offset += 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( was_single_param )
|
||||
has_single_param = true;
|
||||
}
|
||||
|
||||
return has_single_param || has_double_param;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::IsEnd()
|
||||
{
|
||||
Advance();
|
||||
|
||||
return argindex >= argsize;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::HasSingleParam()
|
||||
{
|
||||
return has_single_param;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::HasDoubleParam()
|
||||
{
|
||||
return has_double_param;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool MainParser::Advance()
|
||||
{
|
||||
bool was_incremented = false;
|
||||
|
||||
while( argindex < argsize && arg[argindex][offset] == 0 )
|
||||
{
|
||||
offset = 0;
|
||||
argindex += 1;
|
||||
was_incremented = true;
|
||||
}
|
||||
|
||||
return was_incremented;
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011-2012, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_mainparser_mainparser
|
||||
#define headerfile_picotools_mainparser_mainparser
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
/*
|
||||
a very little parser for main(int argc, char ** argv) parameters
|
||||
look in sample/sample.cpp how to use the parser
|
||||
*/
|
||||
class MainParser
|
||||
{
|
||||
public:
|
||||
|
||||
MainParser();
|
||||
MainParser(int argc, const char ** argv);
|
||||
|
||||
// setting arguments passed to main(int argc, char ** argv) function
|
||||
void Set(int argc, const char ** argv);
|
||||
|
||||
// reseting the current state of parsing
|
||||
// now you can start parsing from the beginning
|
||||
// you don't have to call it for the first time
|
||||
// (is automatically called by the Set method)
|
||||
void Reset();
|
||||
|
||||
// checking if there is a next single or double parameter
|
||||
// this method represents the main loop of checking parameters
|
||||
bool NextParam();
|
||||
|
||||
// returning a single parameter (if exists) or '\0' otherwise
|
||||
// single parameter means a parameter with '-' at the beginning e.g. "-a"
|
||||
// next call to this method (without calling NextParam) returns the same value
|
||||
// this method should be called after NextParam()
|
||||
char GetSingleParam();
|
||||
|
||||
// calling GetSingleParam() and comparign with 'c'
|
||||
bool IsSingleParam(char c);
|
||||
|
||||
// returning a string for a double parameter or an empty string if there is no such a parameter
|
||||
// double parameter means a parameter with '--' at the beginning e.g. "--output"
|
||||
// next call to this method (without calling NextParam) returns the same value
|
||||
// GetDoubleParam() should be called after NextParam()
|
||||
// this method never returns a null pointer -- if there is no a param name (end of the string)
|
||||
// a pointer to en empty string will be returned
|
||||
const char * GetDoubleParam();
|
||||
|
||||
// calling GetDoubleParam() and comparing with 'param'
|
||||
// so you don't have to call strcmp directly
|
||||
bool IsDoubleParam(const char * param);
|
||||
|
||||
// returning a string representing a value
|
||||
// you have to know which parameter requires a value
|
||||
// and if such a parameter is found then use this method to obtain the value
|
||||
// the method advances the current pointer so next call to this method return a next value
|
||||
// you can call GetValue() even when NextParam() has returned false
|
||||
// in such a case this gets you the last values (those at the end of the parameter list)
|
||||
// this method never returns a null pointer -- if there is no a value (end of the string)
|
||||
// a pointer to en empty string will be returned
|
||||
const char * GetValue();
|
||||
|
||||
// returning true if the input string is finished
|
||||
// there are no more parameters or values
|
||||
bool IsEnd();
|
||||
|
||||
// returning true if there is a single parameter
|
||||
// should be called after NextParam()
|
||||
bool HasSingleParam();
|
||||
|
||||
// returning true if there is a double parameter
|
||||
// should be called after NextParam()
|
||||
bool HasDoubleParam();
|
||||
|
||||
private:
|
||||
|
||||
bool Advance();
|
||||
|
||||
int argindex;
|
||||
int offset;
|
||||
int argsize;
|
||||
const char ** arg;
|
||||
char empty;
|
||||
bool has_single_param;
|
||||
bool has_double_param;
|
||||
char last_single_param;
|
||||
const char * last_double_param;
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
|
@ -1,10 +0,0 @@
|
|||
output=sample
|
||||
|
||||
all: $(output)
|
||||
|
||||
$(output): sample.cpp ../mainparser.h ../mainparser.cpp
|
||||
g++ -o $(output) sample.cpp ../mainparser.cpp
|
||||
|
||||
clean:
|
||||
rm -f $(output)
|
||||
rm -f $(output).exe
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=mainspaceparser.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,6 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
mainspaceparser.o: mainspaceparser.h ../space/space.h ../textstream/types.h
|
||||
mainspaceparser.o: ../utf8/utf8.h ../textstream/textstream.h ../date/date.h
|
||||
mainspaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h
|
||||
mainspaceparser.o: ../textstream/types.h
|
|
@ -1 +0,0 @@
|
|||
o = mainspaceparser.o
|
|
@ -1,297 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include "mainspaceparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
#include <string.h>
|
||||
|
||||
// REMOVE ME
|
||||
#include <iostream>
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
MainSpaceParser::MainSpaceParser()
|
||||
{
|
||||
space = 0;
|
||||
options_space = 0;
|
||||
use_utf8 = true;
|
||||
last_status = status_ok;
|
||||
}
|
||||
|
||||
|
||||
|
||||
MainSpaceParser::~MainSpaceParser()
|
||||
{
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::UTF8(bool utf8)
|
||||
{
|
||||
use_utf8 = utf8;
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::SetSpace(Space & space_ref)
|
||||
{
|
||||
space = &space_ref;
|
||||
options_space = 0;
|
||||
}
|
||||
|
||||
|
||||
std::wstring & MainSpaceParser::GetErrorToken()
|
||||
{
|
||||
return last_error_token;
|
||||
}
|
||||
|
||||
|
||||
MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv)
|
||||
{
|
||||
if( !space )
|
||||
{
|
||||
return status_space_not_assigned;
|
||||
}
|
||||
|
||||
options_space = space->FindSpace(L"options");
|
||||
last_status = status_ok;
|
||||
last_error_token.clear();
|
||||
|
||||
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
|
||||
{
|
||||
Parse((size_t)argc, argv, i);
|
||||
}
|
||||
|
||||
return last_status;
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
const char * pchar = argv[argv_index];
|
||||
|
||||
if( *pchar == '-' )
|
||||
{
|
||||
if( *(pchar+1) == '-' )
|
||||
{
|
||||
ParseMultiArgument(argc, argv, argv_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
ParseSingleArgument(argc, argv, argv_index);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
last_status = status_syntax_error;
|
||||
ConvertStr(pchar, last_error_token);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst)
|
||||
{
|
||||
if( use_utf8 )
|
||||
{
|
||||
PT::UTF8ToWide(src,dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.clear();
|
||||
|
||||
for( ; *src ; ++src )
|
||||
dst += (wchar_t)(unsigned char)*src;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
ConvertStr(argv[argv_index] + 1, wide_arg);
|
||||
const wchar_t * wide_pchar = wide_arg.c_str();
|
||||
|
||||
temp_list_val.clear();
|
||||
bool was_option = false;
|
||||
argv_index += 1;
|
||||
|
||||
for( ; *wide_pchar && !was_option ; ++wide_pchar )
|
||||
{
|
||||
temp_arg = *wide_pchar;
|
||||
size_t opt_size = RequireOption(temp_arg);
|
||||
|
||||
if( opt_size > 0 )
|
||||
{
|
||||
was_option = true;
|
||||
|
||||
if( *(wide_pchar+1) )
|
||||
{
|
||||
temp_val = wide_pchar + 1;
|
||||
temp_list_val.push_back(temp_val);
|
||||
opt_size -= 1;
|
||||
}
|
||||
|
||||
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
|
||||
{
|
||||
ConvertStr(argv[argv_index], temp_val);
|
||||
temp_list_val.push_back(temp_val);
|
||||
}
|
||||
|
||||
if( opt_size > 0 )
|
||||
{
|
||||
last_status = status_reading_eof;
|
||||
last_error_token.clear();
|
||||
}
|
||||
}
|
||||
|
||||
temp_val.clear();
|
||||
AddValueToItem(temp_arg, temp_val, temp_list_val);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
ConvertStr(argv[argv_index] + 2, temp_arg);
|
||||
argv_index += 1;
|
||||
|
||||
size_t opt_size = RequireOption(temp_arg);
|
||||
temp_list_val.clear();
|
||||
|
||||
if( opt_size > 0 )
|
||||
{
|
||||
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
|
||||
{
|
||||
ConvertStr(argv[argv_index], temp_val);
|
||||
temp_list_val.push_back(temp_val);
|
||||
}
|
||||
|
||||
if( opt_size > 0 )
|
||||
{
|
||||
last_status = status_reading_eof;
|
||||
last_error_token.clear();
|
||||
}
|
||||
}
|
||||
|
||||
temp_val.clear();
|
||||
AddValueToItem(temp_arg, temp_val, temp_list_val);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list)
|
||||
{
|
||||
std::wstring * val = space->GetFirstValue(name);
|
||||
|
||||
if( !val )
|
||||
{
|
||||
if( list.empty() )
|
||||
space->Add(name, empty_value);
|
||||
else
|
||||
if( list.size() == 1 )
|
||||
space->Add(name, list[0]);
|
||||
else
|
||||
space->table[name] = list; // !! IMPROVE ME there'll be a new api in space
|
||||
}
|
||||
else
|
||||
{
|
||||
PT::Space::Table::iterator i = space->table.find(name);
|
||||
PT::Space::Value * table_value;
|
||||
|
||||
if( i == space->table.end() )
|
||||
{
|
||||
table_value = &space->table[name];
|
||||
table_value->push_back(*val);
|
||||
//space->table_single.erase(name);
|
||||
}
|
||||
else
|
||||
{
|
||||
table_value = &i->second;
|
||||
}
|
||||
|
||||
if( list.empty() )
|
||||
{
|
||||
table_value->push_back(empty_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(const auto & list_item : list)
|
||||
table_value->push_back(list_item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t MainSpaceParser::RequireOption(const std::wstring & arg)
|
||||
{
|
||||
size_t res = 0;
|
||||
|
||||
if( options_space )
|
||||
{
|
||||
std::wstring * val = options_space->GetFirstValue(arg);
|
||||
|
||||
if( val )
|
||||
{
|
||||
/*
|
||||
* IMPLEMENT ME
|
||||
* add a converter to convert/inttostr.h
|
||||
*
|
||||
*/
|
||||
|
||||
long res_long = wcstol(val->c_str(), 0, 10);
|
||||
|
||||
if( res_long < 0 )
|
||||
res_long = 0;
|
||||
|
||||
res = (size_t)res_long;
|
||||
|
||||
//std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=space.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,17 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
jsontospaceparser.o: jsontospaceparser.h space.h ../textstream/types.h
|
||||
jsontospaceparser.o: ../utf8/utf8.h ../textstream/textstream.h
|
||||
jsontospaceparser.o: ../space/space.h ../date/date.h ../convert/inttostr.h
|
||||
jsontospaceparser.o: ../membuffer/membuffer.h ../textstream/types.h
|
||||
space.o: space.h ../textstream/types.h ../utf8/utf8.h
|
||||
space.o: ../textstream/textstream.h ../space/space.h ../date/date.h
|
||||
space.o: ../convert/inttostr.h ../membuffer/membuffer.h ../textstream/types.h
|
||||
space.o: ../convert/convert.h ../convert/inttostr.h
|
||||
space.o: ../convert/patternreplacer.h ../convert/strtoint.h ../convert/text.h
|
||||
space.o: ../convert/misc.h
|
||||
spaceparser.o: spaceparser.h space.h ../textstream/types.h ../utf8/utf8.h
|
||||
spaceparser.o: ../textstream/textstream.h ../space/space.h ../date/date.h
|
||||
spaceparser.o: ../convert/inttostr.h ../membuffer/membuffer.h
|
||||
spaceparser.o: ../textstream/types.h
|
||||
spacetojson.o: spacetojson.h space.h ../textstream/types.h
|
|
@ -1 +0,0 @@
|
|||
o = jsontospaceparser.o space.o spaceparser.o spacetojson.o
|
|
@ -1,870 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <wchar.h>
|
||||
#include "jsontospaceparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
JSONToSpaceParser::JSONToSpaceParser()
|
||||
{
|
||||
root_space = 0;
|
||||
SetDefault();
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SetSpace(Space * pspace)
|
||||
{
|
||||
root_space = pspace;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SetSpace(Space & pspace)
|
||||
{
|
||||
root_space = &pspace;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SetDefault()
|
||||
{
|
||||
// you can change this separators to what you want
|
||||
// you shoud not use only white characters here (as expected by IsWhite() method)
|
||||
// and new line characters ('\n')
|
||||
separator = ':';
|
||||
space_start = '{';
|
||||
space_end = '}';
|
||||
table_start = '[';
|
||||
table_end = ']';
|
||||
option_delimiter = ',';
|
||||
skip_empty = false;
|
||||
use_escape_char = true;
|
||||
input_as_utf8 = true;
|
||||
max_nested_level = 1000;
|
||||
create_table_as_space = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipEmpty(bool skip)
|
||||
{
|
||||
skip_empty = skip;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::UseEscapeChar(bool escape)
|
||||
{
|
||||
use_escape_char = escape;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::UTF8(bool utf)
|
||||
{
|
||||
input_as_utf8 = utf;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::CreateTableAsSpace(bool create_table_as_space_)
|
||||
{
|
||||
create_table_as_space = create_table_as_space_;
|
||||
}
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const char * file_name)
|
||||
{
|
||||
reading_from_file = true;
|
||||
|
||||
file.clear();
|
||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||
|
||||
if( file )
|
||||
{
|
||||
Parse();
|
||||
file.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
status = cant_open_file;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::string & file_name)
|
||||
{
|
||||
return Parse(file_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const wchar_t * file_name)
|
||||
{
|
||||
PT::WideToUTF8(file_name, afile_name);
|
||||
return Parse(afile_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::Parse(const std::wstring & file_name)
|
||||
{
|
||||
return Parse(file_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const char * str)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = false;
|
||||
pchar_ascii = str;
|
||||
pchar_unicode = 0;
|
||||
|
||||
Parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::string & str)
|
||||
{
|
||||
return ParseString(str.c_str());
|
||||
}
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const wchar_t * str)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
pchar_unicode = str;
|
||||
pchar_ascii = 0;
|
||||
|
||||
Parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
JSONToSpaceParser::Status JSONToSpaceParser::ParseString(const std::wstring & str)
|
||||
{
|
||||
return ParseString(str.c_str());
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::Parse()
|
||||
{
|
||||
if( !root_space )
|
||||
{
|
||||
status = no_space;
|
||||
return;
|
||||
}
|
||||
|
||||
line = 1;
|
||||
status = ok;
|
||||
space = root_space;
|
||||
skipped = 0;
|
||||
current_nested_level = 0;
|
||||
ReadChar();
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == space_start )
|
||||
{
|
||||
ParseSpace(false, false);
|
||||
}
|
||||
else
|
||||
if( lastc == table_start )
|
||||
{
|
||||
ParseTable(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
// '{' or '[' expected
|
||||
status = syntax_error;
|
||||
}
|
||||
|
||||
if( status == ok && space != root_space )
|
||||
{
|
||||
// last closing '}' characters are missing (closing a space)
|
||||
status = syntax_error;
|
||||
}
|
||||
|
||||
token.clear();
|
||||
key.clear();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::ParseSpace(bool has_space_name, bool insert_new_space)
|
||||
{
|
||||
//current_nested_level += 1;
|
||||
|
||||
if( current_nested_level > max_nested_level )
|
||||
{
|
||||
status = max_nested_spaces_exceeded;
|
||||
return;
|
||||
}
|
||||
|
||||
if( insert_new_space )
|
||||
{
|
||||
SpaceStarts(has_space_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
// insert_new_space as a false is used only when parsing
|
||||
// the first space (root_space)
|
||||
ReadChar(); // skipping the first space character '{'
|
||||
}
|
||||
|
||||
ParseKeyValuePairs();
|
||||
|
||||
if( insert_new_space )
|
||||
{
|
||||
SpaceEnds();
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadChar(); // skipping the last space character '}'
|
||||
}
|
||||
|
||||
//current_nested_level -= 1;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ParseKeyValuePairs()
|
||||
{
|
||||
SkipWhite();
|
||||
|
||||
while( status == ok && lastc != space_end && lastc != -1 )
|
||||
{
|
||||
ReadKey();
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == separator )
|
||||
{
|
||||
value.clear();
|
||||
ReadChar(); // skipping separator ':'
|
||||
ReadValue(false, false, true, true);
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == option_delimiter )
|
||||
{
|
||||
ReadChar(); // skipping delimiter ','
|
||||
}
|
||||
else
|
||||
if( lastc != space_end && status == ok )
|
||||
{
|
||||
status = syntax_error;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( status == ok )
|
||||
{
|
||||
status = syntax_error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ParseTextTable()
|
||||
{
|
||||
ReadChar(); // skipping table start character '['
|
||||
SkipWhite();
|
||||
value.clear();
|
||||
|
||||
while( status == ok && lastc != table_end && lastc != -1 )
|
||||
{
|
||||
// all space objects inside or tables will be skipped
|
||||
ReadValue(true);
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == option_delimiter )
|
||||
{
|
||||
ReadChar(); // skipping delimiter ','
|
||||
}
|
||||
else
|
||||
if( lastc != table_end && status == ok )
|
||||
{
|
||||
status = syntax_error;
|
||||
}
|
||||
}
|
||||
|
||||
if( lastc == table_end )
|
||||
ReadChar(); // skipping end table character ']'
|
||||
|
||||
AddKeyValuePair();
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ParseObjectsTable(bool has_key)
|
||||
{
|
||||
ReadChar(); // skipping table start character '['
|
||||
SpaceStarts(has_key, false);
|
||||
SkipWhite();
|
||||
|
||||
while( status == ok && lastc != table_end && lastc != -1 )
|
||||
{
|
||||
// 'value' table will not be used here
|
||||
// (we are creating spaces)
|
||||
ReadValue(false, true);
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == option_delimiter )
|
||||
{
|
||||
ReadChar(); // skipping delimiter ','
|
||||
}
|
||||
else
|
||||
if( lastc != table_end && status == ok )
|
||||
{
|
||||
status = syntax_error;
|
||||
}
|
||||
}
|
||||
|
||||
if( lastc == table_end )
|
||||
ReadChar(); // skipping end table character ']'
|
||||
|
||||
SpaceEnds(false);
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ParseTable(bool has_key)
|
||||
{
|
||||
if( create_table_as_space )
|
||||
{
|
||||
//current_nested_level += 1;
|
||||
|
||||
if( current_nested_level > max_nested_level )
|
||||
{
|
||||
status = max_nested_spaces_exceeded;
|
||||
}
|
||||
else
|
||||
{
|
||||
ParseObjectsTable(has_key);
|
||||
}
|
||||
|
||||
//current_nested_level -= 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// ParseTextTable will not create a next level
|
||||
|
||||
if( !has_key )
|
||||
key.clear();
|
||||
|
||||
ParseTextTable(); // ParseTextTable will use key
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::SpaceStarts(bool has_space_name, bool skip_space_char)
|
||||
{
|
||||
Space * new_space = new Space();
|
||||
space->spaces.push_back(new_space);
|
||||
new_space->parent = space;
|
||||
|
||||
if( has_space_name )
|
||||
new_space->name = key;
|
||||
|
||||
space = new_space;
|
||||
|
||||
if( skip_space_char )
|
||||
ReadChar(); // skipping space starts character '{'
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SpaceEnds(bool skip_space_char)
|
||||
{
|
||||
if( space == root_space )
|
||||
{
|
||||
// there cannot be a loose list end character in the global space
|
||||
status = syntax_error;
|
||||
}
|
||||
else
|
||||
{
|
||||
space = space->parent;
|
||||
|
||||
if( skip_space_char )
|
||||
ReadChar(); // skipping closing space character '}'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool JSONToSpaceParser::IsWhite(int c)
|
||||
{
|
||||
// 13 (\r) is at the end of a line in a dos file \r\n
|
||||
// 160 is an unbreakable space
|
||||
if( c==' ' || c=='\t' || c==13 || c==160 || c==10 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipWhite()
|
||||
{
|
||||
while( IsWhite(lastc) )
|
||||
{
|
||||
ReadChar();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::Trim(std::wstring & s)
|
||||
{
|
||||
std::wstring::size_type i;
|
||||
|
||||
if( s.empty() )
|
||||
return;
|
||||
|
||||
// looking for white characters at the end
|
||||
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
|
||||
|
||||
if( i==0 && IsWhite(s[i]) )
|
||||
{
|
||||
// the whole string has white characters
|
||||
s.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// deleting white characters at the end
|
||||
if( i != s.size() - 1 )
|
||||
s.erase(i+1, std::wstring::npos);
|
||||
|
||||
// looking for white characters at the beginning
|
||||
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
|
||||
|
||||
// deleting white characters at the beginning
|
||||
if( i != 0 )
|
||||
s.erase(0, i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::DeleteFromTable(const std::wstring & var)
|
||||
{
|
||||
Space::Table::iterator i = space->table.find(var);
|
||||
|
||||
if( i != space->table.end() )
|
||||
space->table.erase(i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void JSONToSpaceParser::ReadTokenQuoted()
|
||||
{
|
||||
ReadChar(); // skipping the first quotation mark
|
||||
|
||||
while( lastc != -1 && (char_was_escaped || lastc != '"') )
|
||||
{
|
||||
token += static_cast<wchar_t>(lastc);
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
if( !char_was_escaped && lastc == '"' )
|
||||
ReadChar(); // skipping the last quotation mark
|
||||
else
|
||||
status = syntax_error;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
if( lastc == -1 ||
|
||||
(!char_was_escaped &&
|
||||
(
|
||||
lastc == space_end ||
|
||||
lastc == table_end ||
|
||||
(white_delimit && IsWhite(lastc)) ||
|
||||
(new_line_delimit && lastc == '\n') ||
|
||||
(delimit1 != -1 && lastc == delimit1) ||
|
||||
(delimit2 != -1 && lastc == delimit2)
|
||||
) ) )
|
||||
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
token += static_cast<wchar_t>(lastc);
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
Trim(token);
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
|
||||
{
|
||||
token.clear();
|
||||
SkipWhite();
|
||||
|
||||
if( !char_was_escaped && lastc == '"' )
|
||||
ReadTokenQuoted();
|
||||
else
|
||||
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ReadKey()
|
||||
{
|
||||
SkipWhite();
|
||||
ReadToken(false, true, separator, table_start);
|
||||
key = token;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipText()
|
||||
{
|
||||
ReadChar(); // skipping the first quote character '"'
|
||||
|
||||
while( lastc != '"' && lastc != -1 )
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipObjectOrTable(int start_char, int end_char)
|
||||
{
|
||||
int mark = 1;
|
||||
|
||||
skipped += 1;
|
||||
ReadChar(); // skipping the first object character '{' or '['
|
||||
|
||||
do
|
||||
{
|
||||
if( lastc == '"' )
|
||||
SkipText();
|
||||
else
|
||||
if( lastc == end_char )
|
||||
mark -= 1;
|
||||
else
|
||||
if( lastc == start_char )
|
||||
mark += 1;
|
||||
|
||||
ReadChar();
|
||||
}
|
||||
while( mark > 0 && lastc != -1 );
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipObject()
|
||||
{
|
||||
SkipObjectOrTable(space_start, space_end);
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::SkipTable()
|
||||
{
|
||||
SkipObjectOrTable(table_start, table_end);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//void JSONToSpaceParser::ReadValue(bool add_space_for_single_value, bool auto_add_single_value, bool has_space_name)
|
||||
void JSONToSpaceParser::ReadValue(bool skip_object_or_table,
|
||||
bool add_space_for_text_value,
|
||||
bool has_key,
|
||||
bool auto_add_text_value)
|
||||
{
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == space_start )
|
||||
{
|
||||
if( skip_object_or_table )
|
||||
SkipObject();
|
||||
else
|
||||
ParseSpace(has_key);
|
||||
}
|
||||
else
|
||||
if( lastc == table_start )
|
||||
{
|
||||
if( skip_object_or_table )
|
||||
SkipTable();
|
||||
else
|
||||
ParseTable(has_key);
|
||||
}
|
||||
else
|
||||
{
|
||||
if( add_space_for_text_value )
|
||||
{
|
||||
SpaceStarts(false, false);
|
||||
ReadToken(false, true, option_delimiter, -1);
|
||||
space->name = token;
|
||||
SpaceEnds(false);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadToken(false, true, option_delimiter, -1);
|
||||
value.push_back(token);
|
||||
|
||||
if( auto_add_text_value )
|
||||
AddKeyValuePair();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::AddKeyValuePair()
|
||||
{
|
||||
if( value.empty() && skip_empty )
|
||||
{
|
||||
DeleteFromTable(key);
|
||||
return;
|
||||
}
|
||||
|
||||
space->table[key] = value;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadUTF8Char()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
PT::UTF8ToInt(file, c, correct);
|
||||
|
||||
if( !file )
|
||||
return lastc;
|
||||
}
|
||||
while( !correct );
|
||||
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadASCIIChar()
|
||||
{
|
||||
lastc = file.get();
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadCharFromWcharString()
|
||||
{
|
||||
if( *pchar_unicode == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_unicode++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadCharFromUTF8String()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
|
||||
pchar_ascii += len;
|
||||
}
|
||||
while( *pchar_ascii && !correct );
|
||||
|
||||
if( correct )
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadCharFromAsciiString()
|
||||
{
|
||||
if( *pchar_ascii == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_ascii++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadCharNoEscape()
|
||||
{
|
||||
if( reading_from_file )
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return ReadUTF8Char();
|
||||
else
|
||||
return ReadASCIIChar();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( reading_from_wchar_string )
|
||||
{
|
||||
return ReadCharFromWcharString();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return ReadCharFromUTF8String();
|
||||
else
|
||||
return ReadCharFromAsciiString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool JSONToSpaceParser::IsHexDigit(wchar_t c)
|
||||
{
|
||||
return ((c>='0' && c<='9') ||
|
||||
(c>='a' && c<='f') ||
|
||||
(c>='A' && c<='F') );
|
||||
}
|
||||
|
||||
|
||||
int JSONToSpaceParser::HexToInt(wchar_t c)
|
||||
{
|
||||
if( c>='0' && c<='9' )
|
||||
return c - '0';
|
||||
|
||||
if( c>='a' && c<='f' )
|
||||
return c - 'a' + 10;
|
||||
|
||||
if( c>='A' && c<='F' )
|
||||
return c - 'A' + 10;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void JSONToSpaceParser::ReadUnicodeCodePoint()
|
||||
{
|
||||
wchar_t c;
|
||||
int value = 0;
|
||||
|
||||
for(int i=0 ; i<4 ; ++i)
|
||||
{
|
||||
c = ReadCharNoEscape();
|
||||
|
||||
if( !IsHexDigit(c) )
|
||||
{
|
||||
status = syntax_error;
|
||||
return;
|
||||
}
|
||||
|
||||
value = (value << 4) | HexToInt(c);
|
||||
}
|
||||
|
||||
lastc = (wchar_t)value;
|
||||
}
|
||||
|
||||
|
||||
int JSONToSpaceParser::ReadChar()
|
||||
{
|
||||
char_was_escaped = false;
|
||||
ReadCharNoEscape();
|
||||
|
||||
if( use_escape_char && lastc == '\\' )
|
||||
{
|
||||
char_was_escaped = true;
|
||||
ReadCharNoEscape();
|
||||
|
||||
switch(lastc)
|
||||
{
|
||||
case '0': lastc = 0; break;
|
||||
case 't': lastc = '\t'; break;
|
||||
case 'r': lastc = '\r'; break;
|
||||
case 'n': lastc = '\n'; break;
|
||||
case 'b': lastc = 0x08; break;
|
||||
case 'f': lastc = 0x0c; break;
|
||||
case 'u': ReadUnicodeCodePoint(); break;
|
||||
// in other cases we return the last character
|
||||
}
|
||||
}
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,360 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_space_jsonspaceparser
|
||||
#define headerfile_picotools_space_jsonspaceparser
|
||||
|
||||
#include <fstream>
|
||||
#include "space.h"
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
class JSONToSpaceParser
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
/*
|
||||
ctor -- setting default values (SetDefault() method)
|
||||
*/
|
||||
JSONToSpaceParser();
|
||||
|
||||
|
||||
/*
|
||||
setting the root space
|
||||
*/
|
||||
void SetSpace(Space * pspace);
|
||||
void SetSpace(Space & pspace);
|
||||
|
||||
|
||||
/*
|
||||
setting options of the parser to the default values
|
||||
utf8 etc.
|
||||
*/
|
||||
void SetDefault();
|
||||
|
||||
|
||||
/*
|
||||
status of parsing
|
||||
*/
|
||||
enum Status { ok, cant_open_file, syntax_error, max_nested_spaces_exceeded, no_space };
|
||||
|
||||
|
||||
/*
|
||||
the last status of parsing, set by Parse() methods
|
||||
*/
|
||||
Status status;
|
||||
|
||||
|
||||
/*
|
||||
a number of a line in which there is a syntax_error
|
||||
*/
|
||||
int line;
|
||||
|
||||
|
||||
/*
|
||||
how many objects were skipped
|
||||
used in parsing tables when create_table_as_space is false
|
||||
*/
|
||||
size_t skipped;
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
file_name is the path to a file
|
||||
*/
|
||||
Status Parse(const char * file_name);
|
||||
Status Parse(const std::string & file_name);
|
||||
Status Parse(const wchar_t * file_name);
|
||||
Status Parse(const std::wstring & file_name);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
|
||||
*/
|
||||
Status ParseString(const char * str);
|
||||
Status ParseString(const std::string & str);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
here input string is always in unicode (wide characters)
|
||||
*/
|
||||
Status ParseString(const wchar_t * str);
|
||||
Status ParseString(const std::wstring & str);
|
||||
|
||||
|
||||
/*
|
||||
if true then empty values and lists, e.g:
|
||||
option =
|
||||
option2 = ()
|
||||
will be omitted (not inserted to 'table')
|
||||
default: false
|
||||
*/
|
||||
void SkipEmpty(bool skip);
|
||||
|
||||
|
||||
/*
|
||||
'\' character is used to escape other characters in a quoted string
|
||||
so "some \t t\"ext" will produce "some t t"ext"
|
||||
default: true
|
||||
*/
|
||||
void UseEscapeChar(bool escape);
|
||||
|
||||
|
||||
/*
|
||||
if true then the input file or string (char* or std::string) is treated as UTF-8
|
||||
*/
|
||||
void UTF8(bool utf);
|
||||
|
||||
|
||||
/*
|
||||
|
||||
default: true
|
||||
*/
|
||||
void CreateTableAsSpace(bool create_table_as_space_);
|
||||
|
||||
private:
|
||||
|
||||
|
||||
/*
|
||||
current space set by SetSpace();
|
||||
*/
|
||||
Space * root_space;
|
||||
|
||||
|
||||
/*
|
||||
a space in which we are now
|
||||
*/
|
||||
Space * space;
|
||||
|
||||
|
||||
/*
|
||||
true if Parse() method was called
|
||||
false if ParseString() was called
|
||||
*/
|
||||
bool reading_from_file;
|
||||
|
||||
|
||||
/*
|
||||
pointers to the current character
|
||||
if ParseString() is in used
|
||||
*/
|
||||
const char * pchar_ascii;
|
||||
const wchar_t * pchar_unicode;
|
||||
|
||||
|
||||
/*
|
||||
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
||||
*/
|
||||
bool reading_from_wchar_string;
|
||||
|
||||
|
||||
/*
|
||||
last read token
|
||||
*/
|
||||
std::wstring token;
|
||||
|
||||
|
||||
/*
|
||||
last read key
|
||||
*/
|
||||
std::wstring key;
|
||||
|
||||
|
||||
/*
|
||||
last read list
|
||||
*/
|
||||
Space::Value value;
|
||||
|
||||
|
||||
/*
|
||||
separator between a variable and a value, default: '='
|
||||
*/
|
||||
int separator;
|
||||
|
||||
|
||||
/*
|
||||
space starting character, default: '{'
|
||||
*/
|
||||
int space_start;
|
||||
|
||||
|
||||
/*
|
||||
space ending character, default: '}'
|
||||
*/
|
||||
int space_end;
|
||||
|
||||
|
||||
/*
|
||||
table starting character, default: '['
|
||||
*/
|
||||
int table_start;
|
||||
|
||||
|
||||
/*
|
||||
table ending character, default: ']'
|
||||
*/
|
||||
int table_end;
|
||||
|
||||
|
||||
/*
|
||||
option delimiter, default: ','
|
||||
*/
|
||||
int option_delimiter;
|
||||
|
||||
|
||||
/*
|
||||
last read char
|
||||
or -1 if the end
|
||||
*/
|
||||
int lastc;
|
||||
|
||||
|
||||
/*
|
||||
true if the lastc was escaped (with a backslash)
|
||||
we have to know if the last sequence was \" or just "
|
||||
*/
|
||||
bool char_was_escaped;
|
||||
|
||||
|
||||
/*
|
||||
current file
|
||||
*/
|
||||
std::ifstream file;
|
||||
|
||||
|
||||
/*
|
||||
if true then empty lists, e.g:
|
||||
option =
|
||||
option2 = ()
|
||||
will be omitted (not inserted to 'table')
|
||||
default: false
|
||||
*/
|
||||
bool skip_empty;
|
||||
|
||||
|
||||
/*
|
||||
input file is in UTF-8
|
||||
default: true
|
||||
*/
|
||||
bool input_as_utf8;
|
||||
|
||||
|
||||
/*
|
||||
if true you can use an escape character '\' in quoted values
|
||||
*/
|
||||
bool use_escape_char;
|
||||
|
||||
|
||||
/*
|
||||
if false we only allow the tables to consists of text items (numeric, boolean too)
|
||||
objects are not allowed then
|
||||
default: true
|
||||
*/
|
||||
bool create_table_as_space;
|
||||
|
||||
|
||||
/*
|
||||
|
||||
*/
|
||||
size_t current_nested_level;
|
||||
|
||||
|
||||
/*
|
||||
|
||||
default: 1000;
|
||||
*/
|
||||
size_t max_nested_level;
|
||||
|
||||
|
||||
std::string afile_name;
|
||||
|
||||
void Parse();
|
||||
void ParseSpace(bool has_space_name, bool insert_new_space = true);
|
||||
void ParseTextTable();
|
||||
void ParseObjectsTable(bool has_key);
|
||||
void ParseTable(bool has_key);
|
||||
void ParseKeyValuePairs();
|
||||
|
||||
void SkipText();
|
||||
void SkipObjectOrTable(int start_char, int end_char);
|
||||
void SkipTable();
|
||||
void SkipObject();
|
||||
|
||||
void SpaceEnds(bool skip_space_char = true);
|
||||
void SpaceStarts(bool has_space_name, bool skip_space_char = true);
|
||||
|
||||
void DeleteFromTable(const std::wstring & var);
|
||||
|
||||
void ReadTokenQuoted();
|
||||
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
||||
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
||||
void ReadKey();
|
||||
void ReadValue(bool skip_object_or_table = false,
|
||||
bool add_space_for_text_value = false,
|
||||
bool has_key = false,
|
||||
bool auto_add_text_value = false);
|
||||
|
||||
void AddKeyValuePair();
|
||||
int ReadUTF8Char();
|
||||
int ReadASCIIChar();
|
||||
int ReadCharFromWcharString();
|
||||
int ReadCharFromUTF8String();
|
||||
int ReadCharFromAsciiString();
|
||||
int ReadCharNoEscape();
|
||||
int ReadChar();
|
||||
bool IsWhite(int c);
|
||||
void SkipWhite();
|
||||
void Trim(std::wstring & s);
|
||||
bool IsHexDigit(wchar_t c);
|
||||
int HexToInt(wchar_t c);
|
||||
void ReadUnicodeCodePoint();
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
1085
space/space.cpp
1085
space/space.cpp
File diff suppressed because it is too large
Load Diff
567
space/space.h
567
space/space.h
|
@ -1,567 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_space_space
|
||||
#define headerfile_picotools_space_space
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "textstream/types.h"
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
/*
|
||||
A parser for parsing config files.
|
||||
|
||||
A config file can look like this:
|
||||
variable1 = value 1
|
||||
variable2 = " value 2 "
|
||||
variable3 = (value 1, value 2)
|
||||
variable4 = (" value 1 " , "value2", value 3)
|
||||
|
||||
sample of use:
|
||||
SpaceParser parser;
|
||||
parser.Parse("/path/to/config");
|
||||
|
||||
if( parser.status == SpaceParser::ok )
|
||||
{
|
||||
// the whole config we have in parser.table
|
||||
}
|
||||
|
||||
config syntax:
|
||||
option = list
|
||||
|
||||
list can consists of any number of items, if you're using more than one item you should
|
||||
use brackets ()
|
||||
|
||||
for one item the brackets can be ommited:
|
||||
option = value
|
||||
white characters at the beginning of the value (and at the end) will be trimmed,
|
||||
or you can use quotes:
|
||||
option = "value"
|
||||
option2 = "value with spaces at the end "
|
||||
|
||||
the form without quotes:
|
||||
option = value
|
||||
should be written in one line, so this is not allowed:
|
||||
option =
|
||||
value
|
||||
you can use a new line characters only between brackets and quotes:
|
||||
option = "this is
|
||||
a multiline string"
|
||||
option = ( value1,
|
||||
value2 )
|
||||
|
||||
but there is one requirement: the first character " or ( should be in the same line,
|
||||
so this is not allowed
|
||||
option =
|
||||
"this is wrong"
|
||||
but this is ok:
|
||||
option = "
|
||||
that is ok"
|
||||
|
||||
empty lists:
|
||||
option = ()
|
||||
this creates an empty list: parser.table['option'].empty() == true
|
||||
|
||||
option =
|
||||
this creates an empty list too (the same as previously)
|
||||
|
||||
option = ""
|
||||
but this doesn't create an empty list, it creates a list with one (empty) item
|
||||
|
||||
commentaries:
|
||||
# this is a commentary (until the end of the line)
|
||||
option = value # this is a commentary too
|
||||
|
||||
commentaries are treated as white characters, other example:
|
||||
option = ( # this is my list
|
||||
"value 1" # this is a value one
|
||||
value 2 # and this is a value two
|
||||
) # end of my list
|
||||
|
||||
overwriting:
|
||||
option1 = some value
|
||||
option1 = other value
|
||||
# always the last option is used so option1 is "other value"
|
||||
|
||||
list delimiter:
|
||||
option1 = (value1, value2, value3)
|
||||
option2 = ("value1", "value2", "value3")
|
||||
above we're using a comma ',' as a list delimiter but when using quotes (second line)
|
||||
the commas can be omitted:
|
||||
option2 = ("value1" "value2" "value3")
|
||||
|
||||
white characters:
|
||||
the name of an option cannot consist of white characters
|
||||
some option = value # this is wrong
|
||||
some_option = value # this is ok
|
||||
|
||||
which characters are allowed in an option name is defined by IsVariableChar() method
|
||||
|
||||
you can use white characters in values
|
||||
option = value with spaces or tabs
|
||||
white characters at the beginning and at the end will be trimmed,
|
||||
so if you want them use quotes:
|
||||
option = " other value with spaces "
|
||||
|
||||
special characters in quoted strings:
|
||||
option = "this is a string with \" a quote inside"
|
||||
the option will be: this is a string with " a quote inside
|
||||
\\ - means one \
|
||||
basically: \char produces char
|
||||
so:
|
||||
"\a" gives "a"
|
||||
"\\" gives "\"
|
||||
"\Z" gives "Z" and so on
|
||||
you can call UseEscapeChar(false) to turn this off
|
||||
|
||||
|
||||
*/
|
||||
|
||||
|
||||
|
||||
class Space
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
/*
|
||||
this is the table which represents your config file
|
||||
in the Table map: the first (key) is your 'option' and the second is 'list'
|
||||
*/
|
||||
typedef std::vector<std::wstring> Value;
|
||||
typedef std::map<std::wstring, Value> Table;
|
||||
|
||||
|
||||
Space();
|
||||
~Space();
|
||||
|
||||
Space(const Space & s);
|
||||
Space & operator=(const Space & s);
|
||||
|
||||
// IMPROVE ME
|
||||
// add move cctor
|
||||
|
||||
|
||||
void Clear();
|
||||
|
||||
|
||||
|
||||
/*
|
||||
returns true if such an option has 'value'
|
||||
useful when testing lists (they don't have to be copied out)
|
||||
*/
|
||||
bool HasValue(const wchar_t * name, const wchar_t * value);
|
||||
bool HasValue(const wchar_t * name, const std::wstring & value);
|
||||
bool HasValue(const std::wstring & name, const wchar_t * value);
|
||||
bool HasValue(const std::wstring & name, const std::wstring & value);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* methods for getting/finding a value
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* their working in O(log)
|
||||
* can return a null pointer
|
||||
*
|
||||
*/
|
||||
Value * GetValue(const wchar_t * name);
|
||||
Value * GetValue(const std::wstring & name);
|
||||
const Value * GetValue(const wchar_t * name) const;
|
||||
const Value * GetValue(const std::wstring & name) const;
|
||||
|
||||
|
||||
|
||||
// O(n) complexity
|
||||
Value * GetValueNoCase(const wchar_t * name);
|
||||
Value * GetValueNoCase(const std::wstring & name);
|
||||
const Value * GetValueNoCase(const wchar_t * name) const;
|
||||
const Value * GetValueNoCase(const std::wstring & name) const;
|
||||
|
||||
|
||||
// they can return a null pointer if there is not such a 'name'
|
||||
std::wstring * GetFirstValue(const wchar_t * name);
|
||||
std::wstring * GetFirstValue(const std::wstring & name);
|
||||
|
||||
const std::wstring * GetFirstValue(const wchar_t * name) const;
|
||||
const std::wstring * GetFirstValue(const std::wstring & name) const;
|
||||
|
||||
|
||||
/*
|
||||
those methods are used to extract information from space.table
|
||||
as a parameter they take the name of an option
|
||||
and a default value (if there is no such a parameter),
|
||||
they return appropriate value (either text, int or boolean)
|
||||
(in lists they return the first item if exists)
|
||||
|
||||
when calling Text(...) and AText(...) you should copy the object to whom a reference is returned
|
||||
it will be cleared in a next call to one of these methods (as well to Int() Size() and Bool())
|
||||
|
||||
AText(...) always returns a reference to UTF-8 string
|
||||
*/
|
||||
//std::wstring Text(const wchar_t * name) const;
|
||||
std::wstring Text(const wchar_t * name, const wchar_t * def = 0) const;
|
||||
std::wstring Text(const std::wstring & name, const wchar_t * def = 0) const;
|
||||
std::wstring Text(const std::wstring & name, const std::wstring & def) const;
|
||||
|
||||
// returns a reference
|
||||
// if there is no such an option then a new one (def value) is inserted
|
||||
//std::wstring & TextRef(const wchar_t * name);
|
||||
std::wstring & TextRef(const wchar_t * name, const wchar_t * def = 0);
|
||||
std::wstring & TextRef(const std::wstring & name, const wchar_t * def = 0);
|
||||
std::wstring & TextRef(const std::wstring & name, const std::wstring & def);
|
||||
|
||||
|
||||
// returns UTF-8 string
|
||||
//std::string TextA(const wchar_t * name) const;
|
||||
std::string TextA(const wchar_t * name, const char * def) const;
|
||||
std::string TextA(const std::wstring & name, const char * def) const;
|
||||
std::string TextA(const std::wstring & name, const std::string & def) const;
|
||||
|
||||
std::string TextA(const wchar_t * name, const wchar_t * def) const;
|
||||
std::string TextA(const std::wstring & name, const wchar_t * def) const;
|
||||
std::string TextA(const std::wstring & name, const std::wstring & def) const;
|
||||
|
||||
|
||||
|
||||
int Int(const wchar_t * name, int def = 0) const;
|
||||
int Int(const std::wstring & name, int def = 0) const;
|
||||
unsigned int UInt(const wchar_t * name, unsigned int def = 0) const;
|
||||
unsigned int UInt(const std::wstring & name, unsigned int def = 0) const;
|
||||
|
||||
long Long(const wchar_t * name, long def = 0) const;
|
||||
long Long(const std::wstring & name, long def = 0) const;
|
||||
unsigned long ULong(const wchar_t * name, unsigned long def = 0) const;
|
||||
unsigned long ULong(const std::wstring & name, unsigned long def = 0) const;
|
||||
|
||||
long long LongLong(const wchar_t * name, long long def = 0) const;
|
||||
long long LongLong(const std::wstring & name, long long def = 0) const;
|
||||
unsigned long long ULongLong(const wchar_t * name, unsigned long long def = 0) const;
|
||||
unsigned long long ULongLong(const std::wstring & name, unsigned long long def = 0) const;
|
||||
|
||||
size_t Size(const wchar_t * name, size_t def = 0) const;
|
||||
size_t Size(const std::wstring & name, size_t def = 0) const;
|
||||
|
||||
bool Bool(const wchar_t * name, bool def = false) const;
|
||||
bool Bool(const std::wstring & name, bool def = false) const;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* methods for adding a new value
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
std::wstring & FindAdd(const wchar_t * name);
|
||||
std::wstring & FindAdd(const std::wstring & name);
|
||||
std::wstring & FindAdd(const WTextStream & name);
|
||||
|
||||
std::wstring & Add(const wchar_t * name, bool value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, bool value, bool replace_existing = true);
|
||||
std::wstring & Add(const wchar_t * name, int value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, int value, bool replace_existing = true);
|
||||
std::wstring & Add(const wchar_t * name, long value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, long value, bool replace_existing = true);
|
||||
std::wstring & Add(const wchar_t * name, size_t value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, size_t value, bool replace_existing = true);
|
||||
|
||||
std::wstring & Add(const std::wstring & name, const std::wstring & value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, const wchar_t * value, bool replace_existing = true);
|
||||
std::wstring & Add(const wchar_t * name, const wchar_t * value, bool replace_existing = true);
|
||||
std::wstring & Add(const wchar_t * name, const std::wstring & value, bool replace_existing = true);
|
||||
|
||||
std::wstring & Add(const wchar_t * name, const WTextStream & value, bool replace_existing = true);
|
||||
std::wstring & Add(const std::wstring & name, const WTextStream & value, bool replace_existing = true);
|
||||
std::wstring & Add(const WTextStream & name, const WTextStream & value, bool replace_existing = true);
|
||||
|
||||
void Remove(const wchar_t * name);
|
||||
void Remove(const std::wstring & name);
|
||||
|
||||
|
||||
|
||||
Space & AddSpace(const wchar_t * name);
|
||||
Space & AddSpace(const std::wstring & name);
|
||||
|
||||
// looking for the first space with the specified name
|
||||
// if there is not such a space those methods return a null pointer
|
||||
Space * FindSpace(const wchar_t * name);
|
||||
Space * FindSpace(const std::wstring & name);
|
||||
|
||||
// looking for the first space with the specified name
|
||||
// if there is not such a space then this methods adds such a space
|
||||
Space & FindAddSpace(const wchar_t * name);
|
||||
Space & FindAddSpace(const std::wstring & name);
|
||||
|
||||
void RemoveSpace(const wchar_t * name);
|
||||
void RemoveSpace(const std::wstring & name);
|
||||
void RemoveSpace(size_t child_index);
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*
|
||||
raw access to the parsed values
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
std::wstring name; // space name
|
||||
Table table; // std::map<std::wstring, std::vector<std::wstring> >
|
||||
|
||||
// childs
|
||||
typedef std::vector<Space*> Spaces;
|
||||
Spaces spaces;
|
||||
|
||||
// a parent space
|
||||
// null means a root space
|
||||
Space * parent;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
those methods are used to extract lists
|
||||
return true if such an option exists (but value can be an empty list)
|
||||
*/
|
||||
bool ListText(const wchar_t * name, std::vector<std::wstring> & list);
|
||||
bool ListText(const std::wstring & name, std::vector<std::wstring> & list);
|
||||
|
||||
|
||||
/*
|
||||
serialize the content
|
||||
*/
|
||||
template<class Stream>
|
||||
void Serialize(Stream & out, bool use_indents = false, bool use_comments = false, int level = 0) const;
|
||||
|
||||
template<class Stream>
|
||||
void SerializeTableMulti(Stream & out, bool use_indents, int level) const;
|
||||
|
||||
template<class Stream, class StringType>
|
||||
static void PrintValue(Stream & out, const StringType & str, bool use_quote = true);
|
||||
|
||||
template<class Stream>
|
||||
static void PrintKey(Stream & out, const std::wstring & str);
|
||||
|
||||
template<class Stream>
|
||||
static void PrintLevel(Stream & out, bool use_indents, int level);
|
||||
|
||||
|
||||
|
||||
private:
|
||||
|
||||
mutable std::wstring tmp_name;
|
||||
|
||||
std::wstring tmp_value;
|
||||
std::wstring tmp_value_text;
|
||||
std::string tmp_value_text_ascii;
|
||||
|
||||
static unsigned int ToUInt(const std::wstring & value);
|
||||
static int ToInt(const std::wstring & value);
|
||||
static unsigned long ToULong(const std::wstring & value);
|
||||
static long ToLong(const std::wstring & value);
|
||||
static unsigned long long ToULongLong(const std::wstring & value);
|
||||
static long long ToLongLong(const std::wstring & value);
|
||||
static size_t ToSize(const std::wstring & value);
|
||||
static bool ToBool(const std::wstring & value);
|
||||
|
||||
static bool IsWhite(int c);
|
||||
static bool HasWhite(const std::wstring & str);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void Space::PrintLevel(Stream & out, bool use_indents, int level)
|
||||
{
|
||||
if( use_indents )
|
||||
{
|
||||
for(int i=0 ; i<level ; ++i)
|
||||
out << ' ';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream, class StringType>
|
||||
void Space::PrintValue(Stream & out, const StringType & str, bool use_quote)
|
||||
{
|
||||
if( use_quote )
|
||||
out << '\"';
|
||||
|
||||
for(size_t i=0 ; i<str.size() ; ++i)
|
||||
{
|
||||
switch(str[i])
|
||||
{
|
||||
case 0: out << '\\'; out << '0'; break;
|
||||
case '\r': out << '\\'; out << 'r'; break;
|
||||
case '\n': out << '\\'; out << 'n'; break;
|
||||
case '\\': out << '\\'; out << '\\'; break;
|
||||
case '"': out << '\\'; out << '\"'; break;
|
||||
case '(': out << '\\'; out << '('; break;
|
||||
case ')': out << '\\'; out << ')'; break;
|
||||
case '=': out << '\\'; out << '='; break;
|
||||
default:
|
||||
out << str[i];
|
||||
}
|
||||
}
|
||||
|
||||
if( use_quote )
|
||||
out << '\"';
|
||||
}
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void Space::PrintKey(Stream & out, const std::wstring & str)
|
||||
{
|
||||
bool use_quote = false;
|
||||
|
||||
// CHECK ME
|
||||
// HasWhite doesn't take a new line into account, is it correct to use it here?
|
||||
if( str.empty() || HasWhite(str) )
|
||||
use_quote = true;
|
||||
|
||||
PrintValue(out, str, use_quote);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void Space::SerializeTableMulti(Stream & out, bool use_indents, int level) const
|
||||
{
|
||||
Table::const_iterator i2;
|
||||
size_t v;
|
||||
|
||||
for(i2 = table.begin() ; i2 != table.end() ; ++i2)
|
||||
{
|
||||
PrintLevel(out, use_indents, level);
|
||||
PrintKey(out, i2->first);
|
||||
out << L" = ";
|
||||
|
||||
if( i2->second.size() != 1 )
|
||||
out << '(';
|
||||
|
||||
for(v = 0 ; v < i2->second.size() ; ++v)
|
||||
{
|
||||
if( v > 0 )
|
||||
PrintLevel(out, use_indents, level + i2->first.size() + 3);
|
||||
|
||||
PrintValue(out, i2->second[v]);
|
||||
|
||||
if( v + 1 < i2->second.size() )
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
if( i2->second.size() != 1 )
|
||||
out << ')';
|
||||
|
||||
out << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void Space::Serialize(Stream & out, bool use_indents, bool use_comments, int level) const
|
||||
{
|
||||
if( level > 0 )
|
||||
{
|
||||
out << '\n';
|
||||
PrintLevel(out, use_indents, level);
|
||||
|
||||
if( !name.empty() )
|
||||
{
|
||||
PrintKey(out, name);
|
||||
out << ' ';
|
||||
}
|
||||
|
||||
out << L"(\n";
|
||||
|
||||
if( use_comments )
|
||||
{
|
||||
PrintLevel(out, use_indents, level);
|
||||
out << L"# space level " << level << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
SerializeTableMulti(out, use_indents, level);
|
||||
|
||||
for(size_t i=0 ; i<spaces.size() ; ++i)
|
||||
spaces[i]->Serialize(out, use_indents, use_comments, level+1);
|
||||
|
||||
if( level > 0 )
|
||||
{
|
||||
PrintLevel(out, use_indents, level);
|
||||
out << ')';
|
||||
|
||||
if( use_comments )
|
||||
{
|
||||
if( name.empty() )
|
||||
out << L" # end of unnamed space";
|
||||
else
|
||||
out << L" # end of space: " << name;
|
||||
|
||||
out << L" (level " << level << L")";
|
||||
}
|
||||
|
||||
out << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
#endif
|
|
@ -1,675 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2008-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstdlib>
|
||||
#include <wchar.h>
|
||||
#include "spaceparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
SpaceParser::SpaceParser()
|
||||
{
|
||||
root_space = 0;
|
||||
SetDefault();
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SetSpace(Space * pspace)
|
||||
{
|
||||
root_space = pspace;
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SetSpace(Space & pspace)
|
||||
{
|
||||
root_space = &pspace;
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SetDefault()
|
||||
{
|
||||
// you can change this separators to what you want
|
||||
// you shoud not use only white characters here (as expected by IsWhite() method)
|
||||
// and new line characters ('\n')
|
||||
separator = '=';
|
||||
commentary = '#';
|
||||
list_start = '(';
|
||||
list_end = ')';
|
||||
list_delimiter = ',';
|
||||
skip_empty = false;
|
||||
use_escape_char = true;
|
||||
input_as_utf8 = true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void SpaceParser::SkipEmpty(bool skip)
|
||||
{
|
||||
skip_empty = skip;
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::UseEscapeChar(bool escape)
|
||||
{
|
||||
use_escape_char = escape;
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::UTF8(bool utf)
|
||||
{
|
||||
input_as_utf8 = utf;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::Parse(const char * file_name)
|
||||
{
|
||||
reading_from_file = true;
|
||||
|
||||
file.clear();
|
||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||
|
||||
if( file )
|
||||
{
|
||||
Parse();
|
||||
file.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
status = cant_open_file;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::Parse(const std::string & file_name)
|
||||
{
|
||||
return Parse(file_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::Parse(const wchar_t * file_name)
|
||||
{
|
||||
PT::WideToUTF8(file_name, afile_name);
|
||||
return Parse(afile_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::Parse(const std::wstring & file_name)
|
||||
{
|
||||
return Parse(file_name.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::ParseString(const char * str)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = false;
|
||||
pchar_ascii = str;
|
||||
pchar_unicode = 0;
|
||||
|
||||
Parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::ParseString(const std::string & str)
|
||||
{
|
||||
return ParseString(str.c_str());
|
||||
}
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::ParseString(const wchar_t * str)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
pchar_unicode = str;
|
||||
pchar_ascii = 0;
|
||||
|
||||
Parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
SpaceParser::Status SpaceParser::ParseString(const std::wstring & str)
|
||||
{
|
||||
return ParseString(str.c_str());
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::Parse()
|
||||
{
|
||||
if( !root_space )
|
||||
{
|
||||
status = no_space;
|
||||
return;
|
||||
}
|
||||
|
||||
line = 1;
|
||||
status = ok;
|
||||
space = root_space;
|
||||
reading_commentary = false;
|
||||
ReadChar();
|
||||
SkipWhiteLines();
|
||||
|
||||
ParseLoop();
|
||||
|
||||
if( status == ok && space != root_space )
|
||||
{
|
||||
// last closing ')' characters are missing (closing a space)
|
||||
status = syntax_error;
|
||||
}
|
||||
|
||||
token.clear();
|
||||
key.clear();
|
||||
value.clear();
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ParseLoop()
|
||||
{
|
||||
while( status == ok && lastc != -1 )
|
||||
{
|
||||
if( lastc == list_end )
|
||||
{
|
||||
SpaceEnds();
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadKey();
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == list_start )
|
||||
{
|
||||
SpaceStarts();
|
||||
}
|
||||
else
|
||||
if( lastc == separator )
|
||||
{
|
||||
ReadValue();
|
||||
AddKeyValuePair();
|
||||
}
|
||||
else
|
||||
{
|
||||
status = syntax_error;
|
||||
}
|
||||
}
|
||||
|
||||
if( status == ok )
|
||||
SkipWhiteLines();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SpaceEnds()
|
||||
{
|
||||
if( space == root_space )
|
||||
{
|
||||
// there cannot be a loose list end character in the global space
|
||||
status = syntax_error;
|
||||
}
|
||||
else
|
||||
{
|
||||
space = space->parent;
|
||||
ReadChar(); // skipping closing space character ')'
|
||||
SkipWhite();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SpaceStarts()
|
||||
{
|
||||
Space * new_space = new Space();
|
||||
space->spaces.push_back(new_space);
|
||||
new_space->parent = space;
|
||||
new_space->name = key;
|
||||
space = new_space;
|
||||
|
||||
ReadChar(); // skipping space starts character ')'
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
those white characters here should be the same as in space.h
|
||||
*/
|
||||
bool SpaceParser::IsWhite(int c)
|
||||
{
|
||||
// dont use '\n' here
|
||||
// 13 (\r) is at the end of a line in a dos file \r\n
|
||||
// 160 is an unbreakable space
|
||||
if( c==' ' || c=='\t' || c==13 || c==160 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
skip_lines is default false
|
||||
*/
|
||||
void SpaceParser::SkipWhite(bool skip_lines)
|
||||
{
|
||||
while( IsWhite(lastc) || lastc == commentary || (skip_lines && lastc=='\n'))
|
||||
{
|
||||
if( lastc == commentary )
|
||||
SkipComment();
|
||||
else
|
||||
ReadChar();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::SkipWhiteLines()
|
||||
{
|
||||
SkipWhite(true);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
do not skip the last \n character
|
||||
*/
|
||||
void SpaceParser::SkipLine()
|
||||
{
|
||||
while( lastc != -1 && lastc != '\n' )
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
do not skip the last \n character
|
||||
*/
|
||||
void SpaceParser::SkipComment()
|
||||
{
|
||||
reading_commentary = true;
|
||||
SkipLine();
|
||||
reading_commentary = false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void SpaceParser::Trim(std::wstring & s)
|
||||
{
|
||||
std::wstring::size_type i;
|
||||
|
||||
if( s.empty() )
|
||||
return;
|
||||
|
||||
// looking for white characters at the end
|
||||
for(i=s.size()-1 ; i>0 && IsWhite(s[i]) ; --i);
|
||||
|
||||
if( i==0 && IsWhite(s[i]) )
|
||||
{
|
||||
// the whole string has white characters
|
||||
s.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
// deleting white characters at the end
|
||||
if( i != s.size() - 1 )
|
||||
s.erase(i+1, std::wstring::npos);
|
||||
|
||||
// looking for white characters at the beginning
|
||||
for(i=0 ; i<s.size() && IsWhite(s[i]) ; ++i);
|
||||
|
||||
// deleting white characters at the beginning
|
||||
if( i != 0 )
|
||||
s.erase(0, i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void SpaceParser::DeleteFromTable(const std::wstring & var)
|
||||
{
|
||||
Space::Table::iterator i = space->table.find(var);
|
||||
|
||||
if( i != space->table.end() )
|
||||
space->table.erase(i);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void SpaceParser::ReadTokenQuoted()
|
||||
{
|
||||
ReadChar(); // skipping the first quotation mark
|
||||
|
||||
while( lastc != -1 && (char_was_escaped || lastc != '"') )
|
||||
{
|
||||
token += static_cast<wchar_t>(lastc);
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
if( !char_was_escaped && lastc == '"' )
|
||||
ReadChar(); // skipping the last quotation mark
|
||||
else
|
||||
status = syntax_error;
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
|
||||
{
|
||||
while( true )
|
||||
{
|
||||
if( lastc == commentary )
|
||||
SkipComment();
|
||||
|
||||
if( lastc == -1 ||
|
||||
(!char_was_escaped &&
|
||||
(
|
||||
lastc == list_end ||
|
||||
(white_delimit && IsWhite(lastc)) ||
|
||||
(new_line_delimit && lastc == '\n') ||
|
||||
(delimit1 != -1 && lastc == delimit1) ||
|
||||
(delimit2 != -1 && lastc == delimit2)
|
||||
) ) )
|
||||
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
token += static_cast<wchar_t>(lastc);
|
||||
ReadChar();
|
||||
}
|
||||
|
||||
Trim(token);
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2)
|
||||
{
|
||||
token.clear();
|
||||
SkipWhite();
|
||||
|
||||
if( !char_was_escaped && lastc == '"' )
|
||||
ReadTokenQuoted();
|
||||
else
|
||||
ReadTokenSingle(white_delimit, new_line_delimit, delimit1, delimit2);
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ReadKey()
|
||||
{
|
||||
ReadToken(false, true, separator, list_start);
|
||||
key = token;
|
||||
SkipWhite();
|
||||
}
|
||||
|
||||
|
||||
|
||||
void SpaceParser::ReadValueList()
|
||||
{
|
||||
ReadChar(); // skipping the first list character ')'
|
||||
SkipWhiteLines();
|
||||
|
||||
while( lastc != -1 && lastc != list_end )
|
||||
{
|
||||
ReadToken(true, true, list_delimiter, list_end);
|
||||
value.push_back(token);
|
||||
|
||||
SkipWhiteLines();
|
||||
|
||||
if( lastc == list_delimiter )
|
||||
{
|
||||
ReadChar();
|
||||
SkipWhiteLines();
|
||||
}
|
||||
}
|
||||
|
||||
if( lastc == list_end )
|
||||
{
|
||||
ReadChar(); // skipping the last list character ')'
|
||||
SkipWhite();
|
||||
}
|
||||
else
|
||||
{
|
||||
status = syntax_error; // missing one ')'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ReadValueSingle()
|
||||
{
|
||||
SkipWhite();
|
||||
ReadToken(false, true, -1, -1);
|
||||
value.push_back(token);
|
||||
SkipWhite();
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::ReadValue()
|
||||
{
|
||||
ReadChar(); // skipping separator '='
|
||||
value.clear();
|
||||
SkipWhite();
|
||||
|
||||
if( lastc == list_start )
|
||||
ReadValueList();
|
||||
else
|
||||
ReadValueSingle();
|
||||
|
||||
SkipWhiteLines();
|
||||
}
|
||||
|
||||
|
||||
void SpaceParser::AddKeyValuePair()
|
||||
{
|
||||
if( value.empty() && skip_empty )
|
||||
{
|
||||
DeleteFromTable(key);
|
||||
return;
|
||||
}
|
||||
|
||||
space->table[key] = value;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int SpaceParser::ReadUTF8Char()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
PT::UTF8ToInt(file, c, correct);
|
||||
|
||||
if( !file )
|
||||
return lastc;
|
||||
}
|
||||
while( !correct );
|
||||
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int SpaceParser::ReadASCIIChar()
|
||||
{
|
||||
lastc = file.get();
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int SpaceParser::ReadCharFromWcharString()
|
||||
{
|
||||
if( *pchar_unicode == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_unicode++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int SpaceParser::ReadCharFromUTF8String()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
size_t len = PT::UTF8ToInt(pchar_ascii, c, correct);
|
||||
pchar_ascii += len;
|
||||
|
||||
if( *pchar_ascii == 0 )
|
||||
return lastc;
|
||||
}
|
||||
while( !correct );
|
||||
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int SpaceParser::ReadCharFromAsciiString()
|
||||
{
|
||||
if( *pchar_ascii == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_ascii++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int SpaceParser::ReadCharNoEscape()
|
||||
{
|
||||
if( reading_from_file )
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return ReadUTF8Char();
|
||||
else
|
||||
return ReadASCIIChar();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( reading_from_wchar_string )
|
||||
{
|
||||
return ReadCharFromWcharString();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return ReadCharFromUTF8String();
|
||||
else
|
||||
return ReadCharFromAsciiString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int SpaceParser::ReadChar()
|
||||
{
|
||||
char_was_escaped = false;
|
||||
ReadCharNoEscape();
|
||||
|
||||
if( !reading_commentary && use_escape_char && lastc == '\\' )
|
||||
{
|
||||
char_was_escaped = true;
|
||||
ReadCharNoEscape();
|
||||
|
||||
switch(lastc)
|
||||
{
|
||||
case '0': lastc = 0; break;
|
||||
case 't': lastc = '\t'; break;
|
||||
case 'r': lastc = '\r'; break;
|
||||
case 'n': lastc = '\n'; break;
|
||||
// in other cases we return the last character
|
||||
}
|
||||
}
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,325 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_confparser_spaceparser
|
||||
#define headerfile_picotools_confparser_spaceparser
|
||||
|
||||
#include <fstream>
|
||||
#include "space.h"
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
class SpaceParser
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
/*
|
||||
ctor -- setting default values (SetDefault() method)
|
||||
*/
|
||||
SpaceParser();
|
||||
|
||||
|
||||
/*
|
||||
setting the root space
|
||||
*/
|
||||
void SetSpace(Space * pspace);
|
||||
void SetSpace(Space & pspace);
|
||||
|
||||
|
||||
/*
|
||||
setting options of the parser to the default values
|
||||
utf8 etc.
|
||||
*/
|
||||
void SetDefault();
|
||||
|
||||
|
||||
/*
|
||||
status of parsing
|
||||
*/
|
||||
enum Status { ok, cant_open_file, syntax_error, no_space };
|
||||
|
||||
|
||||
/*
|
||||
the last status of parsing, set by Parse() methods
|
||||
*/
|
||||
Status status;
|
||||
|
||||
|
||||
/*
|
||||
a number of a line in which there is a syntax_error
|
||||
*/
|
||||
int line;
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
file_name is the path to a file
|
||||
*/
|
||||
Status Parse(const char * file_name);
|
||||
Status Parse(const std::string & file_name);
|
||||
Status Parse(const wchar_t * file_name);
|
||||
Status Parse(const std::wstring & file_name);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
|
||||
*/
|
||||
Status ParseString(const char * str);
|
||||
Status ParseString(const std::string & str);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
here input string is always in unicode (wide characters)
|
||||
*/
|
||||
Status ParseString(const wchar_t * str);
|
||||
Status ParseString(const std::wstring & str);
|
||||
|
||||
|
||||
/*
|
||||
if true then empty values and lists, e.g:
|
||||
option =
|
||||
option2 = ()
|
||||
will be omitted (not inserted to 'table')
|
||||
default: false
|
||||
*/
|
||||
void SkipEmpty(bool skip);
|
||||
|
||||
|
||||
/*
|
||||
'\' character is used to escape other characters
|
||||
so "some \t t\"ext" will produce "some t t"ext"
|
||||
default: true
|
||||
special characters:
|
||||
\0 - 0 (zero code point)
|
||||
\t - tabulator (9 code point)
|
||||
\r - carriage return (13 code point)
|
||||
\n - a new line character (10 code point)
|
||||
in other cases we return the last character so \Z gives Z and \\ gives one \
|
||||
escape character are not used in commentaries
|
||||
so you can write:
|
||||
# this is my comment \n but this was not a new line
|
||||
*/
|
||||
void UseEscapeChar(bool escape);
|
||||
|
||||
|
||||
/*
|
||||
if true then the input file or string (char* or std::string) is treated as UTF-8
|
||||
default: true
|
||||
*/
|
||||
void UTF8(bool utf);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
/*
|
||||
current space set by SetSpace();
|
||||
*/
|
||||
Space * root_space;
|
||||
|
||||
|
||||
/*
|
||||
a space in which we are now
|
||||
*/
|
||||
Space * space;
|
||||
|
||||
|
||||
/*
|
||||
true if Parse() method was called
|
||||
false if ParseString() was called
|
||||
*/
|
||||
bool reading_from_file;
|
||||
|
||||
|
||||
/*
|
||||
pointers to the current character
|
||||
if ParseString() is in used
|
||||
*/
|
||||
const char * pchar_ascii;
|
||||
const wchar_t * pchar_unicode;
|
||||
|
||||
|
||||
/*
|
||||
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
||||
*/
|
||||
bool reading_from_wchar_string;
|
||||
|
||||
|
||||
/*
|
||||
last read token
|
||||
*/
|
||||
std::wstring token;
|
||||
|
||||
|
||||
/*
|
||||
last read key
|
||||
*/
|
||||
std::wstring key;
|
||||
|
||||
|
||||
/*
|
||||
last read list
|
||||
*/
|
||||
Space::Value value;
|
||||
|
||||
|
||||
/*
|
||||
separator between a variable and a value, default: '='
|
||||
*/
|
||||
int separator;
|
||||
|
||||
|
||||
/*
|
||||
commentary char, default: '#'
|
||||
*/
|
||||
int commentary;
|
||||
|
||||
|
||||
/*
|
||||
list starting character, default: '('
|
||||
*/
|
||||
int list_start;
|
||||
|
||||
|
||||
/*
|
||||
list ending character, default: ')'
|
||||
*/
|
||||
int list_end;
|
||||
|
||||
|
||||
/*
|
||||
list delimiter, default: ','
|
||||
*/
|
||||
int list_delimiter;
|
||||
|
||||
|
||||
/*
|
||||
last read char
|
||||
or -1 if the end
|
||||
*/
|
||||
int lastc;
|
||||
|
||||
|
||||
/*
|
||||
true if the lastc was escaped (with a backslash)
|
||||
we have to know if the last sequence was \" or just "
|
||||
*/
|
||||
bool char_was_escaped;
|
||||
|
||||
|
||||
/*
|
||||
current file
|
||||
*/
|
||||
std::ifstream file;
|
||||
|
||||
|
||||
/*
|
||||
if true then empty lists, e.g:
|
||||
option =
|
||||
option2 = ()
|
||||
will be omitted (not inserted to 'table')
|
||||
default: false
|
||||
*/
|
||||
bool skip_empty;
|
||||
|
||||
|
||||
/*
|
||||
input file is in UTF-8
|
||||
default: true
|
||||
*/
|
||||
bool input_as_utf8;
|
||||
|
||||
|
||||
/*
|
||||
if true you can use an escape character '\' in quoted values
|
||||
*/
|
||||
bool use_escape_char;
|
||||
|
||||
|
||||
/*
|
||||
true if we are reading the commentary (#)
|
||||
this is to avoid parsing escape characters in the commentary
|
||||
*/
|
||||
bool reading_commentary;
|
||||
|
||||
std::string afile_name;
|
||||
|
||||
void Parse();
|
||||
void ParseLoop();
|
||||
void SpaceEnds();
|
||||
void SpaceStarts();
|
||||
|
||||
void DeleteFromTable(const std::wstring & var);
|
||||
|
||||
void ReadTokenQuoted();
|
||||
void ReadTokenSingle(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
||||
void ReadToken(bool white_delimit, bool new_line_delimit, int delimit1, int delimit2);
|
||||
void ReadKey();
|
||||
void ReadValueList();
|
||||
void ReadValueSingle();
|
||||
void ReadValue();
|
||||
void AddKeyValuePair();
|
||||
|
||||
int ReadUTF8Char();
|
||||
int ReadASCIIChar();
|
||||
int ReadCharFromWcharString();
|
||||
int ReadCharFromUTF8String();
|
||||
int ReadCharFromAsciiString();
|
||||
int ReadCharNoEscape();
|
||||
int ReadChar();
|
||||
bool IsWhite(int c);
|
||||
void SkipWhite(bool skip_lines = false);
|
||||
void SkipWhiteLines();
|
||||
void SkipLine();
|
||||
void SkipComment();
|
||||
void Trim(std::wstring & s);
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
|
@ -1,271 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_space_spacetojson
|
||||
#define headerfile_picotools_space_spacetojson
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "space.h"
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
class SpaceToJSON
|
||||
{
|
||||
public:
|
||||
|
||||
void Clear();
|
||||
|
||||
void TreatAsTable(const wchar_t * space_name);
|
||||
void TreatAsTable(const std::wstring & space_name);
|
||||
|
||||
void TreatAsNumeric(const wchar_t * name);
|
||||
void TreatAsNumeric(const std::wstring & name);
|
||||
|
||||
void TreatAsBool(const wchar_t * name);
|
||||
void TreatAsBool(const std::wstring & name);
|
||||
|
||||
template<class Stream>
|
||||
void Serialize(Space & space, Stream & out, bool use_indents = false);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
std::set<std::wstring> numeric, boolean, table;
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void Serialize(Space & space, Stream & out, bool use_indents, int level,
|
||||
bool use_comma, bool treat_as_table, bool skip_name);
|
||||
|
||||
template<class Stream>
|
||||
void SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma);
|
||||
|
||||
template<class Stream, class StringType>
|
||||
void PrintToken(Stream & out, const StringType & str, bool check_specials = false);
|
||||
|
||||
template<class Stream>
|
||||
void PrintLevel(Stream & out, bool use_indents, int level);
|
||||
|
||||
|
||||
bool IsNumeric(const std::wstring & name);
|
||||
bool IsBool(const std::wstring & name);
|
||||
bool IsTable(const std::wstring & name);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void SpaceToJSON::PrintLevel(Stream & out, bool use_indents, int level)
|
||||
{
|
||||
if( use_indents )
|
||||
{
|
||||
for(int i=0 ; i<level ; ++i)
|
||||
out << ' ';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream, class StringType>
|
||||
void SpaceToJSON::PrintToken(Stream & out, const StringType & str, bool is_special)
|
||||
{
|
||||
if( !is_special )
|
||||
out << '\"';
|
||||
|
||||
for(size_t i=0 ; i<str.size() ; ++i)
|
||||
{
|
||||
switch(str[i])
|
||||
{
|
||||
case 0: out << '\\'; out << '0'; break; // may to skip this character is better?
|
||||
case '\r': out << '\\'; out << 'r'; break;
|
||||
case '\n': out << '\\'; out << 'n'; break;
|
||||
case '\t': out << '\\'; out << 't'; break;
|
||||
case 0x08: out << '\\'; out << 'b'; break;
|
||||
case 0x0c: out << '\\'; out << 'f'; break;
|
||||
case '\\': out << '\\'; out << '\\'; break;
|
||||
//case '/': out << '\\'; out << '/'; break; // slash doesn't have to be escaped
|
||||
case '"': out << '\\'; out << '\"'; break;
|
||||
default:
|
||||
out << str[i];
|
||||
}
|
||||
}
|
||||
|
||||
if( !is_special )
|
||||
out << '\"';
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void SpaceToJSON::SerializeTableMulti(Space & space, Stream & out, bool use_indents, int level, bool use_comma)
|
||||
{
|
||||
Space::Table::const_iterator i2;
|
||||
size_t v;
|
||||
size_t index = 0;
|
||||
bool is_special;
|
||||
|
||||
if( use_comma && !space.table.empty() )
|
||||
{
|
||||
PrintLevel(out, use_indents, level);
|
||||
out << L",\n";
|
||||
}
|
||||
|
||||
for(i2 = space.table.begin() ; i2 != space.table.end() ; ++i2, ++index)
|
||||
{
|
||||
is_special = IsNumeric(i2->first) || IsBool(i2->first);
|
||||
|
||||
PrintLevel(out, use_indents, level);
|
||||
PrintToken(out, i2->first);
|
||||
out << L": ";
|
||||
|
||||
if( i2->second.size() != 1 )
|
||||
out << '[';
|
||||
|
||||
for(v = 0 ; v < i2->second.size() ; ++v)
|
||||
{
|
||||
if( v > 0 )
|
||||
PrintLevel(out, use_indents, level + i2->first.size() + 3);
|
||||
|
||||
PrintToken(out, i2->second[v], is_special);
|
||||
|
||||
if( v + 1 < i2->second.size() )
|
||||
out << L",\n";
|
||||
}
|
||||
|
||||
if( i2->second.size() != 1 )
|
||||
out << ']';
|
||||
|
||||
if( index + 1 < space.table.size() )
|
||||
out << ',';
|
||||
|
||||
out << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents, int level,
|
||||
bool use_comma, bool treat_as_table, bool skip_name)
|
||||
{
|
||||
if( use_comma )
|
||||
{
|
||||
PrintLevel(out, use_indents, level);
|
||||
out << L",\n";
|
||||
}
|
||||
|
||||
PrintLevel(out, use_indents, level);
|
||||
|
||||
if( !skip_name )
|
||||
{
|
||||
if( space.name.empty() )
|
||||
{
|
||||
out << L"\"empty\": ";
|
||||
}
|
||||
else
|
||||
{
|
||||
PrintToken(out, space.name);
|
||||
out << L": ";
|
||||
}
|
||||
}
|
||||
|
||||
if( treat_as_table )
|
||||
out << L"[\n";
|
||||
else
|
||||
out << L"{\n";
|
||||
|
||||
bool printed_something = false;
|
||||
|
||||
if( !treat_as_table )
|
||||
{
|
||||
SerializeTableMulti(space, out, use_indents, level, false);
|
||||
|
||||
if( !space.table.empty() )
|
||||
printed_something = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* !! IMPROVE ME when serializing a table
|
||||
* we can make a test whether a space is empty and has a name
|
||||
* in such a case put it as a string
|
||||
* this is the same way as the json parser works
|
||||
*
|
||||
*/
|
||||
for(size_t i=0 ; i<space.spaces.size() ; ++i)
|
||||
{
|
||||
bool next_skip_name = treat_as_table;
|
||||
bool next_is_table = IsTable(space.spaces[i]->name);
|
||||
Serialize(*space.spaces[i], out, use_indents, level+1, printed_something, next_is_table, next_skip_name);
|
||||
printed_something = true;
|
||||
}
|
||||
|
||||
PrintLevel(out, use_indents, level);
|
||||
|
||||
if( treat_as_table )
|
||||
out << L"]\n";
|
||||
else
|
||||
out << L"}\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class Stream>
|
||||
void SpaceToJSON::Serialize(Space & space, Stream & out, bool use_indents)
|
||||
{
|
||||
bool treat_as_table = IsTable(space.name);
|
||||
Serialize(space, out, use_indents, 0, false, treat_as_table, true);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
sourcefiles:=$(shell find . -name "*.cpp")
|
||||
objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles))
|
||||
|
||||
|
||||
|
||||
ifndef CXX
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
ifndef CXXFLAGS
|
||||
CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include
|
||||
endif
|
||||
|
||||
ifndef AR
|
||||
AR = ar
|
||||
endif
|
||||
|
||||
|
||||
|
||||
|
||||
libname = pikotools.a
|
||||
|
||||
|
||||
all: $(libname)
|
||||
|
||||
|
||||
$(libname): $(objfiles)
|
||||
$(AR) rcs $(libname) $(objfiles)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(objfiles)
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -f- $(sourcefiles) > Makefile.dep
|
||||
|
||||
|
||||
-include Makefile.dep
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
./convert/inttostr.o: ./convert/inttostr.h
|
||||
./convert/misc.o: ./convert/misc.h ./convert/text.h
|
||||
./convert/text.o: ./convert/text.h ./convert/text_private.h
|
||||
./date/date.o: ./date/date.h convert/inttostr.h
|
||||
./log/filelog.o: ./log/filelog.h textstream/textstream.h space/space.h
|
||||
./log/filelog.o: textstream/types.h convert/inttostr.h date/date.h
|
||||
./log/filelog.o: membuffer/membuffer.h textstream/types.h utf8/utf8.h
|
||||
./log/filelog.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./log/log.o: ./log/log.h textstream/textstream.h space/space.h
|
||||
./log/log.o: textstream/types.h convert/inttostr.h date/date.h
|
||||
./log/log.o: membuffer/membuffer.h textstream/types.h ./log/filelog.h
|
||||
./log/log.o: utf8/utf8.h utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
|
||||
./space/space.o: utf8/utf8.h textstream/textstream.h space/space.h
|
||||
./space/space.o: date/date.h membuffer/membuffer.h textstream/types.h
|
||||
./space/space.o: utf8/utf8_templates.h utf8/utf8_private.h convert/convert.h
|
||||
./space/space.o: ./convert/inttostr.h convert/patternreplacer.h
|
||||
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
|
||||
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
|
||||
./space/spaceparser.o: textstream/textstream.h space/space.h date/date.h
|
||||
./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
|
||||
./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||
./utf8/utf8.o: ./utf8/utf8.h textstream/textstream.h space/space.h
|
||||
./utf8/utf8.o: textstream/types.h convert/inttostr.h date/date.h
|
||||
./utf8/utf8.o: membuffer/membuffer.h textstream/types.h utf8/utf8_templates.h
|
||||
./utf8/utf8.o: utf8/utf8_private.h
|
||||
./utf8/utf8_private.o: utf8/utf8_private.h textstream/textstream.h
|
||||
./utf8/utf8_private.o: space/space.h textstream/types.h convert/inttostr.h
|
||||
./utf8/utf8_private.o: date/date.h membuffer/membuffer.h textstream/types.h
|
||||
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
|
||||
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/textstream.h
|
||||
./csv/csvparser.o: date/date.h membuffer/membuffer.h textstream/types.h
|
||||
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
|
||||
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
|
||||
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
||||
./mainoptions/mainoptionsparser.o: textstream/textstream.h date/date.h
|
||||
./mainoptions/mainoptionsparser.o: membuffer/membuffer.h textstream/types.h
|
||||
./mainoptions/mainoptionsparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
|
@ -0,0 +1,156 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "inttostr.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
std::string to_str(unsigned long long value, int base)
|
||||
{
|
||||
std::string res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(long long value, int base)
|
||||
{
|
||||
std::string res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(unsigned long value, int base)
|
||||
{
|
||||
return to_str(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(long value, int base)
|
||||
{
|
||||
return to_str(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(unsigned int value, int base)
|
||||
{
|
||||
return to_str(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(int value, int base)
|
||||
{
|
||||
return to_str(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(unsigned short value, int base)
|
||||
{
|
||||
return to_str(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::string to_str(short value, int base)
|
||||
{
|
||||
return to_str(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
std::wstring to_wstr(unsigned long long value, int base)
|
||||
{
|
||||
std::wstring res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(long long value, int base)
|
||||
{
|
||||
std::wstring res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(unsigned long value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(long value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(unsigned int value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(int value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(unsigned short value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_wstr(short value, int base)
|
||||
{
|
||||
return to_wstr(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -42,7 +42,7 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -270,14 +270,23 @@ void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
|
|||
|
||||
|
||||
|
||||
std::wstring Toa(unsigned long long value, int base = 10);
|
||||
std::wstring Toa(long long value, int base = 10);
|
||||
std::wstring Toa(unsigned long value, int base = 10);
|
||||
std::wstring Toa(long value, int base = 10);
|
||||
std::wstring Toa(unsigned int value, int base = 10);
|
||||
std::wstring Toa(int value, int base = 10);
|
||||
std::wstring Toa(unsigned short value, int base = 10);
|
||||
std::wstring Toa(short value, int base = 10);
|
||||
std::string to_str(unsigned long long value, int base = 10);
|
||||
std::string to_str(long long value, int base = 10);
|
||||
std::string to_str(unsigned long value, int base = 10);
|
||||
std::string to_str(long value, int base = 10);
|
||||
std::string to_str(unsigned int value, int base = 10);
|
||||
std::string to_str(int value, int base = 10);
|
||||
std::string to_str(unsigned short value, int base = 10);
|
||||
std::string to_str(short value, int base = 10);
|
||||
|
||||
std::wstring to_wstr(unsigned long long value, int base = 10);
|
||||
std::wstring to_wstr(long long value, int base = 10);
|
||||
std::wstring to_wstr(unsigned long value, int base = 10);
|
||||
std::wstring to_wstr(long value, int base = 10);
|
||||
std::wstring to_wstr(unsigned int value, int base = 10);
|
||||
std::wstring to_wstr(int value, int base = 10);
|
||||
std::wstring to_wstr(unsigned short value, int base = 10);
|
||||
std::wstring to_wstr(short value, int base = 10);
|
||||
|
||||
|
||||
|
|
@ -38,7 +38,7 @@
|
|||
#include "misc.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
|
@ -42,7 +42,7 @@
|
|||
#include "text.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
void SetOverflow(bool * was_overflow, bool val);
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
template<typename CharType, typename StrType>
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017, Tomasz Sowa
|
||||
* Copyright (c) 2017-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -43,7 +43,7 @@
|
|||
#include "misc.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -59,9 +59,9 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType **
|
|||
SetOverflow(was_overflow, false);
|
||||
|
||||
if( allow_skip_whitechars )
|
||||
str = SkipWhite(str);
|
||||
str = skip_white(str);
|
||||
|
||||
while( !carry && IsDigit(*str, base, &digit) )
|
||||
while( !carry && is_digit(*str, base, &digit) )
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
carry = __builtin_mul_overflow(res, static_cast<unsigned long long>(base), &res);
|
||||
|
@ -82,7 +82,7 @@ unsigned long long Toull(const CharType * str, int base = 10, const CharType **
|
|||
{
|
||||
if( after_str )
|
||||
{
|
||||
while( IsDigit(*str, base, &digit) )
|
||||
while( is_digit(*str, base, &digit) )
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
|
@ -108,7 +108,7 @@ long long Toll(const CharType * str, int base = 10, const CharType ** after_str
|
|||
SetOverflow(was_overflow, false);
|
||||
|
||||
if( allow_skip_whitechars )
|
||||
str = SkipWhite(str);
|
||||
str = skip_white(str);
|
||||
|
||||
if( *str == '-' )
|
||||
{
|
||||
|
@ -230,7 +230,7 @@ template<typename CharType>
|
|||
unsigned long long Toull_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
|
||||
{
|
||||
if( allow_skip_whitechars )
|
||||
str = SkipWhite(str);
|
||||
str = skip_white(str);
|
||||
|
||||
int base = 10;
|
||||
|
||||
|
@ -266,7 +266,7 @@ long long Toll_b(const CharType * str, const CharType ** after_str = 0, bool * w
|
|||
SetOverflow(was_overflow, false);
|
||||
|
||||
if( allow_skip_whitechars )
|
||||
str = SkipWhite(str);
|
||||
str = skip_white(str);
|
||||
|
||||
if( *str == '-' )
|
||||
{
|
|
@ -0,0 +1,473 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include "text.h"
|
||||
#include "text_private.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
// white_chars table should be sorted (a binary search algorithm is used to find a character)
|
||||
// we do not treat a new line character (10) as a white character here
|
||||
// also space (32) and tab (9) are not inserted here
|
||||
static const wchar_t white_chars_table[] = {
|
||||
0x000B, // LINE TABULATION (vertical tabulation)
|
||||
0x000C, // FORM FEED (FF)
|
||||
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
|
||||
0x0085, // NEXT LINE (NEL)
|
||||
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
|
||||
0x1680, // OGHAM SPACE MARK
|
||||
0x180E, // MONGOLIAN VOWEL SEPARATOR
|
||||
0x2000, // EN QUAD
|
||||
0x2001, // EM QUAD
|
||||
0x2002, // EN SPACE
|
||||
0x2003, // EM SPACE
|
||||
0x2004, // THREE-PER-EM SPACE
|
||||
0x2005, // FOUR-PER-EM SPACE
|
||||
0x2006, // SIX-PER-EM SPACE
|
||||
0x2007, // FIGURE SPACE
|
||||
0x2008, // PUNCTUATION SPACE
|
||||
0x2009, // THIN SPACE
|
||||
0x200A, // HAIR SPACE
|
||||
0x2028, // LINE SEPARATOR
|
||||
0x2029, // PARAGRAPH SEPARATOR
|
||||
0x202F, // NARROW NO-BREAK SPACE
|
||||
0x205F, // MEDIUM MATHEMATICAL SPACE
|
||||
0x3000, // IDEOGRAPHIC SPACE
|
||||
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
|
||||
*/
|
||||
bool is_white(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
// space (32) and tab (9) are the most common white chars
|
||||
// so we check them at the beginning (optimisation)
|
||||
if( c == 32 || c == 9 )
|
||||
return true;
|
||||
|
||||
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
|
||||
std::size_t o1 = 0;
|
||||
std::size_t o2 = len - 1;
|
||||
|
||||
if( c == 10 )
|
||||
return treat_new_line_as_white ? true : false;
|
||||
|
||||
if( !check_additional_chars )
|
||||
return false;
|
||||
|
||||
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
|
||||
return false;
|
||||
|
||||
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
|
||||
return true;
|
||||
|
||||
while( o1 + 1 < o2 )
|
||||
{
|
||||
std::size_t o = (o2 - o1)/2 + o1;
|
||||
|
||||
if( c == white_chars_table[o] )
|
||||
return true;
|
||||
|
||||
if( c > white_chars_table[o] )
|
||||
o1 = o;
|
||||
else
|
||||
o2 = o;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool is_digit(wchar_t c, int base, int * digit)
|
||||
{
|
||||
int d = 0;
|
||||
|
||||
if( c >= '0' && c <= '9' )
|
||||
{
|
||||
d = c - '0';
|
||||
}
|
||||
else
|
||||
if( c >= 'a' && c <= 'f' )
|
||||
{
|
||||
d = c - 'a' + 10;
|
||||
}
|
||||
else
|
||||
if( c >= 'A' && c <= 'F' )
|
||||
{
|
||||
d = c - 'A' + 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return d < base;
|
||||
}
|
||||
|
||||
|
||||
const char * skip_white(const char * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
const char * skip_white_from_back(const char * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
char to_lower(char c)
|
||||
{
|
||||
return pt_private::to_lower_generic(c);
|
||||
}
|
||||
|
||||
wchar_t to_lower(wchar_t c)
|
||||
{
|
||||
return pt_private::to_lower_generic(c);
|
||||
}
|
||||
|
||||
|
||||
char to_upper(char c)
|
||||
{
|
||||
return pt_private::to_upper_generic(c);
|
||||
}
|
||||
|
||||
wchar_t to_upper(wchar_t c)
|
||||
{
|
||||
return pt_private::to_upper_generic(c);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void to_lower_emplace(std::string & str)
|
||||
{
|
||||
pt_private::to_lower_str_generic(str);
|
||||
}
|
||||
|
||||
|
||||
void to_lower_emplace(std::wstring & str)
|
||||
{
|
||||
pt_private::to_lower_str_generic(str);
|
||||
}
|
||||
|
||||
|
||||
void to_upper_emplace(std::string & str)
|
||||
{
|
||||
pt_private::to_upper_str_generic(str);
|
||||
}
|
||||
|
||||
|
||||
void to_upper_emplace(std::wstring & str)
|
||||
{
|
||||
pt_private::to_upper_str_generic(str);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
std::string to_lower(const std::string & str)
|
||||
{
|
||||
std::string res(str);
|
||||
to_lower_emplace(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_lower(const std::wstring & str)
|
||||
{
|
||||
std::wstring res(str);
|
||||
to_lower_emplace(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::string to_upper(const std::string & str)
|
||||
{
|
||||
std::string res(str);
|
||||
to_upper_emplace(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::wstring to_upper(const std::wstring & str)
|
||||
{
|
||||
std::wstring res(str);
|
||||
to_upper_emplace(res);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int compare(const char * str1, const char * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare(const wchar_t * str1, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare(const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return pt_private::compare_str_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare(const std::wstring & str1, const std::wstring & str2)
|
||||
{
|
||||
return pt_private::compare_str_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare(const char * str1_begin, const char * str1_end, const char * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1_begin, str1_end, str2);
|
||||
}
|
||||
|
||||
int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1_begin, str1_end, str2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
int compare_nc(const char * str1, const char * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare_nc(const wchar_t * str1, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare_nc(const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return pt_private::compare_nc_str_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare_nc(const std::wstring & str1, const std::wstring & str2)
|
||||
{
|
||||
return pt_private::compare_nc_str_generic(str1, str2);
|
||||
}
|
||||
|
||||
int compare_nc(const char * str1_begin, const char * str1_end, const char * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1_begin, str1_end, str2);
|
||||
}
|
||||
|
||||
int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1_begin, str1_end, str2);
|
||||
}
|
||||
|
||||
|
||||
bool is_equal(const char * str1, const char * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1, str2) == 0;
|
||||
}
|
||||
|
||||
bool is_equal(const wchar_t * str1, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal(const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return is_equal(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_equal(const std::wstring & str1, const std::wstring & str2)
|
||||
{
|
||||
return is_equal(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool is_equal(const char * str1_begin, const char * str1_end, const char * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1_begin, str1_end, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_generic(str1_begin, str1_end, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const char * str1, const char * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const wchar_t * str1, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const std::string & str1, const std::string & str2)
|
||||
{
|
||||
return is_equal_nc(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const std::wstring & str1, const std::wstring & str2)
|
||||
{
|
||||
return is_equal_nc(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2)
|
||||
{
|
||||
return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool is_substr(const char * short_str, const char * long_str)
|
||||
{
|
||||
return pt_private::is_substr_generic(short_str, long_str);
|
||||
}
|
||||
|
||||
|
||||
bool is_substr(const wchar_t * short_str, const wchar_t * long_str)
|
||||
{
|
||||
return pt_private::is_substr_generic(short_str, long_str);
|
||||
}
|
||||
|
||||
|
||||
bool is_substr(const std::string & short_str, const std::string & long_str)
|
||||
{
|
||||
return is_substr(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_substr(const std::wstring & short_str, const std::wstring & long_str)
|
||||
{
|
||||
return is_substr(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_substr_nc(const char * short_str, const char * long_str)
|
||||
{
|
||||
return pt_private::is_substr_nc_generic(short_str, long_str);
|
||||
}
|
||||
|
||||
bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str)
|
||||
{
|
||||
return pt_private::is_substr_nc_generic(short_str, long_str);
|
||||
}
|
||||
|
||||
|
||||
bool is_substr_nc(const std::string & short_str, const std::string & long_str)
|
||||
{
|
||||
return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str)
|
||||
{
|
||||
return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_convert_text
|
||||
#define headerfile_picotools_convert_text
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
bool is_white(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
bool is_digit(wchar_t c, int base = 10, int * digit = 0);
|
||||
|
||||
const char * skip_white(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* str_end is pointing at the end of the string (the last item + one)
|
||||
*
|
||||
* return value is a pointer to the first white character after a non-white character at the end
|
||||
* or to the last+one if there is no any white characters
|
||||
*
|
||||
*/
|
||||
const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
|
||||
const char * skip_white_from_back(const char * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars = true, bool treat_new_line_as_white = true);
|
||||
|
||||
|
||||
|
||||
char to_lower(char c);
|
||||
wchar_t to_lower(wchar_t c);
|
||||
|
||||
char to_upper(char c);
|
||||
wchar_t to_upper(wchar_t c);
|
||||
|
||||
void to_lower_emplace(std::string & str);
|
||||
void to_lower_emplace(std::wstring & str);
|
||||
|
||||
void to_upper_emplace(std::string & str);
|
||||
void to_upper_emplace(std::wstring & str);
|
||||
|
||||
std::string to_lower(const std::string & str);
|
||||
std::wstring to_lower(const std::wstring & str);
|
||||
|
||||
std::string to_upper(const std::string & str);
|
||||
std::wstring to_upper(const std::wstring & str);
|
||||
|
||||
|
||||
int compare(const char * str1, const char * str2);
|
||||
int compare(const wchar_t * str1, const wchar_t * str2);
|
||||
|
||||
int compare(const std::string & str1, const std::string & str2);
|
||||
int compare(const std::wstring & str1, const std::wstring & str2);
|
||||
|
||||
int compare(const char * str1_begin, const char * str1_end, const char * str2);
|
||||
int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
|
||||
|
||||
|
||||
/*
|
||||
* compare no case
|
||||
*/
|
||||
int compare_nc(const char * str1, const char * str2);
|
||||
int compare_nc(const wchar_t * str1, const wchar_t * str2);
|
||||
|
||||
int compare_nc(const std::string & str1, const std::string & str2);
|
||||
int compare_nc(const std::wstring & str1, const std::wstring & str2);
|
||||
|
||||
int compare_nc(const char * str1_begin, const char * str1_end, const char * str2);
|
||||
int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
|
||||
|
||||
|
||||
|
||||
bool is_equal(const char * str1, const char * str2);
|
||||
bool is_equal(const wchar_t * str1, const wchar_t * str2);
|
||||
|
||||
bool is_equal(const std::string & str1, const std::string & str2);
|
||||
bool is_equal(const std::wstring & str1, const std::wstring & str2);
|
||||
|
||||
bool is_equal(const char * str1_begin, const char * str1_end, const char * str2);
|
||||
bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
|
||||
|
||||
bool is_equal_nc(const char * str1, const char * str2);
|
||||
bool is_equal_nc(const wchar_t * str1, const wchar_t * str2);
|
||||
|
||||
bool is_equal_nc(const std::string & str1, const std::string & str2);
|
||||
bool is_equal_nc(const std::wstring & str1, const std::wstring & str2);
|
||||
|
||||
bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2);
|
||||
bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2);
|
||||
|
||||
|
||||
bool is_substr(const char * short_str, const char * long_str);
|
||||
bool is_substr(const wchar_t * short_str, const wchar_t * long_str);
|
||||
|
||||
bool is_substr(const std::string & short_str, const std::string & long_str);
|
||||
bool is_substr(const std::wstring & short_str, const std::wstring & long_str);
|
||||
|
||||
bool is_substr_nc(const char * short_str, const char * long_str);
|
||||
bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str);
|
||||
|
||||
bool is_substr_nc(const std::string & short_str, const std::string & long_str);
|
||||
bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -0,0 +1,309 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_convert_text_private
|
||||
#define headerfile_picotools_convert_text_private
|
||||
|
||||
#include <string>
|
||||
#include "text.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_private
|
||||
{
|
||||
|
||||
template<class CharType>
|
||||
CharType to_lower_generic(CharType c)
|
||||
{
|
||||
if( c >= 'A' && c <= 'Z' )
|
||||
return c - 'A' + 'a';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType to_upper_generic(CharType c)
|
||||
{
|
||||
if( c >= 'a' && c <= 'z' )
|
||||
return c - 'a' + 'A';
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType>
|
||||
void to_lower_str_generic(StringType & s)
|
||||
{
|
||||
typename StringType::size_type i;
|
||||
|
||||
for(i=0 ; i<s.size() ; ++i)
|
||||
s[i] = to_lower(s[i]);
|
||||
}
|
||||
|
||||
|
||||
template<class StringType>
|
||||
void to_upper_str_generic(StringType & s)
|
||||
{
|
||||
typename StringType::size_type i;
|
||||
|
||||
for(i=0 ; i<s.size() ; ++i)
|
||||
s[i] = to_upper(s[i]);
|
||||
}
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType * skip_white_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
while( is_white(static_cast<wchar_t>(*str), check_additional_chars, treat_new_line_as_white) )
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType * skip_white_from_back_generic(CharType * str_begin, CharType * str_end, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
while( str_end > str_begin && is_white(static_cast<wchar_t>(*(str_end-1)), check_additional_chars, treat_new_line_as_white) )
|
||||
{
|
||||
str_end -= 1;
|
||||
}
|
||||
|
||||
return str_end;
|
||||
}
|
||||
|
||||
|
||||
template<class CharType>
|
||||
CharType * skip_white_from_back_generic(CharType * str, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
CharType * str_begin = str;
|
||||
|
||||
while( *str != 0 )
|
||||
{
|
||||
str += 1;
|
||||
}
|
||||
|
||||
return skip_white_from_back_generic(str_begin, str, check_additional_chars, treat_new_line_as_white);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_generic(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
while( *str1 && *str2 && *str1 == *str2 )
|
||||
{
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( *str1 == 0 && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
int c1;
|
||||
int c2;
|
||||
|
||||
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
|
||||
{
|
||||
c1 = (wchar_t)(unsigned char)(*str1);
|
||||
c2 = (wchar_t)(unsigned char)(*str2);
|
||||
}
|
||||
else
|
||||
{
|
||||
c1 = *str1;
|
||||
c2 = *str2;
|
||||
}
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_str_generic(const StringType1 & str1, const StringType2 & str2)
|
||||
{
|
||||
return compare_generic(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
|
||||
{
|
||||
while( str1_begin < str1_end && *str2 && *str1_begin == *str2 )
|
||||
{
|
||||
++str1_begin;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( str1_begin == str1_end && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
int c1;
|
||||
int c2;
|
||||
|
||||
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
|
||||
{
|
||||
c1 = str1_begin < str1_end ? (wchar_t)(unsigned char)(*str1_begin) : 0;
|
||||
c2 = (wchar_t)(unsigned char)(*str2);
|
||||
}
|
||||
else
|
||||
{
|
||||
c1 = str1_begin < str1_end ? *str1_begin : 0;
|
||||
c2 = *str2;
|
||||
}
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_nc_generic(const StringType1 * str1, const StringType2 * str2)
|
||||
{
|
||||
while( *str1 && *str2 && to_lower(*str1) == to_lower(*str2) )
|
||||
{
|
||||
++str1;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( *str1 == 0 && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
int c1;
|
||||
int c2;
|
||||
|
||||
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
|
||||
{
|
||||
c1 = to_lower((wchar_t)(unsigned char)(*str1));
|
||||
c2 = to_lower((wchar_t)(unsigned char)(*str2));
|
||||
}
|
||||
else
|
||||
{
|
||||
c1 = to_lower(*str1);
|
||||
c2 = to_lower(*str2);
|
||||
}
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_nc_str_generic(const StringType1 & str1, const StringType2 & str2)
|
||||
{
|
||||
return compare_nc(str1.c_str(), str2.c_str());
|
||||
}
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
int compare_nc_generic(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
|
||||
{
|
||||
while( str1_begin < str1_end && *str2 && to_lower(*str1_begin) == to_lower(*str2) )
|
||||
{
|
||||
++str1_begin;
|
||||
++str2;
|
||||
}
|
||||
|
||||
if( str1_begin == str1_end && *str2 == 0 )
|
||||
return 0;
|
||||
|
||||
int c1;
|
||||
int c2;
|
||||
|
||||
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
|
||||
{
|
||||
c1 = str1_begin < str1_end ? to_lower((wchar_t)(unsigned char)(*str1_begin)) : 0;
|
||||
c2 = to_lower((wchar_t)(unsigned char)(*str2));
|
||||
}
|
||||
else
|
||||
{
|
||||
c1 = str1_begin < str1_end ? to_lower(*str1_begin) : 0;
|
||||
c2 = to_lower(*str2);
|
||||
}
|
||||
|
||||
return c1 - c2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool is_substr_generic(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
while( *short_str && *long_str && *short_str == *long_str )
|
||||
{
|
||||
++short_str;
|
||||
++long_str;
|
||||
}
|
||||
|
||||
if( *short_str == 0 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class StringType1, class StringType2>
|
||||
bool is_substr_nc_generic(const StringType1 * short_str, const StringType2 * long_str)
|
||||
{
|
||||
while( *short_str && *long_str && to_lower(*short_str) == to_lower(*long_str) )
|
||||
{
|
||||
++short_str;
|
||||
++long_str;
|
||||
}
|
||||
|
||||
if( *short_str == 0 )
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace pt_private
|
||||
|
||||
} // namespace pt
|
||||
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "csvparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space)
|
||||
{
|
||||
reading_from_file = true;
|
||||
space = &out_space;
|
||||
|
||||
file.clear();
|
||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||
|
||||
if( file )
|
||||
{
|
||||
parse();
|
||||
file.close();
|
||||
}
|
||||
else
|
||||
{
|
||||
status = cant_open_file;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse_file(const std::string & file_name, Space & out_space)
|
||||
{
|
||||
return parse_file(file_name.c_str(), out_space);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse_file(const wchar_t * file_name, Space & out_space)
|
||||
{
|
||||
std::string file_name_utf8;
|
||||
|
||||
wide_to_utf8(file_name, file_name_utf8);
|
||||
return parse_file(file_name_utf8.c_str(), out_space);
|
||||
}
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse_file(const std::wstring & file_name, Space & out_space)
|
||||
{
|
||||
return parse_file(file_name.c_str(), out_space);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse(const char * str, Space & out_space)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = false;
|
||||
pchar_ascii = str;
|
||||
pchar_unicode = 0;
|
||||
space = &out_space;
|
||||
|
||||
parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse(const std::string & str, Space & out_space)
|
||||
{
|
||||
return parse(str.c_str(), out_space);
|
||||
}
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space)
|
||||
{
|
||||
reading_from_file = false;
|
||||
reading_from_wchar_string = true;
|
||||
pchar_unicode = str;
|
||||
pchar_ascii = 0;
|
||||
space = &out_space;
|
||||
|
||||
parse();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
CSVParser::Status CSVParser::parse(const std::wstring & str, Space & out_space)
|
||||
{
|
||||
return parse(str.c_str(), out_space);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CSVParser::parse()
|
||||
{
|
||||
line = 1;
|
||||
status = ok;
|
||||
|
||||
space->set_empty_table();
|
||||
read_char(); // put first character to lastc
|
||||
|
||||
if( lastc == -1 )
|
||||
{
|
||||
// an empty file/string, in such a case we return such a space struct (if would be serialized to json): [[]]
|
||||
Space row_space;
|
||||
row_space.set_empty_table();
|
||||
space->add(std::move(row_space));
|
||||
}
|
||||
|
||||
while( lastc != -1 )
|
||||
{
|
||||
/*
|
||||
* even if there is an error when parsing we continue to read the file/string
|
||||
*
|
||||
*/
|
||||
|
||||
Space row_space;
|
||||
row_space.set_empty_table();
|
||||
|
||||
parse_row(row_space);
|
||||
space->add(std::move(row_space));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CSVParser::parse_row(Space & row_space)
|
||||
{
|
||||
bool continue_reading;
|
||||
|
||||
do
|
||||
{
|
||||
continue_reading = read_value_to(row_space);
|
||||
}
|
||||
while(continue_reading);
|
||||
}
|
||||
|
||||
|
||||
bool CSVParser::read_value_to(Space & row_space)
|
||||
{
|
||||
Space & space_value = row_space.add_empty_space();
|
||||
space_value.set_empty_wstring();
|
||||
|
||||
if( lastc == '"' )
|
||||
{
|
||||
return read_quoted_value_to(space_value.value.value_wstring);
|
||||
}
|
||||
else
|
||||
{
|
||||
return read_non_quoted_value_to(space_value.value.value_wstring);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool CSVParser::read_quoted_value_to(std::wstring & value)
|
||||
{
|
||||
bool is_comma = false;
|
||||
bool is_value_character = true;
|
||||
|
||||
while( lastc != -1 && is_value_character )
|
||||
{
|
||||
read_char();
|
||||
|
||||
if( lastc == '"' )
|
||||
{
|
||||
read_char();
|
||||
|
||||
if( lastc == '"' )
|
||||
{
|
||||
value += lastc;
|
||||
}
|
||||
else
|
||||
{
|
||||
is_value_character = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( lastc != -1 )
|
||||
{
|
||||
value += lastc;
|
||||
}
|
||||
}
|
||||
|
||||
if( lastc == ',' )
|
||||
{
|
||||
is_comma = true;
|
||||
read_char(); // skip the comma character
|
||||
}
|
||||
else
|
||||
if( lastc == 13 )
|
||||
{
|
||||
read_char(); // skip CR character
|
||||
|
||||
if( lastc == 10 )
|
||||
read_char();
|
||||
}
|
||||
else
|
||||
if( lastc == 10 )
|
||||
{
|
||||
read_char(); // skip new line character
|
||||
}
|
||||
|
||||
return is_comma;
|
||||
}
|
||||
|
||||
|
||||
bool CSVParser::read_non_quoted_value_to(std::wstring & value)
|
||||
{
|
||||
while( lastc != -1 && lastc != ',' && lastc != 10 )
|
||||
{
|
||||
value += lastc;
|
||||
read_char();
|
||||
}
|
||||
|
||||
bool is_comma = (lastc == ',');
|
||||
|
||||
if( is_comma )
|
||||
{
|
||||
read_char(); // skip the comma character
|
||||
}
|
||||
else
|
||||
{
|
||||
bool is_new_line = (lastc == 10);
|
||||
|
||||
// check CRLF sequence
|
||||
if( is_new_line && !value.empty() && value.back() == 13 )
|
||||
{
|
||||
value.erase(value.size() - 1, 1);
|
||||
}
|
||||
|
||||
if( is_new_line )
|
||||
{
|
||||
read_char(); // skip the new line character
|
||||
}
|
||||
}
|
||||
|
||||
return is_comma;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int CSVParser::read_utf8_char()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
utf8_to_int(file, c, correct);
|
||||
|
||||
if( !file )
|
||||
return lastc;
|
||||
}
|
||||
while( !correct );
|
||||
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int CSVParser::read_ascii_char()
|
||||
{
|
||||
lastc = file.get();
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int CSVParser::read_char_from_wchar_string()
|
||||
{
|
||||
if( *pchar_unicode == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_unicode++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int CSVParser::read_char_from_utf8_string()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
size_t len = utf8_to_int(pchar_ascii, c, correct);
|
||||
pchar_ascii += len;
|
||||
}
|
||||
while( *pchar_ascii && !correct );
|
||||
|
||||
if( correct )
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int CSVParser::read_char_from_ascii_string()
|
||||
{
|
||||
if( *pchar_ascii == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_ascii++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int CSVParser::read_char_no_escape()
|
||||
{
|
||||
if( reading_from_file )
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return read_utf8_char();
|
||||
else
|
||||
return read_ascii_char();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( reading_from_wchar_string )
|
||||
{
|
||||
return read_char_from_wchar_string();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return read_char_from_utf8_string();
|
||||
else
|
||||
return read_char_from_ascii_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
int CSVParser::read_char()
|
||||
{
|
||||
return read_char_no_escape();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_csv_csvparser
|
||||
#define headerfile_picotools_csv_csvparser
|
||||
|
||||
#include "space/space.h"
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
/*
|
||||
*
|
||||
* https://datatracker.ietf.org/doc/html/rfc4180
|
||||
*
|
||||
*/
|
||||
class CSVParser
|
||||
{
|
||||
public:
|
||||
|
||||
enum Status
|
||||
{
|
||||
ok,
|
||||
cant_open_file,
|
||||
};
|
||||
|
||||
|
||||
Status parse_file(const char * file_name, Space & out_space);
|
||||
Status parse_file(const std::string & file_name, Space & out_space);
|
||||
Status parse_file(const wchar_t * file_name, Space & out_space);
|
||||
Status parse_file(const std::wstring & file_name, Space & out_space);
|
||||
|
||||
Status parse(const char * str, Space & out_space);
|
||||
Status parse(const std::string & str, Space & out_space);
|
||||
Status parse(const wchar_t * str, Space & out_space);
|
||||
Status parse(const std::wstring & str, Space & out_space);
|
||||
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
|
||||
/*
|
||||
the last status of parsing, set by Parse() methods
|
||||
*/
|
||||
Status status;
|
||||
|
||||
|
||||
Space * space;
|
||||
|
||||
/*
|
||||
true if parse_file() method was called
|
||||
false if parse() was called
|
||||
*/
|
||||
bool reading_from_file;
|
||||
|
||||
/*
|
||||
true if parse(wchar_t *) or parse(std::wstring&) was called
|
||||
*/
|
||||
bool reading_from_wchar_string;
|
||||
|
||||
/*
|
||||
pointers to the current character
|
||||
if parse() is being used
|
||||
*/
|
||||
const char * pchar_ascii;
|
||||
const wchar_t * pchar_unicode;
|
||||
|
||||
|
||||
/*
|
||||
last read char
|
||||
or -1 if the end
|
||||
*/
|
||||
int lastc;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
a number of a line in which there is a syntax_error
|
||||
*/
|
||||
int line;
|
||||
|
||||
/*
|
||||
current file
|
||||
|
||||
may it would be better to make a pointer?
|
||||
if we parse only a string then there is no sense to have such an object
|
||||
*/
|
||||
std::ifstream file;
|
||||
|
||||
/*
|
||||
input file is in UTF-8
|
||||
default: true
|
||||
*/
|
||||
bool input_as_utf8;
|
||||
|
||||
|
||||
|
||||
|
||||
void parse();
|
||||
void parse_row(Space & row_space);
|
||||
|
||||
bool read_value_to(Space & row_space);
|
||||
bool read_quoted_value_to(std::wstring & value);
|
||||
bool read_non_quoted_value_to(std::wstring & value);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* copied from SpaceParser
|
||||
* may it would be better to have a class with those methods and inherit from it?
|
||||
*/
|
||||
int read_utf8_char();
|
||||
int read_ascii_char();
|
||||
int read_char_from_wchar_string();
|
||||
int read_char_from_utf8_string();
|
||||
int read_char_from_ascii_string();
|
||||
int read_char_no_escape();
|
||||
|
||||
int read_char();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -41,7 +41,7 @@
|
|||
#include <string.h>
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018, Tomasz Sowa
|
||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,7 +39,7 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -74,7 +74,7 @@ void FileLog::init(const std::wstring & log_file, bool log_stdout, int log_level
|
|||
this->log_stdout = log_stdout;
|
||||
this->log_level = log_level;
|
||||
this->save_each_line = save_each_line;
|
||||
PT::WideToUTF8(log_file, this->log_file);
|
||||
wide_to_utf8(log_file, this->log_file);
|
||||
}
|
||||
|
||||
|
||||
|
@ -100,7 +100,7 @@ void FileLog::open_file()
|
|||
}
|
||||
|
||||
|
||||
void FileLog::save_log(PT::WTextStream * buffer)
|
||||
void FileLog::save_log(WTextStream * buffer)
|
||||
{
|
||||
if( buffer->empty() )
|
||||
return;
|
||||
|
@ -111,7 +111,7 @@ void FileLog::save_log(PT::WTextStream * buffer)
|
|||
{
|
||||
if( log_stdout )
|
||||
{
|
||||
PT::WideToUTF8(*buffer, std::cout);
|
||||
wide_stream_to_utf8(*buffer, std::cout);
|
||||
}
|
||||
|
||||
if( !log_file.empty() )
|
||||
|
@ -126,7 +126,7 @@ void FileLog::save_log(PT::WTextStream * buffer)
|
|||
|
||||
if( file )
|
||||
{
|
||||
PT::WideToUTF8(*buffer, file);
|
||||
wide_stream_to_utf8(*buffer, file);
|
||||
file.flush();
|
||||
}
|
||||
}
|
|
@ -42,7 +42,7 @@
|
|||
#include "textstream/textstream.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -55,7 +55,7 @@ public:
|
|||
virtual ~FileLog();
|
||||
|
||||
virtual void init(const std::wstring & log_file, bool log_stdout, int log_level, bool save_each_line);
|
||||
virtual void save_log(PT::WTextStream * buffer);
|
||||
virtual void save_log(WTextStream * buffer);
|
||||
|
||||
virtual int get_log_level();
|
||||
virtual bool should_save_each_line();
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018, Tomasz Sowa
|
||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -41,8 +41,11 @@
|
|||
#include "date/date.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
#ifdef PT_HAS_MORM
|
||||
#include "morm.h"
|
||||
#endif
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -70,13 +73,13 @@ Log::~Log()
|
|||
}
|
||||
|
||||
|
||||
void Log::SetLogBuffer(PT::WTextStream * buffer)
|
||||
void Log::SetLogBuffer(WTextStream * buffer)
|
||||
{
|
||||
this->buffer = buffer;
|
||||
}
|
||||
|
||||
|
||||
PT::WTextStream * Log::GetLogBuffer()
|
||||
WTextStream * Log::GetLogBuffer()
|
||||
{
|
||||
return buffer;
|
||||
}
|
||||
|
@ -133,7 +136,7 @@ Log & Log::operator<<(const char * s)
|
|||
{
|
||||
if( buffer && file_log && s && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
PT::UTF8ToWide(s, *buffer, false);
|
||||
utf8_to_wide(s, *buffer, false);
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
@ -145,7 +148,7 @@ Log & Log::operator<<(const std::string & s)
|
|||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
PT::UTF8ToWide(s, *buffer, false);
|
||||
utf8_to_wide(s, *buffer, false);
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
@ -157,7 +160,7 @@ Log & Log::operator<<(const std::string * s)
|
|||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
PT::UTF8ToWide(*s, *buffer, false);
|
||||
utf8_to_wide(*s, *buffer, false);
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
@ -287,7 +290,7 @@ Log & Log::operator<<(double s)
|
|||
|
||||
|
||||
|
||||
Log & Log::operator<<(const PT::Space & s)
|
||||
Log & Log::operator<<(const Space & s)
|
||||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
|
@ -299,7 +302,7 @@ Log & Log::operator<<(const PT::Space & s)
|
|||
|
||||
|
||||
|
||||
Log & Log::operator<<(const PT::Date & date)
|
||||
Log & Log::operator<<(const Date & date)
|
||||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
|
@ -310,6 +313,14 @@ Log & Log::operator<<(const PT::Date & date)
|
|||
}
|
||||
|
||||
|
||||
#ifdef PT_HAS_MORM
|
||||
Log & Log::operator<<(morm::Model & model)
|
||||
{
|
||||
operator<<(model.to_string());
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Log & Log::operator<<(Manipulators m)
|
||||
{
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018, Tomasz Sowa
|
||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -45,7 +45,13 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace morm
|
||||
{
|
||||
class Model;
|
||||
}
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -87,8 +93,8 @@ public:
|
|||
Log();
|
||||
virtual ~Log();
|
||||
|
||||
virtual void SetLogBuffer(PT::WTextStream * buffer);
|
||||
virtual PT::WTextStream * GetLogBuffer();
|
||||
virtual void SetLogBuffer(WTextStream * buffer);
|
||||
virtual WTextStream * GetLogBuffer();
|
||||
|
||||
void SetFileLog(FileLog * file_log);
|
||||
FileLog * GetFileLog();
|
||||
|
@ -122,17 +128,24 @@ public:
|
|||
//virtual Log & operator<<(float s); // added
|
||||
virtual Log & operator<<(double s);
|
||||
|
||||
virtual Log & operator<<(const PT::Space & space);
|
||||
virtual Log & operator<<(const PT::Date & date);
|
||||
virtual Log & operator<<(const Space & space);
|
||||
virtual Log & operator<<(const Date & date);
|
||||
|
||||
#ifdef PT_HAS_MORM
|
||||
virtual Log & operator<<(morm::Model & model);
|
||||
#endif
|
||||
|
||||
virtual Log & operator<<(Manipulators m);
|
||||
|
||||
|
||||
|
||||
|
||||
virtual Log & LogString(const std::string & value, size_t max_size);
|
||||
virtual Log & LogString(const std::wstring & value, size_t max_size);
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
Log & operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf);
|
||||
Log & operator<<(const TextStreamBase<char_type, stack_size, heap_block_size> & buf);
|
||||
|
||||
|
||||
|
||||
|
@ -144,7 +157,7 @@ public:
|
|||
protected:
|
||||
|
||||
// buffer for the log
|
||||
PT::WTextStream * buffer;
|
||||
WTextStream * buffer;
|
||||
|
||||
// file logger
|
||||
FileLog * file_log;
|
||||
|
@ -192,7 +205,7 @@ Log & Log::log_string_generic(const StringType & value, size_t max_size)
|
|||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
Log & Log::operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf)
|
||||
Log & Log::operator<<(const TextStreamBase<char_type, stack_size, heap_block_size> & buf)
|
||||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
|
@ -0,0 +1,397 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include "mainoptionsparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
||||
MainOptionsParser::MainOptionsParser()
|
||||
{
|
||||
space = 0;
|
||||
arguments_required_space = 0;
|
||||
should_use_utf8 = true;
|
||||
last_status = status_ok;
|
||||
non_option_arguments_name = L"args";
|
||||
}
|
||||
|
||||
|
||||
|
||||
MainOptionsParser::~MainOptionsParser()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::use_utf8(bool utf8)
|
||||
{
|
||||
should_use_utf8 = utf8;
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::set_non_options_arguments_name(const wchar_t * name)
|
||||
{
|
||||
non_option_arguments_name = name;
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::set_non_options_arguments_name(const std::wstring & name)
|
||||
{
|
||||
non_option_arguments_name = name;
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::wstring & MainOptionsParser::get_wrong_option()
|
||||
{
|
||||
return last_error_option;
|
||||
}
|
||||
|
||||
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space)
|
||||
{
|
||||
space = &out_space;
|
||||
arguments_required_space = nullptr;
|
||||
|
||||
return parse(argc, argv);
|
||||
}
|
||||
|
||||
|
||||
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv, Space & out_space, const Space & arguments)
|
||||
{
|
||||
space = &out_space;
|
||||
arguments_required_space = &arguments;;
|
||||
|
||||
return parse(argc, argv);
|
||||
}
|
||||
|
||||
|
||||
MainOptionsParser::Status MainOptionsParser::parse(int argc, const char ** argv)
|
||||
{
|
||||
last_status = status_ok;
|
||||
last_error_option.clear();
|
||||
space->set_empty_object();
|
||||
|
||||
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
|
||||
{
|
||||
parse((size_t)argc, argv, i);
|
||||
}
|
||||
|
||||
options.clear();
|
||||
option.clear();
|
||||
argument.clear();
|
||||
arguments.clear();
|
||||
|
||||
return last_status;
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::parse(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
const char * pchar = argv[argv_index];
|
||||
|
||||
if( *pchar == '-' )
|
||||
{
|
||||
if( *(pchar+1) == '-' && *(pchar+2) == 0 )
|
||||
{
|
||||
// two hyphens only "--"
|
||||
argv_index += 1;
|
||||
parse_non_option_arguments(argc, argv, argv_index);
|
||||
}
|
||||
else
|
||||
if( *(pchar+1) == '-' )
|
||||
{
|
||||
// two hyphens and a string, such as "--abc"
|
||||
parse_long_option(argc, argv, argv_index);
|
||||
}
|
||||
else
|
||||
if( *(pchar+1) != 0 )
|
||||
{
|
||||
// one hyphen and a string, such as "-abc"
|
||||
parse_short_option(argc, argv, argv_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
parse_non_option_arguments(argc, argv, argv_index);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
parse_non_option_arguments(argc, argv, argv_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::convert_str(const char * src, std::wstring & dst)
|
||||
{
|
||||
if( should_use_utf8 )
|
||||
{
|
||||
utf8_to_wide(src, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.clear();
|
||||
|
||||
for( ; *src ; ++src )
|
||||
dst += (wchar_t)(unsigned char)*src;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::convert_str(const char * src, size_t len, std::wstring & dst)
|
||||
{
|
||||
if( should_use_utf8 )
|
||||
{
|
||||
utf8_to_wide(src, len, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.clear();
|
||||
|
||||
for(size_t i=0 ; i < len ; ++i)
|
||||
dst += (wchar_t)(unsigned char)src[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::convert_str(const std::wstring & src, Space & space)
|
||||
{
|
||||
if( should_use_utf8 )
|
||||
{
|
||||
space.set_empty_wstring();
|
||||
space.value.value_wstring = src;
|
||||
}
|
||||
else
|
||||
{
|
||||
space.set_empty_string();
|
||||
std::string & dst = space.value.value_string;
|
||||
|
||||
dst.clear();
|
||||
|
||||
for(size_t i=0 ; i < src.size() ; ++i)
|
||||
dst += (char)src[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::parse_short_option(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
convert_str(argv[argv_index] + 1, options);
|
||||
const wchar_t * options_pchar = options.c_str();
|
||||
|
||||
arguments.clear();
|
||||
bool was_argument = false;
|
||||
argv_index += 1;
|
||||
|
||||
for( ; *options_pchar && !was_argument && last_status == status_ok ; ++options_pchar )
|
||||
{
|
||||
option = *options_pchar;
|
||||
size_t args_len = how_many_arguments_required(option);
|
||||
|
||||
if( args_len > 0 )
|
||||
{
|
||||
was_argument = true;
|
||||
|
||||
if( *(options_pchar+1) )
|
||||
{
|
||||
// first argument is directly behind the option
|
||||
argument = options_pchar + 1;
|
||||
arguments.push_back(argument);
|
||||
args_len -= 1;
|
||||
}
|
||||
|
||||
parse_arguments(argc, argv, argv_index, args_len);
|
||||
}
|
||||
|
||||
add_option_to_space(option, arguments);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::parse_long_option(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
const char * option_begin = argv[argv_index] + 2; // skip first two hyphens --
|
||||
const char * option_end = option_begin;
|
||||
bool is_equal_form = false; // is the option in the form with equal sign, such as: option=argument
|
||||
|
||||
while( *option_end != 0 && *option_end != '=' )
|
||||
{
|
||||
option_end += 1;
|
||||
}
|
||||
|
||||
if( *option_end == '=' )
|
||||
{
|
||||
is_equal_form = true;
|
||||
convert_str(option_begin, option_end - option_begin, option);
|
||||
convert_str(option_end + 1, argument);
|
||||
}
|
||||
else
|
||||
{
|
||||
convert_str(option_begin, option);
|
||||
}
|
||||
|
||||
argv_index += 1;
|
||||
size_t args_len = how_many_arguments_required(option);
|
||||
arguments.clear();
|
||||
|
||||
if( is_equal_form )
|
||||
{
|
||||
if( args_len == 0 )
|
||||
{
|
||||
if( !argument.empty() )
|
||||
{
|
||||
// report an error
|
||||
last_status = status_argument_provided;
|
||||
last_error_option = option;
|
||||
}
|
||||
}
|
||||
else
|
||||
if( args_len == 1 )
|
||||
{
|
||||
// argument can be empty in such a case: option=
|
||||
// we treat it as if the argument would not be provided
|
||||
if( !argument.empty() )
|
||||
{
|
||||
arguments.push_back(argument);
|
||||
args_len -= 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// args_len is > 1 but when using option=argument form
|
||||
// we can provide only one argument
|
||||
last_status = status_argument_not_provided;
|
||||
last_error_option = option;
|
||||
}
|
||||
}
|
||||
|
||||
if( last_status == status_ok )
|
||||
{
|
||||
parse_arguments(argc, argv, argv_index, args_len);
|
||||
add_option_to_space(option, arguments);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len)
|
||||
{
|
||||
for( ; args_len > 0 && argv_index < argc ; --args_len, ++argv_index)
|
||||
{
|
||||
convert_str(argv[argv_index], argument);
|
||||
arguments.push_back(argument);
|
||||
}
|
||||
|
||||
if( args_len > 0 )
|
||||
{
|
||||
last_status = status_argument_not_provided;
|
||||
last_error_option = option;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void MainOptionsParser::parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index)
|
||||
{
|
||||
Space * table_with_args = new Space();
|
||||
table_with_args->set_empty_table();
|
||||
|
||||
for( ; argv_index < argc ; ++argv_index)
|
||||
{
|
||||
convert_str(argv[argv_index], argument);
|
||||
table_with_args->add(argument);
|
||||
}
|
||||
|
||||
space->add(non_option_arguments_name, table_with_args);
|
||||
}
|
||||
|
||||
|
||||
void MainOptionsParser::add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments)
|
||||
{
|
||||
Space * option_table = space->get_object_field(option);
|
||||
|
||||
if( !option_table )
|
||||
{
|
||||
option_table = &space->add_empty_space(option);
|
||||
}
|
||||
|
||||
if( !option_table->is_table())
|
||||
{
|
||||
option_table->set_empty_table();
|
||||
}
|
||||
|
||||
Space * arguments_table = new Space();
|
||||
arguments_table->set_empty_table();
|
||||
|
||||
for(const std::wstring & arg : arguments)
|
||||
{
|
||||
Space & space_arg = arguments_table->add_empty_space();
|
||||
convert_str(arg, space_arg);
|
||||
}
|
||||
|
||||
option_table->add(arguments_table);
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t MainOptionsParser::how_many_arguments_required(const std::wstring & arg)
|
||||
{
|
||||
size_t res = 0;
|
||||
|
||||
if( arguments_required_space && arguments_required_space->is_object() )
|
||||
{
|
||||
long res_long = arguments_required_space->to_llong(arg, 0);
|
||||
|
||||
if( res_long < 0 )
|
||||
res_long = 0;
|
||||
|
||||
res = (size_t)res_long;
|
||||
// argument 'arg' needs 'res' options
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_mainoptions_mainoptionsparser
|
||||
#define headerfile_picotools_mainoptions_mainoptionsparser
|
||||
|
||||
#include "space/space.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
/*
|
||||
* a very little parser for parsing main(int argc, char ** argv) parameters
|
||||
*
|
||||
*
|
||||
*/
|
||||
class MainOptionsParser
|
||||
{
|
||||
public:
|
||||
|
||||
MainOptionsParser();
|
||||
~MainOptionsParser();
|
||||
|
||||
|
||||
/*
|
||||
* status_ok - all argument have been parsed correctly
|
||||
*
|
||||
* status_argument_provided - an argument have been provided but was not requested
|
||||
* this can be in situation when using long form with equal sign, such as: --option=argument
|
||||
* and in 'options' space the option either was not set or have zero requested arguments
|
||||
*
|
||||
* status_argument_not_provided - an argument or arguments are required but were not provided
|
||||
* this can be returned in two situations:
|
||||
* 1. when using long form with equal sign, such as: --option=argument and in 'options' space
|
||||
* you have requested more than one argument
|
||||
* 2. when reading arguments and the input strings ended
|
||||
*
|
||||
*/
|
||||
enum Status
|
||||
{
|
||||
status_ok = 0,
|
||||
status_argument_provided = 1,
|
||||
status_argument_not_provided = 2,
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* the name of a field in the output Struct space for non-option arguments (those after two hypens --)
|
||||
* default: L"args"
|
||||
* they will be set as a table of strings/wstrings
|
||||
*
|
||||
*/
|
||||
void set_non_options_arguments_name(const wchar_t * name);
|
||||
void set_non_options_arguments_name(const std::wstring & name);
|
||||
|
||||
|
||||
/*
|
||||
* parse parameters
|
||||
* argc argv have the same meaning as in the main(int argc, const char ** argv) method
|
||||
* the first argument from argv is usualy the name of the program and is skip by this parser
|
||||
*
|
||||
* return value: look at the description of the Status enum
|
||||
*
|
||||
*/
|
||||
Status parse(int argc, const char ** argv, Space & out_space);
|
||||
Status parse(int argc, const char ** argv, Space & out_space, const Space & arguments);
|
||||
|
||||
|
||||
/*
|
||||
* whether or not options arguments should be converted from utf8 char* strings to wide strings (std::wstring)
|
||||
* default true
|
||||
*
|
||||
* if true all arguments in Space struct will be saved as std::wstring
|
||||
* if false all arguments will be std::string (they are read as they are without checking
|
||||
* whether correct utf8 characters are encountered)
|
||||
*
|
||||
* arguments are always held as std::wstring (in such a case is defined ObjectType in Space struct)
|
||||
* when using use_utf8(false) characters will not be treated as an utf8 string but just all 8bit char bytes
|
||||
* will be copied to std::wstring
|
||||
*
|
||||
*/
|
||||
void use_utf8(bool utf8);
|
||||
|
||||
|
||||
/*
|
||||
* return the last option name which was incorrectly parsed
|
||||
* or an empty string if status was equal to status_ok
|
||||
*/
|
||||
std::wstring & get_wrong_option();
|
||||
|
||||
|
||||
private:
|
||||
|
||||
Space * space;
|
||||
const Space * arguments_required_space;
|
||||
std::wstring non_option_arguments_name;
|
||||
std::wstring options, option, argument;
|
||||
std::vector<std::wstring> arguments;
|
||||
bool should_use_utf8;
|
||||
Status last_status;
|
||||
std::wstring last_error_option;
|
||||
|
||||
|
||||
void convert_str(const char * src, std::wstring & dst);
|
||||
void convert_str(const char * src, size_t len, std::wstring & dst);
|
||||
void convert_str(const std::wstring & src, Space & space);
|
||||
|
||||
Status parse(int argc, const char ** argv);
|
||||
void parse(size_t argc, const char ** argv, size_t & argv_index);
|
||||
void parse_short_option(size_t argc, const char ** argv, size_t & argv_index);
|
||||
void parse_long_option(size_t argc, const char ** argv, size_t & argv_index);
|
||||
void parse_arguments(size_t argc, const char ** argv, size_t & argv_index, size_t args_len);
|
||||
void parse_non_option_arguments(size_t argc, const char ** argv, size_t & argv_index);
|
||||
size_t how_many_arguments_required(const std::wstring & arg);
|
||||
void add_option_to_space(const std::wstring & option, const std::vector<std::wstring> & arguments);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
|
@ -41,7 +41,7 @@
|
|||
|
||||
#include <iostream>
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
/*
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,313 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_space_jsonspaceparser
|
||||
#define headerfile_picotools_space_jsonspaceparser
|
||||
|
||||
#include <fstream>
|
||||
#include "space.h"
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
||||
|
||||
class SpaceParser
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
/*
|
||||
ctor -- setting default values (SetDefault() method)
|
||||
*/
|
||||
SpaceParser();
|
||||
|
||||
|
||||
/*
|
||||
status of parsing
|
||||
*/
|
||||
enum Status { ok, cant_open_file, syntax_error };
|
||||
|
||||
|
||||
/*
|
||||
the last status of parsing, set by parse() methods
|
||||
*/
|
||||
Status status;
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse a JSON file
|
||||
file_name is the path to a file
|
||||
*/
|
||||
Status parse_json_file(const char * file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_json_file(const std::string & file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_json_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_json_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse a Space file
|
||||
file_name is the path to a file
|
||||
*/
|
||||
Status parse_space_file(const char * file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_space_file(const std::string & file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_space_file(const wchar_t * file_name, Space & out_space, bool clear_space = true);
|
||||
Status parse_space_file(const std::wstring & file_name, Space & out_space, bool clear_space = true);
|
||||
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
|
||||
*/
|
||||
Status parse_json(const char * str, Space & out_space, bool clear_space = true);
|
||||
Status parse_json(const std::string & str, Space & out_space, bool clear_space = true);
|
||||
|
||||
/*
|
||||
main methods used to parse
|
||||
here input string is always in unicode (wide characters)
|
||||
*/
|
||||
Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true);
|
||||
Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);
|
||||
|
||||
|
||||
|
||||
Status parse_space(const char * str, Space & out_space, bool clear_space = true);
|
||||
Status parse_space(const std::string & str, Space & out_space, bool clear_space = true);
|
||||
Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true);
|
||||
Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* add two args parse method
|
||||
* Status parse(const char * str, Space & output_space);
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* if true then the input file or string (char* or std::string) is treated as UTF-8
|
||||
* default true
|
||||
*
|
||||
* the internal storage for strings is std::wstring so if you call set_utf8(false) then
|
||||
* the characters of input string will be simple static_cast<> from char to wchar_t
|
||||
*
|
||||
*/
|
||||
void use_utf8(bool utf);
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
* returns a number of a last parsed line
|
||||
* can be used to obtain the line in which there was a syntax error
|
||||
*
|
||||
*/
|
||||
int get_last_parsed_line();
|
||||
|
||||
|
||||
private:
|
||||
|
||||
|
||||
/*
|
||||
current space set by SetSpace();
|
||||
*/
|
||||
Space * root_space;
|
||||
|
||||
|
||||
/*
|
||||
a number of a line in which there is a syntax_error
|
||||
*/
|
||||
int line;
|
||||
|
||||
/*
|
||||
true if parse() method was called
|
||||
false if ParseString() was called
|
||||
*/
|
||||
bool reading_from_file;
|
||||
|
||||
|
||||
/*
|
||||
pointers to the current character
|
||||
if ParseString() is in used
|
||||
*/
|
||||
const char * pchar_ascii;
|
||||
const wchar_t * pchar_unicode;
|
||||
|
||||
|
||||
/*
|
||||
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
||||
*/
|
||||
bool reading_from_wchar_string;
|
||||
|
||||
|
||||
/*
|
||||
last read token
|
||||
*/
|
||||
std::wstring token;
|
||||
|
||||
|
||||
/*
|
||||
separator between a variable and a value, default: '='
|
||||
*/
|
||||
int separator;
|
||||
|
||||
|
||||
/*
|
||||
space starting character, default: '{'
|
||||
*/
|
||||
int space_start;
|
||||
|
||||
|
||||
/*
|
||||
space ending character, default: '}'
|
||||
*/
|
||||
int space_end;
|
||||
|
||||
|
||||
/*
|
||||
table starting character, default: '['
|
||||
*/
|
||||
int table_start;
|
||||
|
||||
|
||||
/*
|
||||
table ending character, default: ']'
|
||||
*/
|
||||
int table_end;
|
||||
|
||||
|
||||
/*
|
||||
option delimiter, default: ','
|
||||
*/
|
||||
int option_delimiter;
|
||||
|
||||
|
||||
/*
|
||||
last read char
|
||||
or -1 if the end
|
||||
*/
|
||||
int lastc;
|
||||
|
||||
|
||||
/*
|
||||
true if the lastc was escaped (with a backslash)
|
||||
we have to know if the last sequence was \" or just "
|
||||
*/
|
||||
bool char_was_escaped;
|
||||
|
||||
|
||||
/*
|
||||
current file
|
||||
|
||||
may it would be better to make a pointer?
|
||||
if we parse only a string then there is no sense to have such an object
|
||||
*/
|
||||
std::ifstream file;
|
||||
|
||||
|
||||
/*
|
||||
input file is in UTF-8
|
||||
default: true
|
||||
*/
|
||||
bool input_as_utf8;
|
||||
|
||||
|
||||
/*
|
||||
* if parsing_space is false then it means we are parsing JSON format
|
||||
*
|
||||
*/
|
||||
bool parsing_space;
|
||||
|
||||
|
||||
|
||||
void parse_root_space(bool clear_root_space);
|
||||
void parse(Space * space, bool is_object_value, bool is_table_value);
|
||||
void parse_space(Space * space);
|
||||
void parse_table(Space * space);
|
||||
|
||||
void parse_key_value_pairs(Space * space);
|
||||
void parse_values_list(Space * space);
|
||||
|
||||
void read_key();
|
||||
|
||||
void parse_text_value(Space * space);
|
||||
void parse_integer_value(Space * space);
|
||||
void parse_floating_point_value(Space * space);
|
||||
|
||||
bool is_alfa_numeric_char(int c);
|
||||
|
||||
void read_token_until_delimiter(std::wstring & token, int delimiter1, int delimiter2);
|
||||
void read_alfa_numeric_token(std::wstring & token);
|
||||
void read_string_value(std::wstring & token, bool is_object_value, bool is_table_value);
|
||||
|
||||
bool is_integer_token();
|
||||
bool is_floating_point_token();
|
||||
|
||||
void read_space_field_token(std::wstring & token);
|
||||
void read_token_quoted(std::wstring & token);
|
||||
void read_multiline_token_quoted(std::wstring & token);
|
||||
|
||||
int read_utf8_char();
|
||||
int read_ascii_char();
|
||||
int read_char_from_wchar_string();
|
||||
int read_char_from_utf8_string();
|
||||
int read_char_from_ascii_string();
|
||||
int read_char_no_escape();
|
||||
int read_char();
|
||||
bool is_white(int c);
|
||||
void skip_line();
|
||||
void skip_white();
|
||||
void trim_last_white(std::wstring & s);
|
||||
bool is_hex_digit(wchar_t c);
|
||||
int hex_to_int(wchar_t c);
|
||||
void read_unicode_code_point();
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2013, Tomasz Sowa
|
||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -49,7 +49,7 @@
|
|||
// for snprintf
|
||||
#include <cstdio>
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
@ -60,17 +60,20 @@ namespace PT
|
|||
StringType can be either std::string or std::wstring
|
||||
this class doesn't use UTF-8 in any kind
|
||||
*/
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
template<typename CharT, size_t stack_size, size_t heap_block_size>
|
||||
class TextStreamBase
|
||||
{
|
||||
public:
|
||||
|
||||
TextStreamBase();
|
||||
|
||||
typedef CharT char_type;
|
||||
|
||||
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
|
||||
typedef typename buffer_type::iterator iterator;
|
||||
typedef typename buffer_type::const_iterator const_iterator;
|
||||
|
||||
|
||||
void clear();
|
||||
bool empty() const;
|
||||
size_t size() const;
|
||||
|
@ -106,8 +109,8 @@ public:
|
|||
TextStreamBase & operator<<(unsigned long long);
|
||||
TextStreamBase & operator<<(double);
|
||||
TextStreamBase & operator<<(const void *); // printing a pointer
|
||||
TextStreamBase & operator<<(const PT::Space & space);
|
||||
TextStreamBase & operator<<(const PT::Date & date);
|
||||
TextStreamBase & operator<<(const Space & space);
|
||||
TextStreamBase & operator<<(const Date & date);
|
||||
|
||||
// min width for integer output
|
||||
// if the output value has less digits then first zeroes are added
|
||||
|
@ -466,9 +469,9 @@ wchar_t buf[100];
|
|||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Space & space)
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Space & space)
|
||||
{
|
||||
space.Serialize(*this, true, false);
|
||||
space.serialize_to_space_stream(*this, true);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
@ -477,7 +480,7 @@ return *this;
|
|||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Date & date)
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Date & date)
|
||||
{
|
||||
date.Serialize(*this);
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010-2018, Tomasz Sowa
|
||||
* Copyright (c) 2010-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -36,67 +36,62 @@
|
|||
*/
|
||||
|
||||
#include "utf8.h"
|
||||
#include "utf8_private.h"
|
||||
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
static bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
|
||||
{
|
||||
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
||||
uz <<= 1;
|
||||
|
||||
if( len == 1 )
|
||||
return false;
|
||||
|
||||
res = uz;
|
||||
|
||||
if( len > 0 )
|
||||
res >>= len;
|
||||
|
||||
if( res == 0 )
|
||||
return false;
|
||||
|
||||
if( len == 0 )
|
||||
len = 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
static bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res)
|
||||
{
|
||||
if( (uz & 0xc0) != 0x80 )
|
||||
return false;
|
||||
|
||||
res <<= 6;
|
||||
res |= (uz & 0x3F);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool UTF8_CheckRange(int c)
|
||||
bool utf8_check_range(int c)
|
||||
{
|
||||
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
this method is used when reading from an utf8 string
|
||||
how_many_bytes - means how many bytes from the utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes)
|
||||
{
|
||||
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if( c >= 0x0080 && c <= 0x07ff && how_many_bytes == 2 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if( c >= 0x0800 && c < 0xD800 && how_many_bytes == 3)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if( c > 0xDFFF && c <= 0xffff && how_many_bytes == 3)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if( c >= 0x10000 && c <= 0x10FFFF && how_many_bytes == 4 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -116,7 +111,7 @@ bool UTF8_CheckRange(int c)
|
|||
(returns zero only if utf8_len is zero)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct)
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
|
||||
|
@ -126,17 +121,19 @@ size_t i, len;
|
|||
if( utf8_len == 0 )
|
||||
return 0;
|
||||
|
||||
if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
|
||||
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
|
||||
return 1;
|
||||
|
||||
if( utf8_len < len )
|
||||
return utf8_len;
|
||||
|
||||
for(i=1 ; i<len ; ++i)
|
||||
if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
|
||||
{
|
||||
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
|
||||
return i;
|
||||
}
|
||||
|
||||
if( UTF8_CheckRange(res) )
|
||||
if( utf8_check_range(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
|
@ -158,7 +155,7 @@ return len;
|
|||
(returns zero only if the string has '\0' at the first character)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t UTF8ToInt(const char * utf8, int & res, bool & correct)
|
||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
|
||||
|
@ -168,7 +165,7 @@ size_t i, len;
|
|||
if( *utf8 == 0 )
|
||||
return 0;
|
||||
|
||||
if( !UTF8ToInt_FirstOctet(utf8[0], len, res) )
|
||||
if( !private_namespace::utf8_to_int_first_octet(utf8[0], len, res) )
|
||||
return 1;
|
||||
|
||||
for(i=1 ; i<len ; ++i)
|
||||
|
@ -176,11 +173,11 @@ size_t i, len;
|
|||
if( utf8[i] == 0 )
|
||||
return i;
|
||||
|
||||
if( !UTF8ToInt_AddNextOctet(utf8[i], res) )
|
||||
if( !private_namespace::utf8_to_int_add_next_octet(utf8[i], res) )
|
||||
return i;
|
||||
}
|
||||
|
||||
if( UTF8_CheckRange(res) )
|
||||
if( utf8_check_range(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
|
@ -203,9 +200,9 @@ return len;
|
|||
(returns zero only if utf8 is empty)
|
||||
even if there are errors the functions returns a different from zero value
|
||||
*/
|
||||
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
|
||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct)
|
||||
{
|
||||
return UTF8ToInt(utf8.c_str(), utf8.size(), res, correct);
|
||||
return utf8_to_int(utf8.c_str(), utf8.size(), res, correct);
|
||||
}
|
||||
|
||||
|
||||
|
@ -222,7 +219,7 @@ size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
|
|||
|
||||
the function returns how many characters have been used from the input stream
|
||||
*/
|
||||
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct)
|
||||
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct)
|
||||
{
|
||||
size_t i, len;
|
||||
unsigned char uz;
|
||||
|
@ -235,7 +232,7 @@ unsigned char uz;
|
|||
if( !utf8 )
|
||||
return 0;
|
||||
|
||||
if( !UTF8ToInt_FirstOctet(uz, len, res) )
|
||||
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
|
||||
return 1;
|
||||
|
||||
for(i=1 ; i<len ; ++i)
|
||||
|
@ -245,11 +242,11 @@ unsigned char uz;
|
|||
if( !utf8 )
|
||||
return i;
|
||||
|
||||
if( !UTF8ToInt_AddNextOctet(uz, res) )
|
||||
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
|
||||
return i;
|
||||
}
|
||||
|
||||
if( UTF8_CheckRange(res) )
|
||||
if( utf8_check_range(res, len) )
|
||||
correct = true;
|
||||
|
||||
return len;
|
||||
|
@ -261,7 +258,7 @@ return len;
|
|||
/*
|
||||
|
||||
*/
|
||||
static void IntToWide(int c, std::wstring & res)
|
||||
static void int_to_wide(int c, std::wstring & res)
|
||||
{
|
||||
if( sizeof(wchar_t)==2 && c>0xffff )
|
||||
{
|
||||
|
@ -294,13 +291,13 @@ static void IntToWide(int c, std::wstring & res)
|
|||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
|
||||
IntToWide(c, res);
|
||||
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
|
||||
int_to_wide(c, res);
|
||||
});
|
||||
|
||||
return status;
|
||||
|
@ -324,14 +321,14 @@ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool cle
|
|||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear, int mode)
|
||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
size_t utf8_len = 0;
|
||||
|
||||
while( utf8[utf8_len] != 0 )
|
||||
utf8_len += 1;
|
||||
|
||||
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
|
||||
return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -350,9 +347,9 @@ return UTF8ToWide(utf8, utf8_len, res, clear, mode);
|
|||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -371,7 +368,7 @@ bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mo
|
|||
|
||||
the function returns false if there were some errors when converting
|
||||
*/
|
||||
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
||||
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct, was_error = false;
|
||||
|
@ -379,7 +376,7 @@ bool correct, was_error = false;
|
|||
if( clear )
|
||||
res.clear();
|
||||
|
||||
while( UTF8ToInt(utf8, z, correct) > 0 )
|
||||
while( utf8_to_int(utf8, z, correct) > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
|
@ -390,7 +387,7 @@ bool correct, was_error = false;
|
|||
}
|
||||
else
|
||||
{
|
||||
IntToWide(z, res);
|
||||
int_to_wide(z, res);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -413,13 +410,13 @@ return !was_error;
|
|||
the function returns how many characters have been written to the utf8,
|
||||
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len)
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len)
|
||||
{
|
||||
char buf[10];
|
||||
int i = 0;
|
||||
int mask = 0x3f; // 6 first bits set
|
||||
|
||||
if( utf8_max_len==0 || !UTF8_CheckRange(z) )
|
||||
if( utf8_max_len==0 || !utf8_check_range(z) )
|
||||
return 0;
|
||||
|
||||
if( z <= 0x7f )
|
||||
|
@ -467,14 +464,14 @@ return a;
|
|||
the function returns how many characters have been written to the utf8 string,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t IntToUTF8(int z, std::string & utf8, bool clear)
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear)
|
||||
{
|
||||
char buf[10];
|
||||
|
||||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
|
||||
size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
|
||||
size_t i;
|
||||
|
||||
for(i=0 ; i<len ; ++i)
|
||||
|
@ -485,268 +482,6 @@ return len;
|
|||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 stream
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
the function returns how many characters have been written to the utf8 stream,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
size_t IntToUTF8(int z, std::ostream & utf8)
|
||||
{
|
||||
char buf[10];
|
||||
|
||||
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
|
||||
size_t i;
|
||||
|
||||
for(i=0 ; i<len ; ++i)
|
||||
utf8 << buf[i];
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
static size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||
{
|
||||
if( string_len == 0 )
|
||||
{
|
||||
z = 0;
|
||||
correct = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
z = static_cast<int>(*wide_string);
|
||||
correct = true;
|
||||
|
||||
if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
|
||||
{
|
||||
if( z>=0xD800 && z<=0xDBFF && string_len>1 )
|
||||
{
|
||||
int z2 = *(wide_string+1);
|
||||
|
||||
if( z2>=0xDC00 && z2<=0xDFFF )
|
||||
{
|
||||
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = false;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = false;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = UTF8_CheckRange(z);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
static size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct)
|
||||
{
|
||||
size_t min_str_len = 1;
|
||||
|
||||
if( *wide_string == 0 )
|
||||
{
|
||||
z = 0;
|
||||
correct = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( *(wide_string+1) != 0 )
|
||||
min_str_len = 2;
|
||||
|
||||
return WideToInt(wide_string, min_str_len, z, correct);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
|
||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
a null terminating character)
|
||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
if this flag is true then utf8_written is equal to zero
|
||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
(was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
utf8_written = 0;
|
||||
was_utf8_buf_too_small = false;
|
||||
chars = WideToInt(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
{
|
||||
utf8_written = IntToUTF8(z, utf8, utf8_len);
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( mode == 1 )
|
||||
{
|
||||
utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = WideToInt(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = IntToUTF8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
static size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = WideToInt(wide_string, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = IntToUTF8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = WideToInt(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = IntToUTF8(z, utf8) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
*/
|
||||
static size_t WideOneToUTF8(const wchar_t * wide_string, std::ostream & utf8, bool & was_error, int mode)
|
||||
{
|
||||
size_t min_str_len = 1;
|
||||
|
||||
if( *wide_string == 0 )
|
||||
return 0;
|
||||
|
||||
if( *(wide_string+1) != 0 )
|
||||
min_str_len = 2;
|
||||
|
||||
return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 string
|
||||
|
||||
|
@ -762,7 +497,7 @@ return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
|
|||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
size_t chars;
|
||||
|
@ -772,7 +507,7 @@ size_t chars;
|
|||
|
||||
while( string_len > 0 )
|
||||
{
|
||||
chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
|
||||
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
|
||||
wide_string += chars;
|
||||
string_len -= chars;
|
||||
}
|
||||
|
@ -796,7 +531,7 @@ return !was_error;
|
|||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
|
||||
|
@ -804,7 +539,7 @@ bool was_error = false;
|
|||
utf8.clear();
|
||||
|
||||
while( *wide_string )
|
||||
wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
|
||||
wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
@ -825,90 +560,13 @@ return !was_error;
|
|||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - size of the string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
size_t chars;
|
||||
|
||||
while( string_len > 0 )
|
||||
{
|
||||
chars = WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
|
||||
wide_string += chars;
|
||||
string_len -= chars;
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
|
||||
while( *wide_string )
|
||||
wide_string += WideOneToUTF8(wide_string, utf8, was_error, mode);
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
|
||||
{
|
||||
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
|
@ -932,7 +590,7 @@ bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode)
|
|||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
bool was_buffer_to_small;
|
||||
|
@ -942,7 +600,7 @@ size_t chars, utf8_saved;
|
|||
|
||||
while( string_len > 0 )
|
||||
{
|
||||
chars = WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||
|
||||
if( was_buffer_to_small )
|
||||
{
|
||||
|
@ -986,9 +644,9 @@ return !was_error;
|
|||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1014,7 +672,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
|
|||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
size_t utf8_saved;
|
||||
bool res;
|
||||
|
@ -1022,7 +680,7 @@ bool res;
|
|||
if( utf8_len == 0 )
|
||||
return false;
|
||||
|
||||
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
|
||||
res = wide_to_utf8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
|
||||
utf8[utf8_saved] = 0;
|
||||
|
||||
return res;
|
||||
|
@ -1050,9 +708,9 @@ return res;
|
|||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
|
||||
}
|
||||
|
||||
|
||||
|
@ -1077,7 +735,7 @@ bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len,
|
|||
if there is an error when converting (there is an incorrect character in the wide string) the function
|
||||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
bool was_buffer_to_small;
|
||||
|
@ -1089,7 +747,7 @@ size_t len;
|
|||
while( *wide_string )
|
||||
{
|
||||
len = (*(wide_string+1) == 0) ? 1 : 2;
|
||||
chars = WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||
chars = private_namespace::wide_one_to_utf8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
|
||||
|
||||
if( was_buffer_to_small )
|
||||
{
|
||||
|
@ -1132,7 +790,7 @@ return !was_error;
|
|||
will continue converting but if the buffer is too small the function breaks immediately
|
||||
(in both cases the utf8 buffer is null terminated)
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
|
||||
{
|
||||
size_t utf8_saved;
|
||||
bool res;
|
||||
|
@ -1140,7 +798,7 @@ bool res;
|
|||
if( utf8_len == 0 )
|
||||
return false;
|
||||
|
||||
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
|
||||
res = wide_to_utf8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
|
||||
utf8[utf8_saved] = 0;
|
||||
|
||||
return res;
|
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010-2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_utf8_utf8
|
||||
#define headerfile_picotools_utf8_utf8
|
||||
|
||||
#include <string>
|
||||
#include "textstream/textstream.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
/*!
|
||||
UTF-8, a transformation format of ISO 10646
|
||||
http://tools.ietf.org/html/rfc3629
|
||||
|
||||
when wchar_t is 4 bytes length we use UTF-32
|
||||
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
||||
|
||||
UTF-16
|
||||
http://www.ietf.org/rfc/rfc2781.txt
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool utf8_check_range(int c);
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
|
||||
this method is used when reading from an utf8 string
|
||||
how_many_chars - means how many characters from utf8 string were read
|
||||
*/
|
||||
bool utf8_check_range(int c, int how_many_bytes);
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*
|
||||
*
|
||||
* convertions from UTF-8
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
/*!
|
||||
converting one character from UTF-8 to an int
|
||||
*/
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||
size_t utf8_to_int(const char * utf8, int & res, bool & correct);
|
||||
size_t utf8_to_int(const std::string & utf8, int & res, bool & correct);
|
||||
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct);
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a wide string
|
||||
*/
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool utf8_to_wide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool utf8_to_wide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool utf8_to_wide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const char * utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*
|
||||
*
|
||||
* convertions to UTF-8
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/*!
|
||||
converting one int character to UTF-8
|
||||
*/
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear = true);
|
||||
|
||||
template<typename StreamType>
|
||||
size_t int_to_utf8(int z, StreamType & utf8);
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
converting a wide string to UTF-8 string
|
||||
*/
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
||||
bool wide_to_utf8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||
bool wide_to_utf8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode = 1);
|
||||
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode = 1);
|
||||
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode = 1);
|
||||
|
||||
|
||||
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
|
||||
bool wide_to_utf8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||
bool wide_to_utf8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||
|
||||
template<typename StreamType>
|
||||
void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested
|
||||
|
||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#include "utf8/utf8_templates.h"
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "utf8_private.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace private_namespace
|
||||
{
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res)
|
||||
{
|
||||
for(len=0 ; (uz & 0x80) != 0 ; ++len)
|
||||
uz <<= 1;
|
||||
|
||||
if( len == 1 || len > 4 )
|
||||
return false;
|
||||
|
||||
res = uz;
|
||||
|
||||
if( len > 0 )
|
||||
res >>= len;
|
||||
|
||||
if( len == 0 )
|
||||
len = 1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from UTF-8 string
|
||||
*/
|
||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res)
|
||||
{
|
||||
if( (uz & 0xc0) != 0x80 )
|
||||
return false;
|
||||
|
||||
res <<= 6;
|
||||
res |= (uz & 0x3F);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
|
||||
{
|
||||
if( string_len == 0 )
|
||||
{
|
||||
z = 0;
|
||||
correct = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
z = static_cast<int>(*wide_string);
|
||||
correct = true;
|
||||
|
||||
if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
|
||||
{
|
||||
if( z>=0xD800 && z<=0xDBFF && string_len>1 )
|
||||
{
|
||||
int z2 = *(wide_string+1);
|
||||
|
||||
if( z2>=0xDC00 && z2<=0xDFFF )
|
||||
{
|
||||
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = false;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = false;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
correct = utf8_check_range(z);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
converting a wide character into one int
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct)
|
||||
{
|
||||
size_t min_str_len = 1;
|
||||
|
||||
if( *wide_string == 0 )
|
||||
{
|
||||
z = 0;
|
||||
correct = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( *(wide_string+1) != 0 )
|
||||
min_str_len = 2;
|
||||
|
||||
return wide_to_int(wide_string, min_str_len, z, correct);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
|
||||
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
|
||||
a null terminating character)
|
||||
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
|
||||
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
|
||||
if this flag is true then utf8_written is equal to zero
|
||||
was_error - will be true if there is an error when converting (there was an incorrect wide character)
|
||||
(was_error will not be true if the utf8 buffer is too small)
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
utf8_written = 0;
|
||||
was_utf8_buf_too_small = false;
|
||||
chars = wide_to_int(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
{
|
||||
utf8_written = int_to_utf8(z, utf8, utf8_len);
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( mode == 1 )
|
||||
{
|
||||
utf8_written = int_to_utf8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
|
||||
|
||||
if( utf8_written == 0 )
|
||||
was_utf8_buf_too_small = true;
|
||||
}
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = wide_to_int(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = int_to_utf8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if wide_string has at least one character then the return value is always greater than zero too
|
||||
*/
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = wide_to_int(wide_string, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = int_to_utf8(z, utf8, false) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8, false); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace private_namespace
|
||||
|
||||
} // namespace pt
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_utf8_utf8_private
|
||||
#define headerfile_picotools_utf8_utf8_private
|
||||
|
||||
#include "textstream/textstream.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
bool utf8_check_range(int c);
|
||||
size_t int_to_utf8(int z, char * utf8, size_t utf8_max_len);
|
||||
size_t int_to_utf8(int z, std::string & utf8, bool clear);
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||
|
||||
|
||||
namespace private_namespace
|
||||
{
|
||||
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
|
||||
bool utf8_to_int_add_next_octet(unsigned char uz, int & res);
|
||||
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
||||
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
|
||||
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
|
||||
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
|
||||
|
||||
size_t wide_one_to_utf8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
|
||||
returns how many wide characters were used
|
||||
if string_len is greater than 0 then the return value is always greater than zero too
|
||||
*/
|
||||
template<typename StreamType>
|
||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct;
|
||||
size_t chars;
|
||||
|
||||
chars = wide_to_int(wide_string, string_len, z, correct);
|
||||
|
||||
if( correct )
|
||||
correct = int_to_utf8(z, utf8) != 0;
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
int_to_utf8(0xFFFD, utf8); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
return chars;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
an auxiliary function for converting from wide characters to UTF-8
|
||||
*/
|
||||
template<typename StreamType>
|
||||
static size_t wide_one_to_utf8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
|
||||
{
|
||||
size_t min_str_len = 1;
|
||||
|
||||
if( *wide_string == 0 )
|
||||
return 0;
|
||||
|
||||
if( *(wide_string+1) != 0 )
|
||||
min_str_len = 2;
|
||||
|
||||
return wide_one_to_utf8(wide_string, min_str_len, utf8, was_error, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// declared in utf8.h, defined in utf8.cpp
|
||||
size_t utf8_to_int(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||
|
||||
|
||||
|
||||
template<typename function_type>
|
||||
bool utf8_to_wide_generic(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
|
||||
{
|
||||
int z;
|
||||
size_t len;
|
||||
bool correct, was_error = false;
|
||||
|
||||
while( utf8_len > 0 )
|
||||
{
|
||||
if( (unsigned char)*utf8 <= 0x7f )
|
||||
{
|
||||
// small optimization
|
||||
len = 1;
|
||||
correct = true;
|
||||
z = static_cast<unsigned char>(*utf8);
|
||||
}
|
||||
else
|
||||
{
|
||||
len = pt::utf8_to_int(utf8, utf8_len, z, correct); // the len will be different from zero
|
||||
}
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
convert_function(0xFFFD); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
convert_function(z);
|
||||
}
|
||||
|
||||
utf8 += len;
|
||||
utf8_len -= len;
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
void int_to_wide(int c, StreamType & res)
|
||||
{
|
||||
if( sizeof(wchar_t)==2 && c>0xffff )
|
||||
{
|
||||
// UTF16 surrogate pairs
|
||||
c -= 0x10000;
|
||||
res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
|
||||
res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
|
||||
}
|
||||
else
|
||||
{
|
||||
res << static_cast<wchar_t>(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// not tested
|
||||
// FIX ME it is not using surrogate pairs from input stream
|
||||
// and mode parameter
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
|
||||
void wide_to_utf8_generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
|
||||
{
|
||||
char utf8_buffer[256];
|
||||
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
|
||||
std::size_t utf8_sequence_max_length = 10;
|
||||
std::size_t index = 0;
|
||||
|
||||
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
|
||||
|
||||
while( i != buffer.end() )
|
||||
{
|
||||
if( index + utf8_sequence_max_length > buffer_len )
|
||||
{
|
||||
write_function(utf8_buffer, index);
|
||||
index = 0;
|
||||
}
|
||||
|
||||
index += int_to_utf8(*i, utf8_buffer + index, buffer_len - index);
|
||||
++i;
|
||||
}
|
||||
|
||||
if( index > 0 )
|
||||
{
|
||||
write_function(utf8_buffer, index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace private_namespace
|
||||
|
||||
} // namespace pt
|
||||
|
||||
#endif
|
|
@ -0,0 +1,271 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_utf8_utf8_templates
|
||||
#define headerfile_picotools_utf8_utf8_templates
|
||||
|
||||
// this file is included at the end of utf8.h
|
||||
|
||||
#include "utf8_private.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
||||
(need to be tested)
|
||||
*/
|
||||
// need to be tested
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
bool status = private_namespace::utf8_to_wide_generic(utf8, utf8_len, mode, [&res](int c) {
|
||||
private_namespace::int_to_wide(c, res);
|
||||
});
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const char * utf8, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
size_t utf8_len = 0;
|
||||
|
||||
while( utf8[utf8_len] != 0 )
|
||||
utf8_len += 1;
|
||||
|
||||
return utf8_to_wide(utf8, utf8_len, res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
return utf8_to_wide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// need to be tested
|
||||
template<typename StreamType>
|
||||
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear, int mode)
|
||||
{
|
||||
int z;
|
||||
bool correct, was_error = false;
|
||||
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
while( utf8_to_int(utf8, z, correct) > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
res << 0xFFFD; // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
private_namespace::int_to_wide(z, res);
|
||||
}
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts one wide character into UTF-8 stream
|
||||
|
||||
input:
|
||||
z - wide character
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
the function returns how many characters have been written to the utf8 stream,
|
||||
zero means that 'z' is an incorrect unicode character
|
||||
*/
|
||||
template<typename StreamType>
|
||||
size_t int_to_utf8(int z, StreamType & utf8)
|
||||
{
|
||||
char buf[10];
|
||||
|
||||
size_t len = int_to_utf8(z, buf, sizeof(buf)/sizeof(char));
|
||||
|
||||
if( len > 0 )
|
||||
utf8.write(buf, len);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
string_len - size of the string
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
size_t chars;
|
||||
|
||||
while( string_len > 0 )
|
||||
{
|
||||
chars = private_namespace::wide_one_to_utf8(wide_string, string_len, utf8, was_error, mode);
|
||||
wide_string += chars;
|
||||
string_len -= chars;
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a null terminated wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const wchar_t * wide_string, StreamType & utf8, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
|
||||
while( *wide_string )
|
||||
wide_string += private_namespace::wide_one_to_utf8(wide_string, utf8, was_error, mode);
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
this function converts a wide string (std::wstring) into UTF-8 stream
|
||||
|
||||
input:
|
||||
wide_string - a wide string for converting
|
||||
mode - what to do with errors when converting
|
||||
0: skip an invalid character
|
||||
1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
|
||||
output:
|
||||
utf8 - a UTF-8 stream for the output sequence
|
||||
|
||||
this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename StreamType>
|
||||
bool wide_to_utf8(const std::wstring & wide_string, StreamType & utf8, int mode)
|
||||
{
|
||||
return wide_to_utf8(wide_string.c_str(), wide_string.size(), utf8, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
void wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, int mode)
|
||||
{
|
||||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
||||
utf8.append(utf8_buffer, buffer_len);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// not tested
|
||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
|
||||
{
|
||||
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
||||
utf8.write(utf8_buffer, buffer_len);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace pt
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
sourcefiles:=$(shell find . -name "*.cpp")
|
||||
objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles))
|
||||
|
||||
|
||||
ifndef CXX
|
||||
CXX = g++
|
||||
endif
|
||||
|
||||
ifndef CXXFLAGS
|
||||
CXXFLAGS = -Wall -pedantic -O2 -std=c++20 -I../src -I/usr/local/include
|
||||
endif
|
||||
|
||||
|
||||
progname = tests
|
||||
pikotoolslibfile = ../src/pikotools.a
|
||||
|
||||
|
||||
all: $(progname)
|
||||
|
||||
|
||||
$(progname): $(objfiles) FORCE
|
||||
$(CXX) $(CXXFLAGS) -o $(progname) $(objfiles) $(pikotoolslibfile)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -o $@ $<
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
clean:
|
||||
rm -f $(objfiles)
|
||||
rm -f $(progname)
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I../src -f- $(sourcefiles) > Makefile.dep
|
||||
|
||||
|
||||
FORCE:
|
||||
|
||||
|
||||
-include Makefile.dep
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
./main.o: convert.h mainoptionsparser.h csvparser.h
|
||||
./convert.o: convert.h test.h ../src/convert/convert.h
|
||||
./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h
|
||||
./convert.o: ../src/textstream/textstream.h ../src/space/space.h
|
||||
./convert.o: ../src/textstream/types.h ../src/convert/inttostr.h
|
||||
./convert.o: ../src/date/date.h ../src/membuffer/membuffer.h
|
||||
./convert.o: ../src/textstream/types.h ../src/convert/strtoint.h
|
||||
./convert.o: ../src/convert/text.h ../src/convert/misc.h
|
||||
./test.o: test.h
|
||||
./mainoptionsparser.o: mainoptionsparser.h test.h
|
||||
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
|
||||
./mainoptionsparser.o: ../src/space/space.h ../src/textstream/types.h
|
||||
./mainoptionsparser.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
|
||||
./mainoptionsparser.o: ../src/textstream/textstream.h ../src/date/date.h
|
||||
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||
./mainoptionsparser.o: ../src/utf8/utf8_templates.h
|
||||
./mainoptionsparser.o: ../src/utf8/utf8_private.h ../src/convert/convert.h
|
||||
./mainoptionsparser.o: ../src/convert/inttostr.h
|
||||
./mainoptionsparser.o: ../src/convert/patternreplacer.h
|
||||
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
|
||||
./mainoptionsparser.o: ../src/convert/misc.h
|
||||
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
|
||||
./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h test.h
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_tests_convert
|
||||
#define headerfile_picotools_tests_convert
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_convert_tests
|
||||
{
|
||||
|
||||
|
||||
|
||||
void make_tests();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,326 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "csvparser.h"
|
||||
#include "csv/csvparser.h"
|
||||
#include "test.h"
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_csvparser_tests
|
||||
{
|
||||
|
||||
|
||||
|
||||
void test_csvparser(const char * input_str, const char * expected_json)
|
||||
{
|
||||
CSVParser csv_parser;
|
||||
Space space;
|
||||
std::string json;
|
||||
|
||||
CSVParser::Status status = csv_parser.parse(input_str, space);
|
||||
|
||||
space.serialize_to_json_to(json);
|
||||
|
||||
std::cout << "csv parsed as: " << json << std::endl;
|
||||
test(json.c_str(), expected_json);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test_csvparser1()
|
||||
{
|
||||
const char * input_str = "";
|
||||
const char * expected_json = R"json([[]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
|
||||
void test_csvparser2()
|
||||
{
|
||||
const char * input_str = ",";
|
||||
const char * expected_json = R"json([["",""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser3()
|
||||
{
|
||||
const char * input_str = "field1";
|
||||
const char * expected_json = R"json([["field1"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser4()
|
||||
{
|
||||
const char * input_str = R"csvstring(field1,field2,field3)csvstring";
|
||||
const char * expected_json = R"json([["field1","field2","field3"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
|
||||
void test_csvparser5()
|
||||
{
|
||||
const char * input_str = "\n";
|
||||
const char * expected_json = R"json([[""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser6()
|
||||
{
|
||||
const char * input_str = "\r\n";
|
||||
const char * expected_json = R"json([[""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser7()
|
||||
{
|
||||
const char * input_str = "field1\r\n";
|
||||
const char * expected_json = R"json([["field1"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser8()
|
||||
{
|
||||
const char * input_str = ",\r\n";
|
||||
const char * expected_json = R"json([["",""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser9()
|
||||
{
|
||||
const char * input_str = "field1\r\nfield2";
|
||||
const char * expected_json = R"json([["field1"],["field2"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser10()
|
||||
{
|
||||
const char * input_str = "field1,field2\r\nfield3,field4";
|
||||
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser11()
|
||||
{
|
||||
const char * input_str = "field1,field2\r\nfield3,field4\r\n";
|
||||
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser12()
|
||||
{
|
||||
const char * input_str = "field1,field2\nfield3,field4\n";
|
||||
const char * expected_json = R"json([["field1","field2"],["field3","field4"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser13()
|
||||
{
|
||||
const char * input_str = R"csv("")csv";
|
||||
const char * expected_json = R"json([[""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser14()
|
||||
{
|
||||
const char * input_str = "\"\"\n";
|
||||
const char * expected_json = R"json([[""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser15()
|
||||
{
|
||||
const char * input_str = "\"\"\r\n";
|
||||
const char * expected_json = R"json([[""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser16()
|
||||
{
|
||||
const char * input_str = "\"\",\r\n";
|
||||
const char * expected_json = R"json([["",""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser17()
|
||||
{
|
||||
const char * input_str = "\"\",\n";
|
||||
const char * expected_json = R"json([["",""]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser18()
|
||||
{
|
||||
const char * input_str = "\"field1\"";
|
||||
const char * expected_json = R"json([["field1"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser19()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\"";
|
||||
const char * expected_json = R"json([["field1, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser20()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\"\r\n";
|
||||
const char * expected_json = R"json([["field1, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser21()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\"\n";
|
||||
const char * expected_json = R"json([["field1, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
|
||||
void test_csvparser22()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2\"";
|
||||
const char * expected_json = R"json([["field1, with comma","field2"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser23()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\"";
|
||||
const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser24()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2\"\r\n\"field3\",\"field4, with comma\"\r\n";
|
||||
const char * expected_json = R"json([["field1, with comma","field2"],["field3","field4, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser25()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\r\n\"field3\",\"field4, with comma\"";
|
||||
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser26()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"\n\"field3\",\"field4, with comma\"\n";
|
||||
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["field3","field4, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void test_csvparser27()
|
||||
{
|
||||
const char * input_str = "\"field1, with comma\",\"field2 with \"\" double quote\"syntax error\n\"field3\",\"field4, with comma\"\n";
|
||||
const char * expected_json = R"json([["field1, with comma","field2 with \" double quote"],["syntax error"],["field3","field4, with comma"]])json";
|
||||
|
||||
test_csvparser(input_str, expected_json);
|
||||
}
|
||||
|
||||
void make_tests()
|
||||
{
|
||||
reset_test_counter("CSVParser");
|
||||
test_csvparser1();
|
||||
test_csvparser2();
|
||||
test_csvparser3();
|
||||
test_csvparser4();
|
||||
test_csvparser5();
|
||||
test_csvparser6();
|
||||
test_csvparser7();
|
||||
test_csvparser8();
|
||||
test_csvparser9();
|
||||
test_csvparser10();
|
||||
test_csvparser11();
|
||||
test_csvparser12();
|
||||
test_csvparser13();
|
||||
test_csvparser14();
|
||||
test_csvparser15();
|
||||
test_csvparser16();
|
||||
test_csvparser17();
|
||||
test_csvparser18();
|
||||
test_csvparser19();
|
||||
test_csvparser20();
|
||||
test_csvparser21();
|
||||
test_csvparser22();
|
||||
test_csvparser23();
|
||||
test_csvparser24();
|
||||
test_csvparser25();
|
||||
test_csvparser26();
|
||||
test_csvparser27();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_tests_csvparser
|
||||
#define headerfile_picotools_tests_csvparser
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_csvparser_tests
|
||||
{
|
||||
|
||||
|
||||
void make_tests();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,11 +1,11 @@
|
|||
/*
|
||||
* This file is a part of MainParser -- simple parser for main() parameters
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, Tomasz Sowa
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -35,83 +35,37 @@
|
|||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "convert.h"
|
||||
#include "mainoptionsparser.h"
|
||||
#include "csvparser.h"
|
||||
#include <iostream>
|
||||
#include <string.h>
|
||||
#include "../mainparser.h"
|
||||
|
||||
|
||||
int main()
|
||||
|
||||
namespace pt
|
||||
{
|
||||
MainParser mp;
|
||||
|
||||
// suppose you call a 'programname' in such a way:
|
||||
// $ programname -a -b - c --longparam -- otherlongparam -xyz paramwithvalue -x --longparam2 longwithvalue lastvalue1 lastvalue2 lastvalue3
|
||||
// so the main() function get this table as input:
|
||||
|
||||
const char * tab[] = {
|
||||
"programname",
|
||||
"-a",
|
||||
"-b",
|
||||
"-",
|
||||
"c",
|
||||
"--longparam",
|
||||
"--",
|
||||
"otherlongparam",
|
||||
"-xyz",
|
||||
"paramwithvalue",
|
||||
"-x",
|
||||
"--longparam2",
|
||||
"longwithvalue",
|
||||
"lastvalue1", // some values left at the end
|
||||
"lastvalue2", // you can get them by using GetValue() method
|
||||
"lastvalue3",
|
||||
};
|
||||
|
||||
mp.Set(sizeof(tab)/sizeof(const char*), tab);
|
||||
|
||||
while( mp.NextParam() )
|
||||
{
|
||||
if( mp.GetSingleParam() != 0 )
|
||||
{
|
||||
std::cout << "-" << mp.GetSingleParam() << std::endl;
|
||||
|
||||
// we know that 'z' requires a value
|
||||
if( mp.GetSingleParam() == 'z' )
|
||||
std::cout << "value for z: " << mp.GetValue() << std::endl;
|
||||
}
|
||||
|
||||
if( *mp.GetDoubleParam() )
|
||||
{
|
||||
std::cout << "--" << mp.GetDoubleParam() << std::endl;
|
||||
|
||||
// we know that "longparam2" requires a value
|
||||
if( strcmp(mp.GetDoubleParam(), "longparam2") == 0 )
|
||||
std::cout << "value for longparam2: " << mp.GetValue() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
while( !mp.IsEnd() )
|
||||
std::cout << mp.GetValue() << std::endl;
|
||||
bool was_error = false;
|
||||
int test_counter = 0;
|
||||
const char * test_msg = nullptr;
|
||||
}
|
||||
|
||||
/*
|
||||
program output:
|
||||
|
||||
-a
|
||||
-b
|
||||
-c
|
||||
--longparam
|
||||
--otherlongparam
|
||||
-x
|
||||
-y
|
||||
-z
|
||||
value for z: paramwithvalue
|
||||
-x
|
||||
--longparam2
|
||||
value for longparam2: longwithvalue
|
||||
lastvalue1
|
||||
lastvalue2
|
||||
lastvalue3
|
||||
*/
|
||||
int main(int argc, const char ** argv)
|
||||
{
|
||||
pt::pt_convert_tests::make_tests();
|
||||
pt::pt_mainoptions_tests::make_tests();
|
||||
pt::pt_csvparser_tests::make_tests();
|
||||
|
||||
if( pt::was_error )
|
||||
{
|
||||
std::cout << "some of the tests failed" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "*********************************" << std::endl;
|
||||
std::cout << "* all tests passed successfully *" << std::endl;
|
||||
std::cout << "*********************************" << std::endl;
|
||||
}
|
||||
|
||||
return !pt::was_error ? 0 : 1;
|
||||
}
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include "mainoptionsparser.h"
|
||||
#include "test.h"
|
||||
#include "mainoptions/mainoptionsparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
#include "convert/convert.h"
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_mainoptions_tests
|
||||
{
|
||||
|
||||
|
||||
struct MainOptionsParserOutputTest
|
||||
{
|
||||
MainOptionsParser::Status status;
|
||||
const char * option_err;
|
||||
const char * json;
|
||||
};
|
||||
|
||||
|
||||
void print_status(MainOptionsParser::Status status)
|
||||
{
|
||||
if( status == MainOptionsParser::status_ok )
|
||||
{
|
||||
std::cout << "MainOptionsParser::status_ok";
|
||||
}
|
||||
else
|
||||
if( status == MainOptionsParser::status_argument_not_provided )
|
||||
{
|
||||
std::cout << "MainOptionsParser::status_argument_not_provided";
|
||||
}
|
||||
else
|
||||
if( status == MainOptionsParser::status_argument_provided )
|
||||
{
|
||||
std::cout << "MainOptionsParser::status_argument_provided";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool has_space_in_str(const char * arg)
|
||||
{
|
||||
while( *arg )
|
||||
{
|
||||
if( is_white((wchar_t)*arg) )
|
||||
return true;
|
||||
|
||||
arg += 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void print_args(int to_index, const char ** argv)
|
||||
{
|
||||
for(int i=0 ; i <= to_index ; ++i)
|
||||
{
|
||||
bool has_space = has_space_in_str(argv[i]);
|
||||
|
||||
if( has_space )
|
||||
std::cout << "\"";
|
||||
|
||||
std::cout << argv[i];
|
||||
|
||||
if( has_space )
|
||||
std::cout << "\"";
|
||||
|
||||
std::cout << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test_mainoptionsparser(size_t len, const char ** argv, const Space & arguments_required, MainOptionsParserOutputTest * output)
|
||||
{
|
||||
reset_test_counter("mainoptionsparser");
|
||||
std::cout << "Testing MainArgsParser" << std::endl;
|
||||
|
||||
MainOptionsParser parser;
|
||||
Space space;
|
||||
|
||||
/*
|
||||
* set to true when creating new tests (you can copy console output to the cpp file)
|
||||
*/
|
||||
bool prepare_tests = false;
|
||||
|
||||
for(size_t i = 0 ; i < len ; ++i)
|
||||
{
|
||||
if( !prepare_tests )
|
||||
print_args(i, argv);
|
||||
|
||||
MainOptionsParser::Status status = parser.parse(i + 1, argv, space, arguments_required);
|
||||
|
||||
std::wstring & err_wstr = parser.get_wrong_option();
|
||||
std::string err_str;
|
||||
wide_to_utf8(err_wstr, err_str);
|
||||
|
||||
std::string json;
|
||||
space.serialize_to_json_to(json);
|
||||
|
||||
std::cout << "{";
|
||||
print_status(status);
|
||||
std::cout << ", " << "\"" << err_str << "\", " << "R\"json(" << json << ")json\"" << "}," << std::endl;
|
||||
|
||||
if( !prepare_tests )
|
||||
{
|
||||
test("status", status, output[i].status);
|
||||
test("err_arg", err_str.c_str(), output[i].option_err);
|
||||
test("json", json.c_str(), output[i].json);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void test_mainoptionsparser1()
|
||||
{
|
||||
const char * argv[] = {
|
||||
"program_name",
|
||||
"-a",
|
||||
"-b",
|
||||
"-c",
|
||||
"-d",
|
||||
"argument for d",
|
||||
"-b",
|
||||
"--long",
|
||||
"--foo",
|
||||
"foo-one",
|
||||
"foo-two",
|
||||
"--long-option",
|
||||
"--bar",
|
||||
"bar1",
|
||||
"bar2",
|
||||
"bar3",
|
||||
"-x",
|
||||
"--piggy2=option_for_piggy2",
|
||||
"--piggy3",
|
||||
"--bar",
|
||||
"xbar1",
|
||||
"xbar2",
|
||||
"xbar3",
|
||||
"--piggy2 another_option_for_piggy2",
|
||||
"--",
|
||||
"non-option-argument1",
|
||||
"non-option-argument2",
|
||||
"non-option-argument3",
|
||||
};
|
||||
|
||||
MainOptionsParserOutputTest output[] = {
|
||||
{MainOptionsParser::status_ok, "", R"json({})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "d", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[]],"c":[[]],"d":[["argument for d"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"long":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[[]],"long":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "foo", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one"]],"long":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],[]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "bar", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":[],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"args":["non-option-argument1","non-option-argument2","non-option-argument3"],"b":[[],[]],"bar":[["bar1","bar2","bar3"],["xbar1","xbar2","xbar3"]],"c":[[]],"d":[["argument for d"]],"foo":[["foo-one","foo-two"]],"long":[[]],"long-option":[[]],"piggy2":[["option_for_piggy2"]],"piggy2 another_option_for_piggy2":[[]],"piggy3":[[]],"x":[[]]})json"},
|
||||
};
|
||||
|
||||
Space arguments_required;
|
||||
arguments_required.add(L"d", 1);
|
||||
arguments_required.add(L"foo", 2);
|
||||
arguments_required.add(L"bar", 3);
|
||||
arguments_required.add(L"piggy", 1);
|
||||
arguments_required.add(L"piggy2", 1);
|
||||
|
||||
size_t len = sizeof(argv) / sizeof(const char *);
|
||||
test_mainoptionsparser(len, argv, arguments_required, output);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test_mainoptionsparser2()
|
||||
{
|
||||
const char * argv[] = {
|
||||
"program_name",
|
||||
"--long1",
|
||||
"--long2=with-argument",
|
||||
"--long3",
|
||||
"-a",
|
||||
"--=option-for-empty-argument",
|
||||
"-b",
|
||||
"arg b 1",
|
||||
"arg b 2",
|
||||
"-c",
|
||||
"-f file-name with spaces",
|
||||
"--xxx",
|
||||
"arg 1",
|
||||
"arg 2",
|
||||
"arg 3",
|
||||
"-", /* first non-option argument */
|
||||
"non-option-argument2",
|
||||
"non-option-argument3",
|
||||
"non-option-argument4",
|
||||
};
|
||||
|
||||
MainOptionsParserOutputTest output[] = {
|
||||
{MainOptionsParser::status_ok, "", R"json({})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "b", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1"]]})json"},
|
||||
{MainOptionsParser::status_argument_not_provided, "xxx", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"":[["option-for-empty-argument"]],"a":[[]],"args":["-","non-option-argument2","non-option-argument3","non-option-argument4"],"b":[["arg b 1","arg b 2"]],"c":[[]],"f":[[" file-name with spaces"]],"long1":[[]],"long2":[["with-argument"]],"long3":[[]],"xxx":[["arg 1","arg 2","arg 3"]]})json"},
|
||||
};
|
||||
|
||||
Space arguments_required;
|
||||
arguments_required.add(L"long2", 1);
|
||||
arguments_required.add(L"b", 2);
|
||||
arguments_required.add(L"f", 1);
|
||||
arguments_required.add(L"xxx", 3);
|
||||
arguments_required.add(L"", 1);
|
||||
|
||||
size_t len = sizeof(argv) / sizeof(const char *);
|
||||
test_mainoptionsparser(len, argv, arguments_required, output);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void test_mainoptionsparser3()
|
||||
{
|
||||
const char * argv[] = {
|
||||
"program_name",
|
||||
"--long1",
|
||||
"--long2=with-argument",
|
||||
"--long3",
|
||||
};
|
||||
|
||||
MainOptionsParserOutputTest output[] = {
|
||||
{MainOptionsParser::status_ok, "", R"json({})json"},
|
||||
{MainOptionsParser::status_ok, "", R"json({"long1":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"},
|
||||
{MainOptionsParser::status_argument_provided, "long2", R"json({"long1":[[]]})json"},
|
||||
};
|
||||
|
||||
Space arguments_required;
|
||||
arguments_required.add(L"non-existing", 1);
|
||||
|
||||
size_t len = sizeof(argv) / sizeof(const char *);
|
||||
test_mainoptionsparser(len, argv, arguments_required, output);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void make_tests()
|
||||
{
|
||||
test_mainoptionsparser1();
|
||||
test_mainoptionsparser2();
|
||||
test_mainoptionsparser3();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -35,66 +35,28 @@
|
|||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "inttostr.h"
|
||||
#ifndef headerfile_picotools_tests_mainoptionsparser
|
||||
#define headerfile_picotools_tests_mainoptionsparser
|
||||
|
||||
|
||||
namespace PT
|
||||
namespace pt
|
||||
{
|
||||
|
||||
namespace pt_mainoptions_tests
|
||||
{
|
||||
|
||||
|
||||
std::wstring Toa(unsigned long long value, int base)
|
||||
{
|
||||
std::wstring res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(long long value, int base)
|
||||
{
|
||||
std::wstring res;
|
||||
Toa(value, res, false, base);
|
||||
|
||||
return res;
|
||||
}
|
||||
void make_tests();
|
||||
|
||||
|
||||
std::wstring Toa(unsigned long value, int base)
|
||||
{
|
||||
return Toa(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(long value, int base)
|
||||
{
|
||||
return Toa(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(unsigned int value, int base)
|
||||
{
|
||||
return Toa(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(int value, int base)
|
||||
{
|
||||
return Toa(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(unsigned short value, int base)
|
||||
{
|
||||
return Toa(static_cast<unsigned long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
std::wstring Toa(short value, int base)
|
||||
{
|
||||
return Toa(static_cast<long long>(value), base);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012, Tomasz Sowa
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -35,81 +35,76 @@
|
|||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "spacetojson.h"
|
||||
#include "test.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
|
||||
void SpaceToJSON::Clear()
|
||||
void reset_test_counter()
|
||||
{
|
||||
numeric.clear();
|
||||
boolean.clear();
|
||||
table.clear();
|
||||
test_counter = 1;
|
||||
test_msg = nullptr;
|
||||
}
|
||||
|
||||
|
||||
void SpaceToJSON::TreatAsTable(const wchar_t * space_name)
|
||||
void reset_test_counter(const char * msg)
|
||||
{
|
||||
table.insert(space_name);
|
||||
test_counter = 1;
|
||||
test_msg = msg;
|
||||
}
|
||||
|
||||
|
||||
void SpaceToJSON::TreatAsTable(const std::wstring & space_name)
|
||||
void test_status(bool status)
|
||||
{
|
||||
table.insert(space_name);
|
||||
if( status )
|
||||
{
|
||||
std::cout << " OK";
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << " Fail";
|
||||
was_error = true;
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void SpaceToJSON::TreatAsNumeric(const wchar_t * name)
|
||||
template<>
|
||||
bool test<const char*>(const char * test_msg, const char * provided, const char * expected)
|
||||
{
|
||||
numeric.insert(name);
|
||||
std::cout << "test " << test_counter << ": ";
|
||||
|
||||
if( test_msg )
|
||||
std::cout << test_msg;
|
||||
|
||||
bool status = (std::strcmp(provided, expected) == 0);
|
||||
test_status(status);
|
||||
test_counter += 1;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void SpaceToJSON::TreatAsNumeric(const std::wstring & name)
|
||||
template<>
|
||||
bool test<const wchar_t*>(const char * test_msg, const wchar_t * provided, const wchar_t * expected)
|
||||
{
|
||||
numeric.insert(name);
|
||||
std::cout << "test " << test_counter << ": ";
|
||||
|
||||
if( test_msg )
|
||||
std::cout << test_msg;
|
||||
|
||||
bool status = (std::wcscmp(provided, expected) == 0);
|
||||
test_status(status);
|
||||
test_counter += 1;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
void SpaceToJSON::TreatAsBool(const wchar_t * name)
|
||||
{
|
||||
boolean.insert(name);
|
||||
}
|
||||
|
||||
void SpaceToJSON::TreatAsBool(const std::wstring & name)
|
||||
{
|
||||
boolean.insert(name);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool SpaceToJSON::IsNumeric(const std::wstring & name)
|
||||
{
|
||||
std::set<std::wstring>::iterator i = numeric.find(name);
|
||||
return i != numeric.end();
|
||||
}
|
||||
|
||||
|
||||
bool SpaceToJSON::IsBool(const std::wstring & name)
|
||||
{
|
||||
std::set<std::wstring>::iterator i = boolean.find(name);
|
||||
return i != boolean.end();
|
||||
}
|
||||
|
||||
bool SpaceToJSON::IsTable(const std::wstring & name)
|
||||
{
|
||||
std::set<std::wstring>::iterator i = table.find(name);
|
||||
return i != table.end();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2016, Tomasz Sowa
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -35,66 +35,58 @@
|
|||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_mainspaceparser_mainparser
|
||||
#define headerfile_picotools_mainspaceparser_mainparser
|
||||
#ifndef headerfile_picotools_tests_test
|
||||
#define headerfile_picotools_tests_test
|
||||
|
||||
#include "space/space.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
|
||||
|
||||
namespace PT
|
||||
|
||||
namespace pt
|
||||
{
|
||||
extern int test_counter;
|
||||
extern const char * test_msg;
|
||||
extern bool was_error;
|
||||
|
||||
|
||||
/*
|
||||
a very little parser for main(int argc, char ** argv) parameters
|
||||
look in sample/sample.cpp how to use the parser
|
||||
*/
|
||||
class MainSpaceParser
|
||||
void test_status(bool status);
|
||||
void reset_test_counter();
|
||||
void reset_test_counter(const char * msg);
|
||||
|
||||
|
||||
|
||||
template<typename type_t>
|
||||
bool test(const char * test_msg, type_t provided, type_t expected)
|
||||
{
|
||||
public:
|
||||
std::cout << "test " << test_counter << ": ";
|
||||
|
||||
MainSpaceParser();
|
||||
~MainSpaceParser();
|
||||
if( test_msg )
|
||||
std::cout << test_msg;
|
||||
|
||||
enum Status
|
||||
{
|
||||
status_ok = 0,
|
||||
status_space_not_assigned = 1,
|
||||
status_syntax_error = 2,
|
||||
status_reading_eof = 3 /* CHANGE ME give a better name */
|
||||
};
|
||||
bool status = provided == expected;
|
||||
test_status(status);
|
||||
test_counter += 1;
|
||||
|
||||
void SetSpace(Space & space);
|
||||
Status Parse(int argc, const char ** argv);
|
||||
|
||||
void UTF8(bool utf8);
|
||||
|
||||
std::wstring & GetErrorToken();
|
||||
|
||||
private:
|
||||
|
||||
Space * space;
|
||||
Space * options_space;
|
||||
std::wstring wide_arg, temp_arg, temp_val;
|
||||
std::vector<std::wstring> temp_list_val;
|
||||
bool use_utf8;
|
||||
Status last_status;
|
||||
std::wstring last_error_token;
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void ConvertStr(const char * src, std::wstring & dst);
|
||||
void Parse(size_t argc, const char ** argv, size_t & argv_index);
|
||||
void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index);
|
||||
void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index);
|
||||
size_t RequireOption(const std::wstring & arg);
|
||||
void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list);
|
||||
|
||||
};
|
||||
template<typename type_t>
|
||||
bool test(type_t provided, type_t expected)
|
||||
{
|
||||
return test(test_msg, provided, expected);
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
template<>
|
||||
bool test<const char*>(const char * test_msg, const char * provided, const char * expected);
|
||||
|
||||
template<>
|
||||
bool test<const wchar_t*>(const char * test_msg, const wchar_t * provided, const wchar_t * expected);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,27 +0,0 @@
|
|||
include Makefile.o.dep
|
||||
|
||||
libname=utf8.a
|
||||
|
||||
all: $(libname)
|
||||
|
||||
$(libname): $(o)
|
||||
$(AR) rcs $(libname) $(o)
|
||||
|
||||
|
||||
%.o: %.cpp
|
||||
$(CXX) -c $(CXXFLAGS) -I.. $<
|
||||
|
||||
|
||||
|
||||
depend:
|
||||
makedepend -Y. -I.. -f- *.cpp > Makefile.dep
|
||||
echo -n "o = " > Makefile.o.dep
|
||||
ls -1 *.cpp | xargs -I foo echo -n foo " " | sed -E "s/([^\.]*)\.cpp[ ]/\1\.o/g" >> Makefile.o.dep
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
rm -f $(libname)
|
||||
|
||||
|
||||
include Makefile.dep
|
|
@ -1,5 +0,0 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
utf8.o: utf8.h ../textstream/textstream.h ../space/space.h
|
||||
utf8.o: ../textstream/types.h ../date/date.h ../convert/inttostr.h
|
||||
utf8.o: ../membuffer/membuffer.h ../textstream/types.h
|
|
@ -1 +0,0 @@
|
|||
o = utf8.o
|
334
utf8/utf8.h
334
utf8/utf8.h
|
@ -1,334 +0,0 @@
|
|||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010-2018, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_utf8_utf8
|
||||
#define headerfile_picotools_utf8_utf8
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include "textstream/textstream.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
UTF-8, a transformation format of ISO 10646
|
||||
http://tools.ietf.org/html/rfc3629
|
||||
|
||||
when wchar_t is 4 bytes length we use UTF-32
|
||||
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
|
||||
|
||||
UTF-16
|
||||
http://www.ietf.org/rfc/rfc2781.txt
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
returns true if 'c' is a correct unicode character
|
||||
*/
|
||||
bool UTF8_CheckRange(int c);
|
||||
|
||||
|
||||
|
||||
/*!
|
||||
converting one character from UTF-8 to an int
|
||||
*/
|
||||
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
|
||||
size_t UTF8ToInt(const char * utf8, int & res, bool & correct);
|
||||
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct);
|
||||
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct);
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a wide string
|
||||
*/
|
||||
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1);
|
||||
|
||||
|
||||
/*!
|
||||
converting UTF-8 string to a WTextStream stream
|
||||
(need to be tested)
|
||||
*/
|
||||
/*
|
||||
implemented as templates below
|
||||
bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear = true, int mode = 1);
|
||||
bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear = true, int mode = 1);
|
||||
*/
|
||||
|
||||
/*!
|
||||
converting one int character to UTF-8
|
||||
*/
|
||||
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
|
||||
size_t IntToUTF8(int z, std::string & utf8, bool clear = true );
|
||||
size_t IntToUTF8(int z, std::ostream & utf8);
|
||||
|
||||
|
||||
/*!
|
||||
converting a wide string to UTF-8 string
|
||||
*/
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
|
||||
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
|
||||
|
||||
// implemented as a template below
|
||||
//void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested
|
||||
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1);
|
||||
bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1);
|
||||
bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1);
|
||||
|
||||
// implemented as a template below
|
||||
//void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested
|
||||
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
|
||||
|
||||
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
|
||||
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
|
||||
// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, int mode = 1);
|
||||
|
||||
|
||||
|
||||
namespace private_namespace
|
||||
{
|
||||
template<typename function_type>
|
||||
bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
|
||||
{
|
||||
int z;
|
||||
size_t len;
|
||||
bool correct, was_error = false;
|
||||
|
||||
while( utf8_len > 0 )
|
||||
{
|
||||
if( (unsigned char)*utf8 <= 0x7f )
|
||||
{
|
||||
// small optimization
|
||||
len = 1;
|
||||
correct = true;
|
||||
z = static_cast<unsigned char>(*utf8);
|
||||
}
|
||||
else
|
||||
{
|
||||
len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero
|
||||
}
|
||||
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
convert_function(0xFFFD); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
convert_function(z);
|
||||
}
|
||||
|
||||
utf8 += len;
|
||||
utf8_len -= len;
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void IntToWide(int c, TextStreamBase<char_type, stack_size, heap_block_size> & res)
|
||||
{
|
||||
if( sizeof(wchar_t)==2 && c>0xffff )
|
||||
{
|
||||
// UTF16 surrogate pairs
|
||||
c -= 0x10000;
|
||||
res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
|
||||
res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
|
||||
}
|
||||
else
|
||||
{
|
||||
res << static_cast<wchar_t>(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// not tested
|
||||
// FIX ME it is not using surrogate pairs from input stream
|
||||
// and mode parameter
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
|
||||
void WideToUTF8Generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
|
||||
{
|
||||
char utf8_buffer[256];
|
||||
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
|
||||
std::size_t utf8_sequence_max_length = 10;
|
||||
std::size_t index = 0;
|
||||
|
||||
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
|
||||
|
||||
while( i != buffer.end() )
|
||||
{
|
||||
if( index + utf8_sequence_max_length > buffer_len )
|
||||
{
|
||||
write_function(utf8_buffer, index);
|
||||
index = 0;
|
||||
}
|
||||
|
||||
index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index);
|
||||
++i;
|
||||
}
|
||||
|
||||
if( index > 0 )
|
||||
{
|
||||
write_function(utf8_buffer, index);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
||||
|
||||
// need to be tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool UTF8ToWide(const char * utf8, size_t utf8_len, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
|
||||
{
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
|
||||
private_namespace::IntToWide(c, res);
|
||||
});
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
// need to be tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool UTF8ToWide(const char * utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
|
||||
{
|
||||
size_t utf8_len = 0;
|
||||
|
||||
while( utf8[utf8_len] != 0 )
|
||||
utf8_len += 1;
|
||||
|
||||
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
// need to be tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool UTF8ToWide(const std::string & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
|
||||
{
|
||||
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
|
||||
}
|
||||
|
||||
|
||||
// need to be tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool UTF8ToWide(std::istream & utf8, TextStreamBase<char_type, stack_size, heap_block_size> & res, bool clear = true, int mode = 1)
|
||||
{
|
||||
int z;
|
||||
bool correct, was_error = false;
|
||||
|
||||
if( clear )
|
||||
res.clear();
|
||||
|
||||
while( UTF8ToInt(utf8, z, correct) > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
res << 0xFFFD; // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
private_namespace::IntToWide(z, res);
|
||||
}
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// not tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::string & utf8, bool clear = true, int mode = 1)
|
||||
{
|
||||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
||||
utf8.append(utf8_buffer, buffer_len);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// not tested
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
void WideToUTF8(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, std::ostream & utf8, int mode = 1)
|
||||
{
|
||||
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
||||
utf8.write(utf8_buffer, buffer_len);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue