moved all directories to src subdirectory

This commit is contained in:
2021-05-09 20:11:37 +02:00
parent 127f26884e
commit 3984c29fbf
32 changed files with 0 additions and 0 deletions

60
src/Makefile Normal file
View File

@@ -0,0 +1,60 @@
sourcefiles:=$(shell find . -name "*.cpp")
objfiles:=$(patsubst %.cpp,%.o,$(sourcefiles))
CXX = g++10
#CXX = clang++
#CXXFLAGS = -fsanitize=address -O0 -g3 -gdwarf-2 -O0 -std=c++14 -I../ttmath -I../pikotools -Wl,-rpath=/usr/local/lib/gcc5/ -Wall -pedantic -Wextra
#CXXFLAGS = -O0 -g3 -gdwarf-2 -O0 -std=c++17 -I../ttmath -I../pikotools -Wl,-rpath=/usr/local/lib/gcc7/ -Wall -pedantic -Wextra
CXXFLAGS = -Wl,-rpath=/usr/local/lib/gcc10/ -O0 -g3 -std=c++20 -I../pikotools -Wall -pedantic
ifndef CXX
CXX = clang++
endif
ifndef CXXFLAGS
CXXFLAGS = -Wall -O2 -I/usr/local/include
endif
ifndef LDFLAGS
LDFLAGS = -L/usr/local/lib
endif
ifndef AR
AR = ar
endif
#export CXX
#export CXXFLAGS
#export LDFLAGS
#export AR
libname = pikotools.a
all: $(libname)
$(libname): $(objfiles)
$(AR) rcs $(libname) $(objfiles)
%.o: %.cpp
$(CXX) -c $(CXXFLAGS) -o $@ $<
clean:
rm -f $(objfiles)
rm -f $(libname)
depend:
makedepend -Y. -f- $(sourcefiles) > Makefile.dep
-include Makefile.dep

42
src/Makefile.dep Normal file
View File

@@ -0,0 +1,42 @@
# DO NOT DELETE
./convert/inttostr.o: ./convert/inttostr.h
./convert/misc.o: ./convert/misc.h ./convert/text.h ./convert/text_private.h
./convert/text.o: ./convert/text.h ./convert/text_private.h
./date/date.o: ./date/date.h convert/inttostr.h
./log/filelog.o: ./log/filelog.h textstream/textstream.h space/space.h
./log/filelog.o: textstream/types.h convert/inttostr.h date/date.h
./log/filelog.o: membuffer/membuffer.h textstream/types.h utf8/utf8.h
./log/filelog.o: utf8/utf8_templates.h utf8/utf8_private.h
./log/log.o: ./log/log.h textstream/textstream.h space/space.h
./log/log.o: textstream/types.h convert/inttostr.h date/date.h
./log/log.o: membuffer/membuffer.h textstream/types.h ./log/filelog.h
./log/log.o: utf8/utf8.h utf8/utf8_templates.h utf8/utf8_private.h
./mainspaceparser/mainspaceparser.o: ./mainspaceparser/mainspaceparser.h
./mainspaceparser/mainspaceparser.o: space/space.h textstream/types.h
./mainspaceparser/mainspaceparser.o: convert/inttostr.h utf8/utf8.h
./mainspaceparser/mainspaceparser.o: textstream/textstream.h date/date.h
./mainspaceparser/mainspaceparser.o: membuffer/membuffer.h textstream/types.h
./mainspaceparser/mainspaceparser.o: utf8/utf8_templates.h
./mainspaceparser/mainspaceparser.o: utf8/utf8_private.h
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
./space/space.o: utf8/utf8.h textstream/textstream.h space/space.h
./space/space.o: date/date.h membuffer/membuffer.h textstream/types.h
./space/space.o: utf8/utf8_templates.h utf8/utf8_private.h convert/convert.h
./space/space.o: ./convert/inttostr.h convert/patternreplacer.h
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/text_private.h
./space/space.o: ./convert/misc.h
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
./space/spaceparser.o: textstream/textstream.h space/space.h date/date.h
./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
./space/spaceparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./space/spaceparser.o: convert/strtoint.h ./convert/text.h
./space/spaceparser.o: ./convert/text_private.h ./convert/misc.h
./utf8/utf8.o: ./utf8/utf8.h textstream/textstream.h space/space.h
./utf8/utf8.o: textstream/types.h convert/inttostr.h date/date.h
./utf8/utf8.o: membuffer/membuffer.h textstream/types.h utf8/utf8_templates.h
./utf8/utf8.o: utf8/utf8_private.h
./utf8/utf8_private.o: utf8/utf8_private.h textstream/textstream.h
./utf8/utf8_private.o: space/space.h textstream/types.h convert/inttostr.h
./utf8/utf8_private.o: date/date.h membuffer/membuffer.h textstream/types.h

47
src/convert/convert.h Normal file
View File

@@ -0,0 +1,47 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_membuffer_convert_convert
#define headerfile_picotools_membuffer_convert_convert
#include "inttostr.h"
#include "patternreplacer.h"
#include "strtoint.h"
#include "text.h"
#endif

156
src/convert/inttostr.cpp Normal file
View File

@@ -0,0 +1,156 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "inttostr.h"
namespace PT
{
std::string to_str(unsigned long long value, int base)
{
std::string res;
Toa(value, res, false, base);
return res;
}
std::string to_str(long long value, int base)
{
std::string res;
Toa(value, res, false, base);
return res;
}
std::string to_str(unsigned long value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(long value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::string to_str(unsigned int value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(int value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::string to_str(unsigned short value, int base)
{
return to_str(static_cast<unsigned long long>(value), base);
}
std::string to_str(short value, int base)
{
return to_str(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
std::wstring to_wstr(long long value, int base)
{
std::wstring res;
Toa(value, res, false, base);
return res;
}
std::wstring to_wstr(unsigned long value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(long value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned int value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(int value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
std::wstring to_wstr(unsigned short value, int base)
{
return to_wstr(static_cast<unsigned long long>(value), base);
}
std::wstring to_wstr(short value, int base)
{
return to_wstr(static_cast<long long>(value), base);
}
}

298
src/convert/inttostr.h Normal file
View File

@@ -0,0 +1,298 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_inttostr
#define headerfile_picotools_convert_inttostr
#include <string>
namespace PT
{
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
size_t i1, i2;
long rest;
if( len_out )
*len_out = 0;
if( buf_len == 0 )
return false;
i1 = i2 = 0;
if( base < 2 ) base = 2;
if( base > 16 ) base = 16;
do
{
rest = value % base;
value = value / base;
buffer[i2++] = (rest < 10) ? char(rest) + '0' : char(rest) - 10 + 'A';
}
while(value != 0 && i2 < buf_len);
if( i2 >= buf_len )
{
buffer[0] = 0; // ops, the buffer was too small
return false;
}
if( len_out )
*len_out = i2 - i1;
buffer[i2--] = 0;
for( ; i1 < i2 ; ++i1, --i2)
{
CharType temp = buffer[i1];
buffer[i1] = buffer[i2];
buffer[i2] = temp;
}
return true;
}
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
if( len_out )
*len_out = 0;
if( buf_len == 0 )
return false;
CharType * buf = buffer;
bool is_sign = false;
if( value < 0 )
{
buffer[0] = '-';
buf += 1;
buf_len -= 1;
value = -value;
is_sign = true;
}
bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out);
if( res )
{
if( len_out && is_sign )
*len_out += 1;
}
else
{
buffer[0] = 0;
// len_out is set to zero by Toa()
}
return res;
}
template<class CharType>
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
}
template<class CharType>
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
}
template<class CharType>
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
}
template<class CharType>
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
}
template<class CharType>
bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
}
template<class CharType>
bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
}
template<class StringType>
void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10)
{
typename StringType::value_type buffer[50];
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
if( clear_string )
res.clear();
/*
* the size of the buffer is sufficient so the status should always be true
*/
size_t len_out;
Toa(value, buffer, buffer_len, base, &len_out);
res.append(buffer, len_out);
}
template<class StringType>
void Toa(long long value, StringType & res, bool clear_string = true, int base = 10)
{
typename StringType::value_type buffer[50];
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
if( clear_string )
res.clear();
/*
* the size of the buffer is sufficient so the status should always be true
*/
size_t len_out;
Toa(value, buffer, buffer_len, base, &len_out);
res.append(buffer, len_out);
}
template<class StringType>
void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
}
template<class StringType>
void Toa(long value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<long long>(value), res, clear_string, base);
}
template<class StringType>
void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
}
template<class StringType>
void Toa(int value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<long long>(value), res, clear_string, base);
}
template<class StringType>
void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
}
template<class StringType>
void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
{
Toa(static_cast<long long>(value), res, clear_string, base);
}
std::string to_str(unsigned long long value, int base = 10);
std::string to_str(long long value, int base = 10);
std::string to_str(unsigned long value, int base = 10);
std::string to_str(long value, int base = 10);
std::string to_str(unsigned int value, int base = 10);
std::string to_str(int value, int base = 10);
std::string to_str(unsigned short value, int base = 10);
std::string to_str(short value, int base = 10);
std::wstring to_wstr(unsigned long long value, int base = 10);
std::wstring to_wstr(long long value, int base = 10);
std::wstring to_wstr(unsigned long value, int base = 10);
std::wstring to_wstr(long value, int base = 10);
std::wstring to_wstr(unsigned int value, int base = 10);
std::wstring to_wstr(int value, int base = 10);
std::wstring to_wstr(unsigned short value, int base = 10);
std::wstring to_wstr(short value, int base = 10);
}
#endif

55
src/convert/misc.cpp Normal file
View File

@@ -0,0 +1,55 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "misc.h"
namespace PT
{
void SetOverflow(bool * was_overflow, bool val)
{
if( was_overflow )
*was_overflow = val;
}
}

55
src/convert/misc.h Normal file
View File

@@ -0,0 +1,55 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_misc
#define headerfile_picotools_convert_misc
#include <limits>
#include "text.h"
namespace PT
{
void SetOverflow(bool * was_overflow, bool val);
}
#endif

View File

@@ -0,0 +1,169 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_patternreplacer
#define headerfile_picotools_convert_patternreplacer
#include <string>
#include "textstream/textstream.h"
#include "strtoint.h"
namespace PT
{
template<typename CharType, typename StrType>
class PatternReplacerBase
{
public:
/*
* output_string can be the same string as input pattern
*/
template<typename ... Types>
void ReplaceToString(const StrType & pattern, StrType & output_string, Types ... types)
{
ReplaceGeneric(pattern, output_string, types...);
}
template<typename ... Types>
StrType Replace(const StrType & pattern, Types ... types)
{
StrType output_string;
ReplaceGeneric(pattern, output_string, types...);
return output_string;
}
private:
std::vector<StrType> params;
TextStreamBase<CharType, 256, 4096> buffer;
StrType temp_str;
template<typename ... Types>
void ReplaceGeneric(const StrType & pattern, StrType & output_string, Types ... types)
{
params.clear();
AddParams(types...);
ReplacePattern(pattern);
buffer.to_string(output_string);
params.clear();
buffer.clear();
temp_str.clear();
}
void AddParams()
{
}
template<typename Type, typename ... Types>
void AddParams(const Type & type, Types ... types)
{
buffer.clear();
buffer << type;
buffer.to_string(temp_str);
params.push_back(temp_str);
temp_str.clear();
AddParams(types...);
}
void ReplacePattern(const StrType & pattern)
{
buffer.clear();
for(size_t i=0 ; i<pattern.size() ; )
{
bool pattern_changed = false;
if( pattern[i] == '%' )
{
if( i + 1 < pattern.size() )
{
if( pattern[i+1] == '%' )
{
buffer << '%';
i += 2;
pattern_changed = true;
}
else
{
const CharType * index_start_str = &pattern[i+1];
const CharType * after_str;
bool was_overflow;
unsigned long index = Toul(index_start_str, 10, &after_str, &was_overflow, false);
if( !was_overflow && after_str > index_start_str && (size_t)index < params.size() )
{
i = i + 1 + (after_str - index_start_str);
buffer << params[index];
pattern_changed = true;
}
}
}
}
if( !pattern_changed )
{
buffer << pattern[i];
i += 1;
}
}
}
};
typedef PatternReplacerBase<char, std::string> PatternReplacer;
typedef PatternReplacerBase<wchar_t, std::wstring> WPatternReplacer;
}
#endif

378
src/convert/strtoint.h Normal file
View File

@@ -0,0 +1,378 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_strtoint
#define headerfile_picotools_convert_strtoint
#include <limits>
#include "text.h"
#include "misc.h"
namespace PT
{
template<typename CharType>
unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
unsigned long long res = 0;
bool carry = false;
int digit;
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
while( !carry && IsDigit(*str, base, &digit) )
{
#ifdef __GNUC__
carry = __builtin_mul_overflow(res, static_cast<unsigned long long>(base), &res);
if( !carry )
{
carry = __builtin_add_overflow(res, static_cast<unsigned long long>(digit), &res);
}
#else
// on other compilers than GCC or CLANG we do not test overflow at the moment
res = res * static_cast<unsigned long long>(base) + static_cast<unsigned long long>(digit);
#endif
str += 1;
}
if( carry )
{
if( after_str )
{
while( IsDigit(*str, base, &digit) )
{
str += 1;
}
}
SetOverflow(was_overflow, true);
res = 0;
}
if( after_str )
*after_str = str;
return res;
}
template<typename CharType>
long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_sign = false;
bool was_overflow_u = false;
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
if( *str == '-' )
{
was_sign = true;
str += 1;
}
unsigned long long uval = Toull(str, base, after_str, &was_overflow_u, false);
unsigned long long sign_add = ( was_sign ) ? 1 : 0;
if( was_overflow_u )
{
SetOverflow(was_overflow, true);
return 0;
}
if( uval > static_cast<unsigned long long>(std::numeric_limits<long long>::max()) + sign_add )
{
SetOverflow(was_overflow, true);
return 0;
}
if( was_sign )
{
return static_cast<long long>(0) - static_cast<long long>(uval);
}
return static_cast<long long>(uval);
}
template<typename CharType, typename IntegerType>
IntegerType ToUnsignedIntegerType(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_overflow_ll = false;
SetOverflow(was_overflow, false);
unsigned long long val = Toull(str, base, after_str, &was_overflow_ll, allow_skip_whitechars);
if( was_overflow_ll || val > static_cast<unsigned long long>(std::numeric_limits<IntegerType>::max()) )
{
SetOverflow(was_overflow, true);
return 0;
}
return static_cast<IntegerType>(val);
}
template<class CharType>
unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToUnsignedIntegerType<CharType, unsigned long>(str, base, after_str, was_overflow, allow_skip_whitechars);
}
template<class CharType>
unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToUnsignedIntegerType<CharType, unsigned int>(str, base, after_str, was_overflow, allow_skip_whitechars);
}
template<typename CharType, typename IntegerType>
IntegerType ToIntegerType(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_overflow_ll = false;
SetOverflow(was_overflow, false);
long long val = Toll(str, base, after_str, &was_overflow_ll, allow_skip_whitechars);
if( was_overflow_ll ||
val < static_cast<long long>(std::numeric_limits<IntegerType>::min()) ||
val > static_cast<long long>(std::numeric_limits<IntegerType>::max()) )
{
SetOverflow(was_overflow, true);
return 0;
}
return static_cast<IntegerType>(val);
}
template<class CharType>
long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToIntegerType<CharType, long>(str, base, after_str, was_overflow, allow_skip_whitechars);
}
template<class CharType>
int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToIntegerType<CharType, int>(str, base, after_str, was_overflow, allow_skip_whitechars);
}
/*
*
* the base will be automatically detected
*
* (first digit is [1-9]) - base 10
* 0 - 8
* # - 16
* & - 2
*
*/
template<typename CharType>
unsigned long long Toull_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
if( allow_skip_whitechars )
str = SkipWhite(str);
int base = 10;
if( *str == '0' )
{
base = 8;
str += 1;
}
else
if( *str == '#' )
{
base = 16;
str += 1;
}
else
if( *str == '&' )
{
base = 2;
str += 1;
}
return Toull(str, base, after_str, was_overflow, false);
}
template<typename CharType>
long long Toll_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_sign = false;
bool was_overflow_u = false;
SetOverflow(was_overflow, false);
if( allow_skip_whitechars )
str = SkipWhite(str);
if( *str == '-' )
{
was_sign = true;
str += 1;
}
unsigned long long uval = Toull_b(str, after_str, &was_overflow_u, false);
unsigned long long sign_add = ( was_sign ) ? 1 : 0;
if( was_overflow_u )
{
SetOverflow(was_overflow, true);
return 0;
}
if( uval > static_cast<unsigned long long>(std::numeric_limits<long long>::max()) + sign_add )
{
SetOverflow(was_overflow, true);
return 0;
}
if( was_sign )
{
return static_cast<long long>(0) - static_cast<long long>(uval);
}
return static_cast<long long>(uval);
}
template<typename CharType, typename IntegerType>
IntegerType ToUnsignedIntegerType_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_overflow_ll = false;
SetOverflow(was_overflow, false);
unsigned long long val = Toull_b(str, after_str, &was_overflow_ll, allow_skip_whitechars);
if( was_overflow_ll || val > static_cast<unsigned long long>(std::numeric_limits<IntegerType>::max()) )
{
SetOverflow(was_overflow, true);
return 0;
}
return static_cast<IntegerType>(val);
}
template<class CharType>
unsigned long Toul_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToUnsignedIntegerType_b<CharType, unsigned long>(str, after_str, was_overflow, allow_skip_whitechars);
}
template<class CharType>
unsigned int Toui_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToUnsignedIntegerType_b<CharType, unsigned int>(str, after_str, was_overflow, allow_skip_whitechars);
}
template<typename CharType, typename IntegerType>
IntegerType ToIntegerType_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
bool was_overflow_ll = false;
SetOverflow(was_overflow, false);
long long val = Toll_b(str, after_str, &was_overflow_ll, allow_skip_whitechars);
if( was_overflow_ll ||
val < static_cast<long long>(std::numeric_limits<IntegerType>::min()) ||
val > static_cast<long long>(std::numeric_limits<IntegerType>::max()) )
{
SetOverflow(was_overflow, true);
return 0;
}
return static_cast<IntegerType>(val);
}
template<class CharType>
long Tol_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToIntegerType_b<CharType, long>(str, after_str, was_overflow, allow_skip_whitechars);
}
template<class CharType>
int Toi_b(const CharType * str, const CharType ** after_str = 0, bool * was_overflow = 0, bool allow_skip_whitechars = true)
{
return ToIntegerType_b<CharType, int>(str, after_str, was_overflow, allow_skip_whitechars);
}
}
#endif

209
src/convert/text.cpp Normal file
View File

@@ -0,0 +1,209 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cstddef>
#include "text.h"
namespace PT
{
// white_chars table should be sorted (a binary search algorithm is used to find a character)
// we do not treat a new line character (10) as a white character here
// also space (32) and tab (9) are not inserted here
static const wchar_t white_chars_table[] = {
0x000B, // LINE TABULATION (vertical tabulation)
0x000C, // FORM FEED (FF)
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
0x0085, // NEXT LINE (NEL)
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
0x1680, // OGHAM SPACE MARK
0x180E, // MONGOLIAN VOWEL SEPARATOR
0x2000, // EN QUAD
0x2001, // EM QUAD
0x2002, // EN SPACE
0x2003, // EM SPACE
0x2004, // THREE-PER-EM SPACE
0x2005, // FOUR-PER-EM SPACE
0x2006, // SIX-PER-EM SPACE
0x2007, // FIGURE SPACE
0x2008, // PUNCTUATION SPACE
0x2009, // THIN SPACE
0x200A, // HAIR SPACE
0x2028, // LINE SEPARATOR
0x2029, // PARAGRAPH SEPARATOR
0x202F, // NARROW NO-BREAK SPACE
0x205F, // MEDIUM MATHEMATICAL SPACE
0x3000, // IDEOGRAPHIC SPACE
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
};
/*
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
*/
bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
{
// space (32) and tab (9) are the most common white chars
// so we check them at the beginning (optimisation)
if( c == 32 || c == 9 )
return true;
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
std::size_t o1 = 0;
std::size_t o2 = len - 1;
if( c == 10 )
return treat_new_line_as_white ? true : false;
if( !check_additional_chars )
return false;
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
return false;
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
return true;
while( o1 + 1 < o2 )
{
std::size_t o = (o2 - o1)/2 + o1;
if( c == white_chars_table[o] )
return true;
if( c > white_chars_table[o] )
o1 = o;
else
o2 = o;
}
return false;
}
bool IsDigit(wchar_t c, int base, int * digit)
{
int d = 0;
if( c >= '0' && c <= '9' )
{
d = c - '0';
}
else
if( c >= 'a' && c <= 'f' )
{
d = c - 'a' + 10;
}
else
if( c >= 'A' && c <= 'F' )
{
d = c - 'A' + 10;
}
else
{
if( digit )
*digit = d;
return false;
}
if( digit )
*digit = d;
return d < base;
}
char ToLower(char c)
{
return pt_private::ToLowerGeneric(c);
}
wchar_t ToLower(wchar_t c)
{
return pt_private::ToLowerGeneric(c);
}
char ToUpper(char c)
{
return pt_private::ToUpperGeneric(c);
}
wchar_t ToUpper(wchar_t c)
{
return pt_private::ToUpperGeneric(c);
}
void ToLower(std::string & str)
{
pt_private::ToLowerStrGeneric(str);
}
void ToLower(std::wstring & str)
{
pt_private::ToLowerStrGeneric(str);
}
void ToUpper(std::string & str)
{
pt_private::ToLowerStrGeneric(str);
}
void ToUpper(std::wstring & str)
{
pt_private::ToLowerStrGeneric(str);
}
}

296
src/convert/text.h Normal file
View File

@@ -0,0 +1,296 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_text
#define headerfile_picotools_convert_text
#include "text_private.h"
namespace PT
{
bool IsWhite(wchar_t c, bool check_additional_chars = true, bool treat_new_line_as_white = true);
bool IsDigit(wchar_t c, int base = 10, int * digit = 0);
char ToLower(char c);
wchar_t ToLower(wchar_t c);
char ToUpper(char c);
wchar_t ToUpper(wchar_t c);
// rename to something like to_lower_emplace
// and add to_lower which returns string
void ToLower(std::string & str);
void ToLower(std::wstring & str);
void ToUpper(std::string & str);
void ToUpper(std::wstring & str);
////////////////////////////
template<class CharType>
CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
while( IsWhite(static_cast<wchar_t>(*str), check_additional_chars, treat_new_line_as_white) )
{
str += 1;
}
return str;
}
/*
*
* str_end is pointing at the end of the string (the last item + one)
*
* return value is a pointer to the first white character after a non-white character at the end
* or to the last+one if there is no any white characters
*
*/
template<class CharType>
CharType * SkipWhiteFromBack(CharType * str_begin, CharType * str_end, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
while( str_end > str_begin && IsWhite(static_cast<wchar_t>(*(str_end-1)), check_additional_chars, treat_new_line_as_white) )
{
str_end -= 1;
}
return str_end;
}
template<class CharType>
CharType * SkipWhiteFromBack(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true)
{
CharType * str_begin = str;
while( *str != 0 )
{
str += 1;
}
return SkipWhiteFromBack(str_begin, str, check_additional_chars, treat_new_line_as_white);
}
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 * str1, const StringType2 * str2)
{
while( *str1 && *str2 && ToLower(*str1) == ToLower(*str2) )
{
++str1;
++str2;
}
if( *str1 == 0 && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = ToLower((wchar_t)(unsigned char)(*str1));
c2 = ToLower((wchar_t)(unsigned char)(*str2));
}
else
{
c1 = ToLower(*str1);
c2 = ToLower(*str2);
}
return c1 - c2;
}
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 & str1, const StringType2 & str2)
{
return CompareNoCase(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
int CompareNoCasep(const StringType1 * str1, const StringType2 * str2)
{
return CompareNoCase(str1, str2);
}
template<class StringType1, class StringType2>
int CompareNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
while( str1_begin < str1_end && *str2 && ToLower(*str1_begin) == ToLower(*str2) )
{
++str1_begin;
++str2;
}
if( str1_begin == str1_end && *str2 == 0 )
return 0;
int c1;
int c2;
if constexpr (sizeof(StringType1) == 1 && sizeof(StringType2) == 1)
{
c1 = str1_begin < str1_end ? ToLower((wchar_t)(unsigned char)(*str1_begin)) : 0;
c2 = ToLower((wchar_t)(unsigned char)(*str2));
}
else
{
c1 = str1_begin < str1_end ? ToLower(*str1_begin) : 0;
c2 = ToLower(*str2);
}
return c1 - c2;
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1, const StringType2 * str2)
{
return CompareNoCase(str1, str2) == 0;
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 & str1, const StringType2 & str2)
{
return EqualNoCase(str1.c_str(), str2.c_str());
}
template<class StringType1, class StringType2>
bool EqualNoCasep(const StringType1 * str1, const StringType2 * str2)
{
return EqualNoCase(str1, str2);
}
template<class StringType1, class StringType2>
bool EqualNoCase(const StringType1 * str1_begin, const StringType1 * str1_end, const StringType2 * str2)
{
return CompareNoCase(str1_begin, str1_end, str2) == 0;
}
template<class StringType1, class StringType2>
bool IsSubStringp(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && wchar_t(*short_str) == wchar_t(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 * short_str, const StringType2 * long_str)
{
return IsSubStringp(short_str, long_str);
}
template<class StringType1, class StringType2>
bool IsSubString(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringp(short_str.c_str(), long_str.c_str());
}
template<class StringType1, class StringType2>
bool IsSubStringNoCasep(const StringType1 * short_str, const StringType2 * long_str)
{
while( *short_str && *long_str && ToLower(*short_str) == ToLower(*long_str) )
{
++short_str;
++long_str;
}
if( *short_str == 0 )
return true;
return false;
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 * short_str, const StringType2 * long_str)
{
return IsSubStringNoCasep(short_str, long_str);
}
template<class StringType1, class StringType2>
bool IsSubStringNoCase(const StringType1 & short_str, const StringType2 & long_str)
{
return IsSubStringNoCasep(short_str.c_str(), long_str.c_str());
}
}
#endif

View File

@@ -0,0 +1,96 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_text_private
#define headerfile_picotools_convert_text_private
#include <string>
namespace PT
{
namespace pt_private
{
template<class CharType>
CharType ToLowerGeneric(CharType c)
{
if( c >= 'A' && c <= 'Z' )
return c - 'A' + 'a';
return c;
}
template<class CharType>
CharType ToUpperGeneric(CharType c)
{
if( c >= 'a' && c <= 'z' )
return c - 'a' + 'A';
return c;
}
template<class StringType>
void ToLowerStrGeneric(StringType & s)
{
typename StringType::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = ToLowerGeneric(s[i]);
}
template<class StringType>
void ToUpperStrGeneric(StringType & s)
{
typename StringType::size_type i;
for(i=0 ; i<s.size() ; ++i)
s[i] = ToUpperGeneric(s[i]);
}
} // namespace pt_private
} // namespace PT
#endif

496
src/date/date.cpp Normal file
View File

@@ -0,0 +1,496 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "date.h"
// for memset
#include <string.h>
namespace PT
{
Date::Date()
{
Clear();
}
Date::Date(const Date & d)
{
operator=(d);
}
Date::Date(time_t t)
{
FromTime(t);
}
Date::Date(const tm & t)
{
FromTm(t);
}
Date::Date(const char * str)
{
// parsing can be break in the middle of the string (if errors)
// and some values would not be initialized
Clear();
Parse(str);
}
Date::Date(const wchar_t * str)
{
Clear();
Parse(str);
}
Date::Date(const std::string & str)
{
Clear();
Parse(str);
}
Date::Date(const std::wstring & str)
{
Clear();
Parse(str);
}
Date & Date::operator=(const Date & d)
{
year = d.year;
month = d.month;
day = d.day;
hour = d.hour;
min = d.min;
sec = d.sec;
return *this;
}
Date & Date::operator=(time_t t)
{
FromTime(t);
return *this;
}
Date & Date::operator=(const tm & t)
{
FromTm(t);
return *this;
}
Date & Date::operator=(const char * str)
{
Parse(str);
return *this;
}
Date & Date::operator=(const wchar_t * str)
{
Parse(str);
return *this;
}
Date & Date::operator=(const std::string & str)
{
Parse(str);
return *this;
}
Date & Date::operator=(const std::wstring & str)
{
Parse(str);
return *this;
}
Date Date::operator+(time_t t) const
{
time_t t0 = ToTime();
Date d(t0 + t);
return d;
}
Date Date::operator-(time_t t) const
{
time_t t0 = ToTime();
Date d(t0 - t);
return d;
}
Date & Date::operator+=(time_t t)
{
time_t t0 = ToTime();
FromTime(t0 + t);
return *this;
}
Date & Date::operator-=(time_t t)
{
time_t t0 = ToTime();
FromTime(t0 - t);
return *this;
}
void Date::Swap(Date & date)
{
Date temp(*this);
*this = date;
date = temp;
}
time_t Date::operator-(const Date & d) const
{
time_t t0 = ToTime();
time_t t1 = d.ToTime();
time_t res;
if( t1 >= t0 )
res = t1 - t0;
else
res = t0 - t1;
return res;
}
bool Date::IsTheSameDay(const Date & d) const
{
return year == d.year && month == d.month && day == d.day;
}
bool Date::IsTheSameHour(const Date & d) const
{
return hour == d.hour && min == d.min && sec == d.sec;
}
bool Date::operator==(const Date & d) const
{
return IsTheSameDay(d) && IsTheSameHour(d);
}
bool Date::operator!=(const Date & d) const
{
return !operator==(d);
}
int Date::Compare(const Date & d) const
{
if( year != d.year )
return year - d.year;
if( month != d.month )
return month - d.month;
if( day != d.day )
return day - d.day;
if( hour != d.hour )
return hour - d.hour;
if( min != d.min )
return min - d.min;
if( sec != d.sec )
return sec - d.sec;
// dates are equal
return 0;
}
bool Date::operator>(const Date & d) const
{
return Compare(d) > 0;
}
bool Date::operator>=(const Date & d) const
{
return Compare(d) >= 0;
}
bool Date::operator<(const Date & d) const
{
return Compare(d) < 0;
}
bool Date::operator<=(const Date & d) const
{
return Compare(d) <= 0;
}
void Date::Clear()
{
year = 1970;
month = 1;
day = 1;
hour = 0;
min = 0;
sec = 0;
}
void Date::AssertRange(int & val, int val_min, int val_max)
{
if( val < val_min )
val = val_min;
if( val > val_max )
val = val_max;
}
void Date::AssertCorrectDate()
{
// 10000 is only a 'cosmetic' limit
// we can make calculations with greater values
AssertRange(year, 1970, 10000);
AssertRange(month, 1, 12);
AssertRange(day, 1, MonthLen(year, month));
AssertRange(hour, 0, 23);
AssertRange(min, 0, 59);
AssertRange(sec, 0, 59);
}
bool Date::IsCorrectDate()
{
// 10000 is only a 'cosmetic' limit
// we can make calculations with greater values
if( year < 1970 || year > 10000 )
return false;
if( month < 1 || month > 12 )
return false;
if( day < 1 || day > MonthLen(year, month) )
return false;
if( hour < 0 || hour > 23 )
return false;
if( min < 0 || min > 59 )
return false;
if( sec < 0 || sec > 59 )
return false;
return true;
}
int Date::MonthLen(int y, int m)
{
if( m == 2 && IsYearLeap(y) )
return 29;
const int days[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
if( m>=1 && m<=12 )
return days[m-1];
return 0;
}
bool Date::IsYearLeap(int y)
{
return (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0);
}
bool Date::IsYearLeap() const
{
return IsYearLeap(year);
}
/*
return 'days' starts from 0000:03:01 (year 0, month 3 - March, day 1)
*/
long long Date::ToDays() const
{
long long m = (month + 9) % 12;
long long y = year - m/10;
return 365*y + y/4 - y/100 + y/400 + (m*306 + 5)/10 + (day - 1);
}
time_t Date::ToTime() const
{
time_t res = time_t((ToDays() - 719468) * 60*60*24);
res += hour * 60 * 60;
res += min * 60;
res += sec;
return res;
}
tm Date::ToTm() const
{
tm t;
memset(&t, 0, sizeof(tm));
t.tm_year = year - 1900;
t.tm_mon = month - 1;
t.tm_mday = day;
t.tm_hour = hour;
t.tm_min = min;
t.tm_sec = sec;
t.tm_wday = WeekDay();
// t.tm_yday is not set
return t;
}
/*
this method calculates year, month and a day from given 'days'
'days' starts from 0000:03:01 (year 0, month 3 - March, day 1)
*/
void Date::FromDays(long long days)
{
//static_assert( sizeof(long long) >= 8 , "operation here should be used at least with 64 bits precision");
year = int(((long long)(10000)*days + 14780)/3652425);
int delta = int(days - (365*year + year/4 - year/100 + year/400));
if( delta < 0 )
{
year -= 1;
delta = int(days - (365*year + year/4 - year/100 + year/400));
}
int mi = (100*delta + 52)/3060;
month = (mi + 2)%12 + 1;
year = year + (mi + 2)/12;
day = delta - (mi*306 + 5)/10 + 1;
}
void Date::FromTime(time_t t)
{
time_t days = t / 60 / 60 / 24;
time_t diff = t - days * 60 * 60 * 24;
hour = int(diff / 60 / 60);
min = int((diff - hour * 60 * 60) / 60);
sec = int((diff - hour * 60 * 60 - min * 60));
FromDays((long long)(days) + 719468);
}
void Date::FromTm(const tm & t)
{
year = t.tm_year + 1900;
month = t.tm_mon + 1;
day = t.tm_mday;
hour = t.tm_hour;
min = t.tm_min;
sec = t.tm_sec;
}
int Date::WeekDay() const
{
long long d = ToDays();
return (int)((d+3) % 7);
}
} // namespace

996
src/date/date.h Normal file
View File

@@ -0,0 +1,996 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_mainparser_mainparser
#define headerfile_picotools_mainparser_mainparser
#include <ctime>
#include <string>
#include "convert/inttostr.h"
namespace PT
{
/*
this class represents a Date (year, month, day, hour, min, sec)
it has O(1) algorithm when converting from/to time_t (seconds from the Unix Epoch)
algorithm description:
http://alcor.concordia.ca/~gpkatch/gdate-algorithm.html
http://alcor.concordia.ca/~gpkatch/gdate-method.html
current limitation:
we do not support leap seconds
*/
class Date
{
public:
/*
the date
*/
int year; // 1970 - ...
int month; // 1 - 12
int day; // 1 - 31
int hour; // 0 - 23
int min; // 0 - 59
int sec; // 0 - 59
/*
default c-ctor sets the Unix Epoch (Clear method): 1970.01.01 00:00:00
*/
Date();
/*
converting from Date, time_t (seconds from the Unix Epoch), tm structure, and strings
*/
Date(const Date & d);
Date(time_t t);
Date(const tm & t);
Date(const char * str);
Date(const wchar_t * str);
Date(const std::string & str);
Date(const std::wstring & str);
Date & operator=(const Date & d);
Date & operator=(time_t t);
Date & operator=(const tm & t);
Date & operator=(const char * str);
Date & operator=(const wchar_t * str);
Date & operator=(const std::string & str);
Date & operator=(const std::wstring & str);
/*
adding/subtracting time_t (seconds from the Unix Epoch)
*/
Date operator+(time_t t) const;
Date operator-(time_t t) const;
Date & operator+=(time_t t);
Date & operator-=(time_t t);
/*
swapping the contents of *this with date
*/
void Swap(Date & date);
/*
converts time_t in seconds (from the Unix Epoch) to this object
*/
void FromTime(time_t t);
/*
converts tm structure to this object
*/
void FromTm(const tm & t);
/*
returns time_t (in seconds from the Unix Epoch)
*/
time_t ToTime() const;
/*
return tm structure
only tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec fields are set
the rest is equal to zero
*/
tm ToTm() const;
/*
getting/setting the number of days from 0000:03:01 (year 0, month 3 - March, day 1)
(ToDays() and FromDays() can work even with a year less than 1970)
*/
long long ToDays() const;
void FromDays(long long g);
/*
returns a difference in second between two dates
(always a value greater than zero)
*/
time_t operator-(const Date & d) const;
/*
'Compare' returns zero if this and d are equal
return value less than zero if this is lower than d
and a value greater than zero if this is greater than d
*/
int Compare(const Date & d) const;
/*
returns true if year, month and day are the same
*/
bool IsTheSameDay(const Date & d) const;
/*
returns true if hour, min and sec are the same
*/
bool IsTheSameHour(const Date & d) const;
/*
operators for comparing
*/
bool operator==(const Date & d) const;
bool operator!=(const Date & d) const;
bool operator>(const Date & d) const;
bool operator>=(const Date & d) const;
bool operator<(const Date & d) const;
bool operator<=(const Date & d) const;
/*
set the Unix Epoch: 1970.01.01 00:00:00
*/
void Clear();
/*
assert a correct date (values will be from the correct range)
year: 1970 - 10000
month: 1 - 12
day: 1 - MonthLen
hour: 0 - 23
min: 0 - 59
sec: 0 - 59
*/
void AssertCorrectDate();
/*
return true if values are from the correct range
year: 1970 - 10000
month: 1 - 12
day: 1 - MonthLen
hour: 0 - 23
min: 0 - 59
sec: 0 - 59
*/
bool IsCorrectDate();
/*
returns how many days there is in a month
y - year 1970 - ...
m - month 1-12
*/
static int MonthLen(int y, int m);
/*
returns true if 'y' is a leap year
leap year has one additional day (in february) - so the year lasts 366 days
*/
static bool IsYearLeap(int y);
/*
returns true if the currecn year is a leap year
*/
bool IsYearLeap() const;
/*
returns a day index from a week
sunday - 0
monday - 1
...
saturday - 6
*/
int WeekDay() const;
/*
this method outputs to the given stream: YYYY-MM-DD, eg. 1990-02-12
ISO 8601 format
*/
template<class Stream>
void SerializeYearMonthDay(Stream & out) const;
/*
this method outputs to the given stream: HH:MM:SS, eg: 13:05:39
ISO 8601 format
*/
template<class Stream>
void SerializeHourMinSec(Stream & out) const;
/*
this method outputs to the given stream: MM-DD, eg. 02-12 (02 month, 12 day)
*/
template<class Stream>
void SerializeMonthDay(Stream & out) const;
/*
this method outputs to the given stream: HH:MM, eg: 13:05
*/
template<class Stream>
void SerializeHourMin(Stream & out) const;
/*
this method outputs to the given stream: YYYY-MM-DD HH:MM:SS, eg: 1990-02-12 13:05:39
ISO 8601 format
*/
template<class Stream>
void Serialize(Stream & out) const;
/*
this method outputs to the given stream: YYYY-MM-DDTHH:MM:SSZ, eg: 1990-02-12T13:05:39Z
ISO 8601 format
*/
template<class Stream>
void SerializeISO(Stream & out) const;
/*
parsing day month and year
the input string can be as follows:
"12-10-2008"
white characters are ommited and the method stops after reading the year
so the input string can be:
" 12 - 10 - 2008some text "
a white character means a space or a tab
as a separator can be '-' '/' or '.'
so below strings have the same meaning:
" 12.10.2008 "
" 12/10 / 2008 "
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseDayMonthYear(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseDayMonthYear(const StringType & str);
/*
parsing year month and day
the input string can be as follows:
"2008-10-12"
white characters are ommited and the method stops after reading the day
so the input string can be:
" 2008 - 10 - 12some text "
a white character means a space or a tab
as a separator can be '-' '/' or '.'
so below strings have the same meaning:
" 2008.10.12 "
" 2008/10 / 12 "
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseYearMonthDay(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseYearMonthDay(const StringType & str);
/*
parsing month and day
the input string can be as follows:
"10-12" (month: 10, day: 12)
white characters are ommited and the method stops after reading the day
so the input string can be:
" 10 - 12some text "
a white character means a space or a tab
as a separator can be '-' '/' or '.'
so below strings have the same meaning:
" 10.12 "
" 10 / 12 "
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseMonthDay(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseMonthDay(const StringType & str);
/*
parsing hour minutes and seconds
the input string can be as follows:
"14:10:35"
white characters are ommited and the method stops after reading seconds
so the input string can be:
" 14 : 10 : 35some text "
a white character means a space or a tab
a separator is only the ':' character
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseHourMinSec(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseHourMinSec(const StringType & str);
/*
parsing hour and minutes
the input string can be as follows:
"14:10"
white characters are ommited and the method stops after reading minutes
so the input string can be:
" 14 : 10some text "
a white character means a space or a tab
a separator is only the ':' character
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseHourMin(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseHourMin(const StringType & str);
template<class CStringType>
bool ParseZoneOffset(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseZoneOffset(const StringType & str);
/*
parsing hour and minutes (if exists) and seconds (if exists)
the input string can be as follows:
"14" -- only an hour given (min and sec will be zero)
"14:10" -- hour with minutes (sec will be zero)
"14:10:35" -- hour, minutes and seconds
white characters are ommited so these are valid strings too:
" 14 : 10 : 35 "
" 14 : 10 : 35some text "
a white character means a space or a tab
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseTime(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseTime(const StringType & str);
/*
parsing month, day, hour and minutes (if exists) and seconds (if exists)
the input string can be as follows:
"10-23 14" -- only month, day and hour given (min and sec will be zero)
"10-23 14:10" -- month, day and hour with minutes (sec will be zero)
"10-23 14:10:35" -- month, day, hour, minutes and seconds
white characters are ommited so these are valid strings too:
" 10 - 23 14 : 10 : 35 "
" 10 - 23 14 : 10 : 35some text "
a white character means a space or a tab
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseMonthDayTime(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool ParseMonthDayTime(const StringType & str);
/*
parsing year/month/day hour:min:sec
the input strings can be as follows:
"2008-10-12 14:10:35"
"2008/10/12 14:10:35"
"2008.10.12 14:10:35"
"2008-10/12 14:10:35"
white characters are ommited
so the input string can be:
" 2008 - 10 / 12 14 : 10 : 35 "
a white character means a space or a tab
as a separator for year/month/day can be '-' '/' or '.'
see ParseYearMonthDay() for details
as a separator for hour:min:sec is the ':' character
see ParseHourMinSec() for details
at the end the method checks if the values are correct
(by using IsCorrectDate())
*/
template<class CStringType>
bool Parse(const CStringType * str, const CStringType ** str_after = 0);
template<class StringType>
bool Parse(const StringType & str);
private:
void AssertRange(int & val, int val_min, int val_max);
template<class Stream>
void SerializeInt(Stream & out, int val, size_t min_width) const;
template<class Stream>
void SerializeInt(Stream & out, int val) const;
template<class CStringType>
void SetAfter(const CStringType * str, const CStringType ** str_after);
template<class CStringType>
void SkipWhite(const CStringType * & str);
template<class CStringType>
bool ReadInt(const CStringType * & str, int & result, size_t max_digits = 0);
template<class CStringType>
bool SkipSeparator(const CStringType * & str, int separator, int separator2 = -1, int separator3 = -1);
};
template<class Stream>
void Date::SerializeInt(Stream & out, int val, size_t min_width) const
{
char buf[64];
size_t len;
if( Toa(val, buf, sizeof(buf) / sizeof(char), 10, &len) )
{
for(size_t i = len ; i < min_width ; ++i)
{
out << '0';
}
out << buf;
}
}
template<class Stream>
void Date::SerializeInt(Stream & out, int val) const
{
SerializeInt(out, val, 2);
}
template<class Stream>
void Date::SerializeYearMonthDay(Stream & out) const
{
SerializeInt(out, year, 4);
out << '-';
SerializeInt(out, month);
out << '-';
SerializeInt(out, day);
}
template<class Stream>
void Date::SerializeHourMinSec(Stream & out) const
{
SerializeInt(out, hour);
out << ':';
SerializeInt(out, min);
out << ':';
SerializeInt(out, sec);
}
template<class Stream>
void Date::SerializeMonthDay(Stream & out) const
{
SerializeInt(out, month);
out << '-';
SerializeInt(out, day);
}
template<class Stream>
void Date::SerializeHourMin(Stream & out) const
{
SerializeInt(out, hour);
out << ':';
SerializeInt(out, min);
}
template<class Stream>
void Date::Serialize(Stream & out) const
{
SerializeYearMonthDay(out);
out << ' ';
SerializeHourMinSec(out);
}
template<class Stream>
void Date::SerializeISO(Stream & out) const
{
SerializeYearMonthDay(out);
out << 'T';
SerializeHourMinSec(out);
out << 'Z';
}
template<class CStringType>
bool Date::ParseDayMonthYear(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
if( ReadInt(str, day) && SkipSeparator(str, '.', '-', '/') )
if( ReadInt(str, month) && SkipSeparator(str, '.', '-', '/') )
if( ReadInt(str, year) )
result = true;
SetAfter(str, str_after);
return result;
}
template<class StringType>
bool Date::ParseDayMonthYear(const StringType & str)
{
return ParseDayMonthYear(str.c_str());
}
template<class CStringType>
bool Date::ParseYearMonthDay(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
if( ReadInt(str, year) && SkipSeparator(str, '.', '-', '/') )
if( ReadInt(str, month) && SkipSeparator(str, '.', '-', '/') )
if( ReadInt(str, day) )
result = true;
SetAfter(str, str_after);
return result;
}
template<class StringType>
bool Date::ParseYearMonthDay(const StringType & str)
{
return ParseYearMonthDay(str.c_str());
}
template<class CStringType>
bool Date::ParseMonthDay(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
if( ReadInt(str, month) && SkipSeparator(str, '.', '-', '/') )
if( ReadInt(str, day) )
result = true;
SetAfter(str, str_after);
return result;
}
template<class StringType>
bool Date::ParseMonthDay(const StringType & str)
{
return ParseMonthDay(str.c_str());
}
template<class CStringType>
bool Date::ParseHourMinSec(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
if( ReadInt(str, hour) && SkipSeparator(str, ':') )
if( ReadInt(str, min) && SkipSeparator(str, ':') )
if( ReadInt(str, sec) )
result = true;
SetAfter(str, str_after);
return result;
}
template<class StringType>
bool Date::ParseHourMinSec(const StringType & str)
{
return ParseHourMinSec(str.c_str());
}
template<class CStringType>
bool Date::ParseHourMin(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
if( ReadInt(str, hour) && SkipSeparator(str, ':') )
if( ReadInt(str, min) )
result = true;
SetAfter(str, str_after);
return result;
}
template<class StringType>
bool Date::ParseHourMin(const StringType & str)
{
return ParseHourMin(str.c_str());
}
template<class CStringType>
bool Date::ParseZoneOffset(const CStringType * str, const CStringType ** str_after)
{
bool result = false;
bool is_sign = false;
int offset_hour = 0;
int offset_min = 0;
SkipWhite(str);
if( *str == '-' || *str == '+' )
{
if( *str == '-' )
is_sign = true;
str += 1;
if( ReadInt(str, offset_hour, 2) && offset_hour >= -12 && offset_hour <= 14 )
{
SkipWhite(str);
SetAfter(str, str_after);
if( *str == ':' )
{
str += 1;
SkipWhite(str);
SetAfter(str, str_after);
}
if( ReadInt(str, offset_min, 2) && offset_min > -60 && offset_min < 60 )
{
SetAfter(str, str_after);
}
else
{
offset_min = 0;
}
time_t offset = (time_t)offset_hour * 60 * 60 + (time_t)offset_min * 60;
result = true;
if( is_sign )
offset = -offset;
FromTime(ToTime() - offset);
}
}
return result;
}
template<class StringType>
bool Date::ParseZoneOffset(const StringType & str)
{
return ParseZoneOffset(str.c_str());
}
template<class CStringType>
bool Date::ParseTime(const CStringType * str, const CStringType ** str_after)
{
if( !ReadInt(str, hour) )
{
SetAfter(str, str_after);
return false;
}
min = 0;
sec = 0;
if( !SkipSeparator(str, ':') )
{
SetAfter(str, str_after);
return true; // only an hour given
}
if( !ReadInt(str, min) )
{
SetAfter(str, str_after);
return false;
}
if( !SkipSeparator(str, ':') )
{
SetAfter(str, str_after);
return true; // only an hour and minutes given
}
if( !ReadInt(str, sec) )
{
SetAfter(str, str_after);
return false;
}
SetAfter(str, str_after);
return true;
}
template<class StringType>
bool Date::ParseTime(const StringType & str)
{
return ParseTime(str.c_str());
}
template<class CStringType>
bool Date::ParseMonthDayTime(const CStringType * str, const CStringType ** str_after)
{
const CStringType * after;
bool result = false;
if( ParseMonthDay(str, &after) )
if( ParseTime(after, &after) )
result = true;
SetAfter(after, str_after);
return result;
}
template<class StringType>
bool Date::ParseMonthDayTime(const StringType & str)
{
return ParseMonthDayTime(str.c_str());
}
template<class CStringType>
bool Date::Parse(const CStringType * str, const CStringType ** str_after)
{
const CStringType * after;
bool result = false;
if( ParseYearMonthDay(str, &after) )
{
SkipWhite(after);
if( *after == 'T' )
{
// ISO 8601 format
// https://en.wikipedia.org/wiki/ISO_8601
// at the moment skip the 'T' character only
after += 1;
}
if( ParseHourMinSec(after, &after) )
{
SkipWhite(after);
result = true;
if( *after == 'Z' )
{
after += 1;
}
else
{
// we dont have to check errors here
ParseZoneOffset(after, &after);
}
}
}
SetAfter(after, str_after);
if( result )
result = IsCorrectDate();
return result;
}
template<class StringType>
bool Date::Parse(const StringType & str)
{
return Parse(str.c_str());
}
template<class CStringType>
void Date::SetAfter(const CStringType * str, const CStringType ** str_after)
{
if( str_after )
*str_after = str;
}
template<class CStringType>
void Date::SkipWhite(const CStringType * & str)
{
while( *str==' ' || *str=='\t' )
str += 1;
}
template<class CStringType>
bool Date::ReadInt(const CStringType * & str, int & result, size_t max_digits)
{
bool something_read = false;
SkipWhite(str);
result = 0;
size_t len = 0;
while( *str >= '0' && *str <= '9' && (max_digits == 0 || len < max_digits))
{
result = result * 10 + (*str - '0');
str += 1;
len += 1;
something_read = true;
if( result > 10000 )
{
// we assumed the max year to be 10000
return false;
}
}
return something_read;
}
template<class CStringType>
bool Date::SkipSeparator(const CStringType * & str, int separator, int separator2, int separator3)
{
SkipWhite(str);
if( *str == separator )
{
str += 1;
return true;
}
if( separator2 != -1 && *str == separator2 )
{
str += 1;
return true;
}
if( separator3 != -1 && *str == separator3 )
{
str += 1;
return true;
}
return false;
}
} // namespace
#endif

147
src/log/filelog.cpp Normal file
View File

@@ -0,0 +1,147 @@
/*
* This file is a part of Winix
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2018-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "filelog.h"
#include <ctime>
#include <string.h>
#include "utf8/utf8.h"
namespace PT
{
FileLog::FileLog()
{
log_level = 4;
log_stdout = false;
log_file_open = false;
save_each_line = false;
}
FileLog::~FileLog()
{
}
bool FileLog::synchro_lock()
{
return true;
}
void FileLog::synchro_unlock()
{
}
void FileLog::init(const std::wstring & log_file, bool log_stdout, int log_level, bool save_each_line)
{
this->log_stdout = log_stdout;
this->log_level = log_level;
this->save_each_line = save_each_line;
PT::WideToUTF8(log_file, this->log_file);
}
int FileLog::get_log_level()
{
return log_level;
}
bool FileLog::should_save_each_line()
{
return save_each_line;
}
void FileLog::open_file()
{
if( !log_file.empty() )
{
file.open( log_file.c_str(), std::ios_base::out | std::ios_base::app );
log_file_open = true;
}
}
void FileLog::save_log(PT::WTextStream * buffer)
{
if( buffer->empty() )
return;
if( synchro_lock() )
{
try
{
if( log_stdout )
{
PT::WideStreamToUTF8(*buffer, std::cout);
}
if( !log_file.empty() )
{
if( !log_file_open || !file )
{
file.close();
file.clear();
open_file();
}
if( file )
{
PT::WideStreamToUTF8(*buffer, file);
file.flush();
}
}
}
catch(...)
{
}
}
synchro_unlock();
}
} // namespace

94
src/log/filelog.h Normal file
View File

@@ -0,0 +1,94 @@
/*
* This file is a part of Winix
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef headerfile_picotools_log_filelog
#define headerfile_picotools_log_filelog
#include <sstream>
#include <fstream>
#include <iostream>
#include <string>
#include "textstream/textstream.h"
namespace PT
{
class FileLog
{
public:
FileLog();
virtual ~FileLog();
virtual void init(const std::wstring & log_file, bool log_stdout, int log_level, bool save_each_line);
virtual void save_log(PT::WTextStream * buffer);
virtual int get_log_level();
virtual bool should_save_each_line();
protected:
// file log
std::string log_file;
std::ofstream file;
// logging to stdout
bool log_stdout;
// is the config file already open
bool log_file_open;
// log lovel
int log_level;
// whether to save each line (for debug)
bool save_each_line;
virtual bool synchro_lock();
virtual void synchro_unlock();
virtual void open_file();
};
} // namespace
#endif

488
src/log/log.cpp Normal file
View File

@@ -0,0 +1,488 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <ctime>
#include <string.h>
#include "log.h"
#include "date/date.h"
#include "utf8/utf8.h"
#include "morm.h"
namespace PT
{
Log::Log()
{
buffer = nullptr;
file_log = nullptr;
current_level = 4;
max_buffer_length = 2 * 1024 * 1024; // 2MB
}
Log::~Log()
{
// IMPROVE ME
// I am not sure if this is a correct behaviour
// the log buffer and file logger may not exist now
// (life time of objects)
// may would be better to have a flag 'clear_at_the_end'
// and if true then call this method?
save_log_and_clear();
}
void Log::SetLogBuffer(PT::WTextStream * buffer)
{
this->buffer = buffer;
}
PT::WTextStream * Log::GetLogBuffer()
{
return buffer;
}
void Log::SetMaxBufferLength(size_t max_buffer_length)
{
this->max_buffer_length = max_buffer_length;
}
size_t Log::GetMaxBufferLength()
{
return max_buffer_length;
}
void Log::SetFileLog(FileLog * file_log)
{
this->file_log = file_log;
}
FileLog * Log::GetFileLog()
{
return file_log;
}
Log & Log::IntMinWidth(size_t min_width)
{
if( buffer )
{
buffer->int_min_width(min_width);
}
return *this;
}
Log & Log::operator<<(const void * s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(const char * s)
{
if( buffer && file_log && s && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(s, *buffer, false);
}
return *this;
}
Log & Log::operator<<(const std::string & s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(s, *buffer, false);
}
return *this;
}
Log & Log::operator<<(const std::string * s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
PT::UTF8ToWide(*s, *buffer, false);
}
return *this;
}
Log & Log::operator<<(const wchar_t * s)
{
if( buffer && file_log && s && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(const std::wstring & s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(const std::wstring * s)
{
if( buffer && file_log && s && current_level <= file_log->get_log_level() )
{
(*buffer) << *s;
}
return *this;
}
Log & Log::operator<<(int s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(long s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(long long s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(char s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(wchar_t s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(size_t s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(double s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(const PT::Space & s)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << s;
}
return *this;
}
Log & Log::operator<<(const PT::Date & date)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << date;
}
return *this;
}
Log & Log::operator<<(morm::Model & model)
{
operator<<(model.to_string());
return *this;
}
Log & Log::operator<<(Manipulators m)
{
switch(m)
{
case lend:
case logend:
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << '\n';
if( file_log->should_save_each_line() || buffer->size() > max_buffer_length )
save_log_and_clear();
}
break;
case lsave:
case logsave:
save_log_and_clear();
break;
case l1:
case log1:
current_level = 1;
break;
case l2:
case log2:
current_level = 2;
break;
case l3:
case log3:
current_level = 3;
break;
case l4:
case log4:
current_level = 4;
break;
default:
break;
}
return *this;
}
Log & Log::LogString(const std::string & value, size_t max_size)
{
return log_string_generic(value, max_size);
}
Log & Log::LogString(const std::wstring & value, size_t max_size)
{
return log_string_generic(value, max_size);
}
char Log::get_hex_digit(unsigned char c)
{
if( c < 10 )
return c + '0';
return c - 10 + 'A';
}
void Log::to_hex(char * buf, unsigned char c)
{
buf[0] = get_hex_digit(c >> 4);
buf[1] = get_hex_digit(c & 0xf);
buf[2] = 0;
}
Log & Log::LogBinary(const char * blob, size_t blob_len)
{
size_t i=0;
char buf[3];
if( buffer && file_log && blob && current_level <= file_log->get_log_level() )
{
while( i < blob_len )
{
size_t oldi = i;
for(size_t a=0 ; a<16 ; ++a)
{
if( i < blob_len )
{
to_hex(buf, blob[i]);
(*buffer) << buf << ' ';
++i;
}
else
{
(*buffer) << " ";
}
if( a == 7 )
{
if( i < blob_len )
(*buffer) << "- ";
else
(*buffer) << " ";
}
}
i = oldi;
(*buffer) << ' ';
for(size_t a=0 ; a<16 && i<blob_len ; ++a, ++i)
{
if( blob[i] > 31 && blob[i] < 127 )
(*buffer) << blob[i];
else
(*buffer) << '.';
}
(*this) << logend;
}
}
return *this;
}
Log & Log::LogBinary(const std::string & blob)
{
return LogBinary(blob.c_str(), blob.size());
}
void Log::save_log_and_clear()
{
save_log();
if( buffer )
{
buffer->clear();
}
}
void Log::save_log()
{
if( file_log && buffer )
{
file_log->save_log(buffer);
}
}
} // namespace

226
src/log/log.h Normal file
View File

@@ -0,0 +1,226 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2018, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_log_log
#define headerfile_picotools_log_log
#include <string>
#include <fstream>
#include "textstream/textstream.h"
#include "filelog.h"
namespace morm
{
class Model;
}
namespace PT
{
class Log
{
public:
/*
log1 - the first level
log2
log3
log4 - the last level (debug level)
logend - the end of a line
logsave - current log buffer is saved and cleared
*/
enum Manipulators
{
log1,
log2,
log3,
log4,
l1,
l2,
l3,
l4,
logend,
lend,
logsave,
lsave,
};
Log();
virtual ~Log();
virtual void SetLogBuffer(PT::WTextStream * buffer);
virtual PT::WTextStream * GetLogBuffer();
void SetFileLog(FileLog * file_log);
FileLog * GetFileLog();
void SetMaxBufferLength(size_t max_buffer_length);
size_t GetMaxBufferLength();
virtual Log & IntMinWidth(size_t min_width);
virtual Log & operator<<(const void * s);
virtual Log & operator<<(const char * s);
virtual Log & operator<<(const std::string * s);
virtual Log & operator<<(const std::string & s);
virtual Log & operator<<(const wchar_t * s);
virtual Log & operator<<(const std::wstring * s);
virtual Log & operator<<(const std::wstring & s);
virtual Log & operator<<(char s);
virtual Log & operator<<(wchar_t s);
virtual Log & operator<<(int s);
virtual Log & operator<<(long s);
virtual Log & operator<<(long long s);
// add unsigned long, unsigned int
virtual Log & operator<<(size_t s);
//virtual Log & operator<<(float s); // added
virtual Log & operator<<(double s);
virtual Log & operator<<(const PT::Space & space);
virtual Log & operator<<(const PT::Date & date);
virtual Log & operator<<(morm::Model & model);
virtual Log & operator<<(Manipulators m);
virtual Log & LogString(const std::string & value, size_t max_size);
virtual Log & LogString(const std::wstring & value, size_t max_size);
template<typename char_type, size_t stack_size, size_t heap_block_size>
Log & operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf);
virtual Log & LogBinary(const char * blob, size_t blob_len);
virtual Log & LogBinary(const std::string & blob);
protected:
// buffer for the log
PT::WTextStream * buffer;
// file logger
FileLog * file_log;
// current level set by a modifier (e.g. log << log3)
int current_level;
// if there is logend modifier used and the buffer exceeds max_buffer_length then
// the buffer is passed to file_log
size_t max_buffer_length;
char get_hex_digit(unsigned char c);
void to_hex(char * buf, unsigned char c);
template<class StringType>
Log & log_string_generic(const StringType & value, size_t max_size);
virtual void save_log();
virtual void save_log_and_clear();
};
template<class StringType>
Log & Log::log_string_generic(const StringType & value, size_t max_size)
{
std::size_t min_size = value.size() < max_size ? value.size() : max_size;
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
for(size_t i=0 ; i < min_size ; ++i)
{
if( value[i] < 32 )
(*buffer) << '.'; // unprintable characters
else
(*buffer) << value[i];
}
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
Log & Log::operator<<(const PT::TextStreamBase<char_type, stack_size, heap_block_size> & buf)
{
if( buffer && file_log && current_level <= file_log->get_log_level() )
{
(*buffer) << buf;
}
return *this;
}
} // namespace
#endif

View File

@@ -0,0 +1,300 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2016-2017, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mainspaceparser.h"
#include "utf8/utf8.h"
#include <string.h>
// REMOVE ME
#include <iostream>
namespace PT
{
#ifdef nonexisting_value
MainSpaceParser::MainSpaceParser()
{
space = 0;
options_space = 0;
use_utf8 = true;
last_status = status_ok;
}
MainSpaceParser::~MainSpaceParser()
{
}
void MainSpaceParser::UTF8(bool utf8)
{
use_utf8 = utf8;
}
void MainSpaceParser::SetSpace(Space & space_ref)
{
space = &space_ref;
options_space = 0;
}
std::wstring & MainSpaceParser::GetErrorToken()
{
return last_error_token;
}
MainSpaceParser::Status MainSpaceParser::Parse(int argc, const char ** argv)
{
if( !space )
{
return status_space_not_assigned;
}
options_space = space->FindSpace(L"options");
last_status = status_ok;
last_error_token.clear();
for(size_t i=1 ; i < (size_t)argc && last_status == status_ok ; )
{
Parse((size_t)argc, argv, i);
}
return last_status;
}
void MainSpaceParser::Parse(size_t argc, const char ** argv, size_t & argv_index)
{
const char * pchar = argv[argv_index];
if( *pchar == '-' )
{
if( *(pchar+1) == '-' )
{
ParseMultiArgument(argc, argv, argv_index);
}
else
{
ParseSingleArgument(argc, argv, argv_index);
}
}
else
{
last_status = status_syntax_error;
ConvertStr(pchar, last_error_token);
}
}
void MainSpaceParser::ConvertStr(const char * src, std::wstring & dst)
{
if( use_utf8 )
{
PT::UTF8ToWide(src,dst);
}
else
{
dst.clear();
for( ; *src ; ++src )
dst += (wchar_t)(unsigned char)*src;
}
}
void MainSpaceParser::ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index)
{
ConvertStr(argv[argv_index] + 1, wide_arg);
const wchar_t * wide_pchar = wide_arg.c_str();
temp_list_val.clear();
bool was_option = false;
argv_index += 1;
for( ; *wide_pchar && !was_option ; ++wide_pchar )
{
temp_arg = *wide_pchar;
size_t opt_size = RequireOption(temp_arg);
if( opt_size > 0 )
{
was_option = true;
if( *(wide_pchar+1) )
{
temp_val = wide_pchar + 1;
temp_list_val.push_back(temp_val);
opt_size -= 1;
}
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
}
if( opt_size > 0 )
{
last_status = status_reading_eof;
last_error_token.clear();
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
}
}
void MainSpaceParser::ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index)
{
ConvertStr(argv[argv_index] + 2, temp_arg);
argv_index += 1;
size_t opt_size = RequireOption(temp_arg);
temp_list_val.clear();
if( opt_size > 0 )
{
for( ; opt_size > 0 && argv_index < argc ; --opt_size, ++argv_index)
{
ConvertStr(argv[argv_index], temp_val);
temp_list_val.push_back(temp_val);
}
if( opt_size > 0 )
{
last_status = status_reading_eof;
last_error_token.clear();
}
}
temp_val.clear();
AddValueToItem(temp_arg, temp_val, temp_list_val);
}
void MainSpaceParser::AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list)
{
std::wstring * val = space->GetFirstValue(name);
if( !val )
{
if( list.empty() )
space->Add(name, empty_value);
else
if( list.size() == 1 )
space->Add(name, list[0]);
else
space->table[name] = list; // !! IMPROVE ME there'll be a new api in space
}
else
{
PT::Space::Table::iterator i = space->table.find(name);
PT::Space::Value * table_value;
if( i == space->table.end() )
{
table_value = &space->table[name];
table_value->push_back(*val);
//space->table_single.erase(name);
}
else
{
table_value = &i->second;
}
if( list.empty() )
{
table_value->push_back(empty_value);
}
else
{
for(const auto & list_item : list)
table_value->push_back(list_item);
}
}
}
size_t MainSpaceParser::RequireOption(const std::wstring & arg)
{
size_t res = 0;
if( options_space )
{
std::wstring * val = options_space->GetFirstValue(arg);
if( val )
{
/*
* IMPLEMENT ME
* add a converter to convert/inttostr.h
*
*/
long res_long = wcstol(val->c_str(), 0, 10);
if( res_long < 0 )
res_long = 0;
res = (size_t)res_long;
//std::wcout << L"argument " << arg << L" needs " << res << L" options" << std::endl;
}
}
return res;
}
#endif
} // namespace

View File

@@ -0,0 +1,103 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2016, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_mainspaceparser_mainparser
#define headerfile_picotools_mainspaceparser_mainparser
#include "space/space.h"
#include <string>
#include <vector>
namespace PT
{
#ifdef nonexisting_value
/*
a very little parser for main(int argc, char ** argv) parameters
look in sample/sample.cpp how to use the parser
*/
class MainSpaceParser
{
public:
MainSpaceParser();
~MainSpaceParser();
enum Status
{
status_ok = 0,
status_space_not_assigned = 1,
status_syntax_error = 2,
status_reading_eof = 3 /* CHANGE ME give a better name */
};
void SetSpace(Space & space);
Status Parse(int argc, const char ** argv);
void UTF8(bool utf8);
std::wstring & GetErrorToken();
private:
Space * space;
Space * options_space;
std::wstring wide_arg, temp_arg, temp_val;
std::vector<std::wstring> temp_list_val;
bool use_utf8;
Status last_status;
std::wstring last_error_token;
void ConvertStr(const char * src, std::wstring & dst);
void Parse(size_t argc, const char ** argv, size_t & argv_index);
void ParseSingleArgument(size_t argc, const char ** argv, size_t & argv_index);
void ParseMultiArgument(size_t argc, const char ** argv, size_t & argv_index);
size_t RequireOption(const std::wstring & arg);
void AddValueToItem(const std::wstring & name, const std::wstring & empty_value, const std::vector<std::wstring> & list);
};
#endif
} // namespace
#endif

825
src/membuffer/membuffer.h Normal file
View File

@@ -0,0 +1,825 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_membuffer_membuffer
#define headerfile_picotools_membuffer_membuffer
#include <iostream>
namespace PT
{
/*
stack_size and heap_block_size have to be *greater* than zero
*/
template<typename item_type, size_t stack_size, size_t heap_block_size>
class MemBuffer
{
public:
class iterator
{
public:
bool operator==(const iterator & i) const;
bool operator!=(const iterator & i) const;
iterator & operator++(); // prefix ++
iterator operator++(int); // postfix ++
iterator & operator--(); // prefix --
iterator operator--(int); // postfix --
item_type & operator*();
private:
MemBuffer * mem_buffer;
size_t dynamic_array_index;
size_t index;
friend class MemBuffer;
};
class const_iterator
{
public:
const_iterator();
const_iterator(const const_iterator & i);
const_iterator(const iterator & i);
const_iterator & operator=(const const_iterator & i);
const_iterator & operator=(const iterator & i);
bool operator==(const const_iterator & i) const;
bool operator!=(const const_iterator & i) const;
const_iterator & operator++(); // prefix ++
const_iterator operator++(int); // postfix ++
const_iterator & operator--(); // prefix --
const_iterator operator--(int); // postfix --
item_type operator*();
private:
const MemBuffer * mem_buffer;
size_t dynamic_array_index;
size_t index;
friend class MemBuffer;
};
MemBuffer();
~MemBuffer();
MemBuffer(const MemBuffer<item_type, stack_size, heap_block_size> & arg);
MemBuffer & operator=(const MemBuffer<item_type, stack_size, heap_block_size> & arg);
void append(item_type item);
void append(const item_type * item_array, size_t len);
template<typename in_item_type>
void append(const in_item_type * item_array, size_t len);
template<typename arg_item_type, size_t arg_stack_size, size_t arg_heap_block_size>
void append(const MemBuffer<arg_item_type, arg_stack_size, arg_heap_block_size> & arg);
size_t size() const;
bool empty() const;
void reserve(size_t len);
size_t capacity() const;
void clear(); // frees memory but only to capacity()
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
item_type & operator[](size_t i);
const item_type operator[](size_t i) const;
private:
struct MemArray
{
size_t size_used;
item_type * buf;
};
item_type stack_array[stack_size];
MemArray * dynamic_array; // dynamic array of MemArray descriptors
size_t dynamic_array_index; // index of a MemArray to which the last insertion was made
// size_t(-1) means the stack_array
size_t dynamic_array_used; // how many MemArray-s have been inited in dynamic_array
size_t dynamic_array_size; // the size of the dynamic_array
size_t size_used; // the size of all valid items
size_t size_allocated; // how many memory is reserved
size_t size_reserved; // memory reserved by reserve(), it is used by clear()
// used by ctors
void Initialize();
void add_dynamic_node();
};
/*
iterator
*/
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator &
MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator++()
{
if( dynamic_array_index == size_t(-1) )
{
index += 1;
if( index >= stack_size )
{
index = 0;
dynamic_array_index = 0;
}
}
else
{
index += 1;
if( index >= heap_block_size )
{
dynamic_array_index += 1;
index = 0;
}
}
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator
MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator++(int)
{
iterator old(*this);
operator++();
return old;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator &
MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator--()
{
if( index == 0 )
{
dynamic_array_index -= 1;
if( dynamic_array_index == size_t(-1) )
index = stack_size - 1;
else
index = heap_block_size - 1;
}
else
{
index -= 1;
}
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator
MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator--(int)
{
iterator old(*this);
operator++();
return old;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
item_type & MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator*()
{
if( dynamic_array_index == size_t(-1) )
{
return mem_buffer->stack_array[index];
}
else
{
return mem_buffer->dynamic_array[dynamic_array_index].buf[index];
}
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
bool MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
{
return mem_buffer == i.mem_buffer &&
dynamic_array_index == i.dynamic_array_index &&
index == i.index;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
bool MemBuffer<item_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
{
return mem_buffer != i.mem_buffer ||
dynamic_array_index != i.dynamic_array_index ||
index != i.index;
}
/*
const_iterator
*/
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::const_iterator()
{
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i)
{
operator=(i);
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i)
{
operator=(i);
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator &
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
{
mem_buffer = i.mem_buffer;
dynamic_array_index = i.dynamic_array_index;
index = i.index;
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator &
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
{
mem_buffer = i.mem_buffer;
dynamic_array_index = i.dynamic_array_index;
index = i.index;
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator &
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator++()
{
if( dynamic_array_index == size_t(-1) )
{
index += 1;
if( index >= stack_size )
{
index = 0;
dynamic_array_index = 0;
}
}
else
{
index += 1;
if( index >= heap_block_size )
{
dynamic_array_index += 1;
index = 0;
}
}
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator++(int)
{
const_iterator old(*this);
operator++();
return old;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator &
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator--()
{
if( index == 0 )
{
dynamic_array_index -= 1;
if( dynamic_array_index == size_t(-1) )
index = stack_size - 1;
else
index = heap_block_size - 1;
}
else
{
index -= 1;
}
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator
MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator--(int)
{
const_iterator old(*this);
operator++();
return old;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
item_type MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator*()
{
if( dynamic_array_index == size_t(-1) )
{
return mem_buffer->stack_array[index];
}
else
{
return mem_buffer->dynamic_array[dynamic_array_index].buf[index];
}
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
bool MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
{
return mem_buffer == i.mem_buffer &&
dynamic_array_index == i.dynamic_array_index &&
index == i.index;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
bool MemBuffer<item_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
{
return mem_buffer != i.mem_buffer ||
dynamic_array_index != i.dynamic_array_index ||
index != i.index;
}
/*
MemBuffer
*/
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::Initialize()
{
size_reserved = 0;
size_used = 0;
size_allocated = stack_size;
dynamic_array = 0;
dynamic_array_index = size_t(-1);
dynamic_array_used = 0;
dynamic_array_size = 0;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::MemBuffer()
{
Initialize();
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::~MemBuffer()
{
if( dynamic_array )
{
for(size_t i=0 ; i<dynamic_array_used ; ++i)
delete [] dynamic_array[i].buf;
delete [] dynamic_array;
}
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size>::MemBuffer(const MemBuffer<item_type, stack_size, heap_block_size> & arg)
{
Initialize();
operator=(arg);
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
MemBuffer<item_type, stack_size, heap_block_size> &
MemBuffer<item_type, stack_size, heap_block_size>::operator=(const MemBuffer<item_type, stack_size, heap_block_size> & arg)
{
if( size_used > 0 )
clear();
const_iterator i = arg.begin();
for( ; i != arg.end() ; ++i)
append(*i);
return *this;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::add_dynamic_node()
{
if( dynamic_array_used >= dynamic_array_size )
{
// reallocating
dynamic_array_size += 2; // 64;
MemArray * new_array = new MemArray[dynamic_array_size];
for(size_t i=0 ; i<dynamic_array_used ; ++i)
new_array[i] = dynamic_array[i];
delete [] dynamic_array;
dynamic_array = new_array;
}
dynamic_array[dynamic_array_used].size_used = 0;
dynamic_array[dynamic_array_used].buf = new item_type[heap_block_size];
dynamic_array_used += 1;
size_allocated += heap_block_size;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::append(item_type item)
{
if( size_used < stack_size )
{
stack_array[size_used] = item;
}
else
{
if( dynamic_array_index == size_t(-1) )
{
dynamic_array_index = 0;
if( dynamic_array_index >= dynamic_array_used )
add_dynamic_node();
dynamic_array[dynamic_array_index].size_used = 0;
}
else
if( dynamic_array[dynamic_array_index].size_used >= heap_block_size )
{
dynamic_array_index += 1;
if( dynamic_array_index >= dynamic_array_used )
add_dynamic_node();
dynamic_array[dynamic_array_index].size_used = 0;
}
dynamic_array[dynamic_array_index].buf[dynamic_array[dynamic_array_index].size_used] = item;
dynamic_array[dynamic_array_index].size_used += 1;
}
size_used += 1;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::append(const item_type * item_array, size_t len)
{
if( size_used + len <= stack_size )
{
for(size_t i=0 ; i<len ; ++i)
stack_array[size_used++] = item_array[i];
return;
}
if( dynamic_array_index != size_t(-1) &&
dynamic_array[dynamic_array_index].size_used + len <= heap_block_size )
{
item_type * buf = dynamic_array[dynamic_array_index].buf;
size_t bufsize = dynamic_array[dynamic_array_index].size_used;
for(size_t i=0 ; i<len ; ++i)
buf[bufsize++] = item_array[i];
dynamic_array[dynamic_array_index].size_used += len;
size_used += len;
return;
}
for(size_t i=0 ; i<len ; ++i)
append(item_array[i]);
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
template<typename in_item_type>
void MemBuffer<item_type, stack_size, heap_block_size>::append(const in_item_type * item_array, size_t len)
{
for(size_t i=0 ; i<len ; ++i)
append(static_cast<item_type>(item_array[i]));
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
template<typename arg_item_type, size_t arg_stack_size, size_t arg_heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::append(
const MemBuffer<arg_item_type, arg_stack_size, arg_heap_block_size> & arg)
{
typename MemBuffer<arg_item_type, arg_stack_size, arg_heap_block_size>::const_iterator i = arg.begin();
for( ; i != arg.end() ; ++i)
append(static_cast<item_type>(*i));
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
item_type & MemBuffer<item_type, stack_size, heap_block_size>::operator[](size_t i)
{
if( i < stack_size )
{
return stack_array[i];
}
else
{
i -= stack_size;
size_t index = i / heap_block_size;
size_t offset = i % heap_block_size;
return dynamic_array[index].buf[offset];
}
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
const item_type MemBuffer<item_type, stack_size, heap_block_size>::operator[](size_t i) const
{
if( i < stack_size )
{
return stack_array[i];
}
else
{
i -= stack_size;
size_t index = i / heap_block_size;
size_t offset = i % heap_block_size;
return dynamic_array[index].buf[offset];
}
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
size_t MemBuffer<item_type, stack_size, heap_block_size>::size() const
{
return size_used;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
bool MemBuffer<item_type, stack_size, heap_block_size>::empty() const
{
return size_used == 0;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator
MemBuffer<item_type, stack_size, heap_block_size>::begin()
{
iterator i;
i.mem_buffer = this;
i.dynamic_array_index = size_t(-1);
i.index = 0;
return i;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::iterator
MemBuffer<item_type, stack_size, heap_block_size>::end()
{
iterator i;
i.mem_buffer = this;
if( size_used <= stack_size )
{
i.dynamic_array_index = size_t(-1);
i.index = size_used;
if( i.index >= stack_size )
{
i.dynamic_array_index = 0;
i.index = 0;
}
}
else
{
i.dynamic_array_index = dynamic_array_index;
i.index = dynamic_array[dynamic_array_index].size_used;
if( i.index >= heap_block_size )
{
i.dynamic_array_index += 1;
i.index = 0;
}
}
return i;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator
MemBuffer<item_type, stack_size, heap_block_size>::begin() const
{
const_iterator i;
i.mem_buffer = this;
i.dynamic_array_index = size_t(-1);
i.index = 0;
return i;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
typename MemBuffer<item_type, stack_size, heap_block_size>::const_iterator
MemBuffer<item_type, stack_size, heap_block_size>::end() const
{
const_iterator i;
i.mem_buffer = this;
if( size_used <= stack_size )
{
i.dynamic_array_index = size_t(-1);
i.index = size_used;
if( i.index >= stack_size )
{
i.dynamic_array_index = 0;
i.index = 0;
}
}
else
{
i.dynamic_array_index = dynamic_array_index;
i.index = dynamic_array[dynamic_array_index].size_used;
if( i.index >= heap_block_size )
{
i.dynamic_array_index += 1;
i.index = 0;
}
}
return i;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::reserve(size_t len)
{
size_reserved = len;
while( size_allocated < size_reserved )
add_dynamic_node();
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
size_t MemBuffer<item_type, stack_size, heap_block_size>::capacity() const
{
return size_allocated;
}
template<typename item_type, size_t stack_size, size_t heap_block_size>
void MemBuffer<item_type, stack_size, heap_block_size>::clear()
{
size_t index = 0;
if( size_reserved > stack_size )
{
index = (size_reserved - stack_size) / heap_block_size + 1;
size_t old_index = index;
for(; index < dynamic_array_used ; ++index)
{
size_allocated -= heap_block_size;
delete [] dynamic_array[index].buf;
}
dynamic_array_used = old_index;
}
size_used = 0;
dynamic_array_index = size_t(-1);
}
} // namespace
#endif

2237
src/space/space.cpp Normal file

File diff suppressed because it is too large Load Diff

1313
src/space/space.h Normal file

File diff suppressed because it is too large Load Diff

1161
src/space/spaceparser.cpp Normal file

File diff suppressed because it is too large Load Diff

376
src/space/spaceparser.h Normal file
View File

@@ -0,0 +1,376 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_jsonspaceparser
#define headerfile_picotools_space_jsonspaceparser
#include <fstream>
#include "space.h"
namespace PT
{
class SpaceParser
{
public:
/*
ctor -- setting default values (SetDefault() method)
*/
SpaceParser();
/*
setting the root space
*/
void SetSpace(Space * pspace);
void SetSpace(Space & pspace);
/*
setting options of the parser to the default values
utf8 etc.
*/
void SetDefault();
/*
status of parsing
*/
enum Status { ok, cant_open_file, syntax_error, no_space };
/*
the last status of parsing, set by Parse() methods
*/
Status status;
/*
how many objects were skipped
used in parsing tables when create_table_as_space is false
*/
size_t skipped;
/*
main methods used to parse a JSON file
file_name is the path to a file
*/
Status ParseJSONFile(const char * file_name);
Status ParseJSONFile(const std::string & file_name);
Status ParseJSONFile(const wchar_t * file_name);
Status ParseJSONFile(const std::wstring & file_name);
/*
main methods used to parse a Space file
file_name is the path to a file
*/
Status ParseSpaceFile(const char * file_name);
Status ParseSpaceFile(const std::string & file_name);
Status ParseSpaceFile(const wchar_t * file_name);
Status ParseSpaceFile(const std::wstring & file_name);
/*
main methods used to parse
str - input string (either 8bit ascii or UTF-8 -- see UTF8() method)
*/
Status ParseJSON(const char * str);
Status ParseJSON(const std::string & str);
/*
main methods used to parse
here input string is always in unicode (wide characters)
*/
Status ParseJSON(const wchar_t * str);
Status ParseJSON(const std::wstring & str);
Status ParseSpace(const char * str);
Status ParseSpace(const std::string & str);
Status ParseSpace(const wchar_t * str);
Status ParseSpace(const std::wstring & str);
/*
* add two args Parse method
* Status Parse(const char * str, Space & output_space);
*
*/
/*
if true then empty values and lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
void SkipEmpty(bool skip);
/*
'\' character is used to escape other characters in a quoted string
so "some \t t\"ext" will produce "some t t"ext"
default: true
*/
void UseEscapeChar(bool escape);
/*
if true then the input file or string (char* or std::string) is treated as UTF-8
default true
the internal storage for strings is std::wstring so if you call UTF8(false) then
the characters of input string will be simple static_cast<> from char to wchar_t
*/
// rename to use_utf8(bool)
void UTF8(bool utf);
/*
*
* returns a number of a last parsed line
* can be used to obtain the line in which there was a syntax error
*
*/
int get_last_parsed_line();
private:
/*
current space set by SetSpace();
*/
Space * root_space;
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
true if Parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
*/
bool reading_from_wchar_string;
/*
last read token
*/
std::wstring token;
/*
separator between a variable and a value, default: '='
*/
int separator;
/*
space starting character, default: '{'
*/
int space_start;
/*
space ending character, default: '}'
*/
int space_end;
/*
table starting character, default: '['
*/
int table_start;
/*
table ending character, default: ']'
*/
int table_end;
/*
option delimiter, default: ','
*/
int option_delimiter;
/*
last read char
or -1 if the end
*/
int lastc;
/*
true if the lastc was escaped (with a backslash)
we have to know if the last sequence was \" or just "
*/
bool char_was_escaped;
/*
current file
may it would be better to make a pointer?
if we parse only a string then there is no sense to have such an object
*/
std::ifstream file;
/*
if true then empty lists, e.g:
option =
option2 = ()
will be omitted (not inserted to 'table')
default: false
*/
bool skip_empty;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
/*
if true you can use an escape character '\' in quoted values
*/
bool use_escape_char;
/*
*
* if parsing_space is false then it means we are parsing JSON format
*
*
*/
bool parsing_space;
// new
void ParseRootSpace();
void Parse(Space * space, bool is_object_value, bool is_table_value);
void ParseSpace(Space * space);
void ParseTable(Space * space);
void ParseKeyValuePairs(Space * space);
void ParseValuesList(Space * space);
void ReadKey();
void ParseTextValue(Space * space);
void ParseIntegerValue(Space * space);
void ParseFloatingPointValue(Space * space);
bool is_alfa_numeric_char(int c);
void ReadTokenUntilDelimiter(std::wstring & token, int delimiter1, int delimiter2);
void ReadAlfaNumericToken(std::wstring & token);
void ReadStringValue(std::wstring & token, bool is_object_value, bool is_table_value);
bool is_integer_token();
bool is_floating_point_token();
void ReadSpaceFieldToken(std::wstring & token);
void ReadTokenQuoted(std::wstring & token);
void ReadMultilineTokenQuoted(std::wstring & token);
int ReadUTF8Char();
int ReadASCIIChar();
int ReadCharFromWcharString();
int ReadCharFromUTF8String();
int ReadCharFromAsciiString();
int ReadCharNoEscape();
int ReadChar();
bool IsWhite(int c);
void SkipLine();
void SkipWhite();
void TrimLastWhite(std::wstring & s);
//void Trim(std::wstring & s);
bool IsHexDigit(wchar_t c);
int HexToInt(wchar_t c);
void ReadUnicodeCodePoint();
};
} // namespace
#endif

535
src/textstream/textstream.h Normal file
View File

@@ -0,0 +1,535 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_textstream_textstream
#define headerfile_picotools_textstream_textstream
#include <string>
#include "space/space.h"
#include "date/date.h"
#include "convert/inttostr.h"
#include "membuffer/membuffer.h"
#include "types.h"
// for snprintf
#include <cstdio>
namespace PT
{
/*
a special class representing a stream buffer
similar to std::ostringstream
StringType can be either std::string or std::wstring
this class doesn't use UTF-8 in any kind
*/
template<typename CharT, size_t stack_size, size_t heap_block_size>
class TextStreamBase
{
public:
TextStreamBase();
typedef CharT char_type;
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
typedef typename buffer_type::iterator iterator;
typedef typename buffer_type::const_iterator const_iterator;
void clear();
bool empty() const;
size_t size() const;
void reserve(size_t len);
size_t capacity() const;
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
void to_string(std::string & str, bool clear_string = true) const;
void to_string(std::wstring & str, bool clear_string = true) const;
char_type & operator[](size_t index);
char_type operator[](size_t index) const;
TextStreamBase & operator<<(const char * str);
TextStreamBase & operator<<(const std::string & str);
TextStreamBase & operator<<(const wchar_t * str);
TextStreamBase & operator<<(const std::wstring * str);
TextStreamBase & operator<<(const std::wstring & str);
TextStreamBase & operator<<(char);
TextStreamBase & operator<<(wchar_t);
TextStreamBase & operator<<(int);
TextStreamBase & operator<<(long);
TextStreamBase & operator<<(long long);
TextStreamBase & operator<<(unsigned int);
TextStreamBase & operator<<(unsigned long);
TextStreamBase & operator<<(unsigned long long);
TextStreamBase & operator<<(double);
TextStreamBase & operator<<(const void *); // printing a pointer
TextStreamBase & operator<<(const PT::Space & space);
TextStreamBase & operator<<(const PT::Date & date);
// min width for integer output
// if the output value has less digits then first zeroes are added
// (0 turn off)
TextStreamBase & int_min_width(size_t min_width);
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
TextStreamBase & operator<<(const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg);
template<typename in_buffer_type>
TextStreamBase & write(const in_buffer_type * buf, size_t len);
// write double value in a specified format
// format is the same as in the snprintf function, e.g. write("%f", 10.0)
TextStreamBase & write(const char * format, double val);
TextStreamBase & write(const wchar_t * format, double val);
TextStreamBase & fill_up_if_needed(wchar_t fill_up_char, size_t existing_length);
/*
raw access
*/
int radix;
size_t min_width_for_integers;
buffer_type buffer;
};
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
{
clear();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::clear()
{
radix = 10;
min_width_for_integers = 0;
buffer.clear();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::empty() const
{
return buffer.empty();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
size_t TextStreamBase<char_type, stack_size, heap_block_size>::size() const
{
return buffer.size();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::reserve(size_t len)
{
buffer.reserve(len);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
size_t TextStreamBase<char_type, stack_size, heap_block_size>::capacity() const
{
return buffer.capacity();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::begin()
{
return buffer.begin();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::end()
{
return buffer.end();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::begin() const
{
return buffer.begin();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::end() const
{
return buffer.end();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::to_string(std::string & str, bool clear_string) const
{
if( clear_string )
str.clear();
if( str.capacity() < str.size() + size() )
str.reserve(str.size() + size());
const_iterator i = begin();
for( ; i != end() ; ++i)
str += static_cast<char>(*i);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::to_string(std::wstring & str, bool clear_string) const
{
if( clear_string )
str.clear();
if( str.capacity() < str.size() + size() )
str.reserve(str.size() + size());
const_iterator i = begin();
for( ; i != end() ; ++i)
str += static_cast<wchar_t>(*i);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::operator[](size_t index)
{
return buffer[index];
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type TextStreamBase<char_type, stack_size, heap_block_size>::operator[](size_t index) const
{
return buffer[index];
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const char * str)
{
for( ; *str ; ++str)
buffer.append(static_cast<char_type>(*str));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::string & str)
{
if( sizeof(char_type) == sizeof(char) )
buffer.append(str.c_str(), str.size());
else
operator<<(str.c_str());
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const wchar_t * str)
{
for( ; *str ; ++str)
buffer.append(static_cast<char_type>(*str));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::wstring & str)
{
if( sizeof(char_type) == sizeof(wchar_t) )
buffer.append(str.c_str(), str.size());
else
operator<<(str.c_str());
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
{
buffer.append(static_cast<char_type>(v));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
{
buffer.append(static_cast<char_type>(v));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(int v)
{
return operator<<(static_cast<long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(long v)
{
return operator<<(static_cast<long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(long long v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
if( Toa(v, buf, len, radix, &lenout) )
{
fill_up_if_needed('0', lenout);
buffer.append(buf, lenout);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned int v)
{
return operator<<(static_cast<unsigned long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned long v)
{
return operator<<(static_cast<unsigned long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned long long v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
if( Toa(v, buf, len, radix, &lenout) )
{
fill_up_if_needed('0', lenout);
buffer.append(buf, lenout);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(double v)
{
char buf[100];
snprintf(buf, sizeof(buf)/sizeof(char), "%f", v);
return operator<<(buf);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const void * v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
buf[0] = '0';
buf[1] = 'x';
// IMPROVE ME add some minimal width?
if( Toa(reinterpret_cast<unsigned long long>(v), buf+2, len-2, 16, &lenout) )
buffer.append(buf, lenout+2);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename in_buffer_type>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::write(const in_buffer_type * buf, size_t len)
{
buffer.append(buf, len);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::write(const char * format, double val)
{
char buf[100];
snprintf(buf, sizeof(buf)/sizeof(char), format, val);
return operator<<(buf);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::write(const wchar_t * format, double val)
{
wchar_t buf[100];
swprintf(buf, sizeof(buf)/sizeof(wchar_t), format, val);
return operator<<(buf);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Space & space)
{
space.serialize_to_space_stream(*this, true);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const PT::Date & date)
{
date.Serialize(*this);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg)
{
buffer.append(arg.buffer);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::int_min_width(size_t min_width)
{
min_width_for_integers = min_width;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::fill_up_if_needed(wchar_t fill_up_char, size_t existing_length)
{
if( min_width_for_integers > 0 && min_width_for_integers > existing_length )
{
for(size_t i = existing_length ; i < min_width_for_integers ; ++i)
{
buffer.append(fill_up_char);
}
}
return *this;
}
} // namespace
#endif

61
src/textstream/types.h Normal file
View File

@@ -0,0 +1,61 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_space_types
#define headerfile_picotools_space_types
namespace PT
{
template<typename char_type, size_t stack_size, size_t heap_block_size>
class TextStreamBase;
typedef TextStreamBase<char, 256, 4096> TextStream;
typedef TextStreamBase<wchar_t, 256, 4096> WTextStream;
} // namespace
#endif

813
src/utf8/utf8.cpp Normal file
View File

@@ -0,0 +1,813 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "utf8.h"
#include "utf8_private.h"
namespace PT
{
/*!
returns true if 'c' is a correct unicode character
*/
bool UTF8_CheckRange(int c)
{
return c>=0 && c<=0x10FFFF && !(c>=0xD800 && c<=0xDFFF);
}
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_bytes - means how many bytes from the utf8 string were read
*/
bool UTF8_CheckRange(int c, int how_many_bytes)
{
if( c >= 0x0000 && c <= 0x007f && how_many_bytes == 1 )
{
return true;
}
if( c >= 0x0080 && c <= 0x07ff && how_many_bytes == 2 )
{
return true;
}
if( c >= 0x0800 && c < 0xD800 && how_many_bytes == 3)
{
return true;
}
if( c > 0xDFFF && c <= 0xffff && how_many_bytes == 3)
{
return true;
}
if( c >= 0x10000 && c <= 0x10FFFF && how_many_bytes == 4 )
{
return true;
}
return false;
}
/*!
this function converts one UTF-8 character into one wide-character
input:
utf8 - an input UTF-8 string
utf8_len - size of the input string,
the string should be at least 4 bytes length for correctly
recognized the utf-8 sequence
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input string
(returns zero only if utf8_len is zero)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct)
{
size_t i, len;
res = 0;
correct = false;
if( utf8_len == 0 )
return 0;
if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
return 1;
if( utf8_len < len )
return utf8_len;
for(i=1 ; i<len ; ++i)
{
if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
return i;
}
if( UTF8_CheckRange(res, len) )
correct = true;
return len;
}
/*!
this function converts one UTF-8 character into one wide-character
input:
utf8 - an input UTF-8 string (null terminated)
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input string
(returns zero only if the string has '\0' at the first character)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const char * utf8, int & res, bool & correct)
{
size_t i, len;
res = 0;
correct = false;
if( *utf8 == 0 )
return 0;
if( !private_namespace::UTF8ToInt_FirstOctet(utf8[0], len, res) )
return 1;
for(i=1 ; i<len ; ++i)
{
if( utf8[i] == 0 )
return i;
if( !private_namespace::UTF8ToInt_AddNextOctet(utf8[i], res) )
return i;
}
if( UTF8_CheckRange(res, len) )
correct = true;
return len;
}
/*!
this function converts one UTF-8 character into one wide-character
input:
utf8 - an input UTF-8 string
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input string
(returns zero only if utf8 is empty)
even if there are errors the functions returns a different from zero value
*/
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct)
{
return UTF8ToInt(utf8.c_str(), utf8.size(), res, correct);
}
/*!
this function converts one UTF-8 character into one wide-character
input:
utf8 - an input UTF-8 stream
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input stream
*/
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct)
{
size_t i, len;
unsigned char uz;
res = 0;
correct = false;
uz = utf8.get();
if( !utf8 )
return 0;
if( !private_namespace::UTF8ToInt_FirstOctet(uz, len, res) )
return 1;
for(i=1 ; i<len ; ++i)
{
uz = utf8.get();
if( !utf8 )
return i;
if( !private_namespace::UTF8ToInt_AddNextOctet(uz, res) )
return i;
}
if( UTF8_CheckRange(res, len) )
correct = true;
return len;
}
/*
*/
static void IntToWide(int c, std::wstring & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res += static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res += static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res += static_cast<wchar_t>(c);
}
}
/*!
this function converts an utf8 string into wide string (std::wstring)
input:
utf8 - an input utf8 string
utf8_len - size of the input string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
res - an output wide string
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode)
{
if( clear )
res.clear();
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
IntToWide(c, res);
});
return status;
}
/*!
this function converts an utf8 string into wide string (std::wstring)
input:
utf8 - an input utf8 null terminated string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
res - an output wide string
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear, int mode)
{
size_t utf8_len = 0;
while( utf8[utf8_len] != 0 )
utf8_len += 1;
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
}
/*!
this function converts an utf8 string into wide string (std::wstring)
input:
utf8 - an input utf8 string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
res - an output wide string
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear, int mode)
{
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
}
/*!
this function converts an utf8 stream into wide string (std::wstring)
input:
utf8 - an input utf8 stream
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
res - an output wide string
the function returns false if there were some errors when converting
*/
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear, int mode)
{
int z;
bool correct, was_error = false;
if( clear )
res.clear();
while( UTF8ToInt(utf8, z, correct) > 0 )
{
if( !correct )
{
if( mode == 1 )
res += 0xFFFD; // U+FFFD "replacement character"
was_error = true;
}
else
{
IntToWide(z, res);
}
}
return !was_error;
}
/*!
this function converts one wide character into UTF-8 sequence
input:
z - wide character
output:
utf8 - a buffer for the output sequence
utf8_len - the size of the buffer
the function returns how many characters have been written to the utf8,
zero means the utf8 buffer is too small or 'z' is an incorrect unicode character
*/
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len)
{
char buf[10];
int i = 0;
int mask = 0x3f; // 6 first bits set
if( utf8_max_len==0 || !UTF8_CheckRange(z) )
return 0;
if( z <= 0x7f )
{
utf8[0] = static_cast<char>(z);
return 1;
}
do
{
buf[i] = 0x80 | (z & 0x3f);
i += 1;
z >>= 6;
mask >>= 1;
}
while( (z & (~mask)) != 0 );
unsigned int first = -1;
first <<= (7 - i);
first |= (z & mask);
if( size_t(i+1) > utf8_max_len )
return 0;
utf8[0] = static_cast<char>(first);
int a = 1;
for(--i; i>=0 ; --i, ++a)
utf8[a] = buf[i];
return a;
}
/*!
this function converts one wide character into UTF-8 string
input:
z - wide character
output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
the function returns how many characters have been written to the utf8 string,
zero means that 'z' is an incorrect unicode character
*/
size_t IntToUTF8(int z, std::string & utf8, bool clear)
{
char buf[10];
if( clear )
utf8.clear();
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
size_t i;
for(i=0 ; i<len ; ++i)
utf8 += buf[i];
return len;
}
/*!
this function converts a wide string into UTF-8 string
input:
wide_string - a wide string for converting
string_len - the size of the string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear, int mode)
{
bool was_error = false;
size_t chars;
if( clear )
utf8.clear();
while( string_len > 0 )
{
chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
wide_string += chars;
string_len -= chars;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 string
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear, int mode)
{
bool was_error = false;
if( clear )
utf8.clear();
while( *wide_string )
wide_string += private_namespace::WideOneToUTF8(wide_string, utf8, was_error, mode);
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 string
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 string for the output sequence (the string is not cleared)
this function returns false if there were some errors when converting
*/
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, clear, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - lenght of the wide string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
size_t chars, utf8_saved;
utf8_written = 0;
while( string_len > 0 )
{
chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
/*
* if the buffer was too small break immediately
* and set the was_error flag
*/
was_error = true;
break;
}
wide_string += chars;
string_len -= chars;
utf8 += utf8_saved;
utf8_len -= utf8_saved;
utf8_written += utf8_saved;
}
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, utf8_written, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - lenght of the wide string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, string_len, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, utf8_len, mode);
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
utf8_written - how many bytes have been written to the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is not null terminated
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode)
{
bool was_error = false;
bool was_buffer_to_small;
size_t chars, utf8_saved;
size_t len;
utf8_written = 0;
while( *wide_string )
{
len = (*(wide_string+1) == 0) ? 1 : 2;
chars = private_namespace::WideOneToUTF8(wide_string, len, utf8, utf8_len, utf8_saved, was_buffer_to_small, was_error, mode);
if( was_buffer_to_small )
{
/*
* if the buffer was too small break immediately
* and set the was_error flag
*/
was_error = true;
break;
}
wide_string += chars;
utf8 += utf8_saved;
utf8_len -= utf8_saved;
utf8_written += utf8_saved;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a buffer for the UTF-8 stream
utf8_len - the size of the buffer
this function returns false if there were some errors when converting or the output buffer was too small,
the output string is null terminated (even if there were errors during converting)
if there is an error when converting (there is an incorrect character in the wide string) the function
will continue converting but if the buffer is too small the function breaks immediately
(in both cases the utf8 buffer is null terminated)
*/
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode)
{
size_t utf8_saved;
bool res;
if( utf8_len == 0 )
return false;
res = WideToUTF8(wide_string, utf8, utf8_len - 1, utf8_saved, mode);
utf8[utf8_saved] = 0;
return res;
}
} // namespace

182
src/utf8/utf8.h Normal file
View File

@@ -0,0 +1,182 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2010-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8
#define headerfile_picotools_utf8_utf8
#include <string>
#include "textstream/textstream.h"
namespace PT
{
/*!
UTF-8, a transformation format of ISO 10646
http://tools.ietf.org/html/rfc3629
when wchar_t is 4 bytes length we use UTF-32
when wchar_t is 2 bytes length we use UTF-16 (with surrogate pairs)
UTF-16
http://www.ietf.org/rfc/rfc2781.txt
*/
/*!
returns true if 'c' is a correct unicode character
*/
bool UTF8_CheckRange(int c);
/*!
returns true if 'c' is a correct unicode character
this method is used when reading from an utf8 string
how_many_chars - means how many characters from utf8 string were read
*/
bool UTF8_CheckRange(int c, int how_many_bytes);
/*
*
*
*
* convertions from UTF-8
*
*
*
*/
/*!
converting one character from UTF-8 to an int
*/
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
size_t UTF8ToInt(const char * utf8, int & res, bool & correct);
size_t UTF8ToInt(const std::string & utf8, int & res, bool & correct);
size_t UTF8ToInt(std::istream & utf8, int & res, bool & correct);
/*!
converting UTF-8 string to a wide string
*/
bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const char * utf8, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool clear = true, int mode = 1);
bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1);
template<typename StreamType>
bool UTF8ToWide(const char * utf8, size_t utf8_len, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool UTF8ToWide(const char * utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool UTF8ToWide(const std::string & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool UTF8ToWide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
/*
*
*
*
* convertions to UTF-8
*
*
*
*/
/*!
converting one int character to UTF-8
*/
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
size_t IntToUTF8(int z, std::string & utf8, bool clear = true);
template<typename StreamType>
size_t IntToUTF8(int z, StreamType & utf8);
/*!
converting a wide string to UTF-8 string
*/
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1);
template<typename StreamType>
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode = 1);
template<typename StreamType>
bool WideToUTF8(const wchar_t * wide_string, StreamType & utf8, int mode = 1);
template<typename StreamType>
bool WideToUTF8(const std::wstring & wide_string, StreamType & utf8, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1);
// implement template<typename StreamType>
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1);
bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1);
// implement template<typename StreamType>
template<typename StreamType>
void WideStreamToUTF8(StreamType & buffer, std::string & utf8, bool clear = true, int mode = 1); // not tested
template<typename StreamTypeIn, typename StreamTypeOut>
void WideStreamToUTF8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested
} // namespace
#include "utf8/utf8_templates.h"
#endif

283
src/utf8/utf8_private.cpp Normal file
View File

@@ -0,0 +1,283 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "utf8_private.h"
namespace PT
{
namespace private_namespace
{
/*!
an auxiliary function for converting from UTF-8 string
*/
bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res)
{
for(len=0 ; (uz & 0x80) != 0 ; ++len)
uz <<= 1;
if( len == 1 || len > 4 )
return false;
res = uz;
if( len > 0 )
res >>= len;
if( len == 0 )
len = 1;
return true;
}
/*!
an auxiliary function for converting from UTF-8 string
*/
bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res)
{
if( (uz & 0xc0) != 0x80 )
return false;
res <<= 6;
res |= (uz & 0x3F);
return true;
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct)
{
if( string_len == 0 )
{
z = 0;
correct = false;
return 0;
}
z = static_cast<int>(*wide_string);
correct = true;
if( sizeof(wchar_t) == 2 && (z>=0xD800 && z<=0xDFFF) )
{
if( z>=0xD800 && z<=0xDBFF && string_len>1 )
{
int z2 = *(wide_string+1);
if( z2>=0xDC00 && z2<=0xDFFF )
{
z = 0x10000 + (((z & 0x3FF) << 10) | (z2 & 0x3FF));
return 2;
}
else
{
correct = false;
return 2;
}
}
else
{
correct = false;
return 1;
}
}
else
{
correct = UTF8_CheckRange(z);
return 1;
}
}
/*
an auxiliary function for converting from wide characters to UTF-8
converting a wide character into one int
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
{
z = 0;
correct = false;
return 0;
}
if( *(wide_string+1) != 0 )
min_str_len = 2;
return WideToInt(wide_string, min_str_len, z, correct);
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
utf8_written - how many characters were saved in the utf8 string (the string doesn't have
a null terminating character)
it can be equal to zero if the utf8 buffer is too small or there was an incorrect wide character read
was_utf8_buf_too_small - will be true if the utf8 buffer is too small
if this flag is true then utf8_written is equal to zero
was_error - will be true if there is an error when converting (there was an incorrect wide character)
(was_error will not be true if the utf8 buffer is too small)
*/
size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
utf8_written = 0;
was_utf8_buf_too_small = false;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
{
utf8_written = IntToUTF8(z, utf8, utf8_len);
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
else
{
if( mode == 1 )
{
utf8_written = IntToUTF8(0xFFFD, utf8, utf8_len); // U+FFFD "replacement character"
if( utf8_written == 0 )
was_utf8_buf_too_small = true;
}
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
correct = IntToUTF8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if wide_string has at least one character then the return value is always greater than zero too
*/
size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, z, correct);
if( correct )
correct = IntToUTF8(z, utf8, false) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8, false); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
} // namespace private_namespace
} // namespace PT

220
src/utf8/utf8_private.h Normal file
View File

@@ -0,0 +1,220 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8_private
#define headerfile_picotools_utf8_utf8_private
#include "textstream/textstream.h"
namespace PT
{
bool UTF8_CheckRange(int c);
size_t IntToUTF8(int z, char * utf8, size_t utf8_max_len);
size_t IntToUTF8(int z, std::string & utf8, bool clear);
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
namespace private_namespace
{
bool UTF8ToInt_FirstOctet(unsigned char uz, size_t & len, int & res);
bool UTF8ToInt_AddNextOctet(unsigned char uz, int & res);
size_t WideToInt(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
size_t WideToInt(const wchar_t * wide_string, int & z, bool & correct);
size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,
size_t & utf8_written, bool & was_utf8_buf_too_small, bool & was_error, int mode);
size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool & was_error, int mode);
size_t WideOneToUTF8(const wchar_t * wide_string, std::string & utf8, bool & was_error, int mode);
/*!
an auxiliary function for converting from wide characters to UTF-8
returns how many wide characters were used
if string_len is greater than 0 then the return value is always greater than zero too
*/
template<typename StreamType>
static size_t WideOneToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, bool & was_error, int mode)
{
int z;
bool correct;
size_t chars;
chars = WideToInt(wide_string, string_len, z, correct);
if( correct )
correct = IntToUTF8(z, utf8) != 0;
if( !correct )
{
if( mode == 1 )
IntToUTF8(0xFFFD, utf8); // U+FFFD "replacement character"
was_error = true;
}
return chars;
}
/*!
an auxiliary function for converting from wide characters to UTF-8
*/
template<typename StreamType>
static size_t WideOneToUTF8(const wchar_t * wide_string, StreamType & utf8, bool & was_error, int mode)
{
size_t min_str_len = 1;
if( *wide_string == 0 )
return 0;
if( *(wide_string+1) != 0 )
min_str_len = 2;
return WideOneToUTF8(wide_string, min_str_len, utf8, was_error, mode);
}
// declared in utf8.h, defined in utf8.cpp
size_t UTF8ToInt(const char * utf8, size_t utf8_len, int & res, bool & correct);
template<typename function_type>
bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, function_type convert_function)
{
int z;
size_t len;
bool correct, was_error = false;
while( utf8_len > 0 )
{
if( (unsigned char)*utf8 <= 0x7f )
{
// small optimization
len = 1;
correct = true;
z = static_cast<unsigned char>(*utf8);
}
else
{
len = PT::UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero
}
if( !correct )
{
if( mode == 1 )
convert_function(0xFFFD); // U+FFFD "replacement character"
was_error = true;
}
else
{
convert_function(z);
}
utf8 += len;
utf8_len -= len;
}
return !was_error;
}
template<typename StreamType>
void IntToWide(int c, StreamType & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res << static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res << static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res << static_cast<wchar_t>(c);
}
}
// not tested
// FIX ME it is not using surrogate pairs from input stream
// and mode parameter
template<typename char_type, size_t stack_size, size_t heap_block_size, typename function_type>
void WideToUTF8Generic(TextStreamBase<char_type, stack_size, heap_block_size> & buffer, int mode, function_type write_function)
{
char utf8_buffer[256];
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
std::size_t utf8_sequence_max_length = 10;
std::size_t index = 0;
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator i = buffer.begin();
while( i != buffer.end() )
{
if( index + utf8_sequence_max_length > buffer_len )
{
write_function(utf8_buffer, index);
index = 0;
}
index += IntToUTF8(*i, utf8_buffer + index, buffer_len - index);
++i;
}
if( index > 0 )
{
write_function(utf8_buffer, index);
}
}
} // namespace private_namespace
} // namespace PT
#endif

271
src/utf8/utf8_templates.h Normal file
View File

@@ -0,0 +1,271 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_utf8_utf8_templates
#define headerfile_picotools_utf8_utf8_templates
// this file is included at the end of utf8.h
#include "utf8_private.h"
namespace PT
{
/*!
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
(need to be tested)
*/
// need to be tested
template<typename StreamType>
bool UTF8ToWide(const char * utf8, size_t utf8_len, StreamType & res, bool clear, int mode)
{
if( clear )
res.clear();
bool status = private_namespace::UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) {
private_namespace::IntToWide(c, res);
});
return status;
}
template<typename StreamType>
bool UTF8ToWide(const char * utf8, StreamType & res, bool clear, int mode)
{
size_t utf8_len = 0;
while( utf8[utf8_len] != 0 )
utf8_len += 1;
return UTF8ToWide(utf8, utf8_len, res, clear, mode);
}
template<typename StreamType>
bool UTF8ToWide(const std::string & utf8, StreamType & res, bool clear, int mode)
{
return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode);
}
// need to be tested
template<typename StreamType>
bool UTF8ToWide(std::istream & utf8, StreamType & res, bool clear, int mode)
{
int z;
bool correct, was_error = false;
if( clear )
res.clear();
while( UTF8ToInt(utf8, z, correct) > 0 )
{
if( !correct )
{
if( mode == 1 )
res << 0xFFFD; // U+FFFD "replacement character"
was_error = true;
}
else
{
private_namespace::IntToWide(z, res);
}
}
return !was_error;
}
/*!
this function converts one wide character into UTF-8 stream
input:
z - wide character
output:
utf8 - a UTF-8 stream for the output sequence
the function returns how many characters have been written to the utf8 stream,
zero means that 'z' is an incorrect unicode character
*/
template<typename StreamType>
size_t IntToUTF8(int z, StreamType & utf8)
{
char buf[10];
size_t len = IntToUTF8(z, buf, sizeof(buf)/sizeof(char));
if( len > 0 )
utf8.write(buf, len);
return len;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a wide string for converting
string_len - size of the string
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool WideToUTF8(const wchar_t * wide_string, size_t string_len, StreamType & utf8, int mode)
{
bool was_error = false;
size_t chars;
while( string_len > 0 )
{
chars = private_namespace::WideOneToUTF8(wide_string, string_len, utf8, was_error, mode);
wide_string += chars;
string_len -= chars;
}
return !was_error;
}
/*!
this function converts a wide string into UTF-8 stream
input:
wide_string - a null terminated wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool WideToUTF8(const wchar_t * wide_string, StreamType & utf8, int mode)
{
bool was_error = false;
while( *wide_string )
wide_string += private_namespace::WideOneToUTF8(wide_string, utf8, was_error, mode);
return !was_error;
}
/*!
this function converts a wide string (std::wstring) into UTF-8 stream
input:
wide_string - a wide string for converting
mode - what to do with errors when converting
0: skip an invalid character
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
utf8 - a UTF-8 stream for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool WideToUTF8(const std::wstring & wide_string, StreamType & utf8, int mode)
{
return WideToUTF8(wide_string.c_str(), wide_string.size(), utf8, mode);
}
template<typename StreamType>
void WideStreamToUTF8(StreamType & buffer, std::string & utf8, bool clear, int mode)
{
if( clear )
utf8.clear();
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.append(utf8_buffer, buffer_len);
});
}
// not tested
template<typename StreamTypeIn, typename StreamTypeOut>
void WideStreamToUTF8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
{
private_namespace::WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
utf8.write(utf8_buffer, buffer_len);
});
}
} // namespace PT
#endif