Browse Source

Merge branch 'api2021'

master
Tomasz Sowa 5 months ago
parent
commit
4f07c00217
  1. 46
      src/Makefile.dep
  2. 273
      src/convert/baseparser.cpp
  3. 141
      src/convert/baseparser.h
  4. 66
      src/convert/inttostr.cpp
  5. 107
      src/convert/inttostr.h
  6. 361
      src/convert/misc.cpp
  7. 77
      src/convert/misc.h
  8. 151
      src/csv/csvparser.cpp
  9. 68
      src/csv/csvparser.h
  10. 645
      src/html/bbcodeparser.cpp
  11. 128
      src/html/bbcodeparser.h
  12. 2434
      src/html/htmlparser.cpp
  13. 490
      src/html/htmlparser.h
  14. 1
      src/log/filelog.cpp
  15. 48
      src/log/log.cpp
  16. 67
      src/log/log.h
  17. 88
      src/space/space.cpp
  18. 17
      src/space/space.h
  19. 420
      src/space/spaceparser.cpp
  20. 125
      src/space/spaceparser.h
  21. 359
      src/textstream/textstream.h
  22. 10
      src/utf8/utf8.h
  23. 104
      src/utf8/utf8_stream.h
  24. 9
      src/utf8/utf8_templates.h
  25. 24
      tests/Makefile.dep

46
src/Makefile.dep

@ -1,44 +1,76 @@
# DO NOT DELETE
./convert/inttostr.o: ./convert/inttostr.h
./convert/misc.o: ./convert/misc.h ./convert/text.h
./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
./convert/misc.o: textstream/types.h utf8/utf8_stream.h
./convert/misc.o: textstream/textstream.h textstream/stream.h space/space.h
./convert/misc.o: convert/inttostr.h utf8/utf8.h utf8/utf8_templates.h
./convert/misc.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./convert/misc.o: textstream/types.h ./convert/inttostr.h
./convert/text.o: ./convert/text.h ./convert/text_private.h
./convert/double.o: ./convert/double.h textstream/textstream.h
./convert/double.o: textstream/stream.h space/space.h textstream/types.h
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./convert/double.o: membuffer/membuffer.h textstream/types.h
./convert/double.o: utf8/utf8_stream.h
./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./convert/baseparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
./convert/baseparser.o: utf8/utf8_stream.h
./date/date.o: ./date/date.h convert/inttostr.h
./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./log/filelog.o: textstream/types.h
./log/filelog.o: textstream/types.h utf8/utf8_stream.h
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
./log/log.o: ./log/filelog.h
./log/log.o: utf8/utf8_stream.h ./log/filelog.h
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
./space/space.o: convert/patternreplacer.h textstream/textstream.h
./space/space.o: textstream/stream.h space/space.h date/date.h
./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./space/space.o: ./convert/double.h
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
./space/spaceparser.o: utf8/utf8_private.h convert/strtoint.h
./space/spaceparser.o: ./convert/text.h ./convert/misc.h
./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
./space/spaceparser.o: textstream/textstream.h textstream/stream.h
./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./utf8/utf8.o: utf8/utf8_private.h
./utf8/utf8_private.o: utf8/utf8_private.h
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
./html/bbcodeparser.o: utf8/utf8_stream.h

273
src/convert/baseparser.cpp

@ -0,0 +1,273 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "baseparser.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"
namespace pt
{
BaseParser::BaseParser()
{
clear_input_flags();
}
void BaseParser::clear_input_flags()
{
line = 0;
column = 0;
reading_from_file = false;
pchar_ascii = nullptr;
pchar_unicode = nullptr;
wtext_stream_iterator = nullptr;
wtext_stream_iterator_end = nullptr;
text_stream_iterator = nullptr;
text_stream_iterator_end = nullptr;
lastc = -1;
input_as_utf8 = true;
if( file.is_open() )
file.close();
file.clear();
}
void BaseParser::check_new_line()
{
if( lastc == '\n' )
{
++line;
column = 0;
}
}
int BaseParser::read_utf8_char()
{
int c;
bool correct;
lastc = -1;
do
{
utf8_to_int(file, c, correct);
if( !file )
return lastc;
}
while( !correct );
lastc = c;
check_new_line();
return lastc;
}
int BaseParser::read_ascii_char()
{
lastc = file.get();
check_new_line();
return lastc;
}
int BaseParser::read_char_from_wchar_string()
{
if( *pchar_unicode == 0 )
lastc = -1;
else
lastc = *(pchar_unicode++);
check_new_line();
return lastc;
}
int BaseParser::read_char_from_utf8_string()
{
int c;
bool correct;
lastc = -1;
do
{
size_t len = utf8_to_int(pchar_ascii, c, correct);
pchar_ascii += len;
}
while( *pchar_ascii && !correct );
if( correct )
lastc = c;
check_new_line();
return lastc;
}
int BaseParser::read_char_from_ascii_string()
{
if( *pchar_ascii == 0 )
lastc = -1;
else
lastc = *(pchar_ascii++);
check_new_line();
return lastc;
}
int BaseParser::read_char_from_wtext_stream()
{
if( (*wtext_stream_iterator) != (*wtext_stream_iterator_end) )
{
lastc = *(*wtext_stream_iterator);
++(*wtext_stream_iterator);
}
else
{
lastc = -1;
}
check_new_line();
return lastc;
}
int BaseParser::read_char_from_utf8_text_stream()
{
int c;
bool correct;
lastc = -1;
do
{
utf8_to_int(*text_stream_iterator, *text_stream_iterator_end, c, correct);
}
while( !correct && (*text_stream_iterator) != (*text_stream_iterator_end) );
if( correct )
lastc = c;
check_new_line();
return lastc;
}
int BaseParser::read_char_from_ascii_text_stream()
{
if( (*text_stream_iterator) != (*text_stream_iterator_end) )
{
lastc = *(*text_stream_iterator);
++(*text_stream_iterator);
}
else
{
lastc = -1;
}
check_new_line();
return lastc;
}
int BaseParser::read_char_no_escape()
{
if( reading_from_file )
{
if( input_as_utf8 )
return read_utf8_char();
else
return read_ascii_char();
}
else
{
if( pchar_ascii )
{
if( input_as_utf8 )
return read_char_from_utf8_string();
else
return read_char_from_ascii_string();
}
else if( pchar_unicode )
{
return read_char_from_wchar_string();
}
else if( wtext_stream_iterator && wtext_stream_iterator_end )
{
return read_char_from_wtext_stream();
}
else if( text_stream_iterator && text_stream_iterator_end )
{
if( input_as_utf8 )
return read_char_from_utf8_text_stream();
else
return read_char_from_ascii_text_stream();
}
else
{
lastc = -1;
return lastc;
}
}
}
int BaseParser::read_char()
{
return read_char_no_escape();
}
}

141
src/convert/baseparser.h

@ -0,0 +1,141 @@
/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_convert_baseparser
#define headerfile_picotools_convert_baseparser
#include <string>
#include <fstream>
#include "textstream/textstream.h"
namespace pt
{
class BaseParser
{
protected:
BaseParser();
virtual void clear_input_flags();
virtual void check_new_line();
virtual int read_utf8_char();
virtual int read_ascii_char();
virtual int read_char_from_wchar_string();
virtual int read_char_from_utf8_string();
virtual int read_char_from_ascii_string();
virtual int read_char_from_wtext_stream();
virtual int read_char_from_utf8_text_stream();
virtual int read_char_from_ascii_text_stream();
virtual int read_char_no_escape();
virtual int read_char();
/*
a number of a line in which there is a syntax_error
*/
int line;
/*
a number of a column in which there is a syntax_error
*/
int column;
/*
true if parse() method was called
false if ParseString() was called
*/
bool reading_from_file;
/*
pointers to the current character
if ParseString() is in used
*/
const char * pchar_ascii;
const wchar_t * pchar_unicode;
/*
pointers to WTextStream iterators
if set then both of them should be set
*/
WTextStream::const_iterator * wtext_stream_iterator;
WTextStream::const_iterator * wtext_stream_iterator_end;
/*
pointers to TextStream iterators
if set then both of them should be set
*/
TextStream::const_iterator * text_stream_iterator;
TextStream::const_iterator * text_stream_iterator_end;
/*
last read char
or -1 if the end
*/
int lastc;
/*
current file
may it would be better to make a pointer?
if we parse only a string then there is no sense to have such an object
*/
std::ifstream file;
/*
input file is in UTF-8
default: true
*/
bool input_as_utf8;
};
}
#endif

66
src/convert/inttostr.cpp

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -41,114 +41,114 @@
namespace pt
{
std::string to_str(unsigned long long value, int base)
std::string to_str(unsigned long long value, int base, size_t min_width)
{
std::string res;
Toa(value, res, false, base);
Toa(value, res, false, base, min_width);
return res;
}
std::string to_str(long long value, int base)
std::string to_str(long long value, int base, size_t min_width)
{
std::string res;
Toa(value, res, false, base);
Toa(value, res, false, base, min_width);
return res;
}
std::string to_str(unsigned long value, int base)
std::string to_str(unsigned long value, int base, size_t min_width)
{
return to_str(static_cast<unsigned long long>(value), base);
return to_str(static_cast<unsigned long long>(value), base, min_width);
}
std::string to_str(long value, int base)
std::string to_str(long value, int base, size_t min_width)
{
return to_str(static_cast<long long>(value), base);
return to_str(static_cast<long long>(value), base, min_width);
}
std::string to_str(unsigned int value, int base)
std::string to_str(unsigned int value, int base, size_t min_width)
{
return to_str(static_cast<unsigned long long>(value), base);
return to_str(static_cast<unsigned long long>(value), base, min_width);
}
std::string to_str(int value, int base)
std::string to_str(int value, int base, size_t min_width)
{
return to_str(static_cast<long long>(value), base);
return to_str(static_cast<long long>(value), base, min_width);
}
std::string to_str(unsigned short value, int base)
std::string to_str(unsigned short value, int base, size_t min_width)
{
return to_str(static_cast<unsigned long long>(value), base);
return to_str(static_cast<unsigned long long>(value), base, min_width);
}
std::string to_str(short value, int base)
std::string to_str(short value, int base, size_t min_width)
{
return to_str(static_cast<long long>(value), base);
return to_str(static_cast<long long>(value), base, min_width);
}
std::wstring to_wstr(unsigned long long value, int base)
std::wstring to_wstr(unsigned long long value, int base, size_t min_width)
{
std::wstring res;
Toa(value, res, false, base);
Toa(value, res, false, base, min_width);
return res;
}
std::wstring to_wstr(long long value, int base)
std::wstring to_wstr(long long value, int base, size_t min_width)
{
std::wstring res;
Toa(value, res, false, base);
Toa(value, res, false, base, min_width);
return res;
}
std::wstring to_wstr(unsigned long value, int base)
std::wstring to_wstr(unsigned long value, int base, size_t min_width)
{
return to_wstr(static_cast<unsigned long long>(value), base);
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
}
std::wstring to_wstr(long value, int base)
std::wstring to_wstr(long value, int base, size_t min_width)
{
return to_wstr(static_cast<long long>(value), base);
return to_wstr(static_cast<long long>(value), base, min_width);
}
std::wstring to_wstr(unsigned int value, int base)
std::wstring to_wstr(unsigned int value, int base, size_t min_width)
{
return to_wstr(static_cast<unsigned long long>(value), base);
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
}
std::wstring to_wstr(int value, int base)
std::wstring to_wstr(int value, int base, size_t min_width)
{
return to_wstr(static_cast<long long>(value), base);
return to_wstr(static_cast<long long>(value), base, min_width);
}
std::wstring to_wstr(unsigned short value, int base)
std::wstring to_wstr(unsigned short value, int base, size_t min_width)
{
return to_wstr(static_cast<unsigned long long>(value), base);
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
}
std::wstring to_wstr(short value, int base)
std::wstring to_wstr(short value, int base, size_t min_width)
{
return to_wstr(static_cast<long long>(value), base);
return to_wstr(static_cast<long long>(value), base, min_width);
}

107
src/convert/inttostr.h

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -52,8 +52,9 @@ namespace pt
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
// min_width - if greater than zero then it is used for zero padding
template<class CharType>
bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
{
size_t i1, i2;
long rest;
@ -77,6 +78,14 @@ long rest;
}
while(value != 0 && i2 < buf_len);
if( min_width > 0 )
{
for( ; i2 < min_width && i2 < buf_len ; ++i2)
{
buffer[i2] = '0';
}
}
if( i2 >= buf_len )
{
buffer[0] = 0; // ops, the buffer was too small
@ -106,7 +115,7 @@ return true;
// if the buffer is too small it will be terminated at the beginning (empty string)
// and the function returns false
template<class CharType>
bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
{
if( len_out )
*len_out = 0;
@ -126,7 +135,7 @@ bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size
is_sign = true;
}
bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out);
bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out, min_width);
if( res )
{
@ -146,44 +155,44 @@ return res;
template<class CharType>
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
}
template<class CharType>
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
}
template<class CharType>
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
}
template<class CharType>
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
}
template<class CharType>
bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
}
template<class CharType>
bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
{
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
}
@ -192,7 +201,7 @@ bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t *
template<class StringType>
void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10)
void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
typename StringType::value_type buffer[50];
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
@ -204,13 +213,13 @@ void Toa(unsigned long long value, StringType & res, bool clear_string = true, i
* the size of the buffer is sufficient so the status should always be true
*/
size_t len_out;
Toa(value, buffer, buffer_len, base, &len_out);
Toa(value, buffer, buffer_len, base, &len_out, min_width);
res.append(buffer, len_out);
}
template<class StringType>
void Toa(long long value, StringType & res, bool clear_string = true, int base = 10)
void Toa(long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
typename StringType::value_type buffer[50];
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
@ -222,71 +231,71 @@ void Toa(long long value, StringType & res, bool clear_string = true, int base =
* the size of the buffer is sufficient so the status should always be true
*/
size_t len_out;
Toa(value, buffer, buffer_len, base, &len_out);
Toa(value, buffer, buffer_len, base, &len_out, min_width);
res.append(buffer, len_out);
}
template<class StringType>
void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10)
void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
}
template<class StringType>
void Toa(long value, StringType & res, bool clear_string = true, int base = 10)
void Toa(long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<long long>(value), res, clear_string, base);
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
}
template<class StringType>
void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10)
void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
}
template<class StringType>
void Toa(int value, StringType & res, bool clear_string = true, int base = 10)
void Toa(int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<long long>(value), res, clear_string, base);
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
}
template<class StringType>
void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10)
void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
}
template<class StringType>
void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
void Toa(short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
{
Toa(static_cast<long long>(value), res, clear_string, base);
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
}
std::string to_str(unsigned long long value, int base = 10);
std::string to_str(long long value, int base = 10);
std::string to_str(unsigned long value, int base = 10);
std::string to_str(long value, int base = 10);
std::string to_str(unsigned int value, int base = 10);
std::string to_str(int value, int base = 10);
std::string to_str(unsigned short value, int base = 10);
std::string to_str(short value, int base = 10);
std::wstring to_wstr(unsigned long long value, int base = 10);
std::wstring to_wstr(long long value, int base = 10);
std::wstring to_wstr(unsigned long value, int base = 10);
std::wstring to_wstr(long value, int base = 10);
std::wstring to_wstr(unsigned int value, int base = 10);
std::wstring to_wstr(int value, int base = 10);
std::wstring to_wstr(unsigned short value, int base = 10);
std::wstring to_wstr(short value, int base = 10);
std::string to_str(unsigned long long value, int base = 10, size_t min_width = 0);
std::string to_str(long long value, int base = 10, size_t min_width = 0);
std::string to_str(unsigned long value, int base = 10, size_t min_width = 0);
std::string to_str(long value, int base = 10, size_t min_width = 0);
std::string to_str(unsigned int value, int base = 10, size_t min_width = 0);
std::string to_str(int value, int base = 10, size_t min_width = 0);
std::string to_str(unsigned short value, int base = 10, size_t min_width = 0);
std::string to_str(short value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(unsigned long long value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(long long value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(unsigned long value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(long value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(unsigned int value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(int value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(unsigned short value, int base = 10, size_t min_width = 0);
std::wstring to_wstr(short value, int base = 10, size_t min_width = 0);

361
src/convert/misc.cpp

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* Copyright (c) 2017-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -36,6 +36,8 @@
*/
#include "misc.h"
#include "inttostr.h"
#include "utf8/utf8.h"
namespace pt
@ -50,6 +52,363 @@ void SetOverflow(bool * was_overflow, bool val)
void esc_to_json_uformat(wchar_t val, Stream & out)
{
char buf[10];
size_t len;
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
out << "\\u";
if( len < 4 )
{
for(size_t i=0 ; i < (4-len) ; ++i)
{
out << '0';
}
}
out << buf;
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_json(wchar_t val, Stream & out)
{
bool status = false;
if( val == '\r' )
{
out << '\\' << 'r';
status = true;
}
else
if( val == '\n' )
{
out << '\\' << 'n';
status = true;
}
else
if( val == '\t' )
{
out << '\\' << 't';
status = true;
}
else
if( val == 0x08 )
{
out << '\\' << 'b';
status = true;
}
else
if( val == 0x0c )
{
out << '\\' << 'f';
status = true;
}
else
if( val == '\\' )
{
out << '\\' << '\\';
status = true;
}
else
if( val == '"' )
{
out << '\\' << '\"';
status = true;
}
else
if( val < 32 )
{
esc_to_json_uformat(val, out);
status = true;
}
return status;
}
void esc_to_json(wchar_t val, Stream & out)
{
if( !try_esc_to_json(val, out) )
{
out << val;
}
}
void esc_to_json(char val, Stream & out)
{
if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_json(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const std::string & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
void esc_to_json(const std::wstring & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_xml(wchar_t val, Stream & out)
{
bool status = false;
if( val == 0 )
{
// null character is invalid in XML 1.0 and 1.1
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
// return true but not put the char to the out stream
status = true;
}
else
if( val == '<')
{
out << "&lt;";
status = true;
}
else
if( val == '>')
{
out << "&gt;";
status = true;
}
else
if( val == '&')
{
out << "&amp;";
status = true;
}
else
if( val == '"')
{
out << "&quot;";
status = true;
}
return status;
}
void esc_to_xml(wchar_t val, Stream & out)
{
if( !try_esc_to_xml(val, out) )
{
out << val;
}
}
void esc_to_xml(char val, Stream & out)
{
if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_xml(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const std::string & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
void esc_to_xml(const std::wstring & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_csv(wchar_t val, pt::Stream & out)
{
bool status = false;
if( val == 0 )
{
// null characters are invalid in text files
// return true but not put to the out stream
status = true;
}
else
if( val == '"' )
{
out << "\"\"";
status = true;
}
return status;
}
void esc_to_csv(wchar_t val, pt::Stream & out)
{
if( !try_esc_to_csv(val, out) )
{
out << val;
}
}
void esc_to_csv(char val, Stream & out)
{
if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_csv(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const std::string & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
void esc_to_csv(const std::wstring & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
}

77
src/convert/misc.h

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* Copyright (c) 2017-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -40,6 +40,9 @@
#include <limits>
#include "text.h"
#include "textstream/stream.h"
#include "textstream/types.h"
#include "utf8/utf8_stream.h"
namespace pt
@ -47,6 +50,78 @@ namespace pt
void SetOverflow(bool * was_overflow, bool val);
bool try_esc_to_json(wchar_t val, Stream & out);
void esc_to_json(wchar_t val, Stream & out);
void esc_to_json(char val, Stream & out);
void esc_to_json(const char * c, pt::Stream & out);
void esc_to_json(const char * c, std::size_t len, Stream & out);
void esc_to_json(const wchar_t * c, Stream & out);
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
void esc_to_json(const std::string & in, Stream & out);
void esc_to_json(const std::wstring & in, Stream & out);
bool try_esc_to_xml(wchar_t val, Stream & out);
void esc_to_xml(wchar_t c, pt::Stream & out);
void esc_to_xml(char c, pt::Stream & out);
void esc_to_xml(const char * c, pt::Stream & out);
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out);
void esc_to_xml(const wchar_t * c, pt::Stream & out);
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
void esc_to_xml(const std::string & in, Stream & out);
void esc_to_xml(const std::wstring & in, Stream & out);
bool try_esc_to_csv(wchar_t val, pt::Stream & out);
void esc_to_csv(wchar_t val, Stream & out);
void esc_to_csv(char c, pt::Stream & out);
void esc_to_csv(const char * c, std::size_t len, Stream & out);
void esc_to_csv(const char * c, pt::Stream & out);
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out);
void esc_to_csv(const wchar_t * c, pt::Stream & out);
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out);
void esc_to_csv(const std::string & in, Stream & out);
template<typename StreamType>
void esc_to_json(const StreamType & in, Stream & out)
{
typename StreamType::const_iterator i = in.begin();
typename StreamType::const_iterator end = in