pikotools/src/convert/misc.cpp

412 lines
6.8 KiB
C++
Raw Normal View History

added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
/*
* This file is a part of PikoTools
* and is distributed under the 2-Clause BSD licence.
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2022, Tomasz Sowa
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
*/
#include "misc.h"
#include "inttostr.h"
#include "utf8/utf8.h"
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
2021-05-20 16:11:12 +02:00
namespace pt
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
{
void SetOverflow(bool * was_overflow, bool val)
{
if( was_overflow )
*was_overflow = val;
}
void esc_to_json_uformat(wchar_t val, Stream & out)
{
char buf[10];
size_t len;
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
out << "\\u";
if( len < 4 )
{
for(size_t i=0 ; i < (4-len) ; ++i)
{
out << '0';
}
}
out << buf;
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_json(wchar_t val, Stream & out)
{
bool status = false;
if( val == '\r' )
{
out << '\\' << 'r';
status = true;
}
else
if( val == '\n' )
{
out << '\\' << 'n';
status = true;
}
else
if( val == '\t' )
{
out << '\\' << 't';
status = true;
}
else
if( val == 0x08 )
{
out << '\\' << 'b';
status = true;
}
else
if( val == 0x0c )
{
out << '\\' << 'f';
status = true;
}
else
if( val == '\\' )
{
out << '\\' << '\\';
status = true;
}
else
if( val == '"' )
{
out << '\\' << '\"';
status = true;
}
else
if( val < 32 )
{
esc_to_json_uformat(val, out);
status = true;
}
return status;
}
void esc_to_json(wchar_t val, Stream & out)
{
if( !try_esc_to_json(val, out) )
{
out << val;
}
}
void esc_to_json(char val, Stream & out)
{
if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_json(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const std::string & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
void esc_to_json(const std::wstring & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_xml(wchar_t val, Stream & out)
{
bool status = false;
if( val == 0 )
{
// null character is invalid in XML 1.0 and 1.1
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
// return true but not put the char to the out stream
status = true;
}
else
if( val == '<')
{
out << "&lt;";
status = true;
}
else
if( val == '>')
{
out << "&gt;";
status = true;
}
else
if( val == '&')
{
out << "&amp;";
status = true;
}
else
if( val == '"')
{
out << "&quot;";
status = true;
}
return status;
}
void esc_to_xml(wchar_t val, Stream & out)
{
if( !try_esc_to_xml(val, out) )
{
out << val;
}
}
void esc_to_xml(char val, Stream & out)
{
if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_xml(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const std::string & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
void esc_to_xml(const std::wstring & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_csv(wchar_t val, pt::Stream & out)
{
bool status = false;
if( val == 0 )
{
// null characters are invalid in text files
// return true but not put to the out stream
status = true;
}
else
if( val == '"' )
{
out << "\"\"";
status = true;
}
return status;
}
void esc_to_csv(wchar_t val, pt::Stream & out)
{
if( !try_esc_to_csv(val, out) )
{
out << val;
}
}
void esc_to_csv(char val, Stream & out)
{
if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_csv(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const std::string & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
void esc_to_csv(const std::wstring & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
added: functions for dealing with white characters: bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
2017-12-05 17:32:21 +01:00
}