added: functions for dealing with white characters:
bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) (checking unicode white characters too) CharType * SkipWhite(CharType * str, bool check_additional_chars = true, bool treat_new_line_as_white = true) IsDigit(wchar_t c, int base, int * digit) added: functions to converting from a string to an integer: unsigned long long Toull(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long long Toll(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned long Toul(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) unsigned int Toui(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) long Tol(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) int Toi(const CharType * str, int base = 10, const CharType ** after_str = 0, bool * was_overflow = 0) changed: some work in Space (new Api) now Text() methods returns std::wstring by value (before they were returned by reference) added std::wstring & TextRef() methods added unsigned int UInt(), unsigned long ULong() and LongLong() and ULongLong() GetValue() renamed to GetFirstValue() AText() renamed to TextA() and they return std::string by value now git-svn-id: svn://ttmath.org/publicrep/pikotools/trunk@1066 e52654a7-88a9-db11-a3e9-0013d4bc506e
This commit is contained in:
159
convert/text.cpp
Normal file
159
convert/text.cpp
Normal file
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cstddef>
|
||||
#include "text.h"
|
||||
|
||||
|
||||
namespace PT
|
||||
{
|
||||
|
||||
// white_chars table should be sorted (a binary search algorithm is used to find a character)
|
||||
// we do not treat a new line character (10) as a white character here
|
||||
// also space (32) and tab (9) are not inserted here
|
||||
static const wchar_t white_chars_table[] = {
|
||||
0x000B, // LINE TABULATION (vertical tabulation)
|
||||
0x000C, // FORM FEED (FF)
|
||||
0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file
|
||||
0x0085, // NEXT LINE (NEL)
|
||||
0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE)
|
||||
0x1680, // OGHAM SPACE MARK
|
||||
0x180E, // MONGOLIAN VOWEL SEPARATOR
|
||||
0x2000, // EN QUAD
|
||||
0x2001, // EM QUAD
|
||||
0x2002, // EN SPACE
|
||||
0x2003, // EM SPACE
|
||||
0x2004, // THREE-PER-EM SPACE
|
||||
0x2005, // FOUR-PER-EM SPACE
|
||||
0x2006, // SIX-PER-EM SPACE
|
||||
0x2007, // FIGURE SPACE
|
||||
0x2008, // PUNCTUATION SPACE
|
||||
0x2009, // THIN SPACE
|
||||
0x200A, // HAIR SPACE
|
||||
0x2028, // LINE SEPARATOR
|
||||
0x2029, // PARAGRAPH SEPARATOR
|
||||
0x202F, // NARROW NO-BREAK SPACE
|
||||
0x205F, // MEDIUM MATHEMATICAL SPACE
|
||||
0x3000, // IDEOGRAPHIC SPACE
|
||||
0xFEFF, // ZERO WIDTH NO-BREAK SPACE
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true)
|
||||
*/
|
||||
bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white)
|
||||
{
|
||||
// space (32) and tab (9) are the most common white chars
|
||||
// so we check them at the beginning (optimisation)
|
||||
if( c == 32 || c == 9 )
|
||||
return true;
|
||||
|
||||
std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t);
|
||||
std::size_t o1 = 0;
|
||||
std::size_t o2 = len - 1;
|
||||
|
||||
if( c == 10 )
|
||||
return treat_new_line_as_white ? true : false;
|
||||
|
||||
if( !check_additional_chars )
|
||||
return false;
|
||||
|
||||
if( c < white_chars_table[o1] || c > white_chars_table[o2] )
|
||||
return false;
|
||||
|
||||
if( c == white_chars_table[o1] || c == white_chars_table[o2] )
|
||||
return true;
|
||||
|
||||
while( o1 + 1 < o2 )
|
||||
{
|
||||
std::size_t o = (o2 - o1)/2 + o1;
|
||||
|
||||
if( c == white_chars_table[o] )
|
||||
return true;
|
||||
|
||||
if( c > white_chars_table[o] )
|
||||
o1 = o;
|
||||
else
|
||||
o2 = o;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool IsDigit(wchar_t c, int base, int * digit)
|
||||
{
|
||||
int d = 0;
|
||||
|
||||
if( c >= '0' && c <= '9' )
|
||||
{
|
||||
d = c - '0';
|
||||
}
|
||||
else
|
||||
if( c >= 'a' && c <= 'f' )
|
||||
{
|
||||
d = c - 'a' + 10;
|
||||
}
|
||||
else
|
||||
if( c >= 'A' && c <= 'F' )
|
||||
{
|
||||
d = c - 'A' + 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if( digit )
|
||||
*digit = d;
|
||||
|
||||
return d < base;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user