/* * This file is a part of PikoTools * and is distributed under the (new) BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2017-2021, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "text.h" #include "text_private.h" namespace pt { // white_chars table should be sorted (a binary search algorithm is used to find a character) // we do not treat a new line character (10) as a white character here // also space (32) and tab (9) are not inserted here static const wchar_t white_chars_table[] = { 0x000B, // LINE TABULATION (vertical tabulation) 0x000C, // FORM FEED (FF) 0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file 0x0085, // NEXT LINE (NEL) 0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE) 0x1680, // OGHAM SPACE MARK 0x180E, // MONGOLIAN VOWEL SEPARATOR 0x2000, // EN QUAD 0x2001, // EM QUAD 0x2002, // EN SPACE 0x2003, // EM SPACE 0x2004, // THREE-PER-EM SPACE 0x2005, // FOUR-PER-EM SPACE 0x2006, // SIX-PER-EM SPACE 0x2007, // FIGURE SPACE 0x2008, // PUNCTUATION SPACE 0x2009, // THIN SPACE 0x200A, // HAIR SPACE 0x2028, // LINE SEPARATOR 0x2029, // PARAGRAPH SEPARATOR 0x202F, // NARROW NO-BREAK SPACE 0x205F, // MEDIUM MATHEMATICAL SPACE 0x3000, // IDEOGRAPHIC SPACE 0xFEFF, // ZERO WIDTH NO-BREAK SPACE }; /* if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true) */ bool is_white(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) { // space (32) and tab (9) are the most common white chars // so we check them at the beginning (optimisation) if( c == 32 || c == 9 ) return true; std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t); std::size_t o1 = 0; std::size_t o2 = len - 1; if( c == 10 ) return treat_new_line_as_white ? true : false; if( !check_additional_chars ) return false; if( c < white_chars_table[o1] || c > white_chars_table[o2] ) return false; if( c == white_chars_table[o1] || c == white_chars_table[o2] ) return true; while( o1 + 1 < o2 ) { std::size_t o = (o2 - o1)/2 + o1; if( c == white_chars_table[o] ) return true; if( c > white_chars_table[o] ) o1 = o; else o2 = o; } return false; } bool is_digit(wchar_t c, int base, int * digit) { int d = 0; if( c >= '0' && c <= '9' ) { d = c - '0'; } else if( c >= 'a' && c <= 'f' ) { d = c - 'a' + 10; } else if( c >= 'A' && c <= 'F' ) { d = c - 'A' + 10; } else { if( digit ) *digit = d; return false; } if( digit ) *digit = d; return d < base; } const char * skip_white(const char * str, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white); } const wchar_t * skip_white(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_generic(str, check_additional_chars, treat_new_line_as_white); } const char * skip_white_from_back(const char * str_begin, const char * str_end, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white); } const wchar_t * skip_white_from_back(const wchar_t * str_begin, const wchar_t * str_end, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_from_back_generic(str_begin, str_end, check_additional_chars, treat_new_line_as_white); } const char * skip_white_from_back(const char * str, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white); } const wchar_t * skip_white_from_back(const wchar_t * str, bool check_additional_chars, bool treat_new_line_as_white) { return pt_private::skip_white_from_back_generic(str, check_additional_chars, treat_new_line_as_white); } char to_lower(char c) { return pt_private::to_lower_generic(c); } wchar_t to_lower(wchar_t c) { return pt_private::to_lower_generic(c); } char to_upper(char c) { return pt_private::to_upper_generic(c); } wchar_t to_upper(wchar_t c) { return pt_private::to_upper_generic(c); } void to_lower_emplace(std::string & str) { pt_private::to_lower_str_generic(str); } void to_lower_emplace(std::wstring & str) { pt_private::to_lower_str_generic(str); } void to_upper_emplace(std::string & str) { pt_private::to_upper_str_generic(str); } void to_upper_emplace(std::wstring & str) { pt_private::to_upper_str_generic(str); } std::string to_lower(const std::string & str) { std::string res(str); to_lower_emplace(res); return res; } std::wstring to_lower(const std::wstring & str) { std::wstring res(str); to_lower_emplace(res); return res; } std::string to_upper(const std::string & str) { std::string res(str); to_upper_emplace(res); return res; } std::wstring to_upper(const std::wstring & str) { std::wstring res(str); to_upper_emplace(res); return res; } int compare(const char * str1, const char * str2) { return pt_private::compare_generic(str1, str2); } int compare(const wchar_t * str1, const wchar_t * str2) { return pt_private::compare_generic(str1, str2); } int compare(const std::string & str1, const std::string & str2) { return pt_private::compare_str_generic(str1, str2); } int compare(const std::wstring & str1, const std::wstring & str2) { return pt_private::compare_str_generic(str1, str2); } int compare(const char * str1_begin, const char * str1_end, const char * str2) { return pt_private::compare_generic(str1_begin, str1_end, str2); } int compare(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) { return pt_private::compare_generic(str1_begin, str1_end, str2); } int compare_nc(const char * str1, const char * str2) { return pt_private::compare_nc_generic(str1, str2); } int compare_nc(const wchar_t * str1, const wchar_t * str2) { return pt_private::compare_nc_generic(str1, str2); } int compare_nc(const std::string & str1, const std::string & str2) { return pt_private::compare_nc_str_generic(str1, str2); } int compare_nc(const std::wstring & str1, const std::wstring & str2) { return pt_private::compare_nc_str_generic(str1, str2); } int compare_nc(const char * str1_begin, const char * str1_end, const char * str2) { return pt_private::compare_nc_generic(str1_begin, str1_end, str2); } int compare_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) { return pt_private::compare_nc_generic(str1_begin, str1_end, str2); } bool is_equal(const char * str1, const char * str2) { return pt_private::compare_generic(str1, str2) == 0; } bool is_equal(const wchar_t * str1, const wchar_t * str2) { return pt_private::compare_generic(str1, str2) == 0; } bool is_equal(const std::string & str1, const std::string & str2) { return is_equal(str1.c_str(), str2.c_str()); } bool is_equal(const std::wstring & str1, const std::wstring & str2) { return is_equal(str1.c_str(), str2.c_str()); } bool is_equal(const char * str1_begin, const char * str1_end, const char * str2) { return pt_private::compare_generic(str1_begin, str1_end, str2) == 0; } bool is_equal(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) { return pt_private::compare_generic(str1_begin, str1_end, str2) == 0; } bool is_equal_nc(const char * str1, const char * str2) { return pt_private::compare_nc_generic(str1, str2) == 0; } bool is_equal_nc(const wchar_t * str1, const wchar_t * str2) { return pt_private::compare_nc_generic(str1, str2) == 0; } bool is_equal_nc(const std::string & str1, const std::string & str2) { return is_equal_nc(str1.c_str(), str2.c_str()); } bool is_equal_nc(const std::wstring & str1, const std::wstring & str2) { return is_equal_nc(str1.c_str(), str2.c_str()); } bool is_equal_nc(const char * str1_begin, const char * str1_end, const char * str2) { return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0; } bool is_equal_nc(const wchar_t * str1_begin, const wchar_t * str1_end, const wchar_t * str2) { return pt_private::compare_nc_generic(str1_begin, str1_end, str2) == 0; } bool is_substr(const char * short_str, const char * long_str) { return pt_private::is_substr_generic(short_str, long_str); } bool is_substr(const wchar_t * short_str, const wchar_t * long_str) { return pt_private::is_substr_generic(short_str, long_str); } bool is_substr(const std::string & short_str, const std::string & long_str) { return is_substr(short_str.c_str(), long_str.c_str()); } bool is_substr(const std::wstring & short_str, const std::wstring & long_str) { return is_substr(short_str.c_str(), long_str.c_str()); } bool is_substr_nc(const char * short_str, const char * long_str) { return pt_private::is_substr_nc_generic(short_str, long_str); } bool is_substr_nc(const wchar_t * short_str, const wchar_t * long_str) { return pt_private::is_substr_nc_generic(short_str, long_str); } bool is_substr_nc(const std::string & short_str, const std::string & long_str) { return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str()); } bool is_substr_nc(const std::wstring & short_str, const std::wstring & long_str) { return pt_private::is_substr_nc_generic(short_str.c_str(), long_str.c_str()); } }