/* * This file is a part of PikoTools * and is distributed under the (new) BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2017, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * * Neither the name Tomasz Sowa nor the names of contributors to this * project may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "text.h" namespace PT { // white_chars table should be sorted (a binary search algorithm is used to find a character) // we do not treat a new line character (10) as a white character here // also space (32) and tab (9) are not inserted here static const wchar_t white_chars_table[] = { 0x000B, // LINE TABULATION (vertical tabulation) 0x000C, // FORM FEED (FF) 0x000D, // CARRIAGE RETURN (CR) - a character at the end in a dos text file 0x0085, // NEXT LINE (NEL) 0x00A0, // NO-BREAK SPACE (old name: NON-BREAKING SPACE) 0x1680, // OGHAM SPACE MARK 0x180E, // MONGOLIAN VOWEL SEPARATOR 0x2000, // EN QUAD 0x2001, // EM QUAD 0x2002, // EN SPACE 0x2003, // EM SPACE 0x2004, // THREE-PER-EM SPACE 0x2005, // FOUR-PER-EM SPACE 0x2006, // SIX-PER-EM SPACE 0x2007, // FIGURE SPACE 0x2008, // PUNCTUATION SPACE 0x2009, // THIN SPACE 0x200A, // HAIR SPACE 0x2028, // LINE SEPARATOR 0x2029, // PARAGRAPH SEPARATOR 0x202F, // NARROW NO-BREAK SPACE 0x205F, // MEDIUM MATHEMATICAL SPACE 0x3000, // IDEOGRAPHIC SPACE 0xFEFF, // ZERO WIDTH NO-BREAK SPACE }; /* if check_additional_chars is false then we are testing only a space (32), tab (9) and a new line (10) (if treat_new_line_as_white is true) */ bool IsWhite(wchar_t c, bool check_additional_chars, bool treat_new_line_as_white) { // space (32) and tab (9) are the most common white chars // so we check them at the beginning (optimisation) if( c == 32 || c == 9 ) return true; std::size_t len = sizeof(white_chars_table) / sizeof(wchar_t); std::size_t o1 = 0; std::size_t o2 = len - 1; if( c == 10 ) return treat_new_line_as_white ? true : false; if( !check_additional_chars ) return false; if( c < white_chars_table[o1] || c > white_chars_table[o2] ) return false; if( c == white_chars_table[o1] || c == white_chars_table[o2] ) return true; while( o1 + 1 < o2 ) { std::size_t o = (o2 - o1)/2 + o1; if( c == white_chars_table[o] ) return true; if( c > white_chars_table[o] ) o1 = o; else o2 = o; } return false; } bool IsDigit(wchar_t c, int base, int * digit) { int d = 0; if( c >= '0' && c <= '9' ) { d = c - '0'; } else if( c >= 'a' && c <= 'f' ) { d = c - 'a' + 10; } else if( c >= 'A' && c <= 'F' ) { d = c - 'A' + 10; } else { if( digit ) *digit = d; return false; } if( digit ) *digit = d; return d < base; } }