/* * This file is a part of PikoTools * and is distributed under the 2-Clause BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2021-2022, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include "baseparser.h" #include "utf8/utf8.h" namespace pt { BaseParser::BaseParser() { clear_input_flags(); } void BaseParser::clear_input_flags() { line = 0; column = 0; reading_from_file = false; pchar_ascii = nullptr; pchar_unicode = nullptr; wtext_stream_iterator = nullptr; wtext_stream_iterator_end = nullptr; text_stream_iterator = nullptr; text_stream_iterator_end = nullptr; lastc = -1; input_as_utf8 = true; if( file.is_open() ) file.close(); file.clear(); } void BaseParser::check_new_line() { if( lastc == '\n' ) { ++line; column = 0; } } int BaseParser::read_utf8_char() { int c; bool correct; lastc = -1; do { utf8_to_int(file, c, correct); if( !file ) return lastc; } while( !correct ); lastc = c; check_new_line(); return lastc; } int BaseParser::read_ascii_char() { lastc = file.get(); check_new_line(); return lastc; } int BaseParser::read_char_from_wchar_string() { if( *pchar_unicode == 0 ) lastc = -1; else lastc = *(pchar_unicode++); check_new_line(); return lastc; } int BaseParser::read_char_from_utf8_string() { int c; bool correct; lastc = -1; do { size_t len = utf8_to_int(pchar_ascii, c, correct); pchar_ascii += len; } while( *pchar_ascii && !correct ); if( correct ) lastc = c; check_new_line(); return lastc; } int BaseParser::read_char_from_ascii_string() { if( *pchar_ascii == 0 ) lastc = -1; else lastc = *(pchar_ascii++); check_new_line(); return lastc; } int BaseParser::read_char_from_wtext_stream() { if( (*wtext_stream_iterator) != (*wtext_stream_iterator_end) ) { lastc = *(*wtext_stream_iterator); ++(*wtext_stream_iterator); } else { lastc = -1; } check_new_line(); return lastc; } int BaseParser::read_char_from_utf8_text_stream() { int c; bool correct; lastc = -1; do { utf8_to_int(*text_stream_iterator, *text_stream_iterator_end, c, correct); } while( !correct && (*text_stream_iterator) != (*text_stream_iterator_end) ); if( correct ) lastc = c; check_new_line(); return lastc; } int BaseParser::read_char_from_ascii_text_stream() { if( (*text_stream_iterator) != (*text_stream_iterator_end) ) { lastc = *(*text_stream_iterator); ++(*text_stream_iterator); } else { lastc = -1; } check_new_line(); return lastc; } int BaseParser::read_char_no_escape() { if( reading_from_file ) { if( input_as_utf8 ) return read_utf8_char(); else return read_ascii_char(); } else { if( pchar_ascii ) { if( input_as_utf8 ) return read_char_from_utf8_string(); else return read_char_from_ascii_string(); } else if( pchar_unicode ) { return read_char_from_wchar_string(); } else if( wtext_stream_iterator && wtext_stream_iterator_end ) { return read_char_from_wtext_stream(); } else if( text_stream_iterator && text_stream_iterator_end ) { if( input_as_utf8 ) return read_char_from_utf8_text_stream(); else return read_char_from_ascii_text_stream(); } else { lastc = -1; return lastc; } } } int BaseParser::read_char() { return read_char_no_escape(); } }