pikotools/src/convert/baseparser.cpp

/*
 * This file is a part of PikoTools
 * and is distributed under the (new) BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */

/*
 * Copyright (c) 2021, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 *  * Neither the name Tomasz Sowa nor the names of contributors to this
 *    project may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "baseparser.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"


namespace pt
{

BaseParser::BaseParser()
{
	clear_input_flags();
}


void BaseParser::clear_input_flags()
{
	line = 0;
	reading_from_file = false;
	pchar_ascii = nullptr;
	pchar_unicode = nullptr;
	wtext_stream_iterator = nullptr;
	wtext_stream_iterator_end = nullptr;
	text_stream_iterator = nullptr;
	text_stream_iterator_end = nullptr;
	lastc = -1;
	input_as_utf8 = true;

	if( file.is_open() )
		file.close();

	file.clear();
}


int BaseParser::read_utf8_char()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		utf8_to_int(file, c, correct);

		if( !file )
			return lastc;
	}
	while( !correct );

	lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;
}


int BaseParser::read_ascii_char()
{
	lastc = file.get();

	if( lastc == '\n' )
		++line;

return lastc;
}


int BaseParser::read_char_from_wchar_string()
{
	if( *pchar_unicode == 0 )
		lastc = -1;
	else
		lastc = *(pchar_unicode++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int BaseParser::read_char_from_utf8_string()
{
int c;
bool correct;

	lastc = -1;

	do
	{
		size_t len = utf8_to_int(pchar_ascii, c, correct);
		pchar_ascii += len;
	}
	while( *pchar_ascii && !correct );

	if( correct )
		lastc = c;

	if( lastc == '\n' )
		++line;

return lastc;
}


int BaseParser::read_char_from_ascii_string()
{
	if( *pchar_ascii == 0 )
		lastc = -1;
	else
		lastc = *(pchar_ascii++);

	if( lastc == '\n' )
		++line;

return lastc;
}


int BaseParser::read_char_from_wtext_stream()
{
	if( (*wtext_stream_iterator) != (*wtext_stream_iterator_end) )
	{
		lastc = *(*wtext_stream_iterator);
		++(*wtext_stream_iterator);
	}
	else
	{
		lastc = -1;
	}

	if( lastc == '\n' )
		++line;

	return lastc;
}


int BaseParser::read_char_from_utf8_text_stream()
{
	int c;
	bool correct;

	lastc = -1;

	do
	{
		utf8_to_int(*text_stream_iterator, *text_stream_iterator_end, c, correct);
	}
	while( !correct && (*text_stream_iterator) != (*text_stream_iterator_end) );

	if( correct )
		lastc = c;

	if( lastc == '\n' )
		++line;

	return lastc;
}


int BaseParser::read_char_from_ascii_text_stream()
{
	if( (*text_stream_iterator) != (*text_stream_iterator_end) )
	{
		lastc = *(*text_stream_iterator);
		++(*text_stream_iterator);
	}
	else
	{
		lastc = -1;
	}

	if( lastc == '\n' )
		++line;

	return lastc;
}


int BaseParser::read_char_no_escape()
{
	if( reading_from_file )
	{
		if( input_as_utf8 )
			return read_utf8_char();
		else
			return read_ascii_char();
	}
	else
	{
		if( pchar_ascii )
		{
			if( input_as_utf8 )
				return read_char_from_utf8_string();
			else
				return read_char_from_ascii_string();
		}
		else if( pchar_unicode )
		{
			return read_char_from_wchar_string();
		}
		else if( wtext_stream_iterator && wtext_stream_iterator_end )
		{
			return read_char_from_wtext_stream();
		}
		else if( text_stream_iterator && text_stream_iterator_end )
		{
			if( input_as_utf8 )
				return read_char_from_utf8_text_stream();
			else
				return read_char_from_ascii_text_stream();
		}
		else
		{
			lastc = -1;
			return lastc;
		}
	}
}


int BaseParser::read_char()
{
	return read_char_no_escape();
}


}