there are methods for reading from string/files there those methods were moved from SpaceParser and CSVParser fixed: CSVParser didn't set input_as_utf8 flaghtmlparserlistener
parent
2a3f43c5c3
commit
7ce07c57f5
@ -0,0 +1,188 @@
|
||||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "baseparser.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
BaseParser::BaseParser()
|
||||
{
|
||||
clear();
|
||||
}
|
||||
|
||||
|
||||
void BaseParser::clear()
|
||||
{
|
||||
line = 0;
|
||||
reading_from_file = false;
|
||||
pchar_ascii = nullptr;
|
||||
pchar_unicode = nullptr;
|
||||
reading_from_wchar_string = false;
|
||||
lastc = -1;
|
||||
input_as_utf8 = true;
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_utf8_char()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
utf8_to_int(file, c, correct);
|
||||
|
||||
if( !file )
|
||||
return lastc;
|
||||
}
|
||||
while( !correct );
|
||||
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_ascii_char()
|
||||
{
|
||||
lastc = file.get();
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_char_from_wchar_string()
|
||||
{
|
||||
if( *pchar_unicode == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_unicode++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_char_from_utf8_string()
|
||||
{
|
||||
int c;
|
||||
bool correct;
|
||||
|
||||
lastc = -1;
|
||||
|
||||
do
|
||||
{
|
||||
size_t len = utf8_to_int(pchar_ascii, c, correct);
|
||||
pchar_ascii += len;
|
||||
}
|
||||
while( *pchar_ascii && !correct );
|
||||
|
||||
if( correct )
|
||||
lastc = c;
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_char_from_ascii_string()
|
||||
{
|
||||
if( *pchar_ascii == 0 )
|
||||
lastc = -1;
|
||||
else
|
||||
lastc = *(pchar_ascii++);
|
||||
|
||||
if( lastc == '\n' )
|
||||
++line;
|
||||
|
||||
return lastc;
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_char_no_escape()
|
||||
{
|
||||
if( reading_from_file )
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return read_utf8_char();
|
||||
else
|
||||
return read_ascii_char();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( reading_from_wchar_string )
|
||||
{
|
||||
return read_char_from_wchar_string();
|
||||
}
|
||||
else
|
||||
{
|
||||
if( input_as_utf8 )
|
||||
return read_char_from_utf8_string();
|
||||
else
|
||||
return read_char_from_ascii_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int BaseParser::read_char()
|
||||
{
|
||||
return read_char_no_escape();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* This file is a part of PikoTools
|
||||
* and is distributed under the (new) BSD licence.
|
||||
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||
* project may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef headerfile_picotools_convert_baseparser
|
||||
#define headerfile_picotools_convert_baseparser
|
||||
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
namespace pt
|
||||
{
|
||||
|
||||
class BaseParser
|
||||
{
|
||||
protected:
|
||||
|
||||
BaseParser();
|
||||
|
||||
void clear();
|
||||
|
||||
int read_utf8_char();
|
||||
int read_ascii_char();
|
||||
int read_char_from_wchar_string();
|
||||
int read_char_from_utf8_string();
|
||||
int read_char_from_ascii_string();
|
||||
int read_char_no_escape();
|
||||
int read_char();
|
||||
|
||||
|
||||
|
||||
/*
|
||||
a number of a line in which there is a syntax_error
|
||||
*/
|
||||
int line;
|
||||
|
||||
|
||||
/*
|
||||
true if parse() method was called
|
||||
false if ParseString() was called
|
||||
*/
|
||||
bool reading_from_file;
|
||||
|
||||
/*
|
||||
pointers to the current character
|
||||
if ParseString() is in used
|
||||
*/
|
||||
const char * pchar_ascii;
|
||||
const wchar_t * pchar_unicode;
|
||||
|
||||
|
||||
/*
|
||||
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
||||
*/
|
||||
bool reading_from_wchar_string;
|
||||
|
||||
/*
|
||||
last read char
|
||||
or -1 if the end
|
||||
*/
|
||||
int lastc;
|
||||
|
||||
|
||||
/*
|
||||
current file
|
||||
|
||||
may it would be better to make a pointer?
|
||||
if we parse only a string then there is no sense to have such an object
|
||||
*/
|
||||
std::ifstream file;
|
||||
|
||||
|
||||
/*
|
||||
input file is in UTF-8
|
||||
default: true
|
||||
*/
|
||||
bool input_as_utf8;
|
||||
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Reference in new issue