Merge branch 'api2021'
This commit is contained in:
commit
4f07c00217
|
@ -1,44 +1,76 @@
|
||||||
# DO NOT DELETE
|
# DO NOT DELETE
|
||||||
|
|
||||||
./convert/inttostr.o: ./convert/inttostr.h
|
./convert/inttostr.o: ./convert/inttostr.h
|
||||||
./convert/misc.o: ./convert/misc.h ./convert/text.h
|
./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
|
||||||
|
./convert/misc.o: textstream/types.h utf8/utf8_stream.h
|
||||||
|
./convert/misc.o: textstream/textstream.h textstream/stream.h space/space.h
|
||||||
|
./convert/misc.o: convert/inttostr.h utf8/utf8.h utf8/utf8_templates.h
|
||||||
|
./convert/misc.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||||
|
./convert/misc.o: textstream/types.h ./convert/inttostr.h
|
||||||
./convert/text.o: ./convert/text.h ./convert/text_private.h
|
./convert/text.o: ./convert/text.h ./convert/text_private.h
|
||||||
./convert/double.o: ./convert/double.h textstream/textstream.h
|
./convert/double.o: ./convert/double.h textstream/textstream.h
|
||||||
./convert/double.o: textstream/stream.h space/space.h textstream/types.h
|
./convert/double.o: textstream/stream.h space/space.h textstream/types.h
|
||||||
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||||
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
||||||
./convert/double.o: membuffer/membuffer.h textstream/types.h
|
./convert/double.o: membuffer/membuffer.h textstream/types.h
|
||||||
|
./convert/double.o: utf8/utf8_stream.h
|
||||||
|
./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
|
||||||
|
./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
|
||||||
|
./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||||
|
./convert/baseparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
||||||
|
./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
|
||||||
|
./convert/baseparser.o: utf8/utf8_stream.h
|
||||||
./date/date.o: ./date/date.h convert/inttostr.h
|
./date/date.o: ./date/date.h convert/inttostr.h
|
||||||
./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
|
./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
|
||||||
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
|
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
|
||||||
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||||
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||||
./log/filelog.o: textstream/types.h
|
./log/filelog.o: textstream/types.h utf8/utf8_stream.h
|
||||||
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
|
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
|
||||||
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
|
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
|
||||||
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
|
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
|
||||||
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
|
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
|
||||||
./log/log.o: ./log/filelog.h
|
./log/log.o: utf8/utf8_stream.h ./log/filelog.h
|
||||||
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
|
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
|
||||||
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||||
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
|
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
|
||||||
./space/space.o: convert/patternreplacer.h textstream/textstream.h
|
./space/space.o: convert/patternreplacer.h textstream/textstream.h
|
||||||
./space/space.o: textstream/stream.h space/space.h date/date.h
|
./space/space.o: textstream/stream.h space/space.h date/date.h
|
||||||
./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
|
./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
|
||||||
./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
|
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||||
|
./space/space.o: ./convert/double.h
|
||||||
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
|
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
|
||||||
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
|
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
|
||||||
./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
|
./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
|
||||||
./space/spaceparser.o: utf8/utf8_private.h convert/strtoint.h
|
./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
|
||||||
./space/spaceparser.o: ./convert/text.h ./convert/misc.h
|
./space/spaceparser.o: textstream/textstream.h textstream/stream.h
|
||||||
|
./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
|
||||||
|
./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
|
||||||
|
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||||
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||||
./utf8/utf8.o: utf8/utf8_private.h
|
./utf8/utf8.o: utf8/utf8_private.h
|
||||||
./utf8/utf8_private.o: utf8/utf8_private.h
|
./utf8/utf8_private.o: utf8/utf8_private.h
|
||||||
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
|
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
|
||||||
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||||
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||||
|
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
|
||||||
|
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
|
||||||
|
./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
|
||||||
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
|
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
|
||||||
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
|
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
|
||||||
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
||||||
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
|
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
|
||||||
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
|
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
|
||||||
|
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
|
||||||
|
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
|
||||||
|
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
|
||||||
|
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||||
|
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||||
|
./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
|
||||||
|
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
|
||||||
|
./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
|
||||||
|
./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
|
||||||
|
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||||
|
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
||||||
|
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
|
||||||
|
./html/bbcodeparser.o: utf8/utf8_stream.h
|
||||||
|
|
|
@ -0,0 +1,273 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021-2022, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "baseparser.h"
|
||||||
|
#include "utf8/utf8.h"
|
||||||
|
#include "utf8/utf8_stream.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
BaseParser::BaseParser()
|
||||||
|
{
|
||||||
|
clear_input_flags();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BaseParser::clear_input_flags()
|
||||||
|
{
|
||||||
|
line = 0;
|
||||||
|
column = 0;
|
||||||
|
reading_from_file = false;
|
||||||
|
pchar_ascii = nullptr;
|
||||||
|
pchar_unicode = nullptr;
|
||||||
|
wtext_stream_iterator = nullptr;
|
||||||
|
wtext_stream_iterator_end = nullptr;
|
||||||
|
text_stream_iterator = nullptr;
|
||||||
|
text_stream_iterator_end = nullptr;
|
||||||
|
lastc = -1;
|
||||||
|
input_as_utf8 = true;
|
||||||
|
|
||||||
|
if( file.is_open() )
|
||||||
|
file.close();
|
||||||
|
|
||||||
|
file.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BaseParser::check_new_line()
|
||||||
|
{
|
||||||
|
if( lastc == '\n' )
|
||||||
|
{
|
||||||
|
++line;
|
||||||
|
column = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_utf8_char()
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
bool correct;
|
||||||
|
|
||||||
|
lastc = -1;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
utf8_to_int(file, c, correct);
|
||||||
|
|
||||||
|
if( !file )
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
while( !correct );
|
||||||
|
|
||||||
|
lastc = c;
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_ascii_char()
|
||||||
|
{
|
||||||
|
lastc = file.get();
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_wchar_string()
|
||||||
|
{
|
||||||
|
if( *pchar_unicode == 0 )
|
||||||
|
lastc = -1;
|
||||||
|
else
|
||||||
|
lastc = *(pchar_unicode++);
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_utf8_string()
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
bool correct;
|
||||||
|
|
||||||
|
lastc = -1;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
size_t len = utf8_to_int(pchar_ascii, c, correct);
|
||||||
|
pchar_ascii += len;
|
||||||
|
}
|
||||||
|
while( *pchar_ascii && !correct );
|
||||||
|
|
||||||
|
if( correct )
|
||||||
|
lastc = c;
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_ascii_string()
|
||||||
|
{
|
||||||
|
if( *pchar_ascii == 0 )
|
||||||
|
lastc = -1;
|
||||||
|
else
|
||||||
|
lastc = *(pchar_ascii++);
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_wtext_stream()
|
||||||
|
{
|
||||||
|
if( (*wtext_stream_iterator) != (*wtext_stream_iterator_end) )
|
||||||
|
{
|
||||||
|
lastc = *(*wtext_stream_iterator);
|
||||||
|
++(*wtext_stream_iterator);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lastc = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_utf8_text_stream()
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
bool correct;
|
||||||
|
|
||||||
|
lastc = -1;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
utf8_to_int(*text_stream_iterator, *text_stream_iterator_end, c, correct);
|
||||||
|
}
|
||||||
|
while( !correct && (*text_stream_iterator) != (*text_stream_iterator_end) );
|
||||||
|
|
||||||
|
if( correct )
|
||||||
|
lastc = c;
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_from_ascii_text_stream()
|
||||||
|
{
|
||||||
|
if( (*text_stream_iterator) != (*text_stream_iterator_end) )
|
||||||
|
{
|
||||||
|
lastc = *(*text_stream_iterator);
|
||||||
|
++(*text_stream_iterator);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lastc = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
check_new_line();
|
||||||
|
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char_no_escape()
|
||||||
|
{
|
||||||
|
if( reading_from_file )
|
||||||
|
{
|
||||||
|
if( input_as_utf8 )
|
||||||
|
return read_utf8_char();
|
||||||
|
else
|
||||||
|
return read_ascii_char();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( pchar_ascii )
|
||||||
|
{
|
||||||
|
if( input_as_utf8 )
|
||||||
|
return read_char_from_utf8_string();
|
||||||
|
else
|
||||||
|
return read_char_from_ascii_string();
|
||||||
|
}
|
||||||
|
else if( pchar_unicode )
|
||||||
|
{
|
||||||
|
return read_char_from_wchar_string();
|
||||||
|
}
|
||||||
|
else if( wtext_stream_iterator && wtext_stream_iterator_end )
|
||||||
|
{
|
||||||
|
return read_char_from_wtext_stream();
|
||||||
|
}
|
||||||
|
else if( text_stream_iterator && text_stream_iterator_end )
|
||||||
|
{
|
||||||
|
if( input_as_utf8 )
|
||||||
|
return read_char_from_utf8_text_stream();
|
||||||
|
else
|
||||||
|
return read_char_from_ascii_text_stream();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lastc = -1;
|
||||||
|
return lastc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int BaseParser::read_char()
|
||||||
|
{
|
||||||
|
return read_char_no_escape();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,141 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021-2022, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef headerfile_picotools_convert_baseparser
|
||||||
|
#define headerfile_picotools_convert_baseparser
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
#include "textstream/textstream.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
class BaseParser
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
|
||||||
|
BaseParser();
|
||||||
|
|
||||||
|
virtual void clear_input_flags();
|
||||||
|
|
||||||
|
virtual void check_new_line();
|
||||||
|
virtual int read_utf8_char();
|
||||||
|
virtual int read_ascii_char();
|
||||||
|
virtual int read_char_from_wchar_string();
|
||||||
|
virtual int read_char_from_utf8_string();
|
||||||
|
virtual int read_char_from_ascii_string();
|
||||||
|
virtual int read_char_from_wtext_stream();
|
||||||
|
virtual int read_char_from_utf8_text_stream();
|
||||||
|
virtual int read_char_from_ascii_text_stream();
|
||||||
|
virtual int read_char_no_escape();
|
||||||
|
virtual int read_char();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
a number of a line in which there is a syntax_error
|
||||||
|
*/
|
||||||
|
int line;
|
||||||
|
|
||||||
|
/*
|
||||||
|
a number of a column in which there is a syntax_error
|
||||||
|
*/
|
||||||
|
int column;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
true if parse() method was called
|
||||||
|
false if ParseString() was called
|
||||||
|
*/
|
||||||
|
bool reading_from_file;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
pointers to the current character
|
||||||
|
if ParseString() is in used
|
||||||
|
*/
|
||||||
|
const char * pchar_ascii;
|
||||||
|
const wchar_t * pchar_unicode;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
pointers to WTextStream iterators
|
||||||
|
if set then both of them should be set
|
||||||
|
*/
|
||||||
|
WTextStream::const_iterator * wtext_stream_iterator;
|
||||||
|
WTextStream::const_iterator * wtext_stream_iterator_end;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
pointers to TextStream iterators
|
||||||
|
if set then both of them should be set
|
||||||
|
*/
|
||||||
|
TextStream::const_iterator * text_stream_iterator;
|
||||||
|
TextStream::const_iterator * text_stream_iterator_end;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
last read char
|
||||||
|
or -1 if the end
|
||||||
|
*/
|
||||||
|
int lastc;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
current file
|
||||||
|
|
||||||
|
may it would be better to make a pointer?
|
||||||
|
if we parse only a string then there is no sense to have such an object
|
||||||
|
*/
|
||||||
|
std::ifstream file;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
input file is in UTF-8
|
||||||
|
default: true
|
||||||
|
*/
|
||||||
|
bool input_as_utf8;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, Tomasz Sowa
|
* Copyright (c) 2021-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -41,114 +41,114 @@
|
||||||
namespace pt
|
namespace pt
|
||||||
{
|
{
|
||||||
|
|
||||||
std::string to_str(unsigned long long value, int base)
|
std::string to_str(unsigned long long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
std::string res;
|
std::string res;
|
||||||
Toa(value, res, false, base);
|
Toa(value, res, false, base, min_width);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(long long value, int base)
|
std::string to_str(long long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
std::string res;
|
std::string res;
|
||||||
Toa(value, res, false, base);
|
Toa(value, res, false, base, min_width);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(unsigned long value, int base)
|
std::string to_str(unsigned long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<unsigned long long>(value), base);
|
return to_str(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(long value, int base)
|
std::string to_str(long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<long long>(value), base);
|
return to_str(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(unsigned int value, int base)
|
std::string to_str(unsigned int value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<unsigned long long>(value), base);
|
return to_str(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(int value, int base)
|
std::string to_str(int value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<long long>(value), base);
|
return to_str(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(unsigned short value, int base)
|
std::string to_str(unsigned short value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<unsigned long long>(value), base);
|
return to_str(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(short value, int base)
|
std::string to_str(short value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_str(static_cast<long long>(value), base);
|
return to_str(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(unsigned long long value, int base)
|
std::wstring to_wstr(unsigned long long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
std::wstring res;
|
std::wstring res;
|
||||||
Toa(value, res, false, base);
|
Toa(value, res, false, base, min_width);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(long long value, int base)
|
std::wstring to_wstr(long long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
std::wstring res;
|
std::wstring res;
|
||||||
Toa(value, res, false, base);
|
Toa(value, res, false, base, min_width);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(unsigned long value, int base)
|
std::wstring to_wstr(unsigned long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(long value, int base)
|
std::wstring to_wstr(long value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<long long>(value), base);
|
return to_wstr(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(unsigned int value, int base)
|
std::wstring to_wstr(unsigned int value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(int value, int base)
|
std::wstring to_wstr(int value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<long long>(value), base);
|
return to_wstr(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(unsigned short value, int base)
|
std::wstring to_wstr(unsigned short value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<unsigned long long>(value), base);
|
return to_wstr(static_cast<unsigned long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring to_wstr(short value, int base)
|
std::wstring to_wstr(short value, int base, size_t min_width)
|
||||||
{
|
{
|
||||||
return to_wstr(static_cast<long long>(value), base);
|
return to_wstr(static_cast<long long>(value), base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -52,8 +52,9 @@ namespace pt
|
||||||
|
|
||||||
// if the buffer is too small it will be terminated at the beginning (empty string)
|
// if the buffer is too small it will be terminated at the beginning (empty string)
|
||||||
// and the function returns false
|
// and the function returns false
|
||||||
|
// min_width - if greater than zero then it is used for zero padding
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(unsigned long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
size_t i1, i2;
|
size_t i1, i2;
|
||||||
long rest;
|
long rest;
|
||||||
|
@ -77,6 +78,14 @@ long rest;
|
||||||
}
|
}
|
||||||
while(value != 0 && i2 < buf_len);
|
while(value != 0 && i2 < buf_len);
|
||||||
|
|
||||||
|
if( min_width > 0 )
|
||||||
|
{
|
||||||
|
for( ; i2 < min_width && i2 < buf_len ; ++i2)
|
||||||
|
{
|
||||||
|
buffer[i2] = '0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if( i2 >= buf_len )
|
if( i2 >= buf_len )
|
||||||
{
|
{
|
||||||
buffer[0] = 0; // ops, the buffer was too small
|
buffer[0] = 0; // ops, the buffer was too small
|
||||||
|
@ -106,7 +115,7 @@ return true;
|
||||||
// if the buffer is too small it will be terminated at the beginning (empty string)
|
// if the buffer is too small it will be terminated at the beginning (empty string)
|
||||||
// and the function returns false
|
// and the function returns false
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = nullptr, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
if( len_out )
|
if( len_out )
|
||||||
*len_out = 0;
|
*len_out = 0;
|
||||||
|
@ -126,7 +135,7 @@ bool Toa(long long value, CharType * buffer, size_t buf_len, int base = 10, size
|
||||||
is_sign = true;
|
is_sign = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out);
|
bool res = Toa(static_cast<unsigned long long>(value), buf, buf_len, base, len_out, min_width);
|
||||||
|
|
||||||
if( res )
|
if( res )
|
||||||
{
|
{
|
||||||
|
@ -146,44 +155,44 @@ return res;
|
||||||
|
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(unsigned long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(long value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(unsigned int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(int value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(unsigned short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<unsigned long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class CharType>
|
template<class CharType>
|
||||||
bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0)
|
bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t * len_out = 0, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out);
|
return Toa(static_cast<long long>(value), buffer, buf_len, base, len_out, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -192,7 +201,7 @@ bool Toa(short value, CharType * buffer, size_t buf_len, int base = 10, size_t *
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(unsigned long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
typename StringType::value_type buffer[50];
|
typename StringType::value_type buffer[50];
|
||||||
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
|
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
|
||||||
|
@ -204,13 +213,13 @@ void Toa(unsigned long long value, StringType & res, bool clear_string = true, i
|
||||||
* the size of the buffer is sufficient so the status should always be true
|
* the size of the buffer is sufficient so the status should always be true
|
||||||
*/
|
*/
|
||||||
size_t len_out;
|
size_t len_out;
|
||||||
Toa(value, buffer, buffer_len, base, &len_out);
|
Toa(value, buffer, buffer_len, base, &len_out, min_width);
|
||||||
res.append(buffer, len_out);
|
res.append(buffer, len_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(long long value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(long long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
typename StringType::value_type buffer[50];
|
typename StringType::value_type buffer[50];
|
||||||
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
|
size_t buffer_len = sizeof(buffer) / sizeof(wchar_t);
|
||||||
|
@ -222,71 +231,71 @@ void Toa(long long value, StringType & res, bool clear_string = true, int base =
|
||||||
* the size of the buffer is sufficient so the status should always be true
|
* the size of the buffer is sufficient so the status should always be true
|
||||||
*/
|
*/
|
||||||
size_t len_out;
|
size_t len_out;
|
||||||
Toa(value, buffer, buffer_len, base, &len_out);
|
Toa(value, buffer, buffer_len, base, &len_out, min_width);
|
||||||
res.append(buffer, len_out);
|
res.append(buffer, len_out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(unsigned long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
|
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(long value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(long value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<long long>(value), res, clear_string, base);
|
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(unsigned int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
|
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(int value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(int value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<long long>(value), res, clear_string, base);
|
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(unsigned short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<unsigned long long>(value), res, clear_string, base);
|
Toa(static_cast<unsigned long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class StringType>
|
template<class StringType>
|
||||||
void Toa(short value, StringType & res, bool clear_string = true, int base = 10)
|
void Toa(short value, StringType & res, bool clear_string = true, int base = 10, size_t min_width = 0)
|
||||||
{
|
{
|
||||||
Toa(static_cast<long long>(value), res, clear_string, base);
|
Toa(static_cast<long long>(value), res, clear_string, base, min_width);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::string to_str(unsigned long long value, int base = 10);
|
std::string to_str(unsigned long long value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(long long value, int base = 10);
|
std::string to_str(long long value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(unsigned long value, int base = 10);
|
std::string to_str(unsigned long value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(long value, int base = 10);
|
std::string to_str(long value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(unsigned int value, int base = 10);
|
std::string to_str(unsigned int value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(int value, int base = 10);
|
std::string to_str(int value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(unsigned short value, int base = 10);
|
std::string to_str(unsigned short value, int base = 10, size_t min_width = 0);
|
||||||
std::string to_str(short value, int base = 10);
|
std::string to_str(short value, int base = 10, size_t min_width = 0);
|
||||||
|
|
||||||
std::wstring to_wstr(unsigned long long value, int base = 10);
|
std::wstring to_wstr(unsigned long long value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(long long value, int base = 10);
|
std::wstring to_wstr(long long value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(unsigned long value, int base = 10);
|
std::wstring to_wstr(unsigned long value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(long value, int base = 10);
|
std::wstring to_wstr(long value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(unsigned int value, int base = 10);
|
std::wstring to_wstr(unsigned int value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(int value, int base = 10);
|
std::wstring to_wstr(int value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(unsigned short value, int base = 10);
|
std::wstring to_wstr(unsigned short value, int base = 10, size_t min_width = 0);
|
||||||
std::wstring to_wstr(short value, int base = 10);
|
std::wstring to_wstr(short value, int base = 10, size_t min_width = 0);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, Tomasz Sowa
|
* Copyright (c) 2017-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -36,6 +36,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
|
#include "inttostr.h"
|
||||||
|
#include "utf8/utf8.h"
|
||||||
|
|
||||||
|
|
||||||
namespace pt
|
namespace pt
|
||||||
|
@ -50,6 +52,363 @@ void SetOverflow(bool * was_overflow, bool val)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json_uformat(wchar_t val, Stream & out)
|
||||||
|
{
|
||||||
|
char buf[10];
|
||||||
|
size_t len;
|
||||||
|
|
||||||
|
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
|
||||||
|
|
||||||
|
out << "\\u";
|
||||||
|
|
||||||
|
if( len < 4 )
|
||||||
|
{
|
||||||
|
for(size_t i=0 ; i < (4-len) ; ++i)
|
||||||
|
{
|
||||||
|
out << '0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out << buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return true if the val character was escaped and put to the out stream
|
||||||
|
* if the character is invalid for such a stream then only return true
|
||||||
|
* but not put it to the stream
|
||||||
|
*/
|
||||||
|
bool try_esc_to_json(wchar_t val, Stream & out)
|
||||||
|
{
|
||||||
|
bool status = false;
|
||||||
|
|
||||||
|
if( val == '\r' )
|
||||||
|
{
|
||||||
|
out << '\\' << 'r';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '\n' )
|
||||||
|
{
|
||||||
|
out << '\\' << 'n';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '\t' )
|
||||||
|
{
|
||||||
|
out << '\\' << 't';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == 0x08 )
|
||||||
|
{
|
||||||
|
out << '\\' << 'b';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == 0x0c )
|
||||||
|
{
|
||||||
|
out << '\\' << 'f';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '\\' )
|
||||||
|
{
|
||||||
|
out << '\\' << '\\';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '"' )
|
||||||
|
{
|
||||||
|
out << '\\' << '\"';
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val < 32 )
|
||||||
|
{
|
||||||
|
esc_to_json_uformat(val, out);
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(wchar_t val, Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_json(val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(char val, Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const char * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_json(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_json(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const wchar_t * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_json(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_json(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const std::string & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_json(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_json(const std::wstring & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_json(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return true if the val character was escaped and put to the out stream
|
||||||
|
* if the character is invalid for such a stream then only return true
|
||||||
|
* but not put it to the stream
|
||||||
|
*/
|
||||||
|
bool try_esc_to_xml(wchar_t val, Stream & out)
|
||||||
|
{
|
||||||
|
bool status = false;
|
||||||
|
|
||||||
|
if( val == 0 )
|
||||||
|
{
|
||||||
|
// null character is invalid in XML 1.0 and 1.1
|
||||||
|
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
|
||||||
|
// return true but not put the char to the out stream
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '<')
|
||||||
|
{
|
||||||
|
out << "<";
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '>')
|
||||||
|
{
|
||||||
|
out << ">";
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '&')
|
||||||
|
{
|
||||||
|
out << "&";
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '"')
|
||||||
|
{
|
||||||
|
out << """;
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(wchar_t val, Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_xml(val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(char val, Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const char * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_xml(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_xml(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const wchar_t * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_xml(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_xml(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const std::string & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_xml(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_xml(const std::wstring & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_xml(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* return true if the val character was escaped and put to the out stream
|
||||||
|
* if the character is invalid for such a stream then only return true
|
||||||
|
* but not put it to the stream
|
||||||
|
*/
|
||||||
|
bool try_esc_to_csv(wchar_t val, pt::Stream & out)
|
||||||
|
{
|
||||||
|
bool status = false;
|
||||||
|
|
||||||
|
if( val == 0 )
|
||||||
|
{
|
||||||
|
// null characters are invalid in text files
|
||||||
|
// return true but not put to the out stream
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( val == '"' )
|
||||||
|
{
|
||||||
|
out << "\"\"";
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(wchar_t val, pt::Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_csv(val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(char val, Stream & out)
|
||||||
|
{
|
||||||
|
if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
|
||||||
|
{
|
||||||
|
out << val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const char * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_csv(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_csv(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const wchar_t * c, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_csv(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
|
||||||
|
{
|
||||||
|
for(size_t i = 0 ; i < len ; ++i)
|
||||||
|
{
|
||||||
|
esc_to_csv(c[i], out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const std::string & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_csv(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void esc_to_csv(const std::wstring & in, Stream & out)
|
||||||
|
{
|
||||||
|
esc_to_csv(in.c_str(), in.size(), out);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2017, Tomasz Sowa
|
* Copyright (c) 2017-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -40,6 +40,9 @@
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include "text.h"
|
#include "text.h"
|
||||||
|
#include "textstream/stream.h"
|
||||||
|
#include "textstream/types.h"
|
||||||
|
#include "utf8/utf8_stream.h"
|
||||||
|
|
||||||
|
|
||||||
namespace pt
|
namespace pt
|
||||||
|
@ -47,6 +50,78 @@ namespace pt
|
||||||
|
|
||||||
void SetOverflow(bool * was_overflow, bool val);
|
void SetOverflow(bool * was_overflow, bool val);
|
||||||
|
|
||||||
|
bool try_esc_to_json(wchar_t val, Stream & out);
|
||||||
|
void esc_to_json(wchar_t val, Stream & out);
|
||||||
|
void esc_to_json(char val, Stream & out);
|
||||||
|
void esc_to_json(const char * c, pt::Stream & out);
|
||||||
|
void esc_to_json(const char * c, std::size_t len, Stream & out);
|
||||||
|
void esc_to_json(const wchar_t * c, Stream & out);
|
||||||
|
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
|
||||||
|
void esc_to_json(const std::string & in, Stream & out);
|
||||||
|
void esc_to_json(const std::wstring & in, Stream & out);
|
||||||
|
|
||||||
|
bool try_esc_to_xml(wchar_t val, Stream & out);
|
||||||
|
void esc_to_xml(wchar_t c, pt::Stream & out);
|
||||||
|
void esc_to_xml(char c, pt::Stream & out);
|
||||||
|
void esc_to_xml(const char * c, pt::Stream & out);
|
||||||
|
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out);
|
||||||
|
void esc_to_xml(const wchar_t * c, pt::Stream & out);
|
||||||
|
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
|
||||||
|
void esc_to_xml(const std::string & in, Stream & out);
|
||||||
|
void esc_to_xml(const std::wstring & in, Stream & out);
|
||||||
|
|
||||||
|
bool try_esc_to_csv(wchar_t val, pt::Stream & out);
|
||||||
|
void esc_to_csv(wchar_t val, Stream & out);
|
||||||
|
void esc_to_csv(char c, pt::Stream & out);
|
||||||
|
void esc_to_csv(const char * c, std::size_t len, Stream & out);
|
||||||
|
void esc_to_csv(const char * c, pt::Stream & out);
|
||||||
|
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out);
|
||||||
|
void esc_to_csv(const wchar_t * c, pt::Stream & out);
|
||||||
|
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out);
|
||||||
|
void esc_to_csv(const std::string & in, Stream & out);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename StreamType>
|
||||||
|
void esc_to_json(const StreamType & in, Stream & out)
|
||||||
|
{
|
||||||
|
typename StreamType::const_iterator i = in.begin();
|
||||||
|
typename StreamType::const_iterator end = in.end();
|
||||||
|
|
||||||
|
while( i != end )
|
||||||
|
{
|
||||||
|
wchar_t c = i.get_unicode_and_advance(end);
|
||||||
|
esc_to_json(c, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename StreamType>
|
||||||
|
void esc_to_xml(const StreamType & in, Stream & out)
|
||||||
|
{
|
||||||
|
typename StreamType::const_iterator i = in.begin();
|
||||||
|
typename StreamType::const_iterator end = in.end();
|
||||||
|
|
||||||
|
while( i != end )
|
||||||
|
{
|
||||||
|
wchar_t c = i.get_unicode_and_advance(end);
|
||||||
|
esc_to_xml(c, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename StreamType>
|
||||||
|
void esc_to_csv(const StreamType & in, Stream & out)
|
||||||
|
{
|
||||||
|
typename StreamType::const_iterator i = in.begin();
|
||||||
|
typename StreamType::const_iterator end = in.end();
|
||||||
|
|
||||||
|
while( i != end )
|
||||||
|
{
|
||||||
|
wchar_t c = i.get_unicode_and_advance(end);
|
||||||
|
esc_to_csv(c, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -44,10 +44,17 @@ namespace pt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
|
CSVParser::CSVParser()
|
||||||
|
{
|
||||||
|
input_as_utf8 = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space)
|
CSVParser::Status CSVParser::parse_file(const char * file_name, Space & out_space)
|
||||||
{
|
{
|
||||||
|
clear_input_flags();
|
||||||
|
|
||||||
reading_from_file = true;
|
reading_from_file = true;
|
||||||
space = &out_space;
|
space = &out_space;
|
||||||
|
|
||||||
|
@ -98,11 +105,10 @@ CSVParser::Status CSVParser::parse_file(const std::wstring & file_name, Space &
|
||||||
|
|
||||||
CSVParser::Status CSVParser::parse(const char * str, Space & out_space)
|
CSVParser::Status CSVParser::parse(const char * str, Space & out_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
clear_input_flags();
|
||||||
reading_from_wchar_string = false;
|
|
||||||
pchar_ascii = str;
|
pchar_ascii = str;
|
||||||
pchar_unicode = 0;
|
space = &out_space;
|
||||||
space = &out_space;
|
|
||||||
|
|
||||||
parse();
|
parse();
|
||||||
|
|
||||||
|
@ -119,11 +125,10 @@ CSVParser::Status CSVParser::parse(const std::string & str, Space & out_space)
|
||||||
|
|
||||||
CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space)
|
CSVParser::Status CSVParser::parse(const wchar_t * str, Space & out_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
clear_input_flags();
|
||||||
reading_from_wchar_string = true;
|
|
||||||
pchar_unicode = str;
|
pchar_unicode = str;
|
||||||
pchar_ascii = 0;
|
space = &out_space;
|
||||||
space = &out_space;
|
|
||||||
|
|
||||||
parse();
|
parse();
|
||||||
|
|
||||||
|
@ -285,132 +290,6 @@ bool CSVParser::read_non_quoted_value_to(std::wstring & value)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_utf8_char()
|
|
||||||
{
|
|
||||||
int c;
|
|
||||||
bool correct;
|
|
||||||
|
|
||||||
lastc = -1;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
utf8_to_int(file, c, correct);
|
|
||||||
|
|
||||||
if( !file )
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
while( !correct );
|
|
||||||
|
|
||||||
lastc = c;
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_ascii_char()
|
|
||||||
{
|
|
||||||
lastc = file.get();
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_char_from_wchar_string()
|
|
||||||
{
|
|
||||||
if( *pchar_unicode == 0 )
|
|
||||||
lastc = -1;
|
|
||||||
else
|
|
||||||
lastc = *(pchar_unicode++);
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_char_from_utf8_string()
|
|
||||||
{
|
|
||||||
int c;
|
|
||||||
bool correct;
|
|
||||||
|
|
||||||
lastc = -1;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
size_t len = utf8_to_int(pchar_ascii, c, correct);
|
|
||||||
pchar_ascii += len;
|
|
||||||
}
|
|
||||||
while( *pchar_ascii && !correct );
|
|
||||||
|
|
||||||
if( correct )
|
|
||||||
lastc = c;
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_char_from_ascii_string()
|
|
||||||
{
|
|
||||||
if( *pchar_ascii == 0 )
|
|
||||||
lastc = -1;
|
|
||||||
else
|
|
||||||
lastc = *(pchar_ascii++);
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_char_no_escape()
|
|
||||||
{
|
|
||||||
if( reading_from_file )
|
|
||||||
{
|
|
||||||
if( input_as_utf8 )
|
|
||||||
return read_utf8_char();
|
|
||||||
else
|
|
||||||
return read_ascii_char();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if( reading_from_wchar_string )
|
|
||||||
{
|
|
||||||
return read_char_from_wchar_string();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if( input_as_utf8 )
|
|
||||||
return read_char_from_utf8_string();
|
|
||||||
else
|
|
||||||
return read_char_from_ascii_string();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int CSVParser::read_char()
|
|
||||||
{
|
|
||||||
return read_char_no_escape();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -38,9 +38,11 @@
|
||||||
#ifndef headerfile_picotools_csv_csvparser
|
#ifndef headerfile_picotools_csv_csvparser
|
||||||
#define headerfile_picotools_csv_csvparser
|
#define headerfile_picotools_csv_csvparser
|
||||||
|
|
||||||
#include "space/space.h"
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
|
#include "space/space.h"
|
||||||
|
#include "convert/baseparser.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
namespace pt
|
namespace pt
|
||||||
|
@ -51,10 +53,12 @@ namespace pt
|
||||||
* https://datatracker.ietf.org/doc/html/rfc4180
|
* https://datatracker.ietf.org/doc/html/rfc4180
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
class CSVParser
|
class CSVParser : public BaseParser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
CSVParser();
|
||||||
|
|
||||||
enum Status
|
enum Status
|
||||||
{
|
{
|
||||||
ok,
|
ok,
|
||||||
|
@ -85,53 +89,6 @@ protected:
|
||||||
|
|
||||||
Space * space;
|
Space * space;
|
||||||
|
|
||||||
/*
|
|
||||||
true if parse_file() method was called
|
|
||||||
false if parse() was called
|
|
||||||
*/
|
|
||||||
bool reading_from_file;
|
|
||||||
|
|
||||||
/*
|
|
||||||
true if parse(wchar_t *) or parse(std::wstring&) was called
|
|
||||||
*/
|
|
||||||
bool reading_from_wchar_string;
|
|
||||||
|
|
||||||
/*
|
|
||||||
pointers to the current character
|
|
||||||
if parse() is being used
|
|
||||||
*/
|
|
||||||
const char * pchar_ascii;
|
|
||||||
const wchar_t * pchar_unicode;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
last read char
|
|
||||||
or -1 if the end
|
|
||||||
*/
|
|
||||||
int lastc;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
a number of a line in which there is a syntax_error
|
|
||||||
*/
|
|
||||||
int line;
|
|
||||||
|
|
||||||
/*
|
|
||||||
current file
|
|
||||||
|
|
||||||
may it would be better to make a pointer?
|
|
||||||
if we parse only a string then there is no sense to have such an object
|
|
||||||
*/
|
|
||||||
std::ifstream file;
|
|
||||||
|
|
||||||
/*
|
|
||||||
input file is in UTF-8
|
|
||||||
default: true
|
|
||||||
*/
|
|
||||||
bool input_as_utf8;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void parse();
|
void parse();
|
||||||
|
@ -142,19 +99,6 @@ protected:
|
||||||
bool read_non_quoted_value_to(std::wstring & value);
|
bool read_non_quoted_value_to(std::wstring & value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* copied from SpaceParser
|
|
||||||
* may it would be better to have a class with those methods and inherit from it?
|
|
||||||
*/
|
|
||||||
int read_utf8_char();
|
|
||||||
int read_ascii_char();
|
|
||||||
int read_char_from_wchar_string();
|
|
||||||
int read_char_from_utf8_string();
|
|
||||||
int read_char_from_ascii_string();
|
|
||||||
int read_char_no_escape();
|
|
||||||
|
|
||||||
int read_char();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,645 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008-2021, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "bbcodeparser.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::Equal(const wchar_t * str1, const wchar_t * str2)
|
||||||
|
{
|
||||||
|
while( *str1 == *str2 && *str1 != 0 )
|
||||||
|
{
|
||||||
|
str1 += 1;
|
||||||
|
str2 += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return *str1 == *str2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::IsValidCharForName(int c)
|
||||||
|
{
|
||||||
|
if( (c>='a' && c<='z') ||
|
||||||
|
(c>='A' && c<='Z') ||
|
||||||
|
c=='*' || c=='_')
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::IsOpeningTagMark(wchar_t c)
|
||||||
|
{
|
||||||
|
return (c == '[');
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::IsClosingTagMark(wchar_t c)
|
||||||
|
{
|
||||||
|
return (c == ']');
|
||||||
|
}
|
||||||
|
|
||||||
|
bool BBCODEParser::IsClosingXmlSimpleTagMark(wchar_t c)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// there are no commentaries in bbcode
|
||||||
|
bool BBCODEParser::IsOpeningCommentaryTagMark(const wchar_t *)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t BBCODEParser::OpeningCommentaryTagMarkSize()
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::SkipCommentaryTagIfExists()
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// one enter will generate one <br>
|
||||||
|
// two enters or more will generate only two br (<br><br>)
|
||||||
|
void BBCODEParser::PutNormalText(const wchar_t * str, const wchar_t * end)
|
||||||
|
{
|
||||||
|
int br_len;
|
||||||
|
|
||||||
|
if( lastc != -1 )
|
||||||
|
{
|
||||||
|
// trimming last white characters at end of the user text
|
||||||
|
while( str<end && (IsWhite(*(end-1)) || *(end-1)==10) )
|
||||||
|
--end;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
while( str < end )
|
||||||
|
{
|
||||||
|
if( *str == 10 )
|
||||||
|
{
|
||||||
|
++str;
|
||||||
|
br_len = 1;
|
||||||
|
|
||||||
|
// skipping white characters without a new line character
|
||||||
|
while( str < end && IsWhite(*str) )
|
||||||
|
++str;
|
||||||
|
|
||||||
|
if( str < end && *str == 10 )
|
||||||
|
{
|
||||||
|
br_len = 2;
|
||||||
|
|
||||||
|
// skipping white characters with new line characters
|
||||||
|
while( str < end && (IsWhite(*str) || *str==10) )
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !has_open_ol_tag && !has_open_ul_tag && !has_open_li_tag )
|
||||||
|
{
|
||||||
|
for(int i=0 ; i < br_len ; ++i)
|
||||||
|
(*out_string) += L"<br>\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PrintEscape(*str);
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::CheckExceptions()
|
||||||
|
{
|
||||||
|
if( stack_len >= 2 )
|
||||||
|
{
|
||||||
|
if( pstack[stack_len-1].type == Item::opening &&
|
||||||
|
pstack[stack_len-2].type == Item::opening &&
|
||||||
|
IsNameEqual(L"*", pstack[stack_len-1].name) &&
|
||||||
|
IsNameEqual(L"*", pstack[stack_len-2].name) )
|
||||||
|
{
|
||||||
|
// removing the last [*] from the stack
|
||||||
|
// </li> was put automatically
|
||||||
|
PopStack();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
bbcode format:
|
||||||
|
[bbcodetag=value]some text[/bbcodetag]
|
||||||
|
the value can be quoted, e.g.
|
||||||
|
[bbcodetag="value"]some text[/bbcodetag], or
|
||||||
|
[bbcodetag='value']some text[/bbcodetag]
|
||||||
|
|
||||||
|
the third string below (in tags table) is 'html_argument' from Tags,
|
||||||
|
it can contain a special character % followed by a string which means:
|
||||||
|
%1 - "value" escaped as for html
|
||||||
|
%2 - "some text" escaped as for html
|
||||||
|
%u1 - "value" trimmed and escaped as for url-es
|
||||||
|
%u2 - "some text" trimmed and escaped as for url-es
|
||||||
|
%% - one %
|
||||||
|
|
||||||
|
if you are using %2 or %u2 then "some text" is not treated as bbcode, e.g.
|
||||||
|
[bbcodetag=value]some [b]text[/b][/bbcodetag] will produce:
|
||||||
|
<htmltag arg="value">some [b]text[/b]</htmltag> (the inner tags [b][/b] were not parsed)
|
||||||
|
|
||||||
|
also when using %2 or %u2 the closing bbcode tag is skipped
|
||||||
|
(if you want this tag then you can put it in 'html_argument')
|
||||||
|
|
||||||
|
and when using u (%u1 or %u2) the argument is trimmed from whitespaces and new lines
|
||||||
|
at the beginning and at the end
|
||||||
|
(because otherwise a space would be changed to %20 and this were probably not what you really wanted)
|
||||||
|
*/
|
||||||
|
const BBCODEParser::Tags * BBCODEParser::FindTag(const wchar_t * tag)
|
||||||
|
{
|
||||||
|
static Tags tags[] = {
|
||||||
|
{L"*", L"li", L">", false},
|
||||||
|
{L"b", L"em", L">", true},
|
||||||
|
{L"i", L"span", L" class=\"bbitalic\">", true},
|
||||||
|
{L"u", L"span", L" class=\"bbunderline\">", true},
|
||||||
|
{L"s", L"span", L" class=\"bbstrike\">", true},
|
||||||
|
{L"code", L"code", L" class=\"bbcode\">", false},
|
||||||
|
{L"list", L"ul", L" class=\"bblist\">", false},
|
||||||
|
{L"color", L"span", L" class=\"bbcol%1\">", true},
|
||||||
|
{L"url", L"a", L" href=\"%u1\">", true},
|
||||||
|
{L"img", L"img", L" alt=\"%1\" src=\"%u2\">", true},
|
||||||
|
{L"quote", L"div", L" class=\"bbquote\">\n<span class=\"bbquotewho\">%1</span><br>\n", false},
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t i;
|
||||||
|
size_t len = sizeof(tags) / sizeof(Tags);
|
||||||
|
|
||||||
|
for(i=0 ; i<len ; ++i)
|
||||||
|
{
|
||||||
|
if( Equal(tag, tags[i].bbcode) )
|
||||||
|
return &tags[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BBCODEParser::Tags * BBCODEParser::FindTag(const std::wstring & tag)
|
||||||
|
{
|
||||||
|
return FindTag(tag.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end)
|
||||||
|
{
|
||||||
|
// skipping white characters from the argument
|
||||||
|
while( start<end && IsWhite(*start) )
|
||||||
|
++start;
|
||||||
|
|
||||||
|
// skipping first '=' character if exists
|
||||||
|
if( start<end && *start == '=' )
|
||||||
|
++start;
|
||||||
|
|
||||||
|
// skipping white characters from the argument
|
||||||
|
// at the beginning
|
||||||
|
while( start<end && IsWhite(*start) )
|
||||||
|
++start;
|
||||||
|
|
||||||
|
// and at the end
|
||||||
|
while( start<end && IsWhite(*(end-1)) )
|
||||||
|
--end;
|
||||||
|
|
||||||
|
|
||||||
|
if( start<end && (*start=='\'' || *start=='\"') )
|
||||||
|
{
|
||||||
|
++start;
|
||||||
|
|
||||||
|
if( start<end && *(start-1) == *(end-1) )
|
||||||
|
--end;
|
||||||
|
|
||||||
|
// skipping white characters after a first quote char [url = " ww...."]
|
||||||
|
while( start<end && IsWhite(*start) )
|
||||||
|
++start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintEncode(int c)
|
||||||
|
{
|
||||||
|
if( c == '&' )
|
||||||
|
{
|
||||||
|
(*out_string) += L"&";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( (c>='a' && c<='z') ||
|
||||||
|
(c>='A' && c<='Z') ||
|
||||||
|
(c>='0' && c<='9') ||
|
||||||
|
(c=='_' || c=='?' || c=='.' || c==',' || c=='/' || c=='-' ||
|
||||||
|
c=='+' || c=='*' || c=='(' || c==')' || c=='=' || c==':')
|
||||||
|
)
|
||||||
|
{
|
||||||
|
(*out_string) += c;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wchar_t buffer[20];
|
||||||
|
swprintf(buffer, 20, L"%02X", c);
|
||||||
|
|
||||||
|
(*out_string) += '%';
|
||||||
|
(*out_string) += buffer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintEscape(int c, bool change_quote)
|
||||||
|
{
|
||||||
|
if( c == '<' )
|
||||||
|
{
|
||||||
|
(*out_string) += L"<";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( c == '>' )
|
||||||
|
{
|
||||||
|
(*out_string) += L">";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( c == '&' )
|
||||||
|
{
|
||||||
|
(*out_string) += L"&";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( c == '\"' && change_quote )
|
||||||
|
{
|
||||||
|
(*out_string) += L""";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(*out_string) += c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintArgumentEncode(const wchar_t * start, const wchar_t * end)
|
||||||
|
{
|
||||||
|
PrintArgumentCheckQuotes(start, end);
|
||||||
|
TrimWhiteWithNewLines(start, end);
|
||||||
|
|
||||||
|
for( ; start<end ; ++start )
|
||||||
|
PrintEncode(*start);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintArgumentEscape(const wchar_t * start, const wchar_t * end)
|
||||||
|
{
|
||||||
|
PrintArgumentCheckQuotes(start, end);
|
||||||
|
|
||||||
|
for( ; start<end ; ++start )
|
||||||
|
PrintEscape(*start, true); // quotes are escaped as well here
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition)
|
||||||
|
{
|
||||||
|
if( Equal(tag->html_tag, tag_name) )
|
||||||
|
{
|
||||||
|
if( condition )
|
||||||
|
{
|
||||||
|
PutClosingTag(tag);
|
||||||
|
(*out_string) += '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
condition = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::CheckOpeningTag(const Tags * tag)
|
||||||
|
{
|
||||||
|
bool has_list_tag = has_open_ul_tag || has_open_ol_tag;
|
||||||
|
|
||||||
|
CheckOpeningTag(tag, L"li", has_open_li_tag);
|
||||||
|
CheckOpeningTag(tag, L"ul", has_open_ul_tag);
|
||||||
|
CheckOpeningTag(tag, L"ol", has_open_ol_tag);
|
||||||
|
|
||||||
|
if( has_open_li_tag && !has_list_tag )
|
||||||
|
{
|
||||||
|
(*out_string) += L"<ul>\n";
|
||||||
|
has_open_ul_tag = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote)
|
||||||
|
{
|
||||||
|
for( ; start < end ; ++start)
|
||||||
|
PrintEscape(*start, change_quote);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PrintEncode(const wchar_t * start, const wchar_t * end)
|
||||||
|
{
|
||||||
|
for( ; start < end ; ++start)
|
||||||
|
PrintEncode(*start);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutOpeningTagFromEzc()
|
||||||
|
{
|
||||||
|
// this can be a tag from Ezc templates system
|
||||||
|
(*out_string) += '[';
|
||||||
|
(*out_string) += LastItem().name;
|
||||||
|
|
||||||
|
|
||||||
|
// FIXME
|
||||||
|
// const wchar_t * start = pchar;
|
||||||
|
//
|
||||||
|
// while( *pchar && *pchar!=']' )
|
||||||
|
// ++pchar;
|
||||||
|
//
|
||||||
|
// if( *pchar == ']' )
|
||||||
|
// ++pchar;
|
||||||
|
//
|
||||||
|
// Put(start, pchar);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u)
|
||||||
|
{
|
||||||
|
if( has_u )
|
||||||
|
PrintArgumentEncode(arg_start, arg_end);
|
||||||
|
else
|
||||||
|
PrintArgumentEscape(arg_start, arg_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end)
|
||||||
|
{
|
||||||
|
while( start < end && (IsWhite(*start) || *start==10) )
|
||||||
|
++start;
|
||||||
|
|
||||||
|
while( start < end && (IsWhite(*(end-1)) || *(end-1)==10) )
|
||||||
|
--end;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutHtmlArgument2(const Tags * tag, bool has_u)
|
||||||
|
{
|
||||||
|
//const wchar_t * start = pchar;
|
||||||
|
//const wchar_t * end = pchar;
|
||||||
|
bool first_tag_removed = false;
|
||||||
|
|
||||||
|
while( lastc != -1 )
|
||||||
|
{
|
||||||
|
if( IsOpeningTagMark(lastc) )
|
||||||
|
{
|
||||||
|
// FIXME
|
||||||
|
// if( IsClosingTagForLastItem() )
|
||||||
|
// {
|
||||||
|
// // the last tag is skipped when using patterns with %2 or %u2
|
||||||
|
//
|
||||||
|
// PopStack(); // removing opening tag from the stack
|
||||||
|
// first_tag_removed = true;
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
read_char();
|
||||||
|
//end = pchar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !first_tag_removed )
|
||||||
|
PopStack(); // user has forgotten to close the tag
|
||||||
|
|
||||||
|
if( has_u )
|
||||||
|
{
|
||||||
|
// FIXME
|
||||||
|
// TrimWhiteWithNewLines(start, end);
|
||||||
|
// PrintEncode(start, end);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// FIXME
|
||||||
|
// PrintEscape(start, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end)
|
||||||
|
{
|
||||||
|
const wchar_t * pattern = tag->html_argument;
|
||||||
|
bool has_u;
|
||||||
|
|
||||||
|
while( *pattern )
|
||||||
|
{
|
||||||
|
if( *pattern == '%' )
|
||||||
|
{
|
||||||
|
++pattern;
|
||||||
|
has_u = false;
|
||||||
|
|
||||||
|
if( *pattern == 'u' )
|
||||||
|
{
|
||||||
|
++pattern;
|
||||||
|
has_u = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( *pattern == '1' )
|
||||||
|
{
|
||||||
|
++pattern;
|
||||||
|
PutHtmlArgument1(arg_start, arg_end, has_u);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( *pattern == '2' )
|
||||||
|
{
|
||||||
|
++pattern;
|
||||||
|
PutHtmlArgument2(tag, has_u);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
if( *pattern == '%' )
|
||||||
|
{
|
||||||
|
(*out_string) += '%';
|
||||||
|
++pattern;
|
||||||
|
}
|
||||||
|
// else unrecognized, will be printed next time as a normal character
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
(*out_string) += *pattern;
|
||||||
|
++pattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutOpeningTagFromBBCode(const Tags * tag)
|
||||||
|
{
|
||||||
|
CheckOpeningTag(tag);
|
||||||
|
PutOpeningTagMark();
|
||||||
|
Put(tag->html_tag);
|
||||||
|
|
||||||
|
// FIXME
|
||||||
|
// const wchar_t * start = pchar;
|
||||||
|
//
|
||||||
|
// while( *pchar && *pchar != ']' )
|
||||||
|
// ++pchar;
|
||||||
|
//
|
||||||
|
// PutHtmlArgument(tag, start, pchar);
|
||||||
|
//
|
||||||
|
// if( *pchar == ']' )
|
||||||
|
// ++pchar;
|
||||||
|
|
||||||
|
if( !tag->inline_tag )
|
||||||
|
{
|
||||||
|
Put(10);
|
||||||
|
SkipWhiteLines();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool BBCODEParser::PutOpeningTag()
|
||||||
|
{
|
||||||
|
const Tags * tag = FindTag(LastItem().name);
|
||||||
|
|
||||||
|
if( !tag )
|
||||||
|
PutOpeningTagFromEzc();
|
||||||
|
else
|
||||||
|
PutOpeningTagFromBBCode(tag);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutClosingTag(const Tags * tag)
|
||||||
|
{
|
||||||
|
if( !tag )
|
||||||
|
return; // skipping the tag
|
||||||
|
|
||||||
|
PutOpeningTagMark();
|
||||||
|
(*out_string) += '/';
|
||||||
|
(*out_string) += tag->html_tag;
|
||||||
|
PutClosingTagMark();
|
||||||
|
|
||||||
|
if( !tag->inline_tag )
|
||||||
|
{
|
||||||
|
(*out_string) += L"\n";
|
||||||
|
SkipWhiteLines();
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Equal(tag->html_tag, L"li") )
|
||||||
|
has_open_li_tag = false;
|
||||||
|
|
||||||
|
if( Equal(tag->html_tag, L"ol") )
|
||||||
|
has_open_ol_tag = false;
|
||||||
|
|
||||||
|
if( Equal(tag->html_tag, L"ul") )
|
||||||
|
has_open_ul_tag = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::PutClosingTag(const wchar_t * tag_name)
|
||||||
|
{
|
||||||
|
const Tags * tag = FindTag(tag_name);
|
||||||
|
PutClosingTag(tag);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::Init()
|
||||||
|
{
|
||||||
|
has_open_li_tag = false;
|
||||||
|
has_open_ol_tag = false;
|
||||||
|
has_open_ul_tag = false;
|
||||||
|
|
||||||
|
SkipWhiteLines();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void BBCODEParser::Uninit()
|
||||||
|
{
|
||||||
|
if( has_open_li_tag )
|
||||||
|
(*out_string) += L"</li>\n";
|
||||||
|
|
||||||
|
if( has_open_ol_tag )
|
||||||
|
(*out_string) += L"</ol>\n";
|
||||||
|
|
||||||
|
if( has_open_ul_tag )
|
||||||
|
(*out_string) += L"</ul>\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,128 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008-2021, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef headerfile_winix_core_bbcodeparser
|
||||||
|
#define headerfile_winix_core_bbcodeparser
|
||||||
|
|
||||||
|
#include "htmlparser.h"
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
class BBCODEParser : public HTMLParser
|
||||||
|
{
|
||||||
|
|
||||||
|
struct Tags
|
||||||
|
{
|
||||||
|
const wchar_t * bbcode;
|
||||||
|
const wchar_t * html_tag;
|
||||||
|
const wchar_t * html_argument; // with closing '>'
|
||||||
|
bool inline_tag;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
virtual methods
|
||||||
|
(from HTMLParser class)
|
||||||
|
*/
|
||||||
|
virtual void Init();
|
||||||
|
virtual void Uninit();
|
||||||
|
|
||||||
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
|
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||||
|
|
||||||
|
virtual bool IsOpeningCommentaryTagMark(const wchar_t *);
|
||||||
|
virtual size_t OpeningCommentaryTagMarkSize();
|
||||||
|
|
||||||
|
virtual bool IsValidCharForName(int c);
|
||||||
|
virtual void CheckExceptions();
|
||||||
|
virtual bool SkipCommentaryTagIfExists();
|
||||||
|
|
||||||
|
virtual bool PutOpeningTag();
|
||||||
|
virtual void PutClosingTag(const wchar_t * tag);
|
||||||
|
|
||||||
|
virtual void PutNormalText(const wchar_t * str, const wchar_t * end);
|
||||||
|
virtual void ReadNormalTextSkipWhite(const wchar_t * & start, const wchar_t * & last_non_white);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
others
|
||||||
|
*/
|
||||||
|
bool Equal(const wchar_t * str1, const wchar_t * str2);
|
||||||
|
|
||||||
|
void PutHtmlArgument1(const wchar_t * arg_start, const wchar_t * arg_end, bool has_u);
|
||||||
|
void PutHtmlArgument2(const Tags * tag, bool has_u);
|
||||||
|
void PutHtmlArgument(const Tags * tag, const wchar_t * arg_start, const wchar_t * arg_end);
|
||||||
|
|
||||||
|
void PutOpeningTagFromEzc();
|
||||||
|
void PutOpeningTagFromBBCode(const Tags * tag);
|
||||||
|
|
||||||
|
const Tags * FindTag(const wchar_t * tag);
|
||||||
|
const Tags * FindTag(const std::wstring & tag);
|
||||||
|
void PrintArgumentCheckQuotes(const wchar_t * & start, const wchar_t * & end);
|
||||||
|
|
||||||
|
void PrintEscape(int c, bool change_quote = false);
|
||||||
|
void PrintEncode(int c);
|
||||||
|
|
||||||
|
void PrintEscape(const wchar_t * start, const wchar_t * end, bool change_quote = false);
|
||||||
|
void PrintEncode(const wchar_t * start, const wchar_t * end);
|
||||||
|
|
||||||
|
void PrintArgumentEncode(const wchar_t * start, const wchar_t * end);
|
||||||
|
void PrintArgumentEscape(const wchar_t * start, const wchar_t * end);
|
||||||
|
|
||||||
|
void PutClosingTag(const Tags * tag);
|
||||||
|
|
||||||
|
void CheckOpeningTag(const Tags * tag, const wchar_t * tag_name, bool & condition);
|
||||||
|
void CheckOpeningTag(const Tags * tag);
|
||||||
|
|
||||||
|
void TrimWhiteWithNewLines(const wchar_t * & start, const wchar_t * & end);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool has_open_ol_tag; // has open html <ol> tag
|
||||||
|
bool has_open_ul_tag; // has open html <ul> tag
|
||||||
|
bool has_open_li_tag; // has open html <li> tag
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,490 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008-2022, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef headerfile_picotools_html_htmlfilter
|
||||||
|
#define headerfile_picotools_html_htmlfilter
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include "convert/baseparser.h"
|
||||||
|
#include "space/space.h"
|
||||||
|
#include "textstream/stream.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// max length of a name of a html tag (with terminating null)
|
||||||
|
#define WINIX_HTMLFILTER_ITEM_NAME_MAXLEN 30
|
||||||
|
|
||||||
|
// max length of a html lang attribute (e.g. "en", "pl")
|
||||||
|
#define WINIX_HTMLFILTER_ITEM_LANG_MAXLEN 10
|
||||||
|
|
||||||
|
|
||||||
|
#define WINIX_HTMLFILTER_ATTR_NAME_MAXLEN 40
|
||||||
|
|
||||||
|
|
||||||
|
#define WINIX_HTMLFILTER_ATTR_VALUE_MAXLEN 500
|
||||||
|
|
||||||
|
|
||||||
|
// depth of the html tree
|
||||||
|
#define WINIX_HTMLFILTER_STACK_MAXLEN 100
|
||||||
|
|
||||||
|
// length of a buffer used for printing
|
||||||
|
// it should be at least: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN+3
|
||||||
|
#define WINIX_HTMLFILTER_BUFFER_MAXLEN 2048
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
very lightweight filter for html
|
||||||
|
(without using any dynamic memory - some memory is allocated only at the beginning - in ctors)
|
||||||
|
this filter has O(n) complexity over the whole html string
|
||||||
|
|
||||||
|
such tags as: <script> <pre> <textarea> are treated in a special way
|
||||||
|
all characters between the opening and closing tag (<script>....</script>) are untouched
|
||||||
|
|
||||||
|
if the filter finds that there are not closed tags it will close them,
|
||||||
|
if the filter finds a closing tag which doesn't have an opening tag - it will skip it
|
||||||
|
|
||||||
|
tags which don't need to be closed: meta, input, br, img, link
|
||||||
|
look at CheckExceptions() method
|
||||||
|
|
||||||
|
the filter recognizes xml simple tags (with / at the end) such as: <br />
|
||||||
|
*/
|
||||||
|
class HTMLParser : public BaseParser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
status of parsing
|
||||||
|
*/
|
||||||
|
enum Status { ok, cant_open_file, syntax_error };
|
||||||
|
|
||||||
|
|
||||||
|
enum OrphanMode
|
||||||
|
{
|
||||||
|
orphan_nbsp, // putting " " string
|
||||||
|
orphan_160space // putting 160 ascii code
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// orphans for one language
|
||||||
|
struct Orphans
|
||||||
|
{
|
||||||
|
std::vector<std::wstring> tab;
|
||||||
|
size_t max_len;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct Item
|
||||||
|
{
|
||||||
|
std::wstring name; // max size: WINIX_HTMLFILTER_ITEM_NAME_MAXLEN
|
||||||
|
|
||||||
|
enum Type
|
||||||
|
{
|
||||||
|
opening, /* sample: <h1> */
|
||||||
|
closing, /* sample: </h1> */
|
||||||
|
simple, /* sample: <br/> */
|
||||||
|
special, /* sample: <!doctype> */
|
||||||
|
none
|
||||||
|
} type;
|
||||||
|
|
||||||
|
bool is_commentary;
|
||||||
|
|
||||||
|
bool is_cdata;
|
||||||
|
|
||||||
|
// is a new line before this tag (or just a new line and some white characters)
|
||||||
|
bool new_line_before;
|
||||||
|
|
||||||
|
// is there a new line after this tag (or just some white characters and a new line)
|
||||||
|
bool new_line_after;
|
||||||
|
|
||||||
|
// is there a new line in the middle after this tag and before the next tag
|
||||||
|
bool new_line_in_the_middle;
|
||||||
|
|
||||||
|
// is there a white char (but not new line) before this tag
|
||||||
|
bool white_char_before;
|
||||||
|
|
||||||
|
// current orphans table
|
||||||
|
// (will be propagated)
|
||||||
|
Orphans * porphans;
|
||||||
|
|
||||||
|
// this item or one from its parents is a 'body' html tag
|
||||||
|
// (will be propagated)
|
||||||
|
bool has_body_tag;
|
||||||
|
|
||||||
|
size_t tree_index;
|
||||||
|
|
||||||
|
Space * space;
|
||||||
|
|
||||||
|
void Clear();
|
||||||
|
Item();
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class ItemParsedListener
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
ItemParsedListener() {}
|
||||||
|
|
||||||
|
virtual bool item_parsed(const Item & item) { return true; }
|
||||||
|
virtual ~ItemParsedListener() {}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
the last status of parsing, set by parse() methods
|
||||||
|
*/
|
||||||
|
Status status;
|
||||||
|
|
||||||
|
HTMLParser();
|
||||||
|
HTMLParser(const HTMLParser & f);
|
||||||
|
HTMLParser & operator=(const HTMLParser & f);
|
||||||
|
virtual ~HTMLParser();
|
||||||
|
|
||||||
|
void set_item_parsed_listener(ItemParsedListener * listener);
|
||||||
|
|
||||||
|
|
||||||
|
void parse_html(const wchar_t * in, Space & space, bool compact_mode = false);
|
||||||
|
|
||||||
|
Status parse_xml_file(const char * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml_file(const std::string & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml_file(const wchar_t * file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
Status parse_xml_file(const std::wstring & file_name, Space & out_space, bool compact_mode = false, bool clear_space = true);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// main methods used for filtering
|
||||||
|
void filter(const wchar_t * in, std::wstring & out, bool clear_out_string = true);
|
||||||
|
void filter(const std::wstring & in, std::wstring & out, bool clear_out_string = true);
|
||||||
|
|
||||||
|
void filter(const WTextStream & in, Stream & out, bool clear_out_stream = true);
|
||||||
|
|
||||||
|
HTMLParser::Status filter_file(const char * file_name, std::wstring & out, bool clear_out_stream = true);
|
||||||
|
HTMLParser::Status filter_file(const std::string & file_name, std::wstring & out, bool clear_out_stream = true);
|
||||||
|
HTMLParser::Status filter_file(const wchar_t * file_name, std::wstring & out, bool clear_out_stream = true);
|
||||||
|
HTMLParser::Status filter_file(const std::wstring & file_name, std::wstring & out, bool clear_out_stream = true);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* returns a number of a last parsed line
|
||||||
|
* can be used to obtain the line in which there was a syntax error
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
int get_last_parsed_line();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
const static int WHITE_MODE_ORIGIN = 0;
|
||||||
|
const static int WHITE_MODE_SINGLE_LINE = 1;
|
||||||
|
const static int WHITE_MODE_TREE = 2;
|
||||||
|
|
||||||
|
|
||||||
|
// white chars mode
|
||||||
|
//
|
||||||
|
void white_chars_mode(int mode);
|
||||||
|
|
||||||
|
// if the line is wrap_line_ length (or longer) then insert a new line character (in a place of a white char)
|
||||||
|
// (only between html tags and only in <body> subtree)
|
||||||
|
// skipped in such tags: script, pre, textarea
|
||||||
|
// 0 - off
|
||||||
|
// lines are wrapped only in 'body' tag (useful for text in 'title' tag which is in 'head' section)
|
||||||
|
void WrapLine(size_t wrap_line_);
|
||||||
|
|
||||||
|
// first tabs in a tree
|
||||||
|
// default: 2 (spaces)
|
||||||
|
// set 0 to turn off
|
||||||
|
void InsertTabs(size_t tabsize);
|
||||||
|
|
||||||
|
// set a name of a html tag which will be used as 'nofilter' tag
|
||||||
|
// elements between such tags are not filtered (similarly as in <pre> and <textarea>)
|
||||||
|
// these tags (opening and closing) will no be placed in the html output
|
||||||
|
void SetNoFilterTag(const std::wstring & tag_name);
|
||||||
|
|
||||||
|
// orphans are checked only in 'body' tag
|
||||||
|
void AssignOrphans(const wchar_t * lang_code, const std::vector<std::wstring> & otab);
|
||||||
|
void AssignOrphans(const std::wstring & lang_code, const std::vector<std::wstring> & otab);
|
||||||
|
void ClearOrphans();
|
||||||
|
|
||||||
|
// check 'orphans' for the specicic language
|
||||||
|
// if an orphan is detected then the non-break space (" " or ascii 160 code) will be put
|
||||||
|
// default disable (lang_none)
|
||||||
|
void OrphansMode(const std::wstring & orphan_mode);
|
||||||
|
|
||||||
|
// skipping some unsafe tags
|
||||||
|
// (script, iframe, frame, frameset, applet, head, meta, html, link, body, ...)
|
||||||
|
void SafeMode(bool safe_mode_);
|
||||||
|
|
||||||
|
// skip all html tags
|
||||||
|
// gives only text without markup
|
||||||
|
// but there can be commentaries
|
||||||
|
void SkipTags(bool skip_tags);
|
||||||
|
|
||||||
|
// skip commentaries
|
||||||
|
void SkipCommentaries(bool skip_commentaries);
|
||||||
|
|
||||||
|
// if true then entities such as are skipped
|
||||||
|
// this automatically turns on AnalyzeEntities
|
||||||
|
// in such a case FoundEntity callbacks are sent
|
||||||
|
void SkipEntities(bool skip_entities);
|
||||||
|
|
||||||
|
// analyze html entities such as
|
||||||
|
// virtual method: FoundEntity is called
|
||||||
|
// entities are analyzed in normal text and in attribute values such as <p class="a ">
|
||||||
|
void AnalyzeEntities(bool analyze_entities);
|
||||||
|
|
||||||
|
|
||||||
|
protected:
|
||||||
|
|
||||||
|
/*
|
||||||
|
* true when parsing html input, false for parsing xml
|
||||||
|
*/
|
||||||
|
bool parsing_html;
|
||||||
|
|
||||||
|
|
||||||
|
bool xml_compact_mode;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// orphans for all languages
|
||||||
|
// map<language_code, Orphans>
|
||||||
|
typedef std::map<std::wstring, Orphans> OrphansTab;
|
||||||
|
OrphansTab orphans_tab;
|
||||||
|
|
||||||
|
// html <nofilter> tag name
|
||||||
|
std::wstring no_filter_tag;
|
||||||
|
|
||||||
|
ItemParsedListener * item_parsed_listener;
|
||||||
|
|
||||||
|
/*
|
||||||
|
true if the lastc was escaped (with a backslash)
|
||||||
|
we have to know if the last sequence was \" or just "
|
||||||
|
*/
|
||||||
|
bool char_was_escaped;
|
||||||
|
|
||||||
|
std::wstring escaped_chars_buffer;
|
||||||
|
size_t escaped_char_index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* filter mode, a method filter(...) was called
|
||||||
|
* in filter mode we do not unescape xml sequences such as < > ...
|
||||||
|
*/
|
||||||
|
bool filter_mode;
|
||||||
|
|
||||||
|
|
||||||
|
void clear_input_flags();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
virtual methods
|
||||||
|
*/
|
||||||
|
virtual void Init();
|
||||||
|
virtual void Uninit();
|
||||||
|
|
||||||
|
virtual bool IsOpeningTagMark(wchar_t c);
|
||||||
|
virtual bool IsClosingTagMark(wchar_t c);
|
||||||
|
virtual bool IsClosingTagIndicator(wchar_t c);
|
||||||
|
virtual bool IsSpecialTagIndicator(wchar_t c);
|
||||||
|
virtual bool IsXMLSpecialTagIndicator(wchar_t c);
|
||||||
|
virtual bool IsAttributeAssignmentMark(wchar_t c);
|
||||||
|
virtual bool IsClosingXmlSimpleTagMark(wchar_t c);
|
||||||
|
virtual bool IsStartingEntityMark(wchar_t c);
|
||||||
|
virtual bool IsEndingEntityMark(wchar_t c);
|
||||||
|
|
||||||
|
virtual bool IsValidCharForName(int c);
|
||||||
|
virtual bool IsValidCharForAttrName(int c);
|
||||||
|
virtual bool IsValidCharForEntityName(int c);
|
||||||
|
|
||||||
|
virtual void CheckSingleItemExceptions();
|
||||||
|
virtual void CheckWhiteCharsExceptions(Item & item);
|
||||||
|
virtual void CheckDifferentContentExceptions(Item & item);
|
||||||
|
|
||||||
|
virtual void Put(wchar_t c);
|
||||||
|
virtual void Put(const wchar_t * str, const wchar_t * end);
|
||||||
|
virtual void Put(const std::wstring & str);
|
||||||
|
virtual void AnalyzeEntitiesAndPut(const wchar_t * str, const wchar_t * end, std::wstring * out);
|
||||||
|
|
||||||
|
virtual void PutOpeningTagMark();
|
||||||
|
virtual void PutClosingTagMark();
|
||||||
|
virtual bool PutOpeningTag();
|
||||||
|
virtual void PutClosingTag(const Item & item);
|
||||||
|
|
||||||
|
virtual void ItemFound();
|
||||||
|
virtual void EntityFound(const wchar_t * str, const wchar_t * end);
|
||||||
|
|
||||||
|
/*
|
||||||
|
others
|
||||||
|
*/
|
||||||
|
void SetSomeDefaults();
|
||||||
|
|
||||||
|
Item & GetItem(size_t i);
|
||||||
|
Item & LastItem();
|
||||||
|
|
||||||
|
wchar_t ToLower(wchar_t c);
|
||||||
|
void ToLower(std::wstring & str);
|
||||||
|
|
||||||
|
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2);
|
||||||
|
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2);
|
||||||
|
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2);
|
||||||
|
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2);
|
||||||
|
|
||||||
|
bool IsNameEqual(const wchar_t * name1, const wchar_t * name2, size_t len);
|
||||||
|
bool IsNameEqual(const wchar_t * name1, const std::wstring & name2, size_t len);
|
||||||
|
bool IsNameEqual(const std::wstring & name1, const wchar_t * name2, size_t len);
|
||||||
|
bool IsNameEqual(const std::wstring & name1, const std::wstring & name2, size_t len);
|
||||||
|
|
||||||
|
bool IsLastTag(const wchar_t * name);
|
||||||
|
bool IsLastTag(const std::wstring & name);
|
||||||
|
bool IsTagSafe(const wchar_t * tag);
|
||||||
|
bool IsTagSafe(const std::wstring & tag);
|
||||||
|
|
||||||
|
int CheckOrphan(const wchar_t * str, const wchar_t * end, const std::wstring & orphan_str);
|
||||||
|
bool CheckOrphan(const wchar_t * str, const wchar_t * end, const std::vector<std::wstring> & orphans);
|
||||||
|
bool CheckOrphan(const wchar_t * str, const wchar_t * end);
|
||||||
|
|
||||||
|
bool IsWhite(int c);
|
||||||
|
void SkipWhite(std::wstring * out_string = nullptr);
|
||||||
|
void SkipWhiteLines(std::wstring * out_string = nullptr);
|
||||||
|
void SkipWhiteWithFirstNewLine();
|
||||||
|
|
||||||
|
int current_white_char_mode();
|
||||||
|
|
||||||
|
void ReadTextUntilClosingCommentary();
|
||||||
|
bool IsClosingTagForLastItem(bool put_closing_tag_as_well);
|
||||||
|
void ReadTextUntilClosingTag(bool put_closing_tag_as_well);
|
||||||
|
void SkipAndCheckClosingTag(std::wstring * remember_text = nullptr);
|
||||||
|
|
||||||
|
void PopStack();
|
||||||
|
bool PushStack();
|
||||||
|
void CheckStackPrintRest();
|
||||||
|
void AddForgottenTags();
|
||||||
|
void CheckClosingTags();
|
||||||
|
void ReadText(bool is_cdata);
|
||||||
|
bool PrintRest();
|
||||||
|
bool PrintOpeningItem();
|
||||||
|
void ReadItemName(std::wstring & name, bool clear_name = true);
|
||||||
|
void ReadItemAttrName();
|
||||||
|
void ReadItemAttrValueAdd(const std::wstring & str);
|
||||||
|
void ReadItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||||
|
void ReadXMLItemAttrValue(bool has_quote, wchar_t quote_char);
|
||||||
|
|
||||||
|
bool ReadItemAttr();
|
||||||
|
void CheckItemLangAttr();
|
||||||
|
void PrintItemAttr();
|
||||||
|
void PutItemAttrToSpace();
|
||||||
|
|
||||||
|
void ReadItemClosing();
|
||||||
|
void ReadItemSpecial();
|
||||||
|
void ReadItemOpening();
|
||||||
|
bool ReadItem();
|
||||||
|
void ReadLoop();
|
||||||
|
void Read();
|
||||||
|
|
||||||
|
void CheckChar(wchar_t c);
|
||||||
|
|
||||||
|
bool PutNormalNonWhite(std::wstring & str, bool allow_put_new_line, bool allow_put_space, bool is_cdata);
|
||||||
|
void PutNormalWhite(bool & was_white_char, bool & was_new_line, std::wstring * result_text = nullptr);
|
||||||
|
|
||||||
|
void PutTabs(size_t len);
|
||||||
|
void PutNonBreakingSpace();
|
||||||
|
void CalcOrphansMaxLen(Orphans & orphans);
|
||||||
|
|
||||||
|
void AddItemToSpace();
|
||||||
|
void RemoveLastSpace(size_t index);
|
||||||
|
void AddSpaceToSpaceTree(const Space & space);
|
||||||
|
|
||||||
|
bool RemoveIfNeeded(size_t index);
|
||||||
|
|
||||||
|
bool check_escape_sequentions();
|
||||||
|
void read_xml_entity();
|
||||||
|
void read_char_from_entity_buffer();
|
||||||
|
int read_char() override;
|
||||||
|
|
||||||
|
Item empty;
|
||||||
|
Item * pstack; // stack pointer
|
||||||
|
size_t stack_len; // length of the stack
|
||||||
|
wchar_t * buffer; // buffer used when printing
|
||||||
|
std::wstring * out_string;
|
||||||
|
Stream * out_stream;
|
||||||
|
Space * out_space;
|
||||||
|
Space text_space_tmp;
|
||||||
|
|
||||||
|
std::vector<int> white_char_mode_tab;
|
||||||
|
|
||||||
|
//bool last_new_line;
|
||||||
|
bool new_item_has_new_line_before;
|
||||||
|
bool new_item_has_white_char_before;
|
||||||
|
int white_mode;
|
||||||
|
bool is_first_item;
|
||||||
|
size_t wrap_line; // insert a new line character into long lines
|
||||||
|
size_t tab_size;
|
||||||
|
OrphanMode orphan_mode;
|
||||||
|
std::wstring attr_name;
|
||||||
|
std::vector<std::wstring> attr_value;
|
||||||
|
std::wstring attr_value_temp;
|
||||||
|
std::wstring attr_value_lower;
|
||||||
|
bool attr_has_value;
|
||||||
|
std::wstring lang_code_lower;
|
||||||
|
size_t line_len; //length of the current line (without first spaces which create the html tree)
|
||||||
|
bool safe_mode; // skipping some unsafe tags
|
||||||
|
Orphans orphans_temp;
|
||||||
|
bool skip_tags;
|
||||||
|
bool skip_commentaries;
|
||||||
|
bool skip_entities;
|
||||||
|
bool analyze_entities;
|
||||||
|
std::wstring tmp_text;
|
||||||
|
std::wstring tmp_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
|
@ -112,6 +112,7 @@ void FileLog::save_log(WTextStream * buffer)
|
||||||
if( log_stdout )
|
if( log_stdout )
|
||||||
{
|
{
|
||||||
wide_stream_to_utf8(*buffer, std::cout);
|
wide_stream_to_utf8(*buffer, std::cout);
|
||||||
|
std::cout.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !log_file.empty() )
|
if( !log_file.empty() )
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
* Copyright (c) 2018-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -684,5 +684,51 @@ void Log::save_log()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const char * prefix, const char * msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix, msg);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const wchar_t * prefix, const wchar_t * msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix, msg);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const char * prefix, const std::string & msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix, msg.c_str());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const wchar_t * prefix, const std::wstring & msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix, msg.c_str());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const std::string & prefix, const std::string & msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix.c_str(), msg.c_str());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Log & Log::put_multiline(const std::wstring & prefix, const std::wstring & msg)
|
||||||
|
{
|
||||||
|
put_multiline_generic(prefix.c_str(), msg.c_str());
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2021, Tomasz Sowa
|
* Copyright (c) 2018-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -156,6 +156,17 @@ public:
|
||||||
virtual Log & put_binary_blob(const char * blob, size_t blob_len);
|
virtual Log & put_binary_blob(const char * blob, size_t blob_len);
|
||||||
virtual Log & put_binary_blob(const std::string & blob);
|
virtual Log & put_binary_blob(const std::string & blob);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* put multiline message
|
||||||
|
* first and last new characters are trimmed
|
||||||
|
* at the beginning of each line a prefix is inserted
|
||||||
|
*/
|
||||||
|
virtual Log & put_multiline(const char * prefix, const char * msg);
|
||||||
|
virtual Log & put_multiline(const wchar_t * prefix, const wchar_t * msg);
|
||||||
|
virtual Log & put_multiline(const char * prefix, const std::string & msg);
|
||||||
|
virtual Log & put_multiline(const wchar_t * prefix, const std::wstring & msg);
|
||||||
|
virtual Log & put_multiline(const std::string & prefix, const std::string & msg);
|
||||||
|
virtual Log & put_multiline(const std::wstring & prefix, const std::wstring & msg);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -184,6 +195,10 @@ protected:
|
||||||
virtual void save_log();
|
virtual void save_log();
|
||||||
virtual void save_log_and_clear();
|
virtual void save_log_and_clear();
|
||||||
|
|
||||||
|
template<typename CharType>
|
||||||
|
void put_multiline_generic(const CharType * prefix, const CharType * msg);
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -221,11 +236,61 @@ Log & Log::log_string_generic(const StringType & value, size_t max_size)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename CharType>
|
||||||
|
void Log::put_multiline_generic(const CharType * prefix, const CharType * msg)
|
||||||
|
{
|
||||||
|
bool put_prefix = true;
|
||||||
|
bool was_new_line = false;
|
||||||
|
bool was_something_printed = false;
|
||||||
|
|
||||||
|
while( *msg )
|
||||||
|
{
|
||||||
|
if( static_cast<CharType>(*msg) == static_cast<CharType>('\n') )
|
||||||
|
{
|
||||||
|
was_new_line = true;
|
||||||
|
put_prefix = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if( was_new_line )
|
||||||
|
{
|
||||||
|
if( was_something_printed )
|
||||||
|
{
|
||||||
|
operator<<(logend);
|
||||||
|
}
|
||||||
|
|
||||||
|
was_new_line = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( put_prefix )
|
||||||
|
{
|
||||||
|
operator<<(prefix);
|
||||||
|
put_prefix = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
operator<<(*msg);
|
||||||
|
was_something_printed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( was_something_printed )
|
||||||
|
{
|
||||||
|
operator<<(logend);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2021, Tomasz Sowa
|
* Copyright (c) 2008-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -188,6 +188,12 @@ Space::Space(const Space * space)
|
||||||
set(space);
|
set(space);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Space::Space(const Date & date)
|
||||||
|
{
|
||||||
|
initialize();
|
||||||
|
set(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void Space::clear()
|
void Space::clear()
|
||||||
{
|
{
|
||||||
|
@ -427,6 +433,13 @@ void Space::set(Space && space)
|
||||||
move_from(std::move(space));
|
move_from(std::move(space));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Space::set(const Date & date)
|
||||||
|
{
|
||||||
|
initialize_value_wstring_if_needed();
|
||||||
|
WTextStream str;
|
||||||
|
date.SerializeISO(str);
|
||||||
|
str.to_str(value.value_wstring);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Space & Space::add(bool val)
|
Space & Space::add(bool val)
|
||||||
|
@ -528,6 +541,12 @@ Space & Space::add(Space && space)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Space & Space::add(const Date & date)
|
||||||
|
{
|
||||||
|
return add_generic(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Space & Space::add_empty_space()
|
Space & Space::add_empty_space()
|
||||||
{
|
{
|
||||||
return add_generic(static_cast<Space*>(nullptr));
|
return add_generic(static_cast<Space*>(nullptr));
|
||||||
|
@ -643,6 +662,13 @@ Space & Space::add(const wchar_t * field, Space && space)
|
||||||
return *(insert_res.first->second);
|
return *(insert_res.first->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Space & Space::add(const wchar_t * field, const Date & date)
|
||||||
|
{
|
||||||
|
return add_generic(field, date);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
Space & Space::add_empty_space(const wchar_t * field)
|
Space & Space::add_empty_space(const wchar_t * field)
|
||||||
{
|
{
|
||||||
return add_generic(field, static_cast<Space*>(nullptr));
|
return add_generic(field, static_cast<Space*>(nullptr));
|
||||||
|
@ -746,6 +772,11 @@ Space & Space::add(const std::wstring & field, Space && space)
|
||||||
return add(field.c_str(), std::move(space));
|
return add(field.c_str(), std::move(space));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Space & Space::add(const std::wstring & field, const Date & date)
|
||||||
|
{
|
||||||
|
return add_generic(field, date);
|
||||||
|
}
|
||||||
|
|
||||||
Space & Space::add_empty_space(const std::wstring & field)
|
Space & Space::add_empty_space(const std::wstring & field)
|
||||||
{
|
{
|
||||||
return add_generic(field, static_cast<Space*>(nullptr));
|
return add_generic(field, static_cast<Space*>(nullptr));
|
||||||
|
@ -827,8 +858,31 @@ bool Space::to_bool() const
|
||||||
if( type == type_bool )
|
if( type == type_bool )
|
||||||
return value.value_bool;
|
return value.value_bool;
|
||||||
|
|
||||||
long long val = to_long_long();
|
if( type == type_long )
|
||||||
return (val != 0) ? true : false;
|
return value.value_long != 0;
|
||||||
|
|
||||||
|
if( type == type_float )
|
||||||
|
return value.value_float != 0.0f;
|
||||||
|
|
||||||
|
if( type == type_double )
|
||||||
|
return value.value_double != 0.0;
|
||||||
|
|
||||||
|
if( type == type_long_double )
|
||||||
|
return value.value_long_double != 0.0L;
|
||||||
|
|
||||||
|
if( type == type_string )
|
||||||
|
return !value.value_string.empty();
|
||||||
|
|
||||||
|
if( type == type_wstring )
|
||||||
|
return !value.value_wstring.empty();
|
||||||
|
|
||||||
|
if( type == type_table )
|
||||||
|
return !value.value_table.empty();
|
||||||
|
|
||||||
|
if( type == type_object )
|
||||||
|
return !value.value_object.empty();
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
short Space::to_short() const
|
short Space::to_short() const
|
||||||
|
@ -1474,35 +1528,35 @@ void Space::serialize_to_space_to(std::wstring & str, bool pretty_print) const
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::string Space::serialize_to_json_str() const
|
std::string Space::serialize_to_json_str(bool pretty_print) const
|
||||||
{
|
{
|
||||||
std::string str;
|
std::string str;
|
||||||
serialize_to_json_to(str);
|
serialize_to_json_to(str, pretty_print);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::wstring Space::serialize_to_json_wstr() const
|
std::wstring Space::serialize_to_json_wstr(bool pretty_print) const
|
||||||
{
|
{
|
||||||
std::wstring str;
|
std::wstring str;
|
||||||
serialize_to_json_to(str);
|
serialize_to_json_to(str, pretty_print);
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Space::serialize_to_json_to(std::string & str) const
|
void Space::serialize_to_json_to(std::string & str, bool pretty_print) const
|
||||||
{
|
{
|
||||||
TextStream stream;
|
TextStream stream;
|
||||||
serialize_to_json_stream(stream);
|
serialize_to_json_stream(stream, pretty_print);
|
||||||
|
|
||||||
stream.to_str(str);
|
stream.to_str(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Space::serialize_to_json_to(std::wstring & str) const
|
void Space::serialize_to_json_to(std::wstring & str, bool pretty_print) const
|
||||||
{
|
{
|
||||||
WTextStream stream;
|
WTextStream stream;
|
||||||
serialize_to_json_stream(stream);
|
serialize_to_json_stream(stream, pretty_print);
|
||||||
|
|
||||||
stream.to_str(str);
|
stream.to_str(str);
|
||||||
}
|
}
|
||||||
|
@ -2523,6 +2577,18 @@ void Space::remove_value_table(bool only_clear)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Space * Space::get_table_item(size_t index)
|
||||||
|
{
|
||||||
|
if( is_table() && index < table_size() )
|
||||||
|
{
|
||||||
|
return value.value_table[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010-2021, Tomasz Sowa
|
* Copyright (c) 2010-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -207,6 +207,7 @@ public:
|
||||||
Space(const std::string & str);
|
Space(const std::string & str);
|
||||||
Space(const std::wstring & str);
|
Space(const std::wstring & str);
|
||||||
Space(const Space * space);
|
Space(const Space * space);
|
||||||
|
Space(const Date & date);
|
||||||
|
|
||||||
|
|
||||||
void clear();
|
void clear();
|
||||||
|
@ -243,6 +244,7 @@ public:
|
||||||
void set(const Space & space);
|
void set(const Space & space);
|
||||||
void set(const Space * space);
|
void set(const Space * space);
|
||||||
void set(Space && space);
|
void set(Space && space);
|
||||||
|
void set(const Date & date);
|
||||||
|
|
||||||
|
|
||||||
// add a value to the table, change to table if needed, return the reference to the new inserted item
|
// add a value to the table, change to table if needed, return the reference to the new inserted item
|
||||||
|
@ -265,6 +267,7 @@ public:
|
||||||
Space & add(const Space & space);
|
Space & add(const Space & space);
|
||||||
Space & add(const Space * space);
|
Space & add(const Space * space);
|
||||||
Space & add(Space && space);
|
Space & add(Space && space);
|
||||||
|
Space & add(const Date & date);
|
||||||
Space & add_empty_space(); // IMPROVEME rename me to something better
|
Space & add_empty_space(); // IMPROVEME rename me to something better
|
||||||
|
|
||||||
|
|
||||||
|
@ -289,6 +292,7 @@ public:
|
||||||
Space & add(const wchar_t * field, const Space & space);
|
Space & add(const wchar_t * field, const Space & space);
|
||||||
Space & add(const wchar_t * field, const Space * space);
|
Space & add(const wchar_t * field, const Space * space);
|
||||||
Space & add(const wchar_t * field, Space && space);
|
Space & add(const wchar_t * field, Space && space);
|
||||||
|
Space & add(const wchar_t * field, const Date & date);
|
||||||
Space & add_empty_space(const wchar_t * field); // IMPROVEME rename me to something better
|
Space & add_empty_space(const wchar_t * field); // IMPROVEME rename me to something better
|
||||||
|
|
||||||
Space & add(const std::wstring & field, bool val);
|
Space & add(const std::wstring & field, bool val);
|
||||||
|
@ -310,6 +314,7 @@ public:
|
||||||
Space & add(const std::wstring & field, const Space & space);
|
Space & add(const std::wstring & field, const Space & space);
|
||||||
Space & add(const std::wstring & field, const Space * space);
|
Space & add(const std::wstring & field, const Space * space);
|
||||||
Space & add(const std::wstring & field, Space && space);
|
Space & add(const std::wstring & field, Space && space);
|
||||||
|
Space & add(const std::wstring & field, const Date & date);
|
||||||
Space & add_empty_space(const std::wstring & field); // IMPROVEME rename me to something better
|
Space & add_empty_space(const std::wstring & field); // IMPROVEME rename me to something better
|
||||||
|
|
||||||
|
|
||||||
|
@ -555,10 +560,10 @@ public:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::string serialize_to_json_str() const;
|
std::string serialize_to_json_str(bool pretty_print = false) const;
|
||||||
std::wstring serialize_to_json_wstr() const;
|
std::wstring serialize_to_json_wstr(bool pretty_print = false) const;
|
||||||
void serialize_to_json_to(std::string & str) const;
|
void serialize_to_json_to(std::string & str, bool pretty_print = false) const;
|
||||||
void serialize_to_json_to(std::wstring & str) const;
|
void serialize_to_json_to(std::wstring & str, bool pretty_print = false) const;
|
||||||
|
|
||||||
|
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
|
@ -630,7 +635,7 @@ public:
|
||||||
bool has_value(const wchar_t * field, const std::wstring & val) const;
|
bool has_value(const wchar_t * field, const std::wstring & val) const;
|
||||||
|
|
||||||
|
|
||||||
|
Space * get_table_item(size_t index);
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -55,6 +55,10 @@ SpaceParser::SpaceParser()
|
||||||
space_end = '}';
|
space_end = '}';
|
||||||
option_delimiter = ',';
|
option_delimiter = ',';
|
||||||
input_as_utf8 = true;
|
input_as_utf8 = true;
|
||||||
|
object_items_limit = 0;
|
||||||
|
table_items_limit = 0;
|
||||||
|
all_items_limit = 0;
|
||||||
|
nested_levels_limit = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -71,14 +75,78 @@ int SpaceParser::get_last_parsed_line()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int SpaceParser::get_last_parsed_column()
|
||||||
|
{
|
||||||
|
return column;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SpaceParser::set_object_items_limit(size_t val)
|
||||||
|
{
|
||||||
|
this->object_items_limit = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SpaceParser::set_table_items_limit(size_t val)
|
||||||
|
{
|
||||||
|
this->table_items_limit = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SpaceParser::set_all_items_limit(size_t val)
|
||||||
|
{
|
||||||
|
this->all_items_limit = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void SpaceParser::set_nested_level_limit(size_t val)
|
||||||
|
{
|
||||||
|
this->nested_levels_limit = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t SpaceParser::get_object_items_limit()
|
||||||
|
{
|
||||||
|
return object_items_limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t SpaceParser::get_table_items_limit()
|
||||||
|
{
|
||||||
|
return table_items_limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t SpaceParser::get_all_items_limit()
|
||||||
|
{
|
||||||
|
return all_items_limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
size_t SpaceParser::get_nested_level_limit()
|
||||||
|
{
|
||||||
|
return nested_levels_limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void SpaceParser::prepare_to_parsing()
|
||||||
|
{
|
||||||
|
clear_input_flags();
|
||||||
|
|
||||||
|
current_items_counter = 0;
|
||||||
|
current_nested_level = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_json_file(const char * file_name, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_json_file(const char * file_name, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
reading_from_file = true;
|
reading_from_file = true;
|
||||||
parsing_space = false;
|
parsing_space = false;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
file.clear();
|
|
||||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||||
|
|
||||||
if( file )
|
if( file )
|
||||||
|
@ -125,11 +193,12 @@ SpaceParser::Status SpaceParser::parse_json_file(const std::wstring & file_name,
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_space_file(const char * file_name, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_space_file(const char * file_name, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
reading_from_file = true;
|
reading_from_file = true;
|
||||||
parsing_space = true;
|
parsing_space = true;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
file.clear();
|
|
||||||
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
file.open(file_name, std::ios_base::binary | std::ios_base::in);
|
||||||
|
|
||||||
if( file )
|
if( file )
|
||||||
|
@ -174,10 +243,9 @@ SpaceParser::Status SpaceParser::parse_space_file(const std::wstring & file_name
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_json(const char * str, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_json(const char * str, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
prepare_to_parsing();
|
||||||
reading_from_wchar_string = false;
|
|
||||||
pchar_ascii = str;
|
pchar_ascii = str;
|
||||||
pchar_unicode = 0;
|
|
||||||
parsing_space = false;
|
parsing_space = false;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
|
@ -195,10 +263,9 @@ SpaceParser::Status SpaceParser::parse_json(const std::string & str, Space & out
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_json(const wchar_t * str, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_json(const wchar_t * str, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
prepare_to_parsing();
|
||||||
reading_from_wchar_string = true;
|
|
||||||
pchar_unicode = str;
|
pchar_unicode = str;
|
||||||
pchar_ascii = 0;
|
|
||||||
parsing_space = false;
|
parsing_space = false;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
|
@ -215,14 +282,50 @@ SpaceParser::Status SpaceParser::parse_json(const std::wstring & str, Space & ou
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SpaceParser::Status SpaceParser::parse_json(const pt::TextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::TextStream::const_iterator start = str.begin();
|
||||||
|
pt::TextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
text_stream_iterator = &start;
|
||||||
|
text_stream_iterator_end = &end;
|
||||||
|
parsing_space = false;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse_root_space(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SpaceParser::Status SpaceParser::parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::WTextStream::const_iterator start = str.begin();
|
||||||
|
pt::WTextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
wtext_stream_iterator = &start;
|
||||||
|
wtext_stream_iterator_end = &end;
|
||||||
|
parsing_space = false;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse_root_space(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_space(const char * str, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_space(const char * str, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
prepare_to_parsing();
|
||||||
reading_from_wchar_string = false;
|
|
||||||
pchar_ascii = str;
|
pchar_ascii = str;
|
||||||
pchar_unicode = 0;
|
|
||||||
parsing_space = true;
|
parsing_space = true;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
|
@ -240,10 +343,9 @@ SpaceParser::Status SpaceParser::parse_space(const std::string & str, Space & ou
|
||||||
|
|
||||||
SpaceParser::Status SpaceParser::parse_space(const wchar_t * str, Space & out_space, bool clear_space)
|
SpaceParser::Status SpaceParser::parse_space(const wchar_t * str, Space & out_space, bool clear_space)
|
||||||
{
|
{
|
||||||
reading_from_file = false;
|
prepare_to_parsing();
|
||||||
reading_from_wchar_string = true;
|
|
||||||
pchar_unicode = str;
|
pchar_unicode = str;
|
||||||
pchar_ascii = 0;
|
|
||||||
parsing_space = true;
|
parsing_space = true;
|
||||||
root_space = &out_space;
|
root_space = &out_space;
|
||||||
|
|
||||||
|
@ -259,6 +361,41 @@ SpaceParser::Status SpaceParser::parse_space(const std::wstring & str, Space & o
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SpaceParser::Status SpaceParser::parse_space(const pt::TextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::TextStream::const_iterator start = str.begin();
|
||||||
|
pt::TextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
text_stream_iterator = &start;
|
||||||
|
text_stream_iterator_end = &end;
|
||||||
|
parsing_space = true;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse_root_space(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SpaceParser::Status SpaceParser::parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space)
|
||||||
|
{
|
||||||
|
prepare_to_parsing();
|
||||||
|
|
||||||
|
pt::WTextStream::const_iterator start = str.begin();
|
||||||
|
pt::WTextStream::const_iterator end = str.end();
|
||||||
|
|
||||||
|
wtext_stream_iterator = &start;
|
||||||
|
wtext_stream_iterator_end = &end;
|
||||||
|
parsing_space = true;
|
||||||
|
root_space = &out_space;
|
||||||
|
|
||||||
|
parse_root_space(clear_space);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -289,10 +426,13 @@ void SpaceParser::parse_root_space(bool clear_root_space)
|
||||||
parse(root_space, false, false);
|
parse(root_space, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
skip_white();
|
if( status == ok )
|
||||||
|
{
|
||||||
|
skip_white();
|
||||||
|
|
||||||
if( lastc != -1 )
|
if( lastc != -1 )
|
||||||
status = syntax_error;
|
status = syntax_error;
|
||||||
|
}
|
||||||
|
|
||||||
token.clear();
|
token.clear();
|
||||||
}
|
}
|
||||||
|
@ -364,32 +504,45 @@ void SpaceParser::parse(Space * space, bool is_object_value, bool is_table_value
|
||||||
|
|
||||||
void SpaceParser::parse_space(Space * space)
|
void SpaceParser::parse_space(Space * space)
|
||||||
{
|
{
|
||||||
/*
|
if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
|
||||||
* in Space format in global namespace the space start character is not required
|
|
||||||
*/
|
|
||||||
bool need_space_start_character = !parsing_space || space != root_space;
|
|
||||||
|
|
||||||
if( need_space_start_character )
|
|
||||||
{
|
{
|
||||||
read_char(); // inserting a next character after the space_start char to lastc
|
/*
|
||||||
}
|
* in Space format in global namespace the space start character is not required
|
||||||
|
*/
|
||||||
|
bool need_space_start_character = !parsing_space || space != root_space;
|
||||||
|
|
||||||
if( !space->is_object() )
|
if( need_space_start_character )
|
||||||
space->set_empty_object();
|
|
||||||
|
|
||||||
parse_key_value_pairs(space);
|
|
||||||
|
|
||||||
if( need_space_start_character )
|
|
||||||
{
|
|
||||||
if( lastc == space_end )
|
|
||||||
{
|
{
|
||||||
read_char();
|
read_char(); // inserting a next character after the space_start char to lastc
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if( !space->is_object() )
|
||||||
|
space->set_empty_object();
|
||||||
|
|
||||||
|
parse_key_value_pairs(space);
|
||||||
|
|
||||||
|
if( status == ok )
|
||||||
{
|
{
|
||||||
status = syntax_error;
|
if( need_space_start_character )
|
||||||
|
{
|
||||||
|
if( lastc == space_end )
|
||||||
|
{
|
||||||
|
read_char();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = syntax_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = limit_nested_level_exceeded;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( current_nested_level > 0 )
|
||||||
|
current_nested_level -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -465,18 +618,31 @@ void SpaceParser::parse_floating_point_value(Space * space)
|
||||||
|
|
||||||
void SpaceParser::parse_table(Space * space)
|
void SpaceParser::parse_table(Space * space)
|
||||||
{
|
{
|
||||||
read_char(); // inserting a next character after the table_start char to lastc
|
if( nested_levels_limit == 0 || current_nested_level++ < nested_levels_limit )
|
||||||
space->set_empty_table();
|
|
||||||
parse_values_list(space);
|
|
||||||
|
|
||||||
if( lastc == table_end )
|
|
||||||
{
|
{
|
||||||
read_char();
|
read_char(); // inserting a next character after the table_start char to lastc
|
||||||
|
space->set_empty_table();
|
||||||
|
parse_values_list(space);
|
||||||
|
|
||||||
|
if( status == ok )
|
||||||
|
{
|
||||||
|
if( lastc == table_end )
|
||||||
|
{
|
||||||
|
read_char();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = syntax_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
status = syntax_error;
|
status = limit_nested_level_exceeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( current_nested_level > 0 )
|
||||||
|
current_nested_level -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -526,8 +692,23 @@ void SpaceParser::parse_key_value_pairs(Space * space)
|
||||||
{
|
{
|
||||||
read_char(); // inserting a next character after the separator to lastc
|
read_char(); // inserting a next character after the separator to lastc
|
||||||
|
|
||||||
Space & new_space = space->add(token.c_str(), new Space());
|
if( object_items_limit == 0 || !space->is_object() || (space->object_size() < object_items_limit) )
|
||||||
parse(&new_space, true, false);
|
{
|
||||||
|
Space & new_space = space->add(token.c_str(), new Space());
|
||||||
|
|
||||||
|
if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
|
||||||
|
{
|
||||||
|
parse(&new_space, true, false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = limit_all_items_exceeded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = limit_object_items_exceeded;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -537,7 +718,11 @@ void SpaceParser::parse_key_value_pairs(Space * space)
|
||||||
}
|
}
|
||||||
|
|
||||||
is_first = false;
|
is_first = false;
|
||||||
skip_white();
|
|
||||||
|
if( status == ok )
|
||||||
|
{
|
||||||
|
skip_white();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -578,12 +763,31 @@ void SpaceParser::parse_values_list(Space * space)
|
||||||
|
|
||||||
if( status == ok )
|
if( status == ok )
|
||||||
{
|
{
|
||||||
Space * new_space = &space->add(new Space());
|
if( table_items_limit == 0 || !space->is_table() || (space->table_size() < table_items_limit) )
|
||||||
parse(new_space, false, true);
|
{
|
||||||
|
Space * new_space = &space->add(new Space());
|
||||||
|
|
||||||
|
if( all_items_limit == 0 || current_items_counter++ < all_items_limit )
|
||||||
|
{
|
||||||
|
parse(new_space, false, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = limit_all_items_exceeded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
status = limit_table_items_exceeded;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
is_first = false;
|
is_first = false;
|
||||||
skip_white();
|
|
||||||
|
if( status == ok )
|
||||||
|
{
|
||||||
|
skip_white();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -891,122 +1095,6 @@ void SpaceParser::read_key()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_utf8_char()
|
|
||||||
{
|
|
||||||
int c;
|
|
||||||
bool correct;
|
|
||||||
|
|
||||||
lastc = -1;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
utf8_to_int(file, c, correct);
|
|
||||||
|
|
||||||
if( !file )
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
while( !correct );
|
|
||||||
|
|
||||||
lastc = c;
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_ascii_char()
|
|
||||||
{
|
|
||||||
lastc = file.get();
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_char_from_wchar_string()
|
|
||||||
{
|
|
||||||
if( *pchar_unicode == 0 )
|
|
||||||
lastc = -1;
|
|
||||||
else
|
|
||||||
lastc = *(pchar_unicode++);
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_char_from_utf8_string()
|
|
||||||
{
|
|
||||||
int c;
|
|
||||||
bool correct;
|
|
||||||
|
|
||||||
lastc = -1;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
size_t len = utf8_to_int(pchar_ascii, c, correct);
|
|
||||||
pchar_ascii += len;
|
|
||||||
}
|
|
||||||
while( *pchar_ascii && !correct );
|
|
||||||
|
|
||||||
if( correct )
|
|
||||||
lastc = c;
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_char_from_ascii_string()
|
|
||||||
{
|
|
||||||
if( *pchar_ascii == 0 )
|
|
||||||
lastc = -1;
|
|
||||||
else
|
|
||||||
lastc = *(pchar_ascii++);
|
|
||||||
|
|
||||||
if( lastc == '\n' )
|
|
||||||
++line;
|
|
||||||
|
|
||||||
return lastc;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int SpaceParser::read_char_no_escape()
|
|
||||||
{
|
|
||||||
if( reading_from_file )
|
|
||||||
{
|
|
||||||
if( input_as_utf8 )
|
|
||||||
return read_utf8_char();
|
|
||||||
else
|
|
||||||
return read_ascii_char();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if( reading_from_wchar_string )
|
|
||||||
{
|
|
||||||
return read_char_from_wchar_string();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if( input_as_utf8 )
|
|
||||||
return read_char_from_utf8_string();
|
|
||||||
else
|
|
||||||
return read_char_from_ascii_string();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SpaceParser::is_hex_digit(wchar_t c)
|
bool SpaceParser::is_hex_digit(wchar_t c)
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -40,6 +40,7 @@
|
||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include "space.h"
|
#include "space.h"
|
||||||
|
#include "convert/baseparser.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,7 +50,7 @@ namespace pt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SpaceParser
|
class SpaceParser : public BaseParser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
@ -62,8 +63,23 @@ public:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
status of parsing
|
status of parsing
|
||||||
|
ok - input stream has been parsed correctly
|
||||||
|
cant_open_file - I cannot open the file (returns only in a case when parsing a file)
|
||||||
|
syntax_error - a syntax error in the input stream
|
||||||
|
limit_object_items_exceeded - limit of object items has been exceeded
|
||||||
|
limit_table_items_exceeded - limit of table items has been exceeded
|
||||||
|
limit_all_items_exceeded - limit of items (key/value pairs of objects or table items) throughout the whole tree has been exceeded
|
||||||
|
limit_nested_level_exceeded - limit of nested objects/tables has been exceeded
|
||||||
*/
|
*/
|
||||||
enum Status { ok, cant_open_file, syntax_error };
|
enum Status {
|
||||||
|
ok,
|
||||||
|
cant_open_file,
|
||||||
|
syntax_error,
|
||||||
|
limit_object_items_exceeded,
|
||||||
|
limit_table_items_exceeded,
|
||||||
|
limit_all_items_exceeded,
|
||||||
|
limit_nested_level_exceeded
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -108,6 +124,8 @@ public:
|
||||||
Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true);
|
Status parse_json(const wchar_t * str, Space & out_space, bool clear_space = true);
|
||||||
Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);
|
Status parse_json(const std::wstring & str, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
Status parse_json(const pt::TextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse_json(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
|
||||||
Status parse_space(const char * str, Space & out_space, bool clear_space = true);
|
Status parse_space(const char * str, Space & out_space, bool clear_space = true);
|
||||||
|
@ -115,6 +133,8 @@ public:
|
||||||
Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true);
|
Status parse_space(const wchar_t * str, Space & out_space, bool clear_space = true);
|
||||||
Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);
|
Status parse_space(const std::wstring & str, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
Status parse_space(const pt::TextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
Status parse_space(const pt::WTextStream & str, Space & out_space, bool clear_space = true);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -143,6 +163,45 @@ public:
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
int get_last_parsed_line();
|
int get_last_parsed_line();
|
||||||
|
int get_last_parsed_column();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get/set limit of object items in one object
|
||||||
|
* default: 0 (disabled)
|
||||||
|
*/
|
||||||
|
void set_object_items_limit(size_t val);
|
||||||
|
size_t get_object_items_limit();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get/set limit of items in one table
|
||||||
|
* default: 0 (disabled)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void set_table_items_limit(size_t val);
|
||||||
|
size_t get_table_items_limit();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get/set limit of all items (objects items and table items) througout the whole tree
|
||||||
|
* default: 0 (disabled)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void set_all_items_limit(size_t val);
|
||||||
|
size_t get_all_items_limit();
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get/set nested level limit
|
||||||
|
* limit of nested objects and tables
|
||||||
|
* default: 0 (disabled)
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void set_nested_level_limit(size_t val);
|
||||||
|
size_t get_nested_level_limit();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -154,32 +213,6 @@ private:
|
||||||
Space * root_space;
|
Space * root_space;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
a number of a line in which there is a syntax_error
|
|
||||||
*/
|
|
||||||
int line;
|
|
||||||
|
|
||||||
/*
|
|
||||||
true if parse() method was called
|
|
||||||
false if ParseString() was called
|
|
||||||
*/
|
|
||||||
bool reading_from_file;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
pointers to the current character
|
|
||||||
if ParseString() is in used
|
|
||||||
*/
|
|
||||||
const char * pchar_ascii;
|
|
||||||
const wchar_t * pchar_unicode;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
true if ParseString(wchar_t *) or ParseString(std::wstring&) was called
|
|
||||||
*/
|
|
||||||
bool reading_from_wchar_string;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
last read token
|
last read token
|
||||||
*/
|
*/
|
||||||
|
@ -222,13 +255,6 @@ private:
|
||||||
int option_delimiter;
|
int option_delimiter;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
last read char
|
|
||||||
or -1 if the end
|
|
||||||
*/
|
|
||||||
int lastc;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
true if the lastc was escaped (with a backslash)
|
true if the lastc was escaped (with a backslash)
|
||||||
we have to know if the last sequence was \" or just "
|
we have to know if the last sequence was \" or just "
|
||||||
|
@ -236,22 +262,6 @@ private:
|
||||||
bool char_was_escaped;
|
bool char_was_escaped;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
current file
|
|
||||||
|
|
||||||
may it would be better to make a pointer?
|
|
||||||
if we parse only a string then there is no sense to have such an object
|
|
||||||
*/
|
|
||||||
std::ifstream file;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
input file is in UTF-8
|
|
||||||
default: true
|
|
||||||
*/
|
|
||||||
bool input_as_utf8;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* if parsing_space is false then it means we are parsing JSON format
|
* if parsing_space is false then it means we are parsing JSON format
|
||||||
*
|
*
|
||||||
|
@ -259,6 +269,23 @@ private:
|
||||||
bool parsing_space;
|
bool parsing_space;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* object_items_limit - limit of key/value pairs of one object
|
||||||
|
* table_items_limit - limit of items of one table
|
||||||
|
* all_items_limit - limit of all items of all objects and all tables
|
||||||
|
* nested_levels_limit - limit of nested objects/tables
|
||||||
|
*/
|
||||||
|
size_t object_items_limit;
|
||||||
|
size_t table_items_limit;
|
||||||
|
size_t all_items_limit;
|
||||||
|
size_t nested_levels_limit;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* current_items_counter - how many items (key/value pairs of objects or table items) throughout the whole tree
|
||||||
|
* current_nested_level - current nested level of objects and tables
|
||||||
|
*/
|
||||||
|
size_t current_items_counter;
|
||||||
|
size_t current_nested_level;
|
||||||
|
|
||||||
void parse_root_space(bool clear_root_space);
|
void parse_root_space(bool clear_root_space);
|
||||||
void parse(Space * space, bool is_object_value, bool is_table_value);
|
void parse(Space * space, bool is_object_value, bool is_table_value);
|
||||||
|
@ -287,12 +314,6 @@ private:
|
||||||
void read_token_quoted(std::wstring & token);
|
void read_token_quoted(std::wstring & token);
|
||||||
void read_multiline_token_quoted(std::wstring & token);
|
void read_multiline_token_quoted(std::wstring & token);
|
||||||
|
|
||||||
int read_utf8_char();
|
|
||||||
int read_ascii_char();
|
|
||||||
int read_char_from_wchar_string();
|
|
||||||
int read_char_from_utf8_string();
|
|
||||||
int read_char_from_ascii_string();
|
|
||||||
int read_char_no_escape();
|
|
||||||
int read_char();
|
int read_char();
|
||||||
bool is_white(int c);
|
bool is_white(int c);
|
||||||
void skip_line();
|
void skip_line();
|
||||||
|
@ -306,6 +327,8 @@ private:
|
||||||
void read_unicode_floating_format();
|
void read_unicode_floating_format();
|
||||||
void read_unicode_code_point();
|
void read_unicode_code_point();
|
||||||
|
|
||||||
|
void prepare_to_parsing();
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -46,6 +46,7 @@
|
||||||
#include "membuffer/membuffer.h"
|
#include "membuffer/membuffer.h"
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
#include "utf8/utf8.h"
|
#include "utf8/utf8.h"
|
||||||
|
#include "utf8/utf8_stream.h"
|
||||||
|
|
||||||
// for snprintf
|
// for snprintf
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
@ -71,8 +72,67 @@ public:
|
||||||
typedef CharT char_type;
|
typedef CharT char_type;
|
||||||
|
|
||||||
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
|
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
|
||||||
typedef typename buffer_type::iterator iterator;
|
|
||||||
typedef typename buffer_type::const_iterator const_iterator;
|
|
||||||
|
class iterator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typename buffer_type::iterator membuffer_iterator;
|
||||||
|
|
||||||
|
iterator();
|
||||||
|
iterator(const iterator & i);
|
||||||
|
iterator & operator=(const iterator & i);
|
||||||
|
|
||||||
|
iterator(const typename buffer_type::iterator & i);
|
||||||
|
iterator & operator=(const typename buffer_type::iterator & i);
|
||||||
|
|
||||||
|
bool operator==(const iterator & i) const;
|
||||||
|
bool operator!=(const iterator & i) const;
|
||||||
|
|
||||||
|
iterator & operator++(); // prefix ++
|
||||||
|
iterator operator++(int); // postfix ++
|
||||||
|
|
||||||
|
iterator & operator--(); // prefix --
|
||||||
|
iterator operator--(int); // postfix --
|
||||||
|
|
||||||
|
CharT & operator*();
|
||||||
|
|
||||||
|
wchar_t get_unicode_and_advance(const iterator & end);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class const_iterator
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
typename buffer_type::const_iterator membuffer_const_iterator;
|
||||||
|
|
||||||
|
const_iterator();
|
||||||
|
const_iterator(const const_iterator & i);
|
||||||
|
const_iterator(const iterator & i);
|
||||||
|
const_iterator & operator=(const const_iterator & i);
|
||||||
|
const_iterator & operator=(const iterator & i);
|
||||||
|
|
||||||
|
const_iterator(const typename buffer_type::const_iterator & i);
|
||||||
|
const_iterator(const typename buffer_type::iterator & i);
|
||||||
|
const_iterator & operator=(const typename buffer_type::const_iterator & i);
|
||||||
|
const_iterator & operator=(const typename buffer_type::iterator & i);
|
||||||
|
|
||||||
|
bool operator==(const const_iterator & i) const;
|
||||||
|
bool operator!=(const const_iterator & i) const;
|
||||||
|
|
||||||
|
const_iterator & operator++(); // prefix ++
|
||||||
|
const_iterator operator++(int); // postfix ++
|
||||||
|
|
||||||
|
const_iterator & operator--(); // prefix --
|
||||||
|
const_iterator operator--(int); // postfix --
|
||||||
|
|
||||||
|
CharT operator*();
|
||||||
|
|
||||||
|
wchar_t get_unicode_and_advance(const const_iterator & end);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
bool is_char_stream() const;
|
bool is_char_stream() const;
|
||||||
|
@ -112,7 +172,7 @@ public:
|
||||||
|
|
||||||
TextStreamBase & operator<<(char);
|
TextStreamBase & operator<<(char);
|
||||||
TextStreamBase & operator<<(unsigned char);
|
TextStreamBase & operator<<(unsigned char);
|
||||||
TextStreamBase & operator<<(wchar_t);
|
TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
|
||||||
TextStreamBase & operator<<(bool);
|
TextStreamBase & operator<<(bool);
|
||||||
TextStreamBase & operator<<(short);
|
TextStreamBase & operator<<(short);
|
||||||
TextStreamBase & operator<<(int);
|
TextStreamBase & operator<<(int);
|
||||||
|
@ -173,6 +233,272 @@ TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const iterator & i) : membuffer_iterator(i)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_iterator = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const typename buffer_type::iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_iterator = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
|
||||||
|
{
|
||||||
|
return membuffer_iterator == i.membuffer_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
|
||||||
|
{
|
||||||
|
return membuffer_iterator != i.membuffer_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++()
|
||||||
|
{
|
||||||
|
++membuffer_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++(int)
|
||||||
|
{
|
||||||
|
const_iterator old(*this);
|
||||||
|
membuffer_iterator++;
|
||||||
|
return old;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--()
|
||||||
|
{
|
||||||
|
--membuffer_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--(int)
|
||||||
|
{
|
||||||
|
const_iterator old(*this);
|
||||||
|
membuffer_iterator--;
|
||||||
|
return old;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator*()
|
||||||
|
{
|
||||||
|
return *membuffer_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::iterator::get_unicode_and_advance(const iterator & end)
|
||||||
|
{
|
||||||
|
if( *this != end )
|
||||||
|
{
|
||||||
|
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
bool correct;
|
||||||
|
utf8_to_int(*this, end, res, correct);
|
||||||
|
|
||||||
|
if( correct )
|
||||||
|
return static_cast<wchar_t>(res);
|
||||||
|
else
|
||||||
|
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wchar_t c = operator*();
|
||||||
|
++membuffer_iterator;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_const_iterator = i.membuffer_const_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_const_iterator = i.membuffer_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::const_iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_const_iterator = i;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::iterator & i)
|
||||||
|
{
|
||||||
|
membuffer_const_iterator = i;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
|
||||||
|
{
|
||||||
|
return membuffer_const_iterator == i.membuffer_const_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
|
||||||
|
{
|
||||||
|
return membuffer_const_iterator != i.membuffer_const_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++()
|
||||||
|
{
|
||||||
|
++membuffer_const_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++(int)
|
||||||
|
{
|
||||||
|
const_iterator old(*this);
|
||||||
|
membuffer_const_iterator++;
|
||||||
|
return old;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--()
|
||||||
|
{
|
||||||
|
--membuffer_const_iterator;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||||
|
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--(int)
|
||||||
|
{
|
||||||
|
const_iterator old(*this);
|
||||||
|
membuffer_const_iterator--;
|
||||||
|
return old;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
char_type TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator*()
|
||||||
|
{
|
||||||
|
return *membuffer_const_iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
|
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::get_unicode_and_advance(const const_iterator & end)
|
||||||
|
{
|
||||||
|
if( *this != end )
|
||||||
|
{
|
||||||
|
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||||
|
{
|
||||||
|
int res;
|
||||||
|
bool correct;
|
||||||
|
pt::utf8_to_int(*this, end, res, correct);
|
||||||
|
|
||||||
|
if( correct )
|
||||||
|
return static_cast<wchar_t>(res);
|
||||||
|
else
|
||||||
|
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
wchar_t c = operator*();
|
||||||
|
++membuffer_const_iterator;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
|
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
|
||||||
{
|
{
|
||||||
|
@ -433,10 +759,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
|
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
|
||||||
{
|
{
|
||||||
// IMPROVEME
|
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||||
// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
|
{
|
||||||
// if char_type > 1 then simply put that character
|
buffer.append(static_cast<char_type>(static_cast<unsigned char>(v)));
|
||||||
buffer.append(static_cast<char_type>(v));
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buffer.append(v);
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -446,9 +776,6 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
|
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
|
||||||
{
|
{
|
||||||
// IMPROVEME
|
|
||||||
// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
|
|
||||||
// if char_type > 1 then simply put that character
|
|
||||||
buffer.append(static_cast<char_type>(v));
|
buffer.append(static_cast<char_type>(v));
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
|
@ -459,8 +786,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
||||||
{
|
{
|
||||||
// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
|
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||||
buffer.append(static_cast<char_type>(v));
|
{
|
||||||
|
buffer.append(v);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pt::int_to_utf8(static_cast<int>(v), *this);
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,6 +45,12 @@
|
||||||
namespace pt
|
namespace pt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* public methods are also defined in utf8_stream.h
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
UTF-8, a transformation format of ISO 10646
|
UTF-8, a transformation format of ISO 10646
|
||||||
http://tools.ietf.org/html/rfc3629
|
http://tools.ietf.org/html/rfc3629
|
||||||
|
@ -213,9 +219,7 @@ template<typename StreamType>
|
||||||
bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear = true, int mode = 1);
|
bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear = true, int mode = 1);
|
||||||
|
|
||||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode = 1); // not tested, IMPROVE ME add clear parameter, mode parameter is not used
|
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear = true, int mode = 1); // not tested, IMPROVE ME mode parameter is not used
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
/*
|
||||||
|
* This file is a part of PikoTools
|
||||||
|
* and is distributed under the (new) BSD licence.
|
||||||
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2021-2022, Tomasz Sowa
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the name Tomasz Sowa nor the names of contributors to this
|
||||||
|
* project may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
* THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef headerfile_picotools_utf8_utf8_stream
|
||||||
|
#define headerfile_picotools_utf8_utf8_stream
|
||||||
|
|
||||||
|
#include "textstream/textstream.h"
|
||||||
|
|
||||||
|
namespace pt
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
/*!
|
||||||
|
this function converts one UTF-8 character into one wide-character
|
||||||
|
|
||||||
|
input:
|
||||||
|
iterator_in - an TextStream iterator for reading from
|
||||||
|
iterator_end - an end iterator (can be returned by end() method from TextStream)
|
||||||
|
|
||||||
|
output:
|
||||||
|
res - an output character
|
||||||
|
correct - true if it is a correct character
|
||||||
|
|
||||||
|
the function returns how many characters have been used from the input stream
|
||||||
|
*/
|
||||||
|
template<typename StreamIteratorType>
|
||||||
|
size_t utf8_to_int(
|
||||||
|
StreamIteratorType & iterator_in,
|
||||||
|
const StreamIteratorType & iterator_end,
|
||||||
|
int & res,
|
||||||
|
bool & correct)
|
||||||
|
{
|
||||||
|
size_t i, len;
|
||||||
|
unsigned char uz;
|
||||||
|
|
||||||
|
res = 0;
|
||||||
|
correct = false;
|
||||||
|
|
||||||
|
if( iterator_in == iterator_end )
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
uz = *iterator_in;
|
||||||
|
++iterator_in;
|
||||||
|
|
||||||
|
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
for(i=1 ; i<len ; ++i)
|
||||||
|
{
|
||||||
|
if( iterator_in == iterator_end )
|
||||||
|
return i;
|
||||||
|
|
||||||
|
uz = *iterator_in;
|
||||||
|
++iterator_in;
|
||||||
|
|
||||||
|
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( utf8_check_range(res, len) )
|
||||||
|
correct = true;
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -47,6 +47,7 @@ namespace pt
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename StreamType>
|
template<typename StreamType>
|
||||||
void int_to_wide(int c, StreamType & res)
|
void int_to_wide(int c, StreamType & res)
|
||||||
{
|
{
|
||||||
|
@ -65,6 +66,7 @@ void int_to_wide(int c, StreamType & res)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
converting UTF-8 string to a TextStreamBase<wchar_t,...> stream
|
||||||
(need to be tested)
|
(need to be tested)
|
||||||
|
@ -376,8 +378,11 @@ bool wide_stream_to_utf8(const Stream & stream, StreamType & utf8, bool clear, i
|
||||||
|
|
||||||
// not tested
|
// not tested
|
||||||
template<typename StreamTypeIn, typename StreamTypeOut>
|
template<typename StreamTypeIn, typename StreamTypeOut>
|
||||||
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
|
void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear, int mode)
|
||||||
{
|
{
|
||||||
|
if( clear )
|
||||||
|
utf8.clear();
|
||||||
|
|
||||||
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
private_namespace::wide_to_utf8_generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){
|
||||||
utf8.write(utf8_buffer, buffer_len);
|
utf8.write(utf8_buffer, buffer_len);
|
||||||
});
|
});
|
||||||
|
@ -385,8 +390,6 @@ void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, int mode)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace pt
|
} // namespace pt
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
# DO NOT DELETE
|
# DO NOT DELETE
|
||||||
|
|
||||||
./main.o: convert.h mainoptionsparser.h csvparser.h
|
|
||||||
./convert.o: convert.h test.h ../src/convert/convert.h
|
./convert.o: convert.h test.h ../src/convert/convert.h
|
||||||
./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h
|
./convert.o: ../src/convert/inttostr.h ../src/convert/patternreplacer.h
|
||||||
./convert.o: ../src/textstream/textstream.h ../src/textstream/stream.h
|
./convert.o: ../src/textstream/textstream.h ../src/textstream/stream.h
|
||||||
|
@ -9,8 +8,18 @@
|
||||||
./convert.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
|
./convert.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
|
||||||
./convert.o: ../src/utf8/utf8_private.h ../src/date/date.h
|
./convert.o: ../src/utf8/utf8_private.h ../src/date/date.h
|
||||||
./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||||
./convert.o: ../src/convert/strtoint.h ../src/convert/text.h
|
./convert.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
|
||||||
./convert.o: ../src/convert/misc.h ../src/convert/double.h
|
./convert.o: ../src/convert/text.h ../src/convert/misc.h
|
||||||
|
./convert.o: ../src/convert/double.h
|
||||||
|
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
|
||||||
|
./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h
|
||||||
|
./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
|
||||||
|
./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h
|
||||||
|
./csvparser.o: ../src/convert/baseparser.h ../src/textstream/textstream.h
|
||||||
|
./csvparser.o: ../src/textstream/stream.h ../src/date/date.h
|
||||||
|
./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||||
|
./csvparser.o: ../src/utf8/utf8_stream.h test.h
|
||||||
|
./main.o: convert.h mainoptionsparser.h csvparser.h
|
||||||
./test.o: test.h
|
./test.o: test.h
|
||||||
./mainoptionsparser.o: mainoptionsparser.h test.h
|
./mainoptionsparser.o: mainoptionsparser.h test.h
|
||||||
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
|
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
|
||||||
|
@ -24,9 +33,6 @@
|
||||||
./mainoptionsparser.o: ../src/textstream/textstream.h
|
./mainoptionsparser.o: ../src/textstream/textstream.h
|
||||||
./mainoptionsparser.o: ../src/textstream/stream.h ../src/date/date.h
|
./mainoptionsparser.o: ../src/textstream/stream.h ../src/date/date.h
|
||||||
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||||
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
|
./mainoptionsparser.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
|
||||||
./mainoptionsparser.o: ../src/convert/misc.h ../src/convert/double.h
|
./mainoptionsparser.o: ../src/convert/text.h ../src/convert/misc.h
|
||||||
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
|
./mainoptionsparser.o: ../src/convert/double.h
|
||||||
./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h
|
|
||||||
./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
|
|
||||||
./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h test.h
|
|
||||||
|
|
Loading…
Reference in New Issue