fix: correctly escape json/xml/csv wide strings
A wide string was first changed to utf-8 and then escaped to json/xml/csv which is incorrect. First should be escaped and then changed to utf-8. Add TextStreamBase<>::iterator and TextStreamBase<>::const_interator as classes with a method wchar_t get_unicode_and_advance(const iterator & end) to return one character either from utf-8 stream or from wide stream. Let TextStreamBase<>::operator<<(wchar_t v) correctly use utf-8.master
parent
fd1a8270cd
commit
6b97b1b74a
|
@ -13,6 +13,7 @@
|
|||
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
||||
./convert/double.o: membuffer/membuffer.h textstream/types.h
|
||||
./convert/double.o: utf8/utf8_stream.h
|
||||
./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
|
||||
./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
|
||||
./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
|
@ -24,19 +25,19 @@
|
|||
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
|
||||
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||
./log/filelog.o: textstream/types.h
|
||||
./log/filelog.o: textstream/types.h utf8/utf8_stream.h
|
||||
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
|
||||
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
|
||||
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
|
||||
./log/log.o: ./log/filelog.h
|
||||
./log/log.o: utf8/utf8_stream.h ./log/filelog.h
|
||||
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
|
||||
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
|
||||
./space/space.o: convert/patternreplacer.h textstream/textstream.h
|
||||
./space/space.o: textstream/stream.h space/space.h date/date.h
|
||||
./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
|
||||
./space/space.o: ./convert/text.h ./convert/misc.h utf8/utf8_stream.h
|
||||
./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
|
||||
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||
./space/space.o: ./convert/double.h
|
||||
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
|
||||
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
|
||||
|
@ -44,31 +45,32 @@
|
|||
./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
|
||||
./space/spaceparser.o: textstream/textstream.h textstream/stream.h
|
||||
./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
|
||||
./space/spaceparser.o: textstream/types.h convert/strtoint.h ./convert/text.h
|
||||
./space/spaceparser.o: ./convert/misc.h utf8/utf8_stream.h
|
||||
./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
|
||||
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
|
||||
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||
./utf8/utf8.o: utf8/utf8_private.h
|
||||
./utf8/utf8_private.o: utf8/utf8_private.h
|
||||
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
|
||||
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
|
||||
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
|
||||
./csv/csvparser.o: textstream/types.h
|
||||
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
|
||||
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
|
||||
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
||||
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
|
||||
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
|
||||
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
|
||||
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
|
||||
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
|
||||
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||
./html/htmlparser.o: textstream/types.h convert/text.h
|
||||
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
|
||||
./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
|
||||
./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
|
||||
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
|
||||
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
|
||||
./html/bbcodeparser.o: utf8/utf8_stream.h
|
||||
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
|
||||
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
|
||||
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
|
||||
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
|
||||
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
|
||||
./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
|
||||
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
|
||||
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
|
||||
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
|
||||
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
|
||||
./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
|
||||
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
|
||||
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
|
||||
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
|
||||
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
|
||||
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2021, Tomasz Sowa
|
||||
* Copyright (c) 2017-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -51,62 +51,83 @@ void SetOverflow(bool * was_overflow, bool val)
|
|||
}
|
||||
|
||||
|
||||
void esc_to_json(char val, Stream & out)
|
||||
|
||||
void esc_to_json_uformat(wchar_t val, Stream & out)
|
||||
{
|
||||
if( (unsigned char)val < 32 )
|
||||
char buf[10];
|
||||
size_t len;
|
||||
|
||||
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
|
||||
|
||||
out << "\\u";
|
||||
|
||||
if( len < 4 )
|
||||
{
|
||||
char buf[10];
|
||||
size_t len;
|
||||
Toa((unsigned char)val, buf, sizeof(buf)/sizeof(char), 16, &len);
|
||||
|
||||
out << "\\u";
|
||||
|
||||
if( len < 4 )
|
||||
for(size_t i=0 ; i < (4-len) ; ++i)
|
||||
{
|
||||
for(size_t i=0 ; i < (4-len) ; ++i)
|
||||
{
|
||||
out << '0';
|
||||
}
|
||||
}
|
||||
|
||||
out << buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
// CHECKME
|
||||
// \r \n \t are <32 and will be serialized os \u.... above
|
||||
|
||||
switch( val )
|
||||
{
|
||||
case 0: out << '\\'; out << '0'; break; // may to skip this character is better?
|
||||
case '\r': out << '\\'; out << 'r'; break;
|
||||
case '\n': out << '\\'; out << 'n'; break;
|
||||
case '\t': out << '\\'; out << 't'; break;
|
||||
case 0x08: out << '\\'; out << 'b'; break;
|
||||
case 0x0c: out << '\\'; out << 'f'; break;
|
||||
case '\\': out << '\\'; out << '\\'; break;
|
||||
case '"': out << '\\'; out << '\"'; break;
|
||||
default:
|
||||
out << val;
|
||||
out << '0';
|
||||
}
|
||||
}
|
||||
|
||||
out << buf;
|
||||
}
|
||||
|
||||
|
||||
void esc_to_json(wchar_t val, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
|
||||
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
|
||||
|
||||
for(size_t a = 0 ; a < len ; ++a)
|
||||
if( val == '\r' )
|
||||
{
|
||||
esc_to_json(utf8_buf[a], out);
|
||||
out << '\\' << 'r';
|
||||
}
|
||||
else
|
||||
if( val == '\n' )
|
||||
{
|
||||
out << '\\' << 'n';
|
||||
}
|
||||
else
|
||||
if( val == '\t' )
|
||||
{
|
||||
out << '\\' << 't';
|
||||
}
|
||||
else
|
||||
if( val == 0x08 )
|
||||
{
|
||||
out << '\\' << 'b';
|
||||
}
|
||||
else
|
||||
if( val == 0x0c )
|
||||
{
|
||||
out << '\\' << 'f';
|
||||
}
|
||||
else
|
||||
if( val == '\\' )
|
||||
{
|
||||
out << '\\' << '\\';
|
||||
}
|
||||
else
|
||||
if( val == '"' )
|
||||
{
|
||||
out << '\\' << '\"';
|
||||
}
|
||||
else
|
||||
if( val < 32 )
|
||||
{
|
||||
esc_to_json_uformat(val, out);
|
||||
}
|
||||
else
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void esc_to_json(char val, Stream & out)
|
||||
{
|
||||
esc_to_json((wchar_t)(unsigned char)val, out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void esc_to_json(const char * c, pt::Stream & out)
|
||||
{
|
||||
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||
|
@ -159,10 +180,15 @@ void esc_to_json(const std::wstring & in, Stream & out)
|
|||
|
||||
|
||||
|
||||
void esc_to_xml(char val, Stream & out)
|
||||
void esc_to_xml(wchar_t val, Stream & out)
|
||||
{
|
||||
switch(val)
|
||||
{
|
||||
case 0:
|
||||
// null character is invalid in XML 1.0 and 1.1
|
||||
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
|
||||
break;
|
||||
|
||||
case '<':
|
||||
out << "<";
|
||||
break;
|
||||
|
@ -182,22 +208,13 @@ void esc_to_xml(char val, Stream & out)
|
|||
default:
|
||||
out << val;
|
||||
break;
|
||||
|
||||
// what about zero (null) character?
|
||||
}
|
||||
}
|
||||
|
||||
void esc_to_xml(wchar_t val, Stream & out)
|
||||
|
||||
void esc_to_xml(char val, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
|
||||
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
|
||||
|
||||
for(size_t a = 0 ; a < len ; ++a)
|
||||
{
|
||||
esc_to_xml(utf8_buf[a], out);
|
||||
}
|
||||
esc_to_xml((wchar_t)(unsigned char)val, out);
|
||||
}
|
||||
|
||||
|
||||
|
@ -252,10 +269,14 @@ void esc_to_xml(const std::wstring & in, Stream & out)
|
|||
|
||||
|
||||
|
||||
void esc_to_csv(char c, pt::Stream & out)
|
||||
void esc_to_csv(wchar_t c, pt::Stream & out)
|
||||
{
|
||||
switch(c)
|
||||
{
|
||||
case 0:
|
||||
// null characters are invalid in text files
|
||||
break;
|
||||
|
||||
case '"':
|
||||
out << "\"\"";
|
||||
break;
|
||||
|
@ -263,27 +284,16 @@ void esc_to_csv(char c, pt::Stream & out)
|
|||
default:
|
||||
out << c;
|
||||
break;
|
||||
|
||||
// what about zero (null) character?
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void esc_to_csv(wchar_t val, Stream & out)
|
||||
void esc_to_csv(char val, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
|
||||
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
|
||||
|
||||
for(size_t a = 0 ; a < len ; ++a)
|
||||
{
|
||||
esc_to_csv(utf8_buf[a], out);
|
||||
}
|
||||
esc_to_csv((wchar_t)(unsigned char)val, out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void esc_to_csv(const char * c, pt::Stream & out)
|
||||
{
|
||||
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2021, Tomasz Sowa
|
||||
* Copyright (c) 2017-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -50,8 +50,8 @@ namespace pt
|
|||
|
||||
void SetOverflow(bool * was_overflow, bool val);
|
||||
|
||||
void esc_to_json(char val, Stream & out);
|
||||
void esc_to_json(wchar_t val, Stream & out);
|
||||
void esc_to_json(char val, Stream & out);
|
||||
void esc_to_json(const char * c, pt::Stream & out);
|
||||
void esc_to_json(const char * c, std::size_t len, Stream & out);
|
||||
void esc_to_json(const wchar_t * c, Stream & out);
|
||||
|
@ -59,8 +59,8 @@ void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
|
|||
void esc_to_json(const std::string & in, Stream & out);
|
||||
void esc_to_json(const std::wstring & in, Stream & out);
|
||||
|
||||
void esc_to_xml(char c, pt::Stream & out);
|
||||
void esc_to_xml(wchar_t c, pt::Stream & out);
|
||||
void esc_to_xml(char c, pt::Stream & out);
|
||||
void esc_to_xml(const char * c, pt::Stream & out);
|
||||
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out);
|
||||
void esc_to_xml(const wchar_t * c, pt::Stream & out);
|
||||
|
@ -68,8 +68,8 @@ void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
|
|||
void esc_to_xml(const std::string & in, Stream & out);
|
||||
void esc_to_xml(const std::wstring & in, Stream & out);
|
||||
|
||||
void esc_to_csv(char c, pt::Stream & out);
|
||||
void esc_to_csv(wchar_t val, Stream & out);
|
||||
void esc_to_csv(char c, pt::Stream & out);
|
||||
void esc_to_csv(const char * c, std::size_t len, Stream & out);
|
||||
void esc_to_csv(const char * c, pt::Stream & out);
|
||||
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out);
|
||||
|
@ -82,34 +82,13 @@ void esc_to_csv(const std::string & in, Stream & out);
|
|||
template<typename StreamType>
|
||||
void esc_to_json(const StreamType & in, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
typename StreamType::const_iterator i = in.begin();
|
||||
typename StreamType::const_iterator end = in.end();
|
||||
int res;
|
||||
bool correct;
|
||||
|
||||
for( ; i != end ; ++i)
|
||||
while( i != end )
|
||||
{
|
||||
if( in.is_wchar_stream() && out.is_char_stream() )
|
||||
{
|
||||
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
|
||||
esc_to_json(utf8_buf, len, out);
|
||||
}
|
||||
else
|
||||
if( in.is_char_stream() && out.is_wchar_stream() )
|
||||
{
|
||||
utf8_to_int(i, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
esc_to_json(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
|
||||
|
||||
// put replacement char if not correct?
|
||||
}
|
||||
else
|
||||
{
|
||||
esc_to_json(static_cast<wchar_t>(*i), out);
|
||||
}
|
||||
wchar_t c = i.get_unicode_and_advance(end);
|
||||
esc_to_json(c, out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,34 +96,13 @@ void esc_to_json(const StreamType & in, Stream & out)
|
|||
template<typename StreamType>
|
||||
void esc_to_xml(const StreamType & in, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
typename StreamType::const_iterator i = in.begin();
|
||||
typename StreamType::const_iterator end = in.end();
|
||||
int res;
|
||||
bool correct;
|
||||
|
||||
for( ; i != end ; ++i)
|
||||
while( i != end )
|
||||
{
|
||||
if( in.is_wchar_stream() && out.is_char_stream() )
|
||||
{
|
||||
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
|
||||
esc_to_xml(utf8_buf, len, out);
|
||||
}
|
||||
else
|
||||
if( in.is_char_stream() && out.is_wchar_stream() )
|
||||
{
|
||||
utf8_to_int(i, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
esc_to_xml(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
|
||||
|
||||
// put replacement char if not correct?
|
||||
}
|
||||
else
|
||||
{
|
||||
esc_to_xml(static_cast<wchar_t>(*i), out);
|
||||
}
|
||||
wchar_t c = i.get_unicode_and_advance(end);
|
||||
esc_to_xml(c, out);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -152,34 +110,13 @@ void esc_to_xml(const StreamType & in, Stream & out)
|
|||
template<typename StreamType>
|
||||
void esc_to_csv(const StreamType & in, Stream & out)
|
||||
{
|
||||
char utf8_buf[10];
|
||||
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
|
||||
typename StreamType::const_iterator i = in.begin();
|
||||
typename StreamType::const_iterator end = in.end();
|
||||
int res;
|
||||
bool correct;
|
||||
|
||||
for( ; i != end ; ++i)
|
||||
while( i != end )
|
||||
{
|
||||
if( in.is_wchar_stream() && out.is_char_stream() )
|
||||
{
|
||||
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
|
||||
esc_to_csv(utf8_buf, len, out);
|
||||
}
|
||||
else
|
||||
if( in.is_char_stream() && out.is_wchar_stream() )
|
||||
{
|
||||
utf8_to_int(i, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
esc_to_csv(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
|
||||
|
||||
// put replacement char if not correct?
|
||||
}
|
||||
else
|
||||
{
|
||||
esc_to_csv(static_cast<wchar_t>(*i), out);
|
||||
}
|
||||
wchar_t c = i.get_unicode_and_advance(end);
|
||||
esc_to_csv(c, out);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
||||
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -46,6 +46,7 @@
|
|||
#include "membuffer/membuffer.h"
|
||||
#include "types.h"
|
||||
#include "utf8/utf8.h"
|
||||
#include "utf8/utf8_stream.h"
|
||||
|
||||
// for snprintf
|
||||
#include <cstdio>
|
||||
|
@ -71,8 +72,67 @@ public:
|
|||
typedef CharT char_type;
|
||||
|
||||
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
|
||||
typedef typename buffer_type::iterator iterator;
|
||||
typedef typename buffer_type::const_iterator const_iterator;
|
||||
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
|
||||
typename buffer_type::iterator membuffer_iterator;
|
||||
|
||||
iterator();
|
||||
iterator(const iterator & i);
|
||||
iterator & operator=(const iterator & i);
|
||||
|
||||
iterator(const typename buffer_type::iterator & i);
|
||||
iterator & operator=(const typename buffer_type::iterator & i);
|
||||
|
||||
bool operator==(const iterator & i) const;
|
||||
bool operator!=(const iterator & i) const;
|
||||
|
||||
iterator & operator++(); // prefix ++
|
||||
iterator operator++(int); // postfix ++
|
||||
|
||||
iterator & operator--(); // prefix --
|
||||
iterator operator--(int); // postfix --
|
||||
|
||||
CharT & operator*();
|
||||
|
||||
wchar_t get_unicode_and_advance(const iterator & end);
|
||||
};
|
||||
|
||||
|
||||
class const_iterator
|
||||
{
|
||||
public:
|
||||
|
||||
typename buffer_type::const_iterator membuffer_const_iterator;
|
||||
|
||||
const_iterator();
|
||||
const_iterator(const const_iterator & i);
|
||||
const_iterator(const iterator & i);
|
||||
const_iterator & operator=(const const_iterator & i);
|
||||
const_iterator & operator=(const iterator & i);
|
||||
|
||||
const_iterator(const typename buffer_type::const_iterator & i);
|
||||
const_iterator(const typename buffer_type::iterator & i);
|
||||
const_iterator & operator=(const typename buffer_type::const_iterator & i);
|
||||
const_iterator & operator=(const typename buffer_type::iterator & i);
|
||||
|
||||
bool operator==(const const_iterator & i) const;
|
||||
bool operator!=(const const_iterator & i) const;
|
||||
|
||||
const_iterator & operator++(); // prefix ++
|
||||
const_iterator operator++(int); // postfix ++
|
||||
|
||||
const_iterator & operator--(); // prefix --
|
||||
const_iterator operator--(int); // postfix --
|
||||
|
||||
CharT operator*();
|
||||
|
||||
wchar_t get_unicode_and_advance(const const_iterator & end);
|
||||
|
||||
};
|
||||
|
||||
|
||||
bool is_char_stream() const;
|
||||
|
@ -112,7 +172,7 @@ public:
|
|||
|
||||
TextStreamBase & operator<<(char);
|
||||
TextStreamBase & operator<<(unsigned char);
|
||||
TextStreamBase & operator<<(wchar_t);
|
||||
TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
|
||||
TextStreamBase & operator<<(bool);
|
||||
TextStreamBase & operator<<(short);
|
||||
TextStreamBase & operator<<(int);
|
||||
|
@ -173,6 +233,272 @@ TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
|
|||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const iterator & i) : membuffer_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const iterator & i)
|
||||
{
|
||||
membuffer_iterator = i;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const typename buffer_type::iterator & i)
|
||||
{
|
||||
membuffer_iterator = i;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
|
||||
{
|
||||
return membuffer_iterator == i.membuffer_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
|
||||
{
|
||||
return membuffer_iterator != i.membuffer_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++()
|
||||
{
|
||||
++membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_iterator++;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--()
|
||||
{
|
||||
--membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_iterator--;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator*()
|
||||
{
|
||||
return *membuffer_iterator;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::iterator::get_unicode_and_advance(const iterator & end)
|
||||
{
|
||||
if( *this != end )
|
||||
{
|
||||
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||
{
|
||||
int res;
|
||||
bool correct;
|
||||
utf8_to_int(*this, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
return static_cast<wchar_t>(res);
|
||||
else
|
||||
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||
}
|
||||
else
|
||||
{
|
||||
wchar_t c = operator*();
|
||||
++membuffer_iterator;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator()
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i.membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i.membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::const_iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
|
||||
{
|
||||
return membuffer_const_iterator == i.membuffer_const_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
|
||||
{
|
||||
return membuffer_const_iterator != i.membuffer_const_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++()
|
||||
{
|
||||
++membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_const_iterator++;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--()
|
||||
{
|
||||
--membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_const_iterator--;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
char_type TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator*()
|
||||
{
|
||||
return *membuffer_const_iterator;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::get_unicode_and_advance(const const_iterator & end)
|
||||
{
|
||||
if( *this != end )
|
||||
{
|
||||
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||
{
|
||||
int res;
|
||||
bool correct;
|
||||
pt::utf8_to_int(*this, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
return static_cast<wchar_t>(res);
|
||||
else
|
||||
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||
}
|
||||
else
|
||||
{
|
||||
wchar_t c = operator*();
|
||||
++membuffer_const_iterator;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
|
||||
{
|
||||
|
@ -433,10 +759,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||
{
|
||||
buffer.append(static_cast<char_type>(static_cast<unsigned char>(v)));
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.append(v);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
@ -446,9 +776,6 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
|
||||
return *this;
|
||||
|
@ -459,8 +786,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
|||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
||||
{
|
||||
// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||
{
|
||||
buffer.append(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
pt::int_to_utf8(static_cast<int>(v), *this);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* Copyright (c) 2021-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -60,7 +60,7 @@ namespace pt
|
|||
template<typename StreamIteratorType>
|
||||
size_t utf8_to_int(
|
||||
StreamIteratorType & iterator_in,
|
||||
StreamIteratorType & iterator_end,
|
||||
const StreamIteratorType & iterator_end,
|
||||
int & res,
|
||||
bool & correct)
|
||||
{
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
./csvparser.o: ../src/textstream/stream.h ../src/date/date.h
|
||||
./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h test.h
|
||||
./main.o: convert.h mainoptionsparser.h csvparser.h
|
||||
./test.o: test.h
|
||||
./mainoptionsparser.o: mainoptionsparser.h test.h
|
||||
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
|
||||
./mainoptionsparser.o: ../src/space/space.h ../src/textstream/types.h
|
||||
|
@ -35,3 +34,4 @@
|
|||
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
|
||||
./mainoptionsparser.o: ../src/convert/misc.h ../src/utf8/utf8_stream.h
|
||||
./mainoptionsparser.o: ../src/convert/double.h
|
||||
./test.o: test.h
|
||||
|
|
Loading…
Reference in New Issue