Browse Source

fix: correctly escape json/xml/csv wide strings

A wide string was first changed to utf-8 and then escaped to json/xml/csv
which is incorrect. First should be escaped and then changed to utf-8.

Add TextStreamBase<>::iterator and TextStreamBase<>::const_interator as classes
with a method wchar_t get_unicode_and_advance(const iterator & end)
to return one character either from utf-8 stream or from wide stream.

Let TextStreamBase<>::operator<<(wchar_t v) correctly use utf-8.
master
Tomasz Sowa 10 months ago
parent
commit
6b97b1b74a
  1. 40
      src/Makefile.dep
  2. 142
      src/convert/misc.cpp
  3. 89
      src/convert/misc.h
  4. 359
      src/textstream/textstream.h
  5. 4
      src/utf8/utf8_stream.h
  6. 2
      tests/Makefile.dep

40
src/Makefile.dep

@ -13,6 +13,7 @@
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./convert/double.o: membuffer/membuffer.h textstream/types.h
./convert/double.o: utf8/utf8_stream.h
./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
@ -24,19 +25,19 @@
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./log/filelog.o: textstream/types.h
./log/filelog.o: textstream/types.h utf8/utf8_stream.h
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
./log/log.o: ./log/filelog.h
./log/log.o: utf8/utf8_stream.h ./log/filelog.h
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
./space/space.o: convert/patternreplacer.h textstream/textstream.h
./space/space.o: textstream/stream.h space/space.h date/date.h
./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
./space/space.o: ./convert/text.h ./convert/misc.h utf8/utf8_stream.h
./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./space/space.o: ./convert/double.h
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
@ -44,31 +45,32 @@
./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
./space/spaceparser.o: textstream/textstream.h textstream/stream.h
./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
./space/spaceparser.o: textstream/types.h convert/strtoint.h ./convert/text.h
./space/spaceparser.o: ./convert/misc.h utf8/utf8_stream.h
./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./utf8/utf8.o: utf8/utf8_private.h
./utf8/utf8_private.o: utf8/utf8_private.h
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
./html/bbcodeparser.o: utf8/utf8_stream.h
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
./csv/csvparser.o: ./csv/csvparser.h space/space.h textstream/types.h
./csv/csvparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
./csv/csvparser.o: textstream/types.h
./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
./mainoptions/mainoptionsparser.o: textstream/stream.h utf8/utf8_templates.h
./mainoptions/mainoptionsparser.o: utf8/utf8_private.h
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./html/htmlparser.o: textstream/types.h convert/text.h
./html/bbcodeparser.o: ./html/bbcodeparser.h ./html/htmlparser.h
./html/bbcodeparser.o: convert/baseparser.h textstream/textstream.h
./html/bbcodeparser.o: textstream/stream.h space/space.h textstream/types.h
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h

142
src/convert/misc.cpp

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* Copyright (c) 2017-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -51,62 +51,83 @@ void SetOverflow(bool * was_overflow, bool val)
}
void esc_to_json(char val, Stream & out)
void esc_to_json_uformat(wchar_t val, Stream & out)
{
if( (unsigned char)val < 32 )
{
char buf[10];
size_t len;
Toa((unsigned char)val, buf, sizeof(buf)/sizeof(char), 16, &len);
char buf[10];
size_t len;
out << "\\u";
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
if( len < 4 )
{
for(size_t i=0 ; i < (4-len) ; ++i)
{
out << '0';
}
}
out << "\\u";
out << buf;
}
else
if( len < 4 )
{
// CHECKME
// \r \n \t are <32 and will be serialized os \u.... above
switch( val )
for(size_t i=0 ; i < (4-len) ; ++i)
{
case 0: out << '\\'; out << '0'; break; // may to skip this character is better?
case '\r': out << '\\'; out << 'r'; break;
case '\n': out << '\\'; out << 'n'; break;
case '\t': out << '\\'; out << 't'; break;
case 0x08: out << '\\'; out << 'b'; break;
case 0x0c: out << '\\'; out << 'f'; break;
case '\\': out << '\\'; out << '\\'; break;
case '"': out << '\\'; out << '\"'; break;
default:
out << val;
out << '0';
}
}
out << buf;
}
void esc_to_json(wchar_t val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
if( val == '\r' )
{
out << '\\' << 'r';
}
else
if( val == '\n' )
{
out << '\\' << 'n';
}
else
if( val == '\t' )
{
out << '\\' << 't';
}
else
if( val == 0x08 )
{
out << '\\' << 'b';
}
else
if( val == 0x0c )
{
out << '\\' << 'f';
}
else
if( val == '\\' )
{
out << '\\' << '\\';
}
else
if( val == '"' )
{
out << '\\' << '\"';
}
else
if( val < 32 )
{
esc_to_json_uformat(val, out);
}
else
{
esc_to_json(utf8_buf[a], out);
out << val;
}
}
void esc_to_json(char val, Stream & out)
{
esc_to_json((wchar_t)(unsigned char)val, out);
}
void esc_to_json(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
@ -159,10 +180,15 @@ void esc_to_json(const std::wstring & in, Stream & out)
void esc_to_xml(char val, Stream & out)
void esc_to_xml(wchar_t val, Stream & out)
{
switch(val)
{
case 0:
// null character is invalid in XML 1.0 and 1.1
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
break;
case '<':
out << "&lt;";
break;
@ -182,22 +208,13 @@ void esc_to_xml(char val, Stream & out)
default:
out << val;
break;
// what about zero (null) character?
}
}
void esc_to_xml(wchar_t val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
{
esc_to_xml(utf8_buf[a], out);
}
void esc_to_xml(char val, Stream & out)
{
esc_to_xml((wchar_t)(unsigned char)val, out);
}
@ -252,10 +269,14 @@ void esc_to_xml(const std::wstring & in, Stream & out)
void esc_to_csv(char c, pt::Stream & out)
void esc_to_csv(wchar_t c, pt::Stream & out)
{
switch(c)
{
case 0:
// null characters are invalid in text files
break;
case '"':
out << "\"\"";
break;
@ -263,27 +284,16 @@ void esc_to_csv(char c, pt::Stream & out)
default:
out << c;
break;
// what about zero (null) character?
}
}
void esc_to_csv(wchar_t val, Stream & out)
void esc_to_csv(char val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
{
esc_to_csv(utf8_buf[a], out);
}
esc_to_csv((wchar_t)(unsigned char)val, out);
}
void esc_to_csv(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)

89
src/convert/misc.h

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* Copyright (c) 2017-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -50,8 +50,8 @@ namespace pt
void SetOverflow(bool * was_overflow, bool val);
void esc_to_json(char val, Stream & out);
void esc_to_json(wchar_t val, Stream & out);
void esc_to_json(char val, Stream & out);
void esc_to_json(const char * c, pt::Stream & out);
void esc_to_json(const char * c, std::size_t len, Stream & out);
void esc_to_json(const wchar_t * c, Stream & out);
@ -59,8 +59,8 @@ void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
void esc_to_json(const std::string & in, Stream & out);
void esc_to_json(const std::wstring & in, Stream & out);
void esc_to_xml(char c, pt::Stream & out);
void esc_to_xml(wchar_t c, pt::Stream & out);
void esc_to_xml(char c, pt::Stream & out);
void esc_to_xml(const char * c, pt::Stream & out);
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out);
void esc_to_xml(const wchar_t * c, pt::Stream & out);
@ -68,8 +68,8 @@ void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
void esc_to_xml(const std::string & in, Stream & out);
void esc_to_xml(const std::wstring & in, Stream & out);
void esc_to_csv(char c, pt::Stream & out);
void esc_to_csv(wchar_t val, Stream & out);
void esc_to_csv(char c, pt::Stream & out);
void esc_to_csv(const char * c, std::size_t len, Stream & out);
void esc_to_csv(const char * c, pt::Stream & out);
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out);
@ -82,34 +82,13 @@ void esc_to_csv(const std::string & in, Stream & out);
template<typename StreamType>
void esc_to_json(const StreamType & in, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
typename StreamType::const_iterator i = in.begin();
typename StreamType::const_iterator end = in.end();
int res;
bool correct;
for( ; i != end ; ++i)
while( i != end )
{
if( in.is_wchar_stream() && out.is_char_stream() )
{
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
esc_to_json(utf8_buf, len, out);
}
else
if( in.is_char_stream() && out.is_wchar_stream() )
{
utf8_to_int(i, end, res, correct);
if( correct )
esc_to_json(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
// put replacement char if not correct?
}
else
{
esc_to_json(static_cast<wchar_t>(*i), out);
}
wchar_t c = i.get_unicode_and_advance(end);
esc_to_json(c, out);
}
}
@ -117,34 +96,13 @@ void esc_to_json(const StreamType & in, Stream & out)
template<typename StreamType>
void esc_to_xml(const StreamType & in, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
typename StreamType::const_iterator i = in.begin();
typename StreamType::const_iterator end = in.end();
int res;
bool correct;
for( ; i != end ; ++i)
while( i != end )
{
if( in.is_wchar_stream() && out.is_char_stream() )
{
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
esc_to_xml(utf8_buf, len, out);
}
else
if( in.is_char_stream() && out.is_wchar_stream() )
{
utf8_to_int(i, end, res, correct);
if( correct )
esc_to_xml(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
// put replacement char if not correct?
}
else
{
esc_to_xml(static_cast<wchar_t>(*i), out);
}
wchar_t c = i.get_unicode_and_advance(end);
esc_to_xml(c, out);
}
}
@ -152,34 +110,13 @@ void esc_to_xml(const StreamType & in, Stream & out)
template<typename StreamType>
void esc_to_csv(const StreamType & in, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
typename StreamType::const_iterator i = in.begin();
typename StreamType::const_iterator end = in.end();
int res;
bool correct;
for( ; i != end ; ++i)
while( i != end )
{
if( in.is_wchar_stream() && out.is_char_stream() )
{
std::size_t len = int_to_utf8(static_cast<int>(*i), utf8_buf, utf8_buf_len);
esc_to_csv(utf8_buf, len, out);
}
else
if( in.is_char_stream() && out.is_wchar_stream() )
{
utf8_to_int(i, end, res, correct);
if( correct )
esc_to_csv(static_cast<wchar_t>(res), out); // IMPROVEME no surrogate pair used here (if sizeof(wchar_t) == 2)
// put replacement char if not correct?
}
else
{
esc_to_csv(static_cast<wchar_t>(*i), out);
}
wchar_t c = i.get_unicode_and_advance(end);
esc_to_csv(c, out);
}
}

359
src/textstream/textstream.h

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -46,6 +46,7 @@
#include "membuffer/membuffer.h"
#include "types.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"
// for snprintf
#include <cstdio>
@ -71,8 +72,67 @@ public:
typedef CharT char_type;
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
typedef typename buffer_type::iterator iterator;
typedef typename buffer_type::const_iterator const_iterator;
class iterator
{
public:
typename buffer_type::iterator membuffer_iterator;
iterator();
iterator(const iterator & i);
iterator & operator=(const iterator & i);
iterator(const typename buffer_type::iterator & i);
iterator & operator=(const typename buffer_type::iterator & i);
bool operator==(const iterator & i) const;
bool operator!=(const iterator & i) const;
iterator & operator++(); // prefix ++
iterator operator++(int); // postfix ++
iterator & operator--(); // prefix --
iterator operator--(int); // postfix --
CharT & operator*();
wchar_t get_unicode_and_advance(const iterator & end);
};
class const_iterator
{
public:
typename buffer_type::const_iterator membuffer_const_iterator;
const_iterator();
const_iterator(const const_iterator & i);
const_iterator(const iterator & i);
const_iterator & operator=(const const_iterator & i);
const_iterator & operator=(const iterator & i);
const_iterator(const typename buffer_type::const_iterator & i);
const_iterator(const typename buffer_type::iterator & i);
const_iterator & operator=(const typename buffer_type::const_iterator & i);
const_iterator & operator=(const typename buffer_type::iterator & i);
bool operator==(const const_iterator & i) const;
bool operator!=(const const_iterator & i) const;
const_iterator & operator++(); // prefix ++
const_iterator operator++(int); // postfix ++
const_iterator & operator--(); // prefix --
const_iterator operator--(int); // postfix --
CharT operator*();
wchar_t get_unicode_and_advance(const const_iterator & end);
};
bool is_char_stream() const;
@ -112,7 +172,7 @@ public:
TextStreamBase & operator<<(char);
TextStreamBase & operator<<(unsigned char);
TextStreamBase & operator<<(wchar_t);
TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
TextStreamBase & operator<<(bool);
TextStreamBase & operator<<(short);
TextStreamBase & operator<<(int);
@ -173,6 +233,272 @@ TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator()
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const iterator & i) : membuffer_iterator(i)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const iterator & i)
{
membuffer_iterator = i;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const typename buffer_type::iterator & i)
{
membuffer_iterator = i;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
{
return membuffer_iterator == i.membuffer_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
{
return membuffer_iterator != i.membuffer_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++()
{
++membuffer_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++(int)
{
const_iterator old(*this);
membuffer_iterator++;
return old;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--()
{
--membuffer_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--(int)
{
const_iterator old(*this);
membuffer_iterator--;
return old;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator*()
{
return *membuffer_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::iterator::get_unicode_and_advance(const iterator & end)
{
if( *this != end )
{
if constexpr (sizeof(char_type) == sizeof(char) )
{
int res;
bool correct;
utf8_to_int(*this, end, res, correct);
if( correct )
return static_cast<wchar_t>(res);
else
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
}
else
{
wchar_t c = operator*();
++membuffer_iterator;
return c;
}
}
return 0;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator()
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
{
membuffer_const_iterator = i.membuffer_const_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
{
membuffer_const_iterator = i.membuffer_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i)
{
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::const_iterator & i)
{
membuffer_const_iterator = i;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::iterator & i)
{
membuffer_const_iterator = i;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
{
return membuffer_const_iterator == i.membuffer_const_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
{
return membuffer_const_iterator != i.membuffer_const_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++()
{
++membuffer_const_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++(int)
{
const_iterator old(*this);
membuffer_const_iterator++;
return old;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--()
{
--membuffer_const_iterator;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--(int)
{
const_iterator old(*this);
membuffer_const_iterator--;
return old;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator*()
{
return *membuffer_const_iterator;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::get_unicode_and_advance(const const_iterator & end)
{
if( *this != end )
{
if constexpr (sizeof(char_type) == sizeof(char) )
{
int res;
bool correct;
pt::utf8_to_int(*this, end, res, correct);
if( correct )
return static_cast<wchar_t>(res);
else
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
}
else
{
wchar_t c = operator*();
++membuffer_const_iterator;
return c;
}
}
return 0;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
{
@ -433,10 +759,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
{
// IMPROVEME
// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
// if char_type > 1 then simply put that character
buffer.append(static_cast<char_type>(v));
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
buffer.append(static_cast<char_type>(static_cast<unsigned char>(v)));
}
else
{
buffer.append(v);
}
return *this;
}
@ -446,9 +776,6 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
{
// IMPROVEME
// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
// if char_type > 1 then simply put that character
buffer.append(static_cast<char_type>(v));
return *this;
@ -459,8 +786,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
{
// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
buffer.append(static_cast<char_type>(v));
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
buffer.append(v);
}
else
{
pt::int_to_utf8(static_cast<int>(v), *this);
}
return *this;
}

4
src/utf8/utf8_stream.h

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -60,7 +60,7 @@ namespace pt
template<typename StreamIteratorType>
size_t utf8_to_int(
StreamIteratorType & iterator_in,
StreamIteratorType & iterator_end,
const StreamIteratorType & iterator_end,
int & res,
bool & correct)
{

2
tests/Makefile.dep

@ -19,7 +19,6 @@
./csvparser.o: ../src/textstream/stream.h ../src/date/date.h
./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h test.h
./main.o: convert.h mainoptionsparser.h csvparser.h
./test.o: test.h
./mainoptionsparser.o: mainoptionsparser.h test.h
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
./mainoptionsparser.o: ../src/space/space.h ../src/textstream/types.h
@ -35,3 +34,4 @@
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
./mainoptionsparser.o: ../src/convert/misc.h ../src/utf8/utf8_stream.h
./mainoptionsparser.o: ../src/convert/double.h
./test.o: test.h

Loading…
Cancel
Save