fix: correctly escape json/xml/csv wide strings
A wide string was first changed to utf-8 and then escaped to json/xml/csv which is incorrect. First should be escaped and then changed to utf-8. Add TextStreamBase<>::iterator and TextStreamBase<>::const_interator as classes with a method wchar_t get_unicode_and_advance(const iterator & end) to return one character either from utf-8 stream or from wide stream. Let TextStreamBase<>::operator<<(wchar_t v) correctly use utf-8.
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2021, Tomasz Sowa
|
||||
* Copyright (c) 2012-2022, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,6 +46,7 @@
|
||||
#include "membuffer/membuffer.h"
|
||||
#include "types.h"
|
||||
#include "utf8/utf8.h"
|
||||
#include "utf8/utf8_stream.h"
|
||||
|
||||
// for snprintf
|
||||
#include <cstdio>
|
||||
@@ -71,8 +72,67 @@ public:
|
||||
typedef CharT char_type;
|
||||
|
||||
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
|
||||
typedef typename buffer_type::iterator iterator;
|
||||
typedef typename buffer_type::const_iterator const_iterator;
|
||||
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
|
||||
typename buffer_type::iterator membuffer_iterator;
|
||||
|
||||
iterator();
|
||||
iterator(const iterator & i);
|
||||
iterator & operator=(const iterator & i);
|
||||
|
||||
iterator(const typename buffer_type::iterator & i);
|
||||
iterator & operator=(const typename buffer_type::iterator & i);
|
||||
|
||||
bool operator==(const iterator & i) const;
|
||||
bool operator!=(const iterator & i) const;
|
||||
|
||||
iterator & operator++(); // prefix ++
|
||||
iterator operator++(int); // postfix ++
|
||||
|
||||
iterator & operator--(); // prefix --
|
||||
iterator operator--(int); // postfix --
|
||||
|
||||
CharT & operator*();
|
||||
|
||||
wchar_t get_unicode_and_advance(const iterator & end);
|
||||
};
|
||||
|
||||
|
||||
class const_iterator
|
||||
{
|
||||
public:
|
||||
|
||||
typename buffer_type::const_iterator membuffer_const_iterator;
|
||||
|
||||
const_iterator();
|
||||
const_iterator(const const_iterator & i);
|
||||
const_iterator(const iterator & i);
|
||||
const_iterator & operator=(const const_iterator & i);
|
||||
const_iterator & operator=(const iterator & i);
|
||||
|
||||
const_iterator(const typename buffer_type::const_iterator & i);
|
||||
const_iterator(const typename buffer_type::iterator & i);
|
||||
const_iterator & operator=(const typename buffer_type::const_iterator & i);
|
||||
const_iterator & operator=(const typename buffer_type::iterator & i);
|
||||
|
||||
bool operator==(const const_iterator & i) const;
|
||||
bool operator!=(const const_iterator & i) const;
|
||||
|
||||
const_iterator & operator++(); // prefix ++
|
||||
const_iterator operator++(int); // postfix ++
|
||||
|
||||
const_iterator & operator--(); // prefix --
|
||||
const_iterator operator--(int); // postfix --
|
||||
|
||||
CharT operator*();
|
||||
|
||||
wchar_t get_unicode_and_advance(const const_iterator & end);
|
||||
|
||||
};
|
||||
|
||||
|
||||
bool is_char_stream() const;
|
||||
@@ -112,7 +172,7 @@ public:
|
||||
|
||||
TextStreamBase & operator<<(char);
|
||||
TextStreamBase & operator<<(unsigned char);
|
||||
TextStreamBase & operator<<(wchar_t);
|
||||
TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
|
||||
TextStreamBase & operator<<(bool);
|
||||
TextStreamBase & operator<<(short);
|
||||
TextStreamBase & operator<<(int);
|
||||
@@ -173,6 +233,272 @@ TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const iterator & i) : membuffer_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const iterator & i)
|
||||
{
|
||||
membuffer_iterator = i;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator=(const typename buffer_type::iterator & i)
|
||||
{
|
||||
membuffer_iterator = i;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator==(const iterator & i) const
|
||||
{
|
||||
return membuffer_iterator == i.membuffer_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator!=(const iterator & i) const
|
||||
{
|
||||
return membuffer_iterator != i.membuffer_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++()
|
||||
{
|
||||
++membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator++(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_iterator++;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--()
|
||||
{
|
||||
--membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator--(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_iterator--;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::iterator::operator*()
|
||||
{
|
||||
return *membuffer_iterator;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::iterator::get_unicode_and_advance(const iterator & end)
|
||||
{
|
||||
if( *this != end )
|
||||
{
|
||||
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||
{
|
||||
int res;
|
||||
bool correct;
|
||||
utf8_to_int(*this, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
return static_cast<wchar_t>(res);
|
||||
else
|
||||
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||
}
|
||||
else
|
||||
{
|
||||
wchar_t c = operator*();
|
||||
++membuffer_iterator;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator()
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const const_iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i.membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i.membuffer_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i)
|
||||
{
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::const_iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator=(const typename buffer_type::iterator & i)
|
||||
{
|
||||
membuffer_const_iterator = i;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator==(const const_iterator & i) const
|
||||
{
|
||||
return membuffer_const_iterator == i.membuffer_const_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator!=(const const_iterator & i) const
|
||||
{
|
||||
return membuffer_const_iterator != i.membuffer_const_iterator;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++()
|
||||
{
|
||||
++membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator++(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_const_iterator++;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--()
|
||||
{
|
||||
--membuffer_const_iterator;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator--(int)
|
||||
{
|
||||
const_iterator old(*this);
|
||||
membuffer_const_iterator--;
|
||||
return old;
|
||||
}
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
char_type TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::operator*()
|
||||
{
|
||||
return *membuffer_const_iterator;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator::get_unicode_and_advance(const const_iterator & end)
|
||||
{
|
||||
if( *this != end )
|
||||
{
|
||||
if constexpr (sizeof(char_type) == sizeof(char) )
|
||||
{
|
||||
int res;
|
||||
bool correct;
|
||||
pt::utf8_to_int(*this, end, res, correct);
|
||||
|
||||
if( correct )
|
||||
return static_cast<wchar_t>(res);
|
||||
else
|
||||
return static_cast<wchar_t>(0xFFFD); // U+FFFD "replacement character"
|
||||
}
|
||||
else
|
||||
{
|
||||
wchar_t c = operator*();
|
||||
++membuffer_const_iterator;
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
|
||||
{
|
||||
@@ -433,10 +759,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if (unsigned)v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||
{
|
||||
buffer.append(static_cast<char_type>(static_cast<unsigned char>(v)));
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer.append(v);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
@@ -446,9 +776,6 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
|
||||
{
|
||||
// IMPROVEME
|
||||
// if char_type == 1 then if v <= 127 then put that char but if v > 127 put replacement character
|
||||
// if char_type > 1 then simply put that character
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
|
||||
return *this;
|
||||
@@ -459,8 +786,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
||||
{
|
||||
// IMPROVEME add utf8/wide conversion, if v is from surrogate pair we can skip it
|
||||
buffer.append(static_cast<char_type>(v));
|
||||
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
|
||||
{
|
||||
buffer.append(v);
|
||||
}
|
||||
else
|
||||
{
|
||||
pt::int_to_utf8(static_cast<int>(v), *this);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
Reference in New Issue
Block a user