let all utf8/wide functions can be available just by including utf8/utf8.h

while here:
- remove utf8/utf8_stream.h, now we only need utf8/utf8.h to include
- add some new methods for converting from a utf8 stream to wide stream/string
- do some improvements in TextStream:
  - don't use temporary objects to convert utf8/wide
  - add put_stream() which takes TextStreamBase<> as its argument
    (uses an iterator instead of get_char() for reading)
  - let operator<<(const Space & space) serialize to json and not to Space
This commit is contained in:
2022-07-30 03:31:18 +02:00
parent 84e9e6f98f
commit 663233fe2a
10 changed files with 255 additions and 176 deletions

View File

@@ -43,7 +43,6 @@
#include "membuffer/membuffer.h"
#include "types.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"
// for snprintf
#include <cstdio>
@@ -213,6 +212,9 @@ public:
protected:
template<typename char_type_to, size_t stack_size_to, size_t heap_block_size_to>
void put_stream(const TextStreamBase<char_type_to, stack_size_to, heap_block_size_to> & stream);
void put_stream(const Stream & stream);
@@ -599,8 +601,9 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::string
if constexpr (sizeof(char_type) == sizeof(char) )
{
const_iterator i = begin();
const_iterator i_end = end();
for( ; i != end() ; ++i)
for( ; i != i_end ; ++i)
str += *i;
}
else
@@ -629,10 +632,7 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::wstring
}
else
{
// IMPROVE ME don't use a temporary object
std::string utf8;
to_str(utf8);
utf8_to_wide(utf8, str, false);
utf8_to_wide(*this, str, false);
}
}
@@ -715,7 +715,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::string & str)
{
operator<<(str.c_str());
if constexpr ( sizeof(char_type) == sizeof(char) )
{
buffer.append(str.c_str(), str.size());
}
else
{
utf8_to_wide(str, *this, false);
}
return *this;
}
@@ -733,7 +740,7 @@ TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const wchar_t
}
else
{
wide_to_utf8(str, *this);
wide_to_utf8(str, *this, false);
}
return *this;
@@ -745,7 +752,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::wstring & str)
{
operator<<(str.c_str());
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
buffer.append(str.c_str(), str.size());
}
else
{
wide_to_utf8(str, *this, false);
}
return *this;
}
@@ -988,6 +1002,44 @@ TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Stream
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & stream
)
{
if( (sizeof(char_type) == sizeof(char) && stream.is_char_stream()) ||
(sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream()) )
{
// from utf8 to utf8 or from wide to wide
typename TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size>::const_iterator i = stream.begin();
for( ; i != stream.end() ; ++i)
{
operator<<(*i);
}
}
else
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() )
{
// from utf8 to wide
utf8_to_wide(stream, *this, false);
}
else
if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() )
{
// from wide to utf8
wide_stream_to_utf8(stream, *this, false);
}
else
{
operator<<("such conversion is not implemented");
}
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const Stream & stream)
{
@@ -1036,7 +1088,7 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Space & space)
{
space.serialize_to_space_stream(*this, true);
space.serialize_to_json_stream(*this, true);
return *this;
}
@@ -1059,11 +1111,6 @@ TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg)
{
/*
* in the future we can have a faster implementation
* which uses iterators instead of get_char() and get_wchar() methods
*
*/
put_stream(arg);
return *this;
@@ -1101,6 +1148,35 @@ TextStreamBase<char_type, stack_size, heap_block_size>::fill_up_if_needed(wchar_
}
/*!
this function converts an UTF-8 stream into wide stream or wide string
(is declared in utf8/utf8.h)
input:
iterator_in - an TextStream iterator for reading from
iterator_end - an end iterator (can be returned by end() method from TextStream)
output:
out_stream - an output wide stream or wide string
this function returns false if there were some errors when converting
*/
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
bool utf8_to_wide(
const TextStreamBase<char, stack_size, heap_block_size> & utf8,
StreamOrStringType & out_stream,
bool clear_stream,
int mode
)
{
typename TextStreamBase<char, stack_size, heap_block_size>::const_iterator i_begin = utf8.begin();
return utf8_to_wide(i_begin, utf8.end(), out_stream, clear_stream, mode);
}
} // namespace