pikotools/src/textstream/textstream.h

835 lines
21 KiB
C++

/*
* This file is a part of PikoTools
* and is distributed under the (new) BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* * Neither the name Tomasz Sowa nor the names of contributors to this
* project may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef headerfile_picotools_textstream_textstream
#define headerfile_picotools_textstream_textstream
#include <string>
#include "stream.h"
#include "space/space.h"
#include "date/date.h"
#include "convert/inttostr.h"
#include "membuffer/membuffer.h"
#include "types.h"
#include "utf8/utf8.h"
// for snprintf
#include <cstdio>
namespace pt
{
/*
a special class representing a stream buffer
similar to std::ostringstream
StringType can be either std::string or std::wstring
*/
template<typename CharT, size_t stack_size, size_t heap_block_size>
class TextStreamBase : public Stream
{
public:
TextStreamBase();
typedef CharT char_type;
typedef MemBuffer<char_type, stack_size, heap_block_size> buffer_type;
typedef typename buffer_type::iterator iterator;
typedef typename buffer_type::const_iterator const_iterator;
bool is_char_stream() const;
bool is_wchar_stream() const;
void clear();
bool empty() const;
size_t size() const;
void reserve(size_t len);
size_t capacity() const;
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
// IMPROVE ME
// add cbegin(), cend(), rbegin(), rend(), crbegin(), crend()
void to_str(std::string & str, bool clear_string = true) const;
void to_str(std::wstring & str, bool clear_string = true) const;
std::string to_str() const;
std::wstring to_wstr() const;
char get_char(size_t index) const;
wchar_t get_wchar(size_t index) const;
char_type & operator[](size_t index);
char_type operator[](size_t index) const;
TextStreamBase & operator<<(const char * str);
TextStreamBase & operator<<(const std::string & str);
TextStreamBase & operator<<(const wchar_t * str);
TextStreamBase & operator<<(const std::wstring & str);
TextStreamBase & operator<<(char);
TextStreamBase & operator<<(unsigned char);
TextStreamBase & operator<<(wchar_t);
TextStreamBase & operator<<(bool);
TextStreamBase & operator<<(short);
TextStreamBase & operator<<(int);
TextStreamBase & operator<<(long);
TextStreamBase & operator<<(long long);
TextStreamBase & operator<<(unsigned short);
TextStreamBase & operator<<(unsigned int);
TextStreamBase & operator<<(unsigned long);
TextStreamBase & operator<<(unsigned long long);
TextStreamBase & operator<<(float);
TextStreamBase & operator<<(double);
TextStreamBase & operator<<(long double);
TextStreamBase & operator<<(const void *); // printing a pointer
TextStreamBase & operator<<(const Stream & stream);
TextStreamBase & operator<<(const Space & space);
TextStreamBase & operator<<(const Date & date);
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
TextStreamBase & operator<<(const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg);
// min width for integer output
// if the output value has less digits then first zeroes are added
// (0 turn off)
TextStreamBase & int_min_width(size_t min_width);
TextStreamBase & write(const char * buf, size_t len);
TextStreamBase & write(const wchar_t * buf, size_t len);
TextStreamBase & fill_up_if_needed(wchar_t fill_up_char, size_t existing_length);
/*
raw access
*/
int radix;
size_t min_width_for_integers;
buffer_type buffer;
protected:
void put_stream(const Stream & stream);
void put_utf8_to_wide(const Stream & stream);
void put_wide_to_utf8(const Stream & stream);
};
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size>::TextStreamBase()
{
clear();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_char_stream() const
{
if constexpr (sizeof(char_type) == sizeof(char) )
{
return true;
}
return false;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::is_wchar_stream() const
{
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
return true;
}
return false;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::clear()
{
radix = 10;
min_width_for_integers = 0;
buffer.clear();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
bool TextStreamBase<char_type, stack_size, heap_block_size>::empty() const
{
return buffer.empty();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
size_t TextStreamBase<char_type, stack_size, heap_block_size>::size() const
{
return buffer.size();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::reserve(size_t len)
{
buffer.reserve(len);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
size_t TextStreamBase<char_type, stack_size, heap_block_size>::capacity() const
{
return buffer.capacity();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::begin()
{
return buffer.begin();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::iterator
TextStreamBase<char_type, stack_size, heap_block_size>::end()
{
return buffer.end();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::begin() const
{
return buffer.begin();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
typename TextStreamBase<char_type, stack_size, heap_block_size>::const_iterator
TextStreamBase<char_type, stack_size, heap_block_size>::end() const
{
return buffer.end();
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::string & str, bool clear_string) const
{
if( clear_string )
str.clear();
if( str.capacity() < str.size() + size() )
str.reserve(str.size() + size());
if constexpr (sizeof(char_type) == sizeof(char) )
{
const_iterator i = begin();
for( ; i != end() ; ++i)
str += *i;
}
else
{
wide_stream_to_utf8(*this, str);
}
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::wstring & str, bool clear_string) const
{
if( clear_string )
str.clear();
if( str.capacity() < str.size() + size() )
str.reserve(str.size() + size());
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
const_iterator i = begin();
for( ; i != end() ; ++i)
str += *i;
}
else
{
// IMPROVE ME don't use a temporary object
std::string utf8;
to_str(utf8);
utf8_to_wide(utf8, str, false);
}
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
std::string TextStreamBase<char_type, stack_size, heap_block_size>::to_str() const
{
std::string str;
to_str(str, false);
return str;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
std::wstring TextStreamBase<char_type, stack_size, heap_block_size>::to_wstr() const
{
std::wstring str;
to_str(str, false);
return str;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char TextStreamBase<char_type, stack_size, heap_block_size>::get_char(size_t index) const
{
return static_cast<char>(buffer[index]);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
wchar_t TextStreamBase<char_type, stack_size, heap_block_size>::get_wchar(size_t index) const
{
return static_cast<wchar_t>(buffer[index]);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type & TextStreamBase<char_type, stack_size, heap_block_size>::operator[](size_t index)
{
return buffer[index];
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
char_type TextStreamBase<char_type, stack_size, heap_block_size>::operator[](size_t index) const
{
return buffer[index];
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const char * str)
{
if constexpr ( sizeof(char_type) == sizeof(char) )
{
for( ; *str ; ++str)
buffer.append(*str);
}
else
{
utf8_to_wide(str, *this, false);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::string & str)
{
operator<<(str.c_str());
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const wchar_t * str)
{
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
for( ; *str ; ++str)
buffer.append(*str);
}
else
{
wide_to_utf8(str, *this);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::wstring & str)
{
operator<<(str.c_str());
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char v)
{
buffer.append(static_cast<char_type>(v));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned char v)
{
buffer.append(static_cast<char_type>(v));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
{
buffer.append(static_cast<char_type>(v));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(bool v)
{
char c = v ? '1' : '0';
buffer.append(static_cast<char_type>(c));
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(short v)
{
return operator<<(static_cast<long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(int v)
{
return operator<<(static_cast<long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(long v)
{
return operator<<(static_cast<long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(long long v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
if( Toa(v, buf, len, radix, &lenout) )
{
fill_up_if_needed('0', lenout);
buffer.append(buf, lenout);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned short v)
{
return operator<<(static_cast<unsigned long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned int v)
{
return operator<<(static_cast<unsigned long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned long v)
{
return operator<<(static_cast<unsigned long long>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(unsigned long long v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
if( Toa(v, buf, len, radix, &lenout) )
{
fill_up_if_needed('0', lenout);
buffer.append(buf, lenout);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(float v)
{
return operator<<(static_cast<double>(v));
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(double v)
{
char buf[100];
snprintf(buf, sizeof(buf)/sizeof(char), "%g", v);
return operator<<(buf);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(long double v)
{
char buf[100];
snprintf(buf, sizeof(buf)/sizeof(char), "%Lg", v);
return operator<<(buf);
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const void * v)
{
char_type buf[50];
size_t len = sizeof(buf) / sizeof(char_type);
size_t lenout;
buf[0] = '0';
buf[1] = 'x';
// IMPROVE ME add some minimal width?
if( Toa(reinterpret_cast<unsigned long long>(v), buf+2, len-2, 16, &lenout) )
buffer.append(buf, lenout+2);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::write(const char * str, size_t len)
{
if constexpr ( sizeof(char_type) == sizeof(char) )
{
for(size_t i=0 ; i < len ; ++i)
buffer.append(str[i]);
}
else
{
utf8_to_wide(str, *this, false);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::write(const wchar_t * str, size_t len)
{
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
for(size_t i=0 ; i < len ; ++i)
buffer.append(str[i]);
}
else
{
wide_to_utf8(str, *this);
}
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Stream & stream)
{
put_stream(stream);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const Stream & stream)
{
if( sizeof(char_type) == sizeof(char) && stream.is_char_stream() )
{
// from utf8 to utf8
for(size_t i=0 ; i < stream.size() ; ++i)
{
char c = stream.get_char(i);
operator<<(c);
}
}
else
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() )
{
// from utf8 to wide
put_utf8_to_wide(stream);
}
else
if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() )
{
// from wide to utf8
put_wide_to_utf8(stream);
}
else
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream() )
{
// from wide to wide
for(size_t i=0 ; i < stream.size() ; ++i)
{
wchar_t c = stream.get_wchar(i);
operator<<(c);
}
}
else
{
operator<<("such conversion is not implemented");
}
}
// test me when sizeof(wchar_t) is 2
// or may move me to utf8 functions?
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_utf8_to_wide(const Stream & stream)
{
size_t len;
bool correct;
size_t index = 0;
int z;
do
{
len = utf8_to_int(stream, index, z, correct);
if( len > 0 )
{
if( !correct )
{
z = 0xFFFD; // U+FFFD "replacement character"
}
int_to_wide(z, *this);
index += len;
}
}
while( len > 0 );
}
// test me when sizeof(wchar_t) is 2
// or may move me to utf8 functions?
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_wide_to_utf8(const Stream & stream)
{
char utf8_buf[10];
size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
for(size_t i=0 ; i < stream.size() ; ++i)
{
int c = static_cast<int>(stream.get_wchar(i));
if( is_first_surrogate_char(c) && i + 1 < stream.size() )
{
wchar_t c1 = static_cast<wchar_t>(c);
wchar_t c2 = stream.get_wchar(++i);
surrogate_pair_to_int(c1, c2, c);
}
size_t len = int_to_utf8(c, utf8_buf, utf8_buf_len);
for(size_t u=0 ; u < len ; ++u)
{
operator<<(utf8_buf[u]);
}
}
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Space & space)
{
space.serialize_to_space_stream(*this, true);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Date & date)
{
date.Serialize(*this);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg)
{
/*
* in the future we can have a faster implementation
* which uses iterators instead of get_char() and get_wchar() methods
*
*/
put_stream(arg);
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::int_min_width(size_t min_width)
{
min_width_for_integers = min_width;
return *this;
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::fill_up_if_needed(wchar_t fill_up_char, size_t existing_length)
{
if( min_width_for_integers > 0 && min_width_for_integers > existing_length )
{
for(size_t i = existing_length ; i < min_width_for_integers ; ++i)
{
buffer.append(fill_up_char);
}
}
return *this;
}
} // namespace
#endif