/* * This file is a part of PikoTools * and is distributed under the 2-Clause BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2012-2023, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #ifndef headerfile_pikotools_src_textstream_textstream #define headerfile_pikotools_src_textstream_textstream #include #include "stream.h" #include "space/space.h" #include "date/date.h" #include "convert/inttostr.h" #include "membuffer/membuffer.h" #include "types.h" #include "utf8/utf8.h" // for snprintf #include namespace pt { /* a special class representing a stream buffer similar to std::ostringstream StringType can be either std::string or std::wstring */ template class TextStreamBase : public Stream { public: TextStreamBase(); typedef CharT char_type; typedef MemBuffer buffer_type; class iterator { public: typename buffer_type::iterator membuffer_iterator; iterator(); iterator(const iterator & i); iterator & operator=(const iterator & i); iterator(const typename buffer_type::iterator & i); iterator & operator=(const typename buffer_type::iterator & i); bool operator==(const iterator & i) const; bool operator!=(const iterator & i) const; iterator & operator++(); // prefix ++ iterator operator++(int); // postfix ++ iterator & operator--(); // prefix -- iterator operator--(int); // postfix -- CharT & operator*(); wchar_t get_unicode_and_advance(const iterator & end); }; class const_iterator { public: typename buffer_type::const_iterator membuffer_const_iterator; const_iterator(); const_iterator(const const_iterator & i); const_iterator(const iterator & i); const_iterator & operator=(const const_iterator & i); const_iterator & operator=(const iterator & i); const_iterator(const typename buffer_type::const_iterator & i); const_iterator(const typename buffer_type::iterator & i); const_iterator & operator=(const typename buffer_type::const_iterator & i); const_iterator & operator=(const typename buffer_type::iterator & i); bool operator==(const const_iterator & i) const; bool operator!=(const const_iterator & i) const; const_iterator & operator++(); // prefix ++ const_iterator operator++(int); // postfix ++ const_iterator & operator--(); // prefix -- const_iterator operator--(int); // postfix -- CharT operator*(); wchar_t get_unicode_and_advance(const const_iterator & end); }; bool is_char_stream() const; bool is_wchar_stream() const; void clear(); bool empty() const; size_t size() const; void reserve(size_t len); size_t capacity() const; iterator begin(); iterator end(); const_iterator begin() const; const_iterator end() const; // IMPROVE ME // add cbegin(), cend(), rbegin(), rend(), crbegin(), crend() void to_str(std::string & str, bool clear_string = true) const; void to_str(std::wstring & str, bool clear_string = true) const; std::string to_str() const; std::wstring to_wstr() const; /* * returns true if the buffer was sufficient large and there were no convertion errors */ bool to_str(char * str, size_t max_buf_len) const; bool to_str(wchar_t * str, size_t max_buf_len) const; char get_char(size_t index) const; wchar_t get_wchar(size_t index) const; char_type & operator[](size_t index); char_type operator[](size_t index) const; TextStreamBase & operator<<(const char * str); TextStreamBase & operator<<(const std::string & str); TextStreamBase & operator<<(const wchar_t * str); TextStreamBase & operator<<(const std::wstring & str); TextStreamBase & operator<<(char); TextStreamBase & operator<<(unsigned char); TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used TextStreamBase & operator<<(bool); TextStreamBase & operator<<(short); TextStreamBase & operator<<(int); TextStreamBase & operator<<(long); TextStreamBase & operator<<(long long); TextStreamBase & operator<<(unsigned short); TextStreamBase & operator<<(unsigned int); TextStreamBase & operator<<(unsigned long); TextStreamBase & operator<<(unsigned long long); TextStreamBase & operator<<(float); TextStreamBase & operator<<(double); TextStreamBase & operator<<(long double); TextStreamBase & operator<<(const void *); // printing a pointer TextStreamBase & operator<<(const Stream & stream); TextStreamBase & operator<<(const Space & space); TextStreamBase & operator<<(const Date & date); template TextStreamBase & operator<<(const TextStreamBase & arg); template bool operator==(const TextStreamBase & stream) const; template bool operator!=(const TextStreamBase & stream) const; // min width for integer output // if the output value has less digits then first zeroes are added // (0 turn off) TextStreamBase & int_min_width(size_t min_width); TextStreamBase & write(const char * buf, size_t len); TextStreamBase & write(const wchar_t * buf, size_t len); TextStreamBase & fill_up_if_needed(wchar_t fill_up_char, size_t existing_length); /* raw access */ int radix; size_t min_width_for_integers; buffer_type buffer; protected: template void put_stream(const TextStreamBase & stream); void put_stream(const Stream & stream); }; template TextStreamBase::TextStreamBase() { clear(); } template TextStreamBase::iterator::iterator() { } template TextStreamBase::iterator::iterator(const iterator & i) : membuffer_iterator(i) { } template typename TextStreamBase::iterator & TextStreamBase::iterator::operator=(const iterator & i) { membuffer_iterator = i; } template TextStreamBase::iterator::iterator(const typename buffer_type::iterator & i) : membuffer_iterator(i) { } template typename TextStreamBase::iterator & TextStreamBase::iterator::operator=(const typename buffer_type::iterator & i) { membuffer_iterator = i; } template bool TextStreamBase::iterator::operator==(const iterator & i) const { return membuffer_iterator == i.membuffer_iterator; } template bool TextStreamBase::iterator::operator!=(const iterator & i) const { return membuffer_iterator != i.membuffer_iterator; } template typename TextStreamBase::iterator & TextStreamBase::iterator::operator++() { ++membuffer_iterator; return *this; } template typename TextStreamBase::iterator TextStreamBase::iterator::operator++(int) { const_iterator old(*this); membuffer_iterator++; return old; } template typename TextStreamBase::iterator & TextStreamBase::iterator::operator--() { --membuffer_iterator; return *this; } template typename TextStreamBase::iterator TextStreamBase::iterator::operator--(int) { const_iterator old(*this); membuffer_iterator--; return old; } template char_type & TextStreamBase::iterator::operator*() { return *membuffer_iterator; } template wchar_t TextStreamBase::iterator::get_unicode_and_advance(const iterator & end) { if( *this != end ) { if constexpr (sizeof(char_type) == sizeof(char) ) { int res; bool correct; utf8_to_int(*this, end, res, correct); if( correct ) return static_cast(res); else return static_cast(0xFFFD); // U+FFFD "replacement character" } else { wchar_t c = operator*(); ++membuffer_iterator; return c; } } return 0; } template TextStreamBase::const_iterator::const_iterator() { } template TextStreamBase::const_iterator::const_iterator(const const_iterator & i) : membuffer_const_iterator(i.membuffer_const_iterator) { } template TextStreamBase::const_iterator::const_iterator(const iterator & i) : membuffer_const_iterator(i.membuffer_iterator) { } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator=(const const_iterator & i) { membuffer_const_iterator = i.membuffer_const_iterator; return *this; } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator=(const iterator & i) { membuffer_const_iterator = i.membuffer_iterator; return *this; } template TextStreamBase::const_iterator::const_iterator(const typename buffer_type::const_iterator & i) : membuffer_const_iterator(i) { } template TextStreamBase::const_iterator::const_iterator(const typename buffer_type::iterator & i) : membuffer_const_iterator(i) { } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator=(const typename buffer_type::const_iterator & i) { membuffer_const_iterator = i; return *this; } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator=(const typename buffer_type::iterator & i) { membuffer_const_iterator = i; return *this; } template bool TextStreamBase::const_iterator::operator==(const const_iterator & i) const { return membuffer_const_iterator == i.membuffer_const_iterator; } template bool TextStreamBase::const_iterator::operator!=(const const_iterator & i) const { return membuffer_const_iterator != i.membuffer_const_iterator; } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator++() { ++membuffer_const_iterator; return *this; } template typename TextStreamBase::const_iterator TextStreamBase::const_iterator::operator++(int) { const_iterator old(*this); membuffer_const_iterator++; return old; } template typename TextStreamBase::const_iterator & TextStreamBase::const_iterator::operator--() { --membuffer_const_iterator; return *this; } template typename TextStreamBase::const_iterator TextStreamBase::const_iterator::operator--(int) { const_iterator old(*this); membuffer_const_iterator--; return old; } template char_type TextStreamBase::const_iterator::operator*() { return *membuffer_const_iterator; } template wchar_t TextStreamBase::const_iterator::get_unicode_and_advance(const const_iterator & end) { if( *this != end ) { if constexpr (sizeof(char_type) == sizeof(char) ) { int res; bool correct; pt::utf8_to_int(*this, end, res, correct); if( correct ) return static_cast(res); else return static_cast(0xFFFD); // U+FFFD "replacement character" } else { wchar_t c = operator*(); ++membuffer_const_iterator; return c; } } return 0; } template bool TextStreamBase::is_char_stream() const { if constexpr (sizeof(char_type) == sizeof(char) ) { return true; } return false; } template bool TextStreamBase::is_wchar_stream() const { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { return true; } return false; } template void TextStreamBase::clear() { radix = 10; min_width_for_integers = 0; buffer.clear(); } template bool TextStreamBase::empty() const { return buffer.empty(); } template size_t TextStreamBase::size() const { return buffer.size(); } template void TextStreamBase::reserve(size_t len) { buffer.reserve(len); } template size_t TextStreamBase::capacity() const { return buffer.capacity(); } template typename TextStreamBase::iterator TextStreamBase::begin() { return buffer.begin(); } template typename TextStreamBase::iterator TextStreamBase::end() { return buffer.end(); } template typename TextStreamBase::const_iterator TextStreamBase::begin() const { return buffer.begin(); } template typename TextStreamBase::const_iterator TextStreamBase::end() const { return buffer.end(); } template void TextStreamBase::to_str(std::string & str, bool clear_string) const { if( clear_string ) str.clear(); if( str.capacity() < str.size() + size() ) str.reserve(str.size() + size()); if constexpr (sizeof(char_type) == sizeof(char) ) { const_iterator i = begin(); const_iterator i_end = end(); for( ; i != i_end ; ++i) str += *i; } else { wide_stream_to_utf8(*this, str); } } template void TextStreamBase::to_str(std::wstring & str, bool clear_string) const { if( clear_string ) str.clear(); if( str.capacity() < str.size() + size() ) str.reserve(str.size() + size()); if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { const_iterator i = begin(); for( ; i != end() ; ++i) str += *i; } else { utf8_to_wide(*this, str, false); } } template std::string TextStreamBase::to_str() const { std::string str; to_str(str, false); return str; } template std::wstring TextStreamBase::to_wstr() const { std::wstring str; to_str(str, false); return str; } template bool TextStreamBase::to_str(char * str, size_t max_buf_len) const { bool converted_correctly = false; if( max_buf_len > 0 ) { if constexpr (sizeof(char_type) == sizeof(char) ) { converted_correctly = true; const_iterator i = begin(); const_iterator i_end = end(); size_t len = 0; max_buf_len -= 1; // for terminating null character for( ; i != i_end ; ++i, ++len) { if( len < max_buf_len ) { str[len] = *i; } else { converted_correctly = false; break; } } str[len] = 0; } else { converted_correctly = wide_stream_to_utf8(*this, str, max_buf_len); } } return converted_correctly; } template bool TextStreamBase::to_str(wchar_t * str, size_t max_buf_len) const { bool converted_correctly = false; if( max_buf_len > 0 ) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { converted_correctly = true; const_iterator i = begin(); const_iterator i_end = end(); size_t len = 0; max_buf_len -= 1; // for terminating null character for( ; i != i_end ; ++i, ++len) { if( len < max_buf_len ) { str[len] = *i; } else { converted_correctly = false; break; } } str[len] = 0; } else { converted_correctly = utf8_to_wide(*this, str, max_buf_len); } } return converted_correctly; } template char TextStreamBase::get_char(size_t index) const { return static_cast(buffer[index]); } template wchar_t TextStreamBase::get_wchar(size_t index) const { return static_cast(buffer[index]); } template char_type & TextStreamBase::operator[](size_t index) { return buffer[index]; } template char_type TextStreamBase::operator[](size_t index) const { return buffer[index]; } template TextStreamBase & TextStreamBase::operator<<(const char * str) { if constexpr ( sizeof(char_type) == sizeof(char) ) { for( ; *str ; ++str) buffer.append(*str); } else { utf8_to_wide(str, *this, false); } return *this; } template TextStreamBase & TextStreamBase::operator<<(const std::string & str) { if constexpr ( sizeof(char_type) == sizeof(char) ) { buffer.append(str.c_str(), str.size()); } else { utf8_to_wide(str, *this, false); } return *this; } template TextStreamBase & TextStreamBase::operator<<(const wchar_t * str) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { for( ; *str ; ++str) buffer.append(*str); } else { wide_to_utf8(str, *this, false); } return *this; } template TextStreamBase & TextStreamBase::operator<<(const std::wstring & str) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { buffer.append(str.c_str(), str.size()); } else { wide_to_utf8(str, *this, false); } return *this; } template TextStreamBase & TextStreamBase::operator<<(char v) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { buffer.append(static_cast(static_cast(v))); } else { buffer.append(v); } return *this; } template TextStreamBase & TextStreamBase::operator<<(unsigned char v) { buffer.append(static_cast(v)); return *this; } template TextStreamBase & TextStreamBase::operator<<(wchar_t v) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { buffer.append(v); } else { pt::int_to_utf8(static_cast(v), *this); } return *this; } template TextStreamBase & TextStreamBase::operator<<(bool v) { char c = v ? '1' : '0'; buffer.append(static_cast(c)); return *this; } template TextStreamBase & TextStreamBase::operator<<(short v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(int v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(long v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(long long v) { char_type buf[50]; size_t len = sizeof(buf) / sizeof(char_type); size_t lenout; if( Toa(v, buf, len, radix, &lenout) ) { fill_up_if_needed('0', lenout); buffer.append(buf, lenout); } return *this; } template TextStreamBase & TextStreamBase::operator<<(unsigned short v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(unsigned int v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(unsigned long v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(unsigned long long v) { char_type buf[50]; size_t len = sizeof(buf) / sizeof(char_type); size_t lenout; if( Toa(v, buf, len, radix, &lenout) ) { fill_up_if_needed('0', lenout); buffer.append(buf, lenout); } return *this; } template TextStreamBase & TextStreamBase::operator<<(float v) { return operator<<(static_cast(v)); } template TextStreamBase & TextStreamBase::operator<<(double v) { char buf[100]; snprintf(buf, sizeof(buf)/sizeof(char), "%g", v); return operator<<(buf); } template TextStreamBase & TextStreamBase::operator<<(long double v) { char buf[100]; snprintf(buf, sizeof(buf)/sizeof(char), "%Lg", v); return operator<<(buf); } template TextStreamBase & TextStreamBase::operator<<(const void * v) { char_type buf[50]; size_t len = sizeof(buf) / sizeof(char_type); size_t lenout; buf[0] = '0'; buf[1] = 'x'; // IMPROVE ME add some minimal width? if( Toa(reinterpret_cast(v), buf+2, len-2, 16, &lenout) ) buffer.append(buf, lenout+2); return *this; } template TextStreamBase & TextStreamBase::write(const char * str, size_t len) { if constexpr ( sizeof(char_type) == sizeof(char) ) { for(size_t i=0 ; i < len ; ++i) buffer.append(str[i]); } else { utf8_to_wide(str, *this, false); } return *this; } template TextStreamBase & TextStreamBase::write(const wchar_t * str, size_t len) { if constexpr (sizeof(char_type) == sizeof(wchar_t) ) { for(size_t i=0 ; i < len ; ++i) buffer.append(str[i]); } else { wide_to_utf8(str, *this); } return *this; } template TextStreamBase & TextStreamBase::operator<<(const Stream & stream) { put_stream(stream); return *this; } template template void TextStreamBase::put_stream( const TextStreamBase & stream ) { if( (sizeof(char_type) == sizeof(char) && stream.is_char_stream()) || (sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream()) ) { // from utf8 to utf8 or from wide to wide typename TextStreamBase::const_iterator i = stream.begin(); for( ; i != stream.end() ; ++i) { operator<<(*i); } } else if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() ) { // from utf8 to wide utf8_to_wide(stream, *this, false); } else if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() ) { // from wide to utf8 wide_stream_to_utf8(stream, *this, false); } else { operator<<("such conversion is not implemented"); } } template void TextStreamBase::put_stream(const Stream & stream) { if( sizeof(char_type) == sizeof(char) && stream.is_char_stream() ) { // from utf8 to utf8 for(size_t i=0 ; i < stream.size() ; ++i) { char c = stream.get_char(i); operator<<(c); } } else if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() ) { // from utf8 to wide utf8_to_wide(stream, *this, false); } else if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() ) { // from wide to utf8 wide_stream_to_utf8(stream, *this, false); } else if( sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream() ) { // from wide to wide for(size_t i=0 ; i < stream.size() ; ++i) { wchar_t c = stream.get_wchar(i); operator<<(c); } } else { operator<<("such conversion is not implemented"); } } template TextStreamBase & TextStreamBase::operator<<(const Space & space) { space.serialize_to_json_stream(*this, true); return *this; } template TextStreamBase & TextStreamBase::operator<<(const Date & date) { date.Serialize(*this); return *this; } template template TextStreamBase & TextStreamBase::operator<<( const TextStreamBase & arg) { put_stream(arg); return *this; } template template bool TextStreamBase::operator==(const TextStreamBase & stream) const { bool are_the_same = false; /* * at the moment we do not make any conversions for == and != operators * this may change in the future */ if( sizeof(char_type) == sizeof(arg_char_type) && size() == stream.size() ) { are_the_same = true; const_iterator i1 = begin(); const_iterator i2 = stream.begin(); const_iterator i1_end = end(); while( i1 != i1_end ) { if( *i1 != *i2 ) { are_the_same = false; break; } ++i1; ++i2; } } return are_the_same; } template template bool TextStreamBase::operator!=(const TextStreamBase & stream) const { return !operator==(stream); } template TextStreamBase & TextStreamBase::int_min_width(size_t min_width) { min_width_for_integers = min_width; return *this; } template TextStreamBase & TextStreamBase::fill_up_if_needed(wchar_t fill_up_char, size_t existing_length) { if( min_width_for_integers > 0 && min_width_for_integers > existing_length ) { for(size_t i = existing_length ; i < min_width_for_integers ; ++i) { buffer.append(fill_up_char); } } return *this; } /*! this function converts an UTF-8 stream into wide stream or wide string (is declared in utf8/utf8.h) input: iterator_in - an TextStream iterator for reading from iterator_end - an end iterator (can be returned by end() method from TextStream) output: out_stream - an output wide stream or wide string this function returns false if there were some errors when converting */ template bool utf8_to_wide( const TextStreamBase & utf8, StreamOrStringType & out_stream, bool clear_stream, int mode ) { typename TextStreamBase::const_iterator i_begin = utf8.begin(); return utf8_to_wide(i_begin, utf8.end(), out_stream, clear_stream, mode); } } // namespace #endif