diff --git a/log/filelog.cpp b/log/filelog.cpp index 579a3ca..097bbd4 100644 --- a/log/filelog.cpp +++ b/log/filelog.cpp @@ -100,35 +100,6 @@ void FileLog::open_file() } -void FileLog::PutBuffer(PT::WTextStream * buffer, std::ostream & out_stream) -{ - char utf8_buffer[256]; - std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); - std::size_t utf8_sequence_max_length = 10; - std::size_t index = 0; - - PT::WTextStream::const_iterator i = buffer->begin(); - - while( i != buffer->end() ) - { - if( index + utf8_sequence_max_length > buffer_len ) - { - out_stream.write(utf8_buffer, index); - index = 0; - } - - index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index); - ++i; - } - - if( index > 0 ) - { - out_stream.write(utf8_buffer, index); - } -} - - - void FileLog::save_log(PT::WTextStream * buffer) { if( buffer->empty() ) @@ -140,7 +111,7 @@ void FileLog::save_log(PT::WTextStream * buffer) { if( log_stdout ) { - PutBuffer(buffer, std::cout); + PT::WideToUTF8(*buffer, std::cout); } if( !log_file.empty() ) @@ -155,7 +126,7 @@ void FileLog::save_log(PT::WTextStream * buffer) if( file ) { - PutBuffer(buffer, file); + PT::WideToUTF8(*buffer, file); file.flush(); } } diff --git a/log/filelog.h b/log/filelog.h index e1d0fd3..abf6555 100644 --- a/log/filelog.h +++ b/log/filelog.h @@ -82,7 +82,6 @@ protected: virtual bool synchro_lock(); virtual void synchro_unlock(); - virtual void PutBuffer(PT::WTextStream * buffer, std::ostream & out_stream); virtual void open_file(); }; diff --git a/log/log.cpp b/log/log.cpp index 9c31c86..0e28e6b 100644 --- a/log/log.cpp +++ b/log/log.cpp @@ -115,7 +115,7 @@ Log & Log::operator<<(const char * s) { if( buffer && file_log && s && current_level <= file_log->get_log_level() ) { - (*buffer) << s; + PT::UTF8ToWide(s, *buffer, false); } return *this; @@ -127,7 +127,7 @@ Log & Log::operator<<(const std::string & s) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { - (*buffer) << s; + PT::UTF8ToWide(s, *buffer, false); } return *this; @@ -139,7 +139,7 @@ Log & Log::operator<<(const std::string * s) { if( buffer && file_log && current_level <= file_log->get_log_level() ) { - (*buffer) << *s; + PT::UTF8ToWide(*s, *buffer, false); } return *this; diff --git a/mainspaceparser/Makefile.dep b/mainspaceparser/Makefile.dep index bce7807..9893b1b 100644 --- a/mainspaceparser/Makefile.dep +++ b/mainspaceparser/Makefile.dep @@ -1,4 +1,7 @@ # DO NOT DELETE mainspaceparser.o: mainspaceparser.h ../space/space.h ../textstream/types.h -mainspaceparser.o: ../utf8/utf8.h +mainspaceparser.o: ../utf8/utf8.h ../textstream/textstream.h ../date/date.h +mainspaceparser.o: ../convert/convert.h ../convert/inttostr.h +mainspaceparser.o: ../convert/strtoint.h ../convert/text.h ../convert/misc.h +mainspaceparser.o: ../membuffer/membuffer.h ../textstream/types.h diff --git a/space/Makefile.dep b/space/Makefile.dep index ab26c66..adb4879 100644 --- a/space/Makefile.dep +++ b/space/Makefile.dep @@ -1,11 +1,19 @@ # DO NOT DELETE jsontospaceparser.o: jsontospaceparser.h space.h ../textstream/types.h -jsontospaceparser.o: ../utf8/utf8.h +jsontospaceparser.o: ../utf8/utf8.h ../textstream/textstream.h +jsontospaceparser.o: ../space/space.h ../date/date.h ../convert/convert.h +jsontospaceparser.o: ../convert/inttostr.h ../convert/strtoint.h +jsontospaceparser.o: ../convert/text.h ../convert/misc.h +jsontospaceparser.o: ../membuffer/membuffer.h ../textstream/types.h space.o: space.h ../textstream/types.h ../utf8/utf8.h space.o: ../textstream/textstream.h ../space/space.h ../date/date.h space.o: ../convert/convert.h ../convert/inttostr.h ../convert/strtoint.h space.o: ../convert/text.h ../convert/misc.h ../membuffer/membuffer.h space.o: ../textstream/types.h spaceparser.o: spaceparser.h space.h ../textstream/types.h ../utf8/utf8.h +spaceparser.o: ../textstream/textstream.h ../space/space.h ../date/date.h +spaceparser.o: ../convert/convert.h ../convert/inttostr.h +spaceparser.o: ../convert/strtoint.h ../convert/text.h ../convert/misc.h +spaceparser.o: ../membuffer/membuffer.h ../textstream/types.h spacetojson.o: spacetojson.h space.h ../textstream/types.h diff --git a/utf8/Makefile.dep b/utf8/Makefile.dep index f2f3eba..ff68e4a 100644 --- a/utf8/Makefile.dep +++ b/utf8/Makefile.dep @@ -1,3 +1,6 @@ # DO NOT DELETE -utf8.o: utf8.h +utf8.o: utf8.h ../textstream/textstream.h ../space/space.h +utf8.o: ../textstream/types.h ../date/date.h ../convert/convert.h +utf8.o: ../convert/inttostr.h ../convert/strtoint.h ../convert/text.h +utf8.o: ../convert/misc.h ../membuffer/membuffer.h ../textstream/types.h diff --git a/utf8/utf8.cpp b/utf8/utf8.cpp index 5bee161..6f9ef38 100644 --- a/utf8/utf8.cpp +++ b/utf8/utf8.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2010-2014, Tomasz Sowa + * Copyright (c) 2010-2018, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,6 +38,7 @@ #include "utf8.h" + namespace PT { @@ -264,6 +265,7 @@ static void IntToWide(int c, std::wstring & res) { if( sizeof(wchar_t)==2 && c>0xffff ) { + // UTF16 surrogate pairs c -= 0x10000; res += static_cast(((c >> 10) & 0x3FF) + 0xD800); res += static_cast((c & 0x3FF) + 0xDC00); @@ -275,8 +277,61 @@ static void IntToWide(int c, std::wstring & res) } +static void IntToWide(int c, WTextStream & res) +{ + if( sizeof(wchar_t)==2 && c>0xffff ) + { + // UTF16 surrogate pairs + c -= 0x10000; + res << static_cast(((c >> 10) & 0x3FF) + 0xD800); + res << static_cast((c & 0x3FF) + 0xDC00); + } + else + { + res << static_cast(c); + } +} +static bool UTF8ToWideGeneric(const char * utf8, size_t utf8_len, int mode, auto convert_function) +{ +int z; +size_t len; +bool correct, was_error = false; + + while( utf8_len > 0 ) + { + if( (unsigned char)*utf8 <= 0x7f ) + { + // small optimization + len = 1; + correct = true; + z = static_cast(*utf8); + } + else + { + len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero + } + + if( !correct ) + { + if( mode == 1 ) + convert_function(0xFFFD); // U+FFFD "replacement character" + + was_error = true; + } + else + { + convert_function(z); + } + + utf8 += len; + utf8_len -= len; + } + +return !was_error; +} + /*! this function converts an utf8 string into wide string (std::wstring) @@ -295,48 +350,20 @@ static void IntToWide(int c, std::wstring & res) */ bool UTF8ToWide(const char * utf8, size_t utf8_len, std::wstring & res, bool clear, int mode) { -int z; -size_t len; -bool correct, was_error = false; - if( clear ) res.clear(); - while( utf8_len > 0 ) - { - if( (unsigned char)*utf8 <= 0x7f ) - { - // small optimization - len = 1; - correct = true; - z = static_cast(*utf8); - } - else - { - len = UTF8ToInt(utf8, utf8_len, z, correct); // the len will be different from zero - } + bool status = UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { + IntToWide(c, res); + }); - if( !correct ) - { - if( mode == 1 ) - res += 0xFFFD; // U+FFFD "replacement character" - - was_error = true; - } - else - { - IntToWide(z, res); - } - - utf8 += len; - utf8_len -= len; - } - -return !was_error; + return status; } + + /*! this function converts an utf8 string into wide string (std::wstring) @@ -425,6 +452,67 @@ return !was_error; } +// need to be tested +bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear, int mode) +{ + if( clear ) + res.clear(); + + bool status = UTF8ToWideGeneric(utf8, utf8_len, mode, [&res](int c) { + IntToWide(c, res); + }); + + return status; +} + + +// need to be tested +bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear, int mode) +{ +size_t utf8_len = 0; + + while( utf8[utf8_len] != 0 ) + utf8_len += 1; + +return UTF8ToWide(utf8, utf8_len, res, clear, mode); +} + + +// need to be tested +bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear, int mode) +{ + return UTF8ToWide(utf8.c_str(), utf8.size(), res, clear, mode); +} + + +// need to be tested +bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear, int mode) +{ +int z; +bool correct, was_error = false; + + if( clear ) + res.clear(); + + while( UTF8ToInt(utf8, z, correct) > 0 ) + { + if( !correct ) + { + if( mode == 1 ) + res << 0xFFFD; // U+FFFD "replacement character" + + was_error = true; + } + else + { + IntToWide(z, res); + } + } + +return !was_error; +} + + /*! this function converts one wide character into UTF-8 sequence @@ -1174,6 +1262,58 @@ return res; +// not tested +// FIX ME it is not using surrogate pairs from input stream +// and mode parameter +static void WideToUTF8Generic(PT::WTextStream & buffer, int mode, auto write_function) +{ + char utf8_buffer[256]; + std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char); + std::size_t utf8_sequence_max_length = 10; + std::size_t index = 0; + + PT::WTextStream::const_iterator i = buffer.begin(); + + while( i != buffer.end() ) + { + if( index + utf8_sequence_max_length > buffer_len ) + { + write_function(utf8_buffer, index); + index = 0; + } + + index += PT::IntToUTF8(*i, utf8_buffer + index, buffer_len - index); + ++i; + } + + if( index > 0 ) + { + write_function(utf8_buffer, index); + } +} + + +// not tested +void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear, int mode) +{ + if( clear ) + utf8.clear(); + + WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.append(utf8_buffer, buffer_len); + }); +} + + +// not tested +void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode) +{ + WideToUTF8Generic(buffer, mode, [&utf8](const char * utf8_buffer, std::size_t buffer_len){ + utf8.write(utf8_buffer, buffer_len); + }); +} + + } // namespace diff --git a/utf8/utf8.h b/utf8/utf8.h index 416d594..f87e15a 100644 --- a/utf8/utf8.h +++ b/utf8/utf8.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2010-2014, Tomasz Sowa + * Copyright (c) 2010-2018, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,11 +40,14 @@ #include #include +#include "textstream/textstream.h" namespace PT { + + /*! UTF-8, a transformation format of ISO 10646 http://tools.ietf.org/html/rfc3629 @@ -83,6 +86,16 @@ bool UTF8ToWide(const std::string & utf8, std::wstring & res, bool cle bool UTF8ToWide(std::istream & utf8, std::wstring & res, bool clear = true, int mode = 1); +/*! + converting UTF-8 string to a WTextStream stream + (need to be tested) +*/ +bool UTF8ToWide(const char * utf8, size_t utf8_len, WTextStream & res, bool clear = true, int mode = 1); +bool UTF8ToWide(const char * utf8, WTextStream & res, bool clear = true, int mode = 1); +bool UTF8ToWide(const std::string & utf8, WTextStream & res, bool clear = true, int mode = 1); +bool UTF8ToWide(std::istream & utf8, WTextStream & res, bool clear = true, int mode = 1); + + /*! converting one int character to UTF-8 */ @@ -97,18 +110,22 @@ size_t IntToUTF8(int z, std::ostream & utf8); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::string & utf8, bool clear = true, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::string & utf8, bool clear = true, int mode = 1); +void WideToUTF8(PT::WTextStream & buffer, std::string & utf8, bool clear = true, int mode = 1);// not tested bool WideToUTF8(const wchar_t * wide_string, size_t string_len, std::ostream & utf8, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, std::ostream & utf8, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, std::ostream & utf8, int mode = 1); +void WideToUTF8(PT::WTextStream & buffer, std::ostream & utf8, int mode = 1);// not tested bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); +// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, size_t & utf8_written, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const wchar_t * wide_string, char * utf8, size_t utf8_len, int mode = 1); bool WideToUTF8(const std::wstring & wide_string, char * utf8, size_t utf8_len, int mode = 1); +// implement void WideToUTF8(PT::WTextStream & buffer, char * utf8, size_t utf8_len, int mode = 1);