Compare commits

...

4 Commits

Author SHA1 Message Date
Tomasz Sowa 663233fe2a let all utf8/wide functions can be available just by including utf8/utf8.h
while here:
- remove utf8/utf8_stream.h, now we only need utf8/utf8.h to include
- add some new methods for converting from a utf8 stream to wide stream/string
- do some improvements in TextStream:
  - don't use temporary objects to convert utf8/wide
  - add put_stream() which takes TextStreamBase<> as its argument
    (uses an iterator instead of get_char() for reading)
  - let operator<<(const Space & space) serialize to json and not to Space
2022-07-30 03:31:18 +02:00
Tomasz Sowa 84e9e6f98f add methods to Space that take a pointer to a string along with the length
Space::Space(const char * str, size_t len)
Space::Space(const wchar_t * str, size_t len)
Space::set(const char * str, size_t len)
Space::set(const wchar_t * str, size_t len)
Space::add_to_table(const char * val, size_t len)
Space::add_to_table(const wchar_t * val, size_t len)
Space::add(const wchar_t * field, const char * val, size_t len)
Space::add(const wchar_t * field, const wchar_t * val, size_t len)
Space::add(const std::wstring & field, const char * val, size_t len)
Space::add(const std::wstring & field, const wchar_t * val, size_t len)
2022-07-30 03:12:38 +02:00
Tomasz Sowa 9a596dd097 fix: return a correct value from Log::size and Log::capacity 2022-07-30 02:45:19 +02:00
Tomasz Sowa aa97fe2811 add methods for trimming \r\n from the end of a string
add:
void trim_last_new_lines(std::string & str, bool check_carriage_return_too = true);
void trim_last_new_lines(std::wstring & str, bool check_carriage_return_too = true);
2022-07-30 02:43:29 +02:00
16 changed files with 410 additions and 207 deletions

View File

@ -2,51 +2,45 @@
./convert/inttostr.o: ./convert/inttostr.h
./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
./convert/misc.o: textstream/types.h utf8/utf8_stream.h
./convert/misc.o: textstream/textstream.h textstream/stream.h space/space.h
./convert/misc.o: convert/inttostr.h utf8/utf8.h utf8/utf8_templates.h
./convert/misc.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./convert/misc.o: textstream/types.h ./convert/inttostr.h
./convert/misc.o: textstream/types.h ./convert/inttostr.h utf8/utf8.h
./convert/misc.o: utf8/utf8_templates.h utf8/utf8_private.h
./convert/text.o: ./convert/text.h ./convert/text_private.h
./convert/double.o: ./convert/double.h textstream/textstream.h
./convert/double.o: textstream/stream.h space/space.h textstream/types.h
./convert/double.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./convert/double.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./convert/double.o: membuffer/membuffer.h textstream/types.h
./convert/double.o: utf8/utf8_stream.h
./convert/baseparser.o: ./convert/baseparser.h textstream/textstream.h
./convert/baseparser.o: textstream/stream.h space/space.h textstream/types.h
./convert/baseparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./convert/baseparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./convert/baseparser.o: membuffer/membuffer.h textstream/types.h
./convert/baseparser.o: utf8/utf8_stream.h
./date/date.o: ./date/date.h convert/inttostr.h
./log/filelog.o: ./log/filelog.h textstream/textstream.h textstream/stream.h
./log/filelog.o: space/space.h textstream/types.h convert/inttostr.h
./log/filelog.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./log/filelog.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./log/filelog.o: textstream/types.h utf8/utf8_stream.h
./log/filelog.o: textstream/types.h
./log/log.o: ./log/log.h textstream/textstream.h textstream/stream.h
./log/log.o: space/space.h textstream/types.h convert/inttostr.h utf8/utf8.h
./log/log.o: textstream/stream.h utf8/utf8_templates.h utf8/utf8_private.h
./log/log.o: date/date.h membuffer/membuffer.h textstream/types.h
./log/log.o: utf8/utf8_stream.h ./log/filelog.h
./log/log.o: ./log/filelog.h
./space/space.o: ./space/space.h textstream/types.h convert/inttostr.h
./space/space.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./space/space.o: utf8/utf8_private.h convert/convert.h ./convert/inttostr.h
./space/space.o: convert/patternreplacer.h textstream/textstream.h
./space/space.o: textstream/stream.h space/space.h date/date.h
./space/space.o: membuffer/membuffer.h textstream/types.h utf8/utf8_stream.h
./space/space.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./space/space.o: ./convert/double.h
./space/space.o: membuffer/membuffer.h textstream/types.h convert/strtoint.h
./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
./space/spaceparser.o: textstream/types.h convert/inttostr.h utf8/utf8.h
./space/spaceparser.o: textstream/stream.h utf8/utf8_templates.h
./space/spaceparser.o: utf8/utf8_private.h convert/baseparser.h
./space/spaceparser.o: textstream/textstream.h textstream/stream.h
./space/spaceparser.o: space/space.h date/date.h membuffer/membuffer.h
./space/spaceparser.o: textstream/types.h utf8/utf8_stream.h
./space/spaceparser.o: convert/strtoint.h ./convert/text.h ./convert/misc.h
./space/spaceparser.o: textstream/types.h convert/strtoint.h ./convert/text.h
./space/spaceparser.o: ./convert/misc.h
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./utf8/utf8.o: utf8/utf8_private.h
./utf8/utf8_private.o: utf8/utf8_private.h
@ -55,7 +49,7 @@
./csv/csvparser.o: utf8/utf8_templates.h utf8/utf8_private.h
./csv/csvparser.o: convert/baseparser.h textstream/textstream.h
./csv/csvparser.o: textstream/stream.h date/date.h membuffer/membuffer.h
./csv/csvparser.o: textstream/types.h utf8/utf8_stream.h
./csv/csvparser.o: textstream/types.h
./mainoptions/mainoptionsparser.o: ./mainoptions/mainoptionsparser.h
./mainoptions/mainoptionsparser.o: space/space.h textstream/types.h
./mainoptions/mainoptionsparser.o: convert/inttostr.h utf8/utf8.h
@ -67,10 +61,9 @@
./html/bbcodeparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
./html/bbcodeparser.o: utf8/utf8_templates.h utf8/utf8_private.h date/date.h
./html/bbcodeparser.o: membuffer/membuffer.h textstream/types.h
./html/bbcodeparser.o: utf8/utf8_stream.h
./html/htmlparser.o: ./html/htmlparser.h convert/baseparser.h
./html/htmlparser.o: textstream/textstream.h textstream/stream.h
./html/htmlparser.o: space/space.h textstream/types.h convert/inttostr.h
./html/htmlparser.o: utf8/utf8.h textstream/stream.h utf8/utf8_templates.h
./html/htmlparser.o: utf8/utf8_private.h date/date.h membuffer/membuffer.h
./html/htmlparser.o: textstream/types.h utf8/utf8_stream.h convert/text.h
./html/htmlparser.o: textstream/types.h convert/text.h

View File

@ -34,7 +34,6 @@
#include "baseparser.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"
namespace pt

View File

@ -39,7 +39,6 @@
#include "text.h"
#include "textstream/stream.h"
#include "textstream/types.h"
#include "utf8/utf8_stream.h"
namespace pt

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017-2021, Tomasz Sowa
* Copyright (c) 2017-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -532,6 +532,18 @@ void trim(std::wstring & str, wchar_t c)
}
void trim_last_new_lines(std::string & str, bool check_carriage_return_too)
{
pt_private::trim_last_generic(str, '\n', check_carriage_return_too, '\r');
}
void trim_last_new_lines(std::wstring & str, bool check_carriage_return_too)
{
pt_private::trim_last_generic(str, '\n', check_carriage_return_too, '\r');
}
}

View File

@ -201,6 +201,8 @@ void trim_last(std::wstring & str, wchar_t c);
void trim(std::string & str, wchar_t c);
void trim(std::wstring & str, wchar_t c);
void trim_last_new_lines(std::string & str, bool check_carriage_return_too = true);
void trim_last_new_lines(std::wstring & str, bool check_carriage_return_too = true);

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -367,24 +367,24 @@ size_t i;
template<typename StringType>
void trim_last_generic(StringType & s, wchar_t c)
void trim_last_generic(StringType & s, wchar_t c, bool has_c2 = false, wchar_t c2 = 0)
{
size_t i;
if( s.empty() )
return;
// looking for the 'c' characters at the end
for(i=s.size()-1 ; i>0 && s[i]==c ; --i);
// looking for the 'c' or 'c2' (if defined) characters at the end
for(i=s.size()-1 ; i>0 && (s[i]==c || (has_c2 && s[i]==c2)) ; --i);
if( i==0 && s[i]==c )
if( i==0 && (s[i]==c || (has_c2 && s[i]==c2)) )
{
// the whole string has the 'c' characters
// the whole string has the 'c' and 'c2' characters
s.clear();
return;
}
// deleting 'c' characters at the end
// deleting 'c' and 'c2' characters at the end
if( i != s.size() - 1 )
s.erase(i+1, StringType::npos);
}

View File

@ -104,7 +104,7 @@ size_t Log::size() const
{
if( buffer )
{
buffer->size();
return buffer->size();
}
return 0;
@ -124,7 +124,7 @@ size_t Log::capacity() const
{
if( buffer )
{
buffer->capacity();
return buffer->capacity();
}
return 0;

View File

@ -161,12 +161,24 @@ Space::Space(const char * str)
set(str);
}
Space::Space(const char * str, size_t len)
{
initialize();
set(str, len);
}
Space::Space(const wchar_t * str)
{
initialize();
set(str);
}
Space::Space(const wchar_t * str, size_t len)
{
initialize();
set(str, len);
}
Space::Space(const std::string & str)
{
initialize();
@ -383,6 +395,19 @@ void Space::set(const char * str)
}
}
void Space::set(const char * str, size_t len)
{
if( str == nullptr )
{
initialize_value_null_if_needed();
}
else
{
initialize_value_string_if_needed();
value.value_string.assign(str, len);
}
}
void Space::set(const wchar_t * str)
{
if( str == nullptr )
@ -396,6 +421,19 @@ void Space::set(const wchar_t * str)
}
}
void Space::set(const wchar_t * str, size_t len)
{
if( str == nullptr )
{
initialize_value_null_if_needed();
}
else
{
initialize_value_wstring_if_needed();
value.value_wstring.assign(str, len);
}
}
void Space::set(const std::string & str)
{
initialize_value_string_if_needed();
@ -504,11 +542,21 @@ Space & Space::add(const char * val)
return add_generic(val);
}
Space & Space::add_to_table(const char * val, size_t len)
{
return add_generic_len(val, len);
}
Space & Space::add(const wchar_t * val)
{
return add_generic(val);
}
Space & Space::add_to_table(const wchar_t * val, size_t len)
{
return add_generic_len(val, len);
}
Space & Space::add(const std::string & val)
{
return add_generic(val);
@ -617,11 +665,21 @@ Space & Space::add(const wchar_t * field, const char * val)
return add_generic(field, val);
}
Space & Space::add(const wchar_t * field, const char * val, size_t len)
{
return add_generic_len(field, val, len);
}
Space & Space::add(const wchar_t * field, const wchar_t * val)
{
return add_generic(field, val);
}
Space & Space::add(const wchar_t * field, const wchar_t * val, size_t len)
{
return add_generic_len(field, val, len);
}
Space & Space::add(const wchar_t * field, const std::string & val)
{
return add_generic(field, val);
@ -676,92 +734,102 @@ Space & Space::add_empty_space(const wchar_t * field)
Space & Space::add(const std::wstring & field, bool val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, short val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, int val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, long val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, long long val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, unsigned short val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, unsigned int val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, unsigned long val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, unsigned long long val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, float val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, double val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, long double val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, const char * val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, const char * val, size_t len)
{
return add_generic_string_len(field, val, len);
}
Space & Space::add(const std::wstring & field, const wchar_t * val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, const wchar_t * val, size_t len)
{
return add_generic_string_len(field, val, len);
}
Space & Space::add(const std::wstring & field, const std::string & val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, const std::wstring & val)
{
return add_generic(field, val);
return add_generic_string(field, val);
}
Space & Space::add(const std::wstring & field, const Space & space)
{
return add_generic(field, space);
return add_generic_string(field, space);
}
Space & Space::add(const std::wstring & field, const Space * space)
{
return add_generic(field, space);
return add_generic_string(field, space);
}
Space & Space::add(const std::wstring & field, Space && space)
@ -771,12 +839,12 @@ Space & Space::add(const std::wstring & field, Space && space)
Space & Space::add(const std::wstring & field, const Date & date)
{
return add_generic(field, date);
return add_generic_string(field, date);
}
Space & Space::add_empty_space(const std::wstring & field)
{
return add_generic(field, static_cast<Space*>(nullptr));
return add_generic_string(field, static_cast<Space*>(nullptr));
}

View File

@ -200,7 +200,9 @@ public:
Space(double val);
Space(long double val);
Space(const char * str);
Space(const char * str, size_t len);
Space(const wchar_t * str);
Space(const wchar_t * str, size_t len);
Space(const std::string & str);
Space(const std::wstring & str);
Space(const Space * space);
@ -235,7 +237,9 @@ public:
void set(double val);
void set(long double val);
void set(const char * str);
void set(const char * str, size_t len);
void set(const wchar_t * str);
void set(const wchar_t * str, size_t len);
void set(const std::string & str);
void set(const std::wstring & str);
void set(const Space & space);
@ -258,7 +262,9 @@ public:
Space & add(double val);
Space & add(long double val);
Space & add(const char * val);
Space & add_to_table(const char * val, size_t len); // the name add would collapse with add(const char * field, unsigned long val) below;
Space & add(const wchar_t * val);
Space & add_to_table(const wchar_t * val, size_t len); // the name add would collapse with add(const wchar_t * field, unsigned long val) below;
Space & add(const std::string & val);
Space & add(const std::wstring & val);
Space & add(const Space & space);
@ -283,7 +289,9 @@ public:
Space & add(const wchar_t * field, double val);
Space & add(const wchar_t * field, long double val);
Space & add(const wchar_t * field, const char * val);
Space & add(const wchar_t * field, const char * val, size_t len);
Space & add(const wchar_t * field, const wchar_t * val);
Space & add(const wchar_t * field, const wchar_t * val, size_t len);
Space & add(const wchar_t * field, const std::string & val);
Space & add(const wchar_t * field, const std::wstring & val);
Space & add(const wchar_t * field, const Space & space);
@ -306,6 +314,8 @@ public:
Space & add(const std::wstring & field, long double val);
Space & add(const std::wstring & field, const char * val);
Space & add(const std::wstring & field, const wchar_t * val);
Space & add(const std::wstring & field, const char * val, size_t len);
Space & add(const std::wstring & field, const wchar_t * val, size_t len);
Space & add(const std::wstring & field, const std::string & val);
Space & add(const std::wstring & field, const std::wstring & val);
Space & add(const std::wstring & field, const Space & space);
@ -653,6 +663,16 @@ protected:
return *value.value_table.back();
}
template<typename ArgType>
Space & add_generic_len(const ArgType & val, size_t len)
{
initialize_value_table_if_needed();
Space * new_space = new Space(val, len);
value.value_table.push_back(new_space);
return *value.value_table.back();
}
template<typename ArgType>
Space & add_generic(const wchar_t * field, const ArgType & val)
@ -672,13 +692,35 @@ protected:
return *(insert_res.first->second);
}
template<typename ArgType>
Space & add_generic_len(const wchar_t * field, const ArgType & val, size_t len)
{
initialize_value_object_if_needed();
auto insert_res = value.value_object.insert(std::make_pair(field, nullptr));
if( insert_res.second )
{
insert_res.first->second = new Space(val, len);
}
else
{
insert_res.first->second->set(val, len);
}
return *(insert_res.first->second);
}
template<typename ArgType>
Space & add_generic(const std::wstring & field, const ArgType & val)
Space & add_generic_string(const std::wstring & field, const ArgType & val)
{
return add_generic(field.c_str(), val);
}
template<typename ArgType>
Space & add_generic_string_len(const std::wstring & field, const ArgType & val, size_t len)
{
return add_generic_len(field.c_str(), val, len);
}
template<typename ArgType>
ArgType to_generic_numeric_signed_value() const

View File

@ -43,7 +43,6 @@
#include "membuffer/membuffer.h"
#include "types.h"
#include "utf8/utf8.h"
#include "utf8/utf8_stream.h"
// for snprintf
#include <cstdio>
@ -213,6 +212,9 @@ public:
protected:
template<typename char_type_to, size_t stack_size_to, size_t heap_block_size_to>
void put_stream(const TextStreamBase<char_type_to, stack_size_to, heap_block_size_to> & stream);
void put_stream(const Stream & stream);
@ -599,8 +601,9 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::string
if constexpr (sizeof(char_type) == sizeof(char) )
{
const_iterator i = begin();
const_iterator i_end = end();
for( ; i != end() ; ++i)
for( ; i != i_end ; ++i)
str += *i;
}
else
@ -629,10 +632,7 @@ void TextStreamBase<char_type, stack_size, heap_block_size>::to_str(std::wstring
}
else
{
// IMPROVE ME don't use a temporary object
std::string utf8;
to_str(utf8);
utf8_to_wide(utf8, str, false);
utf8_to_wide(*this, str, false);
}
}
@ -715,7 +715,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::string & str)
{
operator<<(str.c_str());
if constexpr ( sizeof(char_type) == sizeof(char) )
{
buffer.append(str.c_str(), str.size());
}
else
{
utf8_to_wide(str, *this, false);
}
return *this;
}
@ -733,7 +740,7 @@ TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const wchar_t
}
else
{
wide_to_utf8(str, *this);
wide_to_utf8(str, *this, false);
}
return *this;
@ -745,7 +752,14 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const std::wstring & str)
{
operator<<(str.c_str());
if constexpr (sizeof(char_type) == sizeof(wchar_t) )
{
buffer.append(str.c_str(), str.size());
}
else
{
wide_to_utf8(str, *this, false);
}
return *this;
}
@ -988,6 +1002,44 @@ TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Stream
template<typename char_type, size_t stack_size, size_t heap_block_size>
template<typename arg_char_type, size_t arg_stack_size, size_t arg_heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & stream
)
{
if( (sizeof(char_type) == sizeof(char) && stream.is_char_stream()) ||
(sizeof(char_type) == sizeof(wchar_t) && stream.is_wchar_stream()) )
{
// from utf8 to utf8 or from wide to wide
typename TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size>::const_iterator i = stream.begin();
for( ; i != stream.end() ; ++i)
{
operator<<(*i);
}
}
else
if( sizeof(char_type) == sizeof(wchar_t) && stream.is_char_stream() )
{
// from utf8 to wide
utf8_to_wide(stream, *this, false);
}
else
if( sizeof(char_type) == sizeof(char) && stream.is_wchar_stream() )
{
// from wide to utf8
wide_stream_to_utf8(stream, *this, false);
}
else
{
operator<<("such conversion is not implemented");
}
}
template<typename char_type, size_t stack_size, size_t heap_block_size>
void TextStreamBase<char_type, stack_size, heap_block_size>::put_stream(const Stream & stream)
{
@ -1036,7 +1088,7 @@ template<typename char_type, size_t stack_size, size_t heap_block_size>
TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(const Space & space)
{
space.serialize_to_space_stream(*this, true);
space.serialize_to_json_stream(*this, true);
return *this;
}
@ -1059,11 +1111,6 @@ TextStreamBase<char_type, stack_size, heap_block_size> &
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(
const TextStreamBase<arg_char_type, arg_stack_size, arg_heap_block_size> & arg)
{
/*
* in the future we can have a faster implementation
* which uses iterators instead of get_char() and get_wchar() methods
*
*/
put_stream(arg);
return *this;
@ -1101,6 +1148,35 @@ TextStreamBase<char_type, stack_size, heap_block_size>::fill_up_if_needed(wchar_
}
/*!
this function converts an UTF-8 stream into wide stream or wide string
(is declared in utf8/utf8.h)
input:
iterator_in - an TextStream iterator for reading from
iterator_end - an end iterator (can be returned by end() method from TextStream)
output:
out_stream - an output wide stream or wide string
this function returns false if there were some errors when converting
*/
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
bool utf8_to_wide(
const TextStreamBase<char, stack_size, heap_block_size> & utf8,
StreamOrStringType & out_stream,
bool clear_stream,
int mode
)
{
typename TextStreamBase<char, stack_size, heap_block_size>::const_iterator i_begin = utf8.begin();
return utf8_to_wide(i_begin, utf8.end(), out_stream, clear_stream, mode);
}
} // namespace

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2010-2021, Tomasz Sowa
* Copyright (c) 2010-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -128,6 +128,25 @@ bool surrogate_pair_to_int(int c1, int c2, int & z)
/*
converts an int to a wide string
*/
void int_to_wide(int c, std::wstring & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res += static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res += static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res += static_cast<wchar_t>(c);
}
}
/*!
this function converts one UTF-8 character into one wide-character
@ -312,7 +331,7 @@ unsigned char uz;
uz = utf8.get_char(stream_index + i);
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
return i;
return i + 1;
}
if( utf8_check_range(res, len) )
@ -330,26 +349,6 @@ unsigned char uz;
/*
*/
static void int_to_wide(int c, std::wstring & res)
{
if( sizeof(wchar_t)==2 && c>0xffff )
{
// UTF16 surrogate pairs
c -= 0x10000;
res += static_cast<wchar_t>(((c >> 10) & 0x3FF) + 0xD800);
res += static_cast<wchar_t>((c & 0x3FF) + 0xDC00);
}
else
{
res += static_cast<wchar_t>(c);
}
}
/*!

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2010-2021, Tomasz Sowa
* Copyright (c) 2010-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -128,6 +128,9 @@ size_t utf8_to_int(const std::string & utf8, int & res, bool & correct
size_t utf8_to_int(std::istream & utf8, int & res, bool & correct);
size_t utf8_to_int(const Stream & utf8, size_t stream_index, int & res, bool & correct);
template<typename StreamIteratorType>
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct);
/*!
converting one character from int to wide stream
@ -136,6 +139,11 @@ template<typename StreamType>
void int_to_wide(int c, StreamType & res);
/*!
converting one character from int to wide string
*/
void int_to_wide(int c, std::wstring & res);
/*!
converting UTF-8 string to a wide string
@ -157,8 +165,18 @@ bool utf8_to_wide(const std::string & utf8, StreamType & res, bool clear = true,
template<typename StreamType>
bool utf8_to_wide(std::istream & utf8, StreamType & res, bool clear = true, int mode = 1); // need to be tested
template<typename StreamType>
bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear = true, int mode = 1);
template<typename StreamOrStringType>
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);
template<typename StreamIteratorType, typename StreamOrStringType>
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
template<typename CharT, size_t stack_size, size_t heap_block_size>
class TextStreamBase;
// defined at the end in textstream.h
template<size_t stack_size, size_t heap_block_size, typename StreamOrStringType>
bool utf8_to_wide(const TextStreamBase<char, stack_size, heap_block_size> & utf8, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
/*

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -56,7 +56,7 @@ namespace private_namespace
bool utf8_to_int_first_octet(unsigned char uz, size_t & len, int & res);
bool utf8_to_int_add_next_octet(unsigned char uz, int & res);
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct); // may these methods make public?
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
size_t wide_one_to_utf8(const wchar_t * wide_string, size_t string_len, char * utf8, size_t utf8_len,

View File

@ -1,101 +0,0 @@
/*
* This file is a part of PikoTools
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef headerfile_pikotools_src_utf8_utf8_stream
#define headerfile_pikotools_src_utf8_utf8_stream
#include "textstream/textstream.h"
namespace pt
{
/*!
this function converts one UTF-8 character into one wide-character
input:
iterator_in - an TextStream iterator for reading from
iterator_end - an end iterator (can be returned by end() method from TextStream)
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input stream
*/
template<typename StreamIteratorType>
size_t utf8_to_int(
StreamIteratorType & iterator_in,
const StreamIteratorType & iterator_end,
int & res,
bool & correct)
{
size_t i, len;
unsigned char uz;
res = 0;
correct = false;
if( iterator_in == iterator_end )
return 0;
uz = *iterator_in;
++iterator_in;
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
return 1;
for(i=1 ; i<len ; ++i)
{
if( iterator_in == iterator_end )
return i;
uz = *iterator_in;
++iterator_in;
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
return i;
}
if( utf8_check_range(res, len) )
correct = true;
return len;
}
}
#endif

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2021, Tomasz Sowa
* Copyright (c) 2021-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -62,6 +62,55 @@ void int_to_wide(int c, StreamType & res)
}
/*!
this function converts one UTF-8 character into int
input:
iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
iterator_end - an end iterator
output:
res - an output character
correct - true if it is a correct character
the function returns how many characters have been used from the input stream
*/
template<typename StreamIteratorType>
size_t utf8_to_int(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, int & res, bool & correct)
{
size_t i, len;
unsigned char uz;
res = 0;
correct = false;
if( iterator_in == iterator_end )
return 0;
uz = *iterator_in;
++iterator_in;
if( !private_namespace::utf8_to_int_first_octet(uz, len, res) )
return 1;
for(i=1 ; i<len ; ++i)
{
if( iterator_in == iterator_end )
return i;
uz = *iterator_in;
++iterator_in;
if( !private_namespace::utf8_to_int_add_next_octet(uz, res) )
return i + 1;
}
if( utf8_check_range(res, len) )
correct = true;
return len;
}
/*!
@ -136,7 +185,7 @@ return !was_error;
/*
this function converts a UTF-8 stream into wide stream
this function converts a UTF-8 stream into a wide stream or a wide string
input:
stream - a UTF-8 stream for converting
@ -145,12 +194,12 @@ input:
1: put U+FFFD "replacement character" istead of the invalid character (default)
output:
res - a wide stream for the output sequence
res - a wide stream or a wide string for the output sequence
this function returns false if there were some errors when converting
*/
template<typename StreamType>
bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear, int mode)
template<typename StreamOrStringType>
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, int mode)
{
size_t len;
bool correct;
@ -172,7 +221,7 @@ bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear, int mode)
if( !correct )
{
if( mode == 1 )
res << 0xFFFD; // U+FFFD "replacement character"
int_to_wide(0xFFFD, res); // U+FFFD "replacement character"
was_error = true;
}
@ -194,6 +243,51 @@ bool utf8_to_wide(const Stream & stream, StreamType & res, bool clear, int mode)
/*!
this function converts one UTF-8 character into a wide stream or a wide string
input:
iterator_in - an stream iterator for reading from (the stream can by any stream, we use *, ++ and == operators only)
iterator_end - an end iterator
output:
out_stream - an output wide stream or wide string (the stream can by of any kind, we use only << operator)
this function returns false if there were some errors when converting
*/
template<typename StreamIteratorType, typename StreamOrStringType>
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream, int mode)
{
if( clear_stream )
out_stream.clear();
int res;
bool correct;
bool was_error = false;
while( iterator_in != iterator_end )
{
utf8_to_int(iterator_in, iterator_end, res, correct);
if( correct )
{
int_to_wide(res, out_stream);
}
else
{
if( mode == 1 )
int_to_wide(0xFFFD, out_stream); // U+FFFD "replacement character"
was_error = true;
}
}
return !was_error;
}
/*!
this function converts one wide character into UTF-8 stream
@ -387,6 +481,11 @@ void wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
} // namespace pt
#endif

View File

@ -8,17 +8,15 @@
./convert.o: ../src/textstream/stream.h ../src/utf8/utf8_templates.h
./convert.o: ../src/utf8/utf8_private.h ../src/date/date.h
./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
./convert.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
./convert.o: ../src/convert/text.h ../src/convert/misc.h
./convert.o: ../src/convert/double.h
./convert.o: ../src/convert/strtoint.h ../src/convert/text.h
./convert.o: ../src/convert/misc.h ../src/convert/double.h
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
./csvparser.o: ../src/textstream/types.h ../src/convert/inttostr.h
./csvparser.o: ../src/utf8/utf8.h ../src/textstream/stream.h
./csvparser.o: ../src/utf8/utf8_templates.h ../src/utf8/utf8_private.h
./csvparser.o: ../src/convert/baseparser.h ../src/textstream/textstream.h
./csvparser.o: ../src/textstream/stream.h ../src/date/date.h
./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
./csvparser.o: ../src/utf8/utf8_stream.h test.h
./csvparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h test.h
./main.o: convert.h mainoptionsparser.h csvparser.h
./test.o: test.h
./mainoptionsparser.o: mainoptionsparser.h test.h
@ -33,6 +31,5 @@
./mainoptionsparser.o: ../src/textstream/textstream.h
./mainoptionsparser.o: ../src/textstream/stream.h ../src/date/date.h
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
./mainoptionsparser.o: ../src/utf8/utf8_stream.h ../src/convert/strtoint.h
./mainoptionsparser.o: ../src/convert/text.h ../src/convert/misc.h
./mainoptionsparser.o: ../src/convert/double.h
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
./mainoptionsparser.o: ../src/convert/misc.h ../src/convert/double.h