use a char32_t character in the base Stream class
Add an operator<<(char32_t) to the Stream class, char32_t will be used as a main character instead of a wchar_t (this is needed on systems where sizeof(wchar_t) is equal to 2). while here: - add to utf8: size_t wide_to_int(const Stream & stream, size_t stream_index, int & res, bool & correct) template<typename StreamType, typename OutputFunction> bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1) template<typename OutputFunction> bool wide_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode) - add to convert/misc: bool try_esc_to_tex(char32_t c, pt::Stream & out) bool try_esc_to_html(char32_t c, pt::Stream & out)
This commit is contained in:
parent
2689c9fece
commit
c0838de3a4
|
@ -1,8 +1,7 @@
|
|||
# DO NOT DELETE
|
||||
|
||||
./convert/inttostr.o: ./convert/inttostr.h
|
||||
./convert/misc.o: ./convert/misc.h ./convert/text.h textstream/stream.h
|
||||
./convert/misc.o: textstream/types.h ./convert/inttostr.h utf8/utf8.h
|
||||
./convert/misc.o: ./convert/misc.h textstream/stream.h ./convert/inttostr.h
|
||||
./convert/text.o: ./convert/text.h ./convert/text_private.h
|
||||
./convert/double.o: ./convert/double.h textstream/textstream.h
|
||||
./convert/double.o: textstream/stream.h space/space.h convert/inttostr.h
|
||||
|
@ -29,15 +28,14 @@
|
|||
./space/space.o: textstream/stream.h space/space.h date/date.h
|
||||
./space/space.o: membuffer/membuffer.h textstream/types.h
|
||||
./space/space.o: textstream/stream_private.h convert/strtoint.h
|
||||
./space/space.o: ./convert/text.h ./convert/misc.h textstream/types.h
|
||||
./space/space.o: ./convert/double.h
|
||||
./space/space.o: ./convert/text.h ./convert/misc.h ./convert/double.h
|
||||
./space/spaceparser.o: ./space/spaceparser.h ./space/space.h
|
||||
./space/spaceparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./space/spaceparser.o: convert/baseparser.h textstream/textstream.h
|
||||
./space/spaceparser.o: textstream/stream.h space/space.h date/date.h
|
||||
./space/spaceparser.o: membuffer/membuffer.h textstream/types.h
|
||||
./space/spaceparser.o: textstream/stream_private.h convert/strtoint.h
|
||||
./space/spaceparser.o: ./convert/text.h ./convert/misc.h textstream/types.h
|
||||
./space/spaceparser.o: ./convert/text.h ./convert/misc.h
|
||||
./space/keyvalueparser.o: ./space/keyvalueparser.h ./space/space.h
|
||||
./space/keyvalueparser.o: convert/inttostr.h utf8/utf8.h textstream/stream.h
|
||||
./space/keyvalueparser.o: convert/baseparser.h textstream/textstream.h
|
||||
|
@ -45,7 +43,6 @@
|
|||
./space/keyvalueparser.o: membuffer/membuffer.h textstream/types.h
|
||||
./space/keyvalueparser.o: textstream/stream_private.h convert/strtoint.h
|
||||
./space/keyvalueparser.o: ./convert/text.h ./convert/misc.h
|
||||
./space/keyvalueparser.o: textstream/types.h
|
||||
./textstream/stream_private.o: textstream/stream_private.h
|
||||
./utf8/utf8.o: ./utf8/utf8.h textstream/stream.h
|
||||
./csv/csvparser.o: ./csv/csvparser.h space/space.h convert/inttostr.h
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2022, Tomasz Sowa
|
||||
* Copyright (c) 2017-2024, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -34,7 +34,6 @@
|
|||
|
||||
#include "misc.h"
|
||||
#include "inttostr.h"
|
||||
#include "utf8/utf8.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
|
@ -49,13 +48,12 @@ void SetOverflow(bool * was_overflow, bool val)
|
|||
|
||||
|
||||
|
||||
void esc_to_json_uformat(wchar_t val, Stream & out)
|
||||
void esc_to_json_uformat(char32_t val, Stream & out)
|
||||
{
|
||||
char buf[10];
|
||||
char buf[17];
|
||||
size_t len;
|
||||
|
||||
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
|
||||
|
||||
out << "\\u";
|
||||
|
||||
if( len < 4 )
|
||||
|
@ -75,7 +73,7 @@ void esc_to_json_uformat(wchar_t val, Stream & out)
|
|||
* if the character is invalid for such a stream then only return true
|
||||
* but not put it to the stream
|
||||
*/
|
||||
bool try_esc_to_json(wchar_t val, Stream & out)
|
||||
bool try_esc_to_json(char32_t val, Stream & out)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
|
@ -133,7 +131,7 @@ bool try_esc_to_json(wchar_t val, Stream & out)
|
|||
|
||||
void esc_to_json(wchar_t val, Stream & out)
|
||||
{
|
||||
if( !try_esc_to_json(val, out) )
|
||||
if( !try_esc_to_json((char32_t)(val), out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -142,7 +140,7 @@ void esc_to_json(wchar_t val, Stream & out)
|
|||
|
||||
void esc_to_json(char val, Stream & out)
|
||||
{
|
||||
if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
|
||||
if( !try_esc_to_json((char32_t)(unsigned char)val, out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -204,7 +202,7 @@ void esc_to_json(const std::wstring & in, Stream & out)
|
|||
* if the character is invalid for such a stream then only return true
|
||||
* but not put it to the stream
|
||||
*/
|
||||
bool try_esc_to_xml(wchar_t val, Stream & out)
|
||||
bool try_esc_to_xml(char32_t val, Stream & out)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
|
@ -246,7 +244,7 @@ bool try_esc_to_xml(wchar_t val, Stream & out)
|
|||
|
||||
void esc_to_xml(wchar_t val, Stream & out)
|
||||
{
|
||||
if( !try_esc_to_xml(val, out) )
|
||||
if( !try_esc_to_xml((char32_t)val, out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -255,7 +253,7 @@ void esc_to_xml(wchar_t val, Stream & out)
|
|||
|
||||
void esc_to_xml(char val, Stream & out)
|
||||
{
|
||||
if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
|
||||
if( !try_esc_to_xml((char32_t)(unsigned char)val, out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -318,7 +316,7 @@ void esc_to_xml(const std::wstring & in, Stream & out)
|
|||
* if the character is invalid for such a stream then only return true
|
||||
* but not put it to the stream
|
||||
*/
|
||||
bool try_esc_to_csv(wchar_t val, pt::Stream & out)
|
||||
bool try_esc_to_csv(char32_t val, pt::Stream & out)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
|
@ -341,7 +339,7 @@ bool try_esc_to_csv(wchar_t val, pt::Stream & out)
|
|||
|
||||
void esc_to_csv(wchar_t val, pt::Stream & out)
|
||||
{
|
||||
if( !try_esc_to_csv(val, out) )
|
||||
if( !try_esc_to_csv((char32_t)val, out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -350,7 +348,7 @@ void esc_to_csv(wchar_t val, pt::Stream & out)
|
|||
|
||||
void esc_to_csv(char val, Stream & out)
|
||||
{
|
||||
if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
|
||||
if( !try_esc_to_csv((char32_t)(unsigned char)val, out) )
|
||||
{
|
||||
out << val;
|
||||
}
|
||||
|
@ -405,6 +403,127 @@ void esc_to_csv(const std::wstring & in, Stream & out)
|
|||
}
|
||||
|
||||
|
||||
bool try_esc_to_tex(char32_t c, pt::Stream & out)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case 0:
|
||||
status = true;
|
||||
break; // ignore the null character
|
||||
|
||||
case '{':
|
||||
out << "{\\char`{}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
out << "{\\char`}}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
out << "{\\char`\\\\}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '#':
|
||||
out << "{\\#}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '$':
|
||||
out << "{\\$}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '%':
|
||||
out << "{\\%}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '&':
|
||||
out << "{\\&}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '~':
|
||||
out << "{\\~\\relax}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '^':
|
||||
out << "{\\^\\relax}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '<':
|
||||
out << "{\\char`<}";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '>':
|
||||
out << "{\\char`>}";
|
||||
status = true;
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool try_esc_to_html(char32_t c, pt::Stream & out)
|
||||
{
|
||||
bool status = false;
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case 0:
|
||||
out << L"�";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '<':
|
||||
out << L"<";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '>':
|
||||
out << L">";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '&':
|
||||
out << L"&";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '\"':
|
||||
out << L""";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case '\'':
|
||||
out << L"'"; // (it is "'" but IE8 has a problem with ') (' is valid in HTML5, but not HTML4)
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case 10:
|
||||
out << L" ";
|
||||
status = true;
|
||||
break;
|
||||
|
||||
case 13:
|
||||
out << L" ";
|
||||
status = true;
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2017-2023, Tomasz Sowa
|
||||
* Copyright (c) 2017-2024, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -35,10 +35,7 @@
|
|||
#ifndef headerfile_pikotools_src_convert_misc
|
||||
#define headerfile_pikotools_src_convert_misc
|
||||
|
||||
#include <limits>
|
||||
#include "text.h"
|
||||
#include "textstream/stream.h"
|
||||
#include "textstream/types.h"
|
||||
|
||||
|
||||
namespace pt
|
||||
|
@ -46,7 +43,7 @@ namespace pt
|
|||
|
||||
void SetOverflow(bool * was_overflow, bool val);
|
||||
|
||||
bool try_esc_to_json(wchar_t val, Stream & out);
|
||||
bool try_esc_to_json(char32_t val, Stream & out);
|
||||
void esc_to_json(wchar_t val, Stream & out);
|
||||
void esc_to_json(char val, Stream & out);
|
||||
void esc_to_json(const char * c, pt::Stream & out);
|
||||
|
@ -56,7 +53,7 @@ void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out);
|
|||
void esc_to_json(const std::string & in, Stream & out);
|
||||
void esc_to_json(const std::wstring & in, Stream & out);
|
||||
|
||||
bool try_esc_to_xml(wchar_t val, Stream & out);
|
||||
bool try_esc_to_xml(char32_t val, Stream & out);
|
||||
void esc_to_xml(wchar_t c, pt::Stream & out);
|
||||
void esc_to_xml(char c, pt::Stream & out);
|
||||
void esc_to_xml(const char * c, pt::Stream & out);
|
||||
|
@ -66,7 +63,7 @@ void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out);
|
|||
void esc_to_xml(const std::string & in, Stream & out);
|
||||
void esc_to_xml(const std::wstring & in, Stream & out);
|
||||
|
||||
bool try_esc_to_csv(wchar_t val, pt::Stream & out);
|
||||
bool try_esc_to_csv(char32_t val, pt::Stream & out);
|
||||
void esc_to_csv(wchar_t val, Stream & out);
|
||||
void esc_to_csv(char c, pt::Stream & out);
|
||||
void esc_to_csv(const char * c, std::size_t len, Stream & out);
|
||||
|
@ -77,6 +74,8 @@ void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out);
|
|||
void esc_to_csv(const std::string & in, Stream & out);
|
||||
void esc_to_csv(const std::wstring & in, Stream & out);
|
||||
|
||||
bool try_esc_to_tex(char32_t c, pt::Stream & out);
|
||||
bool try_esc_to_html(char32_t c, pt::Stream & out);
|
||||
|
||||
|
||||
template<typename StreamType>
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018-2022, Tomasz Sowa
|
||||
* Copyright (c) 2018-2024, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -274,6 +274,17 @@ Log & Log::operator<<(wchar_t val)
|
|||
}
|
||||
|
||||
|
||||
Log & Log::operator<<(char32_t val)
|
||||
{
|
||||
if( buffer && file_log && current_level <= file_log->get_log_level() )
|
||||
{
|
||||
(*buffer) << val;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
Log & Log::operator<<(bool val)
|
||||
{
|
||||
if( can_put_log() )
|
||||
|
|
|
@ -100,6 +100,7 @@ public:
|
|||
Log & operator<<(char val);
|
||||
Log & operator<<(unsigned char val);
|
||||
Log & operator<<(wchar_t val);
|
||||
Log & operator<<(char32_t val);
|
||||
Log & operator<<(bool val);
|
||||
Log & operator<<(short val);
|
||||
Log & operator<<(int s);
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021, Tomasz Sowa
|
||||
* Copyright (c) 2021-2024, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -107,6 +107,7 @@ public:
|
|||
|
||||
virtual Stream & operator<<(char) = 0;
|
||||
virtual Stream & operator<<(unsigned char) = 0;
|
||||
virtual Stream & operator<<(char32_t) = 0;
|
||||
virtual Stream & operator<<(wchar_t) = 0;
|
||||
virtual Stream & operator<<(bool) = 0;
|
||||
virtual Stream & operator<<(short) = 0;
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012-2023, Tomasz Sowa
|
||||
* Copyright (c) 2012-2024, Tomasz Sowa
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -177,6 +177,7 @@ public:
|
|||
|
||||
TextStreamBase & operator<<(char);
|
||||
TextStreamBase & operator<<(unsigned char);
|
||||
TextStreamBase & operator<<(char32_t);
|
||||
TextStreamBase & operator<<(wchar_t); // no surrogate pairs are used
|
||||
TextStreamBase & operator<<(bool);
|
||||
TextStreamBase & operator<<(short);
|
||||
|
@ -893,6 +894,16 @@ return *this;
|
|||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(char32_t v)
|
||||
{
|
||||
int_to_stream(static_cast<int>(v), *this);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
template<typename char_type, size_t stack_size, size_t heap_block_size>
|
||||
TextStreamBase<char_type, stack_size, heap_block_size> &
|
||||
TextStreamBase<char_type, stack_size, heap_block_size>::operator<<(wchar_t v)
|
||||
|
@ -1310,7 +1321,7 @@ TextStreamBase<char_type, stack_size, heap_block_size>::fill_up_if_needed(wchar_
|
|||
|
||||
|
||||
/*!
|
||||
this function converts an UTF-8 stream into wide stream or wide string
|
||||
this function converts an UTF-8 stream into a wide stream or a wide string
|
||||
(is declared in utf8/utf8.h)
|
||||
|
||||
input:
|
||||
|
|
|
@ -199,6 +199,59 @@ return wide_to_int(wide_string, min_str_len, z, correct);
|
|||
|
||||
|
||||
|
||||
// new function, need to be tested a little more especially when sizeof(wchar_t) is 2
|
||||
/*
|
||||
*
|
||||
* this function return how many wide characters have been read from the stream
|
||||
* return zero only if the stream_index is equal or greater than the stream.size()
|
||||
*/
|
||||
size_t wide_to_int(const Stream & stream, size_t stream_index, int & res, bool & correct)
|
||||
{
|
||||
size_t len = 0;
|
||||
res = 0xFFFD; // U+FFFD "replacement character";
|
||||
correct = false;
|
||||
|
||||
if( stream_index < stream.size() )
|
||||
{
|
||||
int c = 0xFFFD;
|
||||
wchar_t w1 = stream.get_wchar(stream_index);
|
||||
|
||||
if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
|
||||
{
|
||||
len += 1;
|
||||
|
||||
if( stream_index + 1 < stream.size() )
|
||||
{
|
||||
wchar_t w2 = stream.get_wchar(stream_index + 1);
|
||||
|
||||
if( surrogate_pair_to_int(w1, w2, c) )
|
||||
{
|
||||
len += 1;
|
||||
correct = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
if( is_correct_unicode_char(c) )
|
||||
{
|
||||
c = w1;
|
||||
correct = true;
|
||||
len += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
len += 1;
|
||||
}
|
||||
|
||||
res = c;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* convert an int to a wide string
|
||||
*
|
||||
|
|
147
src/utf8/utf8.h
147
src/utf8/utf8.h
|
@ -117,6 +117,7 @@ bool int_to_stream(int c, pt::Stream & stream);
|
|||
*/
|
||||
size_t wide_to_int(const wchar_t * wide_string, size_t string_len, int & z, bool & correct);
|
||||
size_t wide_to_int(const wchar_t * wide_string, int & z, bool & correct);
|
||||
size_t wide_to_int(const Stream & utf8, size_t stream_index, int & res, bool & correct);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -203,7 +204,7 @@ template<typename StreamOrStringType>
|
|||
bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear = true, int mode = 1);
|
||||
|
||||
template<typename OutputFunction>
|
||||
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode = 1);
|
||||
bool utf8_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode = 1);
|
||||
|
||||
template<typename StreamIteratorType, typename StreamOrStringType>
|
||||
bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, StreamOrStringType & out_stream, bool clear_stream = true, int mode = 1);
|
||||
|
@ -211,6 +212,9 @@ bool utf8_to_wide(StreamIteratorType & iterator_in, const StreamIteratorType & i
|
|||
template<typename StreamIteratorType, typename OutputFunction>
|
||||
bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamIteratorType & iterator_end, OutputFunction output_function, int mode = 1);
|
||||
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool utf8_to_output_function(StreamType & stream, OutputFunction output_function, int mode = 1);
|
||||
|
||||
template<typename CharT, size_t stack_size, size_t heap_block_size>
|
||||
class TextStreamBase;
|
||||
|
||||
|
@ -249,6 +253,12 @@ template<typename StreamType>
|
|||
size_t int_to_utf8(int z, StreamType & utf8);
|
||||
|
||||
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
||||
|
||||
template<typename OutputFunction>
|
||||
bool wide_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode = 1);
|
||||
|
||||
/*
|
||||
* call an output_function for some sequence of wide characters from the stream buffer
|
||||
*
|
||||
|
@ -260,7 +270,7 @@ size_t int_to_utf8(int z, StreamType & utf8);
|
|||
* StreamType should have a const_iterator and begin() and end() methods
|
||||
*/
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
||||
bool wide_to_output_function_utf8(StreamType & buffer, OutputFunction output_function, int mode = 1);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -554,8 +564,113 @@ return !was_error;
|
|||
}
|
||||
|
||||
|
||||
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode)
|
||||
{
|
||||
bool was_error = false;
|
||||
typename StreamType::const_iterator i = buffer.begin();
|
||||
|
||||
while( i != buffer.end() )
|
||||
{
|
||||
int c = 0xFFFD; // U+FFFD "replacement character";
|
||||
bool is_correct_char = false;
|
||||
wchar_t w1 = *i;
|
||||
|
||||
if( sizeof(wchar_t) == 2 && is_first_surrogate_char(w1) )
|
||||
{
|
||||
++i;
|
||||
|
||||
if( i != buffer.end() )
|
||||
{
|
||||
wchar_t w2 = *i;
|
||||
|
||||
if( surrogate_pair_to_int(w1, w2, c) )
|
||||
{
|
||||
is_correct_char = true;
|
||||
++i;
|
||||
}
|
||||
else
|
||||
{
|
||||
was_error = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
was_error = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
c = w1;
|
||||
is_correct_char = true;
|
||||
++i;
|
||||
}
|
||||
|
||||
if( is_correct_char || mode == 1 )
|
||||
{
|
||||
output_function(c);
|
||||
}
|
||||
}
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* read characters from a wide stream and call an output_function
|
||||
* we using a stream index to iterate over the stream
|
||||
*
|
||||
* input:
|
||||
* stream - a wide stream for converting
|
||||
* mode - what to do with errors when converting
|
||||
* 0: skip an invalid character
|
||||
* 1: put U+FFFD "replacement character" istead of the invalid character (default)
|
||||
*
|
||||
* output:
|
||||
* output_function - a function which takes one artument: an int (a character):
|
||||
* output_function(int c)
|
||||
*
|
||||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename OutputFunction>
|
||||
bool wide_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode)
|
||||
{
|
||||
size_t len;
|
||||
bool correct;
|
||||
int z;
|
||||
size_t index = 0;
|
||||
bool was_error = false;
|
||||
|
||||
do
|
||||
{
|
||||
len = wide_to_int(stream, index, z, correct);
|
||||
|
||||
if( len > 0 )
|
||||
{
|
||||
if( !correct )
|
||||
{
|
||||
if( mode == 1 )
|
||||
output_function(0xFFFD); // U+FFFD "replacement character"
|
||||
|
||||
was_error = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
output_function(z);
|
||||
}
|
||||
|
||||
index += len;
|
||||
}
|
||||
}
|
||||
while( len > 0 );
|
||||
|
||||
return !was_error;
|
||||
}
|
||||
|
||||
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool wide_to_output_function_utf8(StreamType & buffer, OutputFunction output_function, int mode)
|
||||
{
|
||||
char utf8_buffer[256];
|
||||
std::size_t buffer_len = sizeof(utf8_buffer) / sizeof(char);
|
||||
|
@ -609,7 +724,7 @@ bool wide_to_output_function(StreamType & buffer, OutputFunction output_function
|
|||
else
|
||||
{
|
||||
c = w1;
|
||||
seems_to_be_correct = true; // we do not test utf8_check_range(...) here because it is tested in int_to_utf8(...) below
|
||||
seems_to_be_correct = true; // we do not test is_correct_unicode_char(...) here because it is tested in int_to_utf8(...) below
|
||||
++i;
|
||||
}
|
||||
|
||||
|
@ -646,7 +761,8 @@ bool wide_to_output_function(StreamType & buffer, OutputFunction output_function
|
|||
|
||||
/*
|
||||
* convert a UTF-8 stream into a wide stream or a wide string
|
||||
*
|
||||
* we are using a stream index to iterate over the stream
|
||||
*
|
||||
* input:
|
||||
* stream - a UTF-8 stream for converting
|
||||
* mode - what to do with errors when converting
|
||||
|
@ -664,7 +780,7 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
|
|||
if( clear )
|
||||
res.clear();
|
||||
|
||||
return utf8_to_output_function(stream, [&](int z) {
|
||||
return utf8_to_output_function_by_index(stream, [&](int z) {
|
||||
int_to_wide(z, res);
|
||||
}, mode);
|
||||
}
|
||||
|
@ -672,6 +788,7 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
|
|||
|
||||
/*
|
||||
* read characters from an UTF-8 stream and call an output_function
|
||||
* we using a stream index to iterate over the stream
|
||||
*
|
||||
* input:
|
||||
* stream - a UTF-8 stream for converting
|
||||
|
@ -686,7 +803,7 @@ bool utf8_to_wide(const Stream & stream, StreamOrStringType & res, bool clear, i
|
|||
* this function returns false if there were some errors when converting
|
||||
*/
|
||||
template<typename OutputFunction>
|
||||
bool utf8_to_output_function(const Stream & stream, OutputFunction output_function, int mode)
|
||||
bool utf8_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode)
|
||||
{
|
||||
size_t len;
|
||||
bool correct;
|
||||
|
@ -778,6 +895,18 @@ bool utf8_to_output_function(StreamIteratorType & iterator_in, const StreamItera
|
|||
|
||||
|
||||
|
||||
template<typename StreamType, typename OutputFunction>
|
||||
bool utf8_to_output_function(StreamType & stream, OutputFunction output_function, int mode)
|
||||
{
|
||||
typename StreamType::const_iterator start = stream.begin();
|
||||
typename StreamType::const_iterator end = stream.end();
|
||||
|
||||
return utf8_to_output_function(start, end, output_function, mode);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* convert an UTF-8 stream into a wide string
|
||||
*
|
||||
|
@ -995,7 +1124,7 @@ bool wide_stream_to_utf8(StreamType & buffer, std::string & utf8, bool clear, in
|
|||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
return wide_to_output_function_utf8(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
utf8.append(utf8_buffer, buffer_len);
|
||||
return true;
|
||||
}, mode);
|
||||
|
@ -1061,7 +1190,7 @@ bool wide_stream_to_utf8(StreamTypeIn & buffer, StreamTypeOut & utf8, bool clear
|
|||
if( clear )
|
||||
utf8.clear();
|
||||
|
||||
return wide_to_output_function(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
return wide_to_output_function_utf8(buffer, [&utf8](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
utf8.write(utf8_buffer, buffer_len);
|
||||
return true;
|
||||
}, mode);
|
||||
|
@ -1093,7 +1222,7 @@ bool wide_stream_to_utf8(StreamType & buffer, char * utf8, std::size_t max_buffe
|
|||
buffer_ok = true;
|
||||
max_buffer_size -= 1; // for terminating null character
|
||||
|
||||
is_ok = wide_to_output_function(buffer, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
is_ok = wide_to_output_function_utf8(buffer, [&utf8, &max_buffer_size, &buffer_ok](const char * utf8_buffer, std::size_t buffer_len) -> bool {
|
||||
std::size_t i=0;
|
||||
|
||||
for( ; i < buffer_len ; ++i)
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
./convert.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||
./convert.o: ../src/textstream/stream_private.h ../src/convert/strtoint.h
|
||||
./convert.o: ../src/convert/text.h ../src/convert/misc.h
|
||||
./convert.o: ../src/textstream/types.h ../src/convert/double.h test.h
|
||||
./convert.o: ../src/convert/double.h test.h
|
||||
./csvparser.o: csvparser.h ../src/csv/csvparser.h ../src/space/space.h
|
||||
./csvparser.o: ../src/convert/inttostr.h ../src/utf8/utf8.h
|
||||
./csvparser.o: ../src/textstream/stream.h ../src/convert/baseparser.h
|
||||
|
@ -23,9 +23,8 @@
|
|||
./main.o: ../src/textstream/stream.h ../src/date/date.h
|
||||
./main.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||
./main.o: ../src/textstream/stream_private.h ../src/convert/strtoint.h
|
||||
./main.o: ../src/convert/text.h ../src/convert/misc.h
|
||||
./main.o: ../src/textstream/types.h ../src/convert/double.h test.h
|
||||
./main.o: mainoptionsparser.h csvparser.h
|
||||
./main.o: ../src/convert/text.h ../src/convert/misc.h ../src/convert/double.h
|
||||
./main.o: test.h mainoptionsparser.h csvparser.h
|
||||
./test.o: test.h
|
||||
./mainoptionsparser.o: mainoptionsparser.h test.h
|
||||
./mainoptionsparser.o: ../src/mainoptions/mainoptionsparser.h
|
||||
|
@ -38,5 +37,4 @@
|
|||
./mainoptionsparser.o: ../src/membuffer/membuffer.h ../src/textstream/types.h
|
||||
./mainoptionsparser.o: ../src/textstream/stream_private.h
|
||||
./mainoptionsparser.o: ../src/convert/strtoint.h ../src/convert/text.h
|
||||
./mainoptionsparser.o: ../src/convert/misc.h ../src/textstream/types.h
|
||||
./mainoptionsparser.o: ../src/convert/double.h
|
||||
./mainoptionsparser.o: ../src/convert/misc.h ../src/convert/double.h
|
||||
|
|
Loading…
Reference in New Issue