use a char32_t character in the base Stream class

Add an operator<<(char32_t) to the Stream class, char32_t will be used
as a main character instead of a wchar_t (this is needed on systems
where sizeof(wchar_t) is equal to 2).

while here:
- add to utf8:
  size_t wide_to_int(const Stream & stream, size_t stream_index, int & res, bool & correct)
  template<typename StreamType, typename OutputFunction> bool wide_to_output_function(StreamType & buffer, OutputFunction output_function, int mode = 1)
  template<typename OutputFunction> bool wide_to_output_function_by_index(const Stream & stream, OutputFunction output_function, int mode)
- add to convert/misc:
  bool try_esc_to_tex(char32_t c, pt::Stream & out)
  bool try_esc_to_html(char32_t c, pt::Stream & out)
This commit is contained in:
2024-05-31 23:11:11 +02:00
parent 2689c9fece
commit c0838de3a4
10 changed files with 365 additions and 46 deletions

View File

@@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017-2022, Tomasz Sowa
* Copyright (c) 2017-2024, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,6 @@
#include "misc.h"
#include "inttostr.h"
#include "utf8/utf8.h"
namespace pt
@@ -49,13 +48,12 @@ void SetOverflow(bool * was_overflow, bool val)
void esc_to_json_uformat(wchar_t val, Stream & out)
void esc_to_json_uformat(char32_t val, Stream & out)
{
char buf[10];
char buf[17];
size_t len;
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
out << "\\u";
if( len < 4 )
@@ -75,7 +73,7 @@ void esc_to_json_uformat(wchar_t val, Stream & out)
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_json(wchar_t val, Stream & out)
bool try_esc_to_json(char32_t val, Stream & out)
{
bool status = false;
@@ -133,7 +131,7 @@ bool try_esc_to_json(wchar_t val, Stream & out)
void esc_to_json(wchar_t val, Stream & out)
{
if( !try_esc_to_json(val, out) )
if( !try_esc_to_json((char32_t)(val), out) )
{
out << val;
}
@@ -142,7 +140,7 @@ void esc_to_json(wchar_t val, Stream & out)
void esc_to_json(char val, Stream & out)
{
if( !try_esc_to_json((wchar_t)(unsigned char)val, out) )
if( !try_esc_to_json((char32_t)(unsigned char)val, out) )
{
out << val;
}
@@ -204,7 +202,7 @@ void esc_to_json(const std::wstring & in, Stream & out)
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_xml(wchar_t val, Stream & out)
bool try_esc_to_xml(char32_t val, Stream & out)
{
bool status = false;
@@ -246,7 +244,7 @@ bool try_esc_to_xml(wchar_t val, Stream & out)
void esc_to_xml(wchar_t val, Stream & out)
{
if( !try_esc_to_xml(val, out) )
if( !try_esc_to_xml((char32_t)val, out) )
{
out << val;
}
@@ -255,7 +253,7 @@ void esc_to_xml(wchar_t val, Stream & out)
void esc_to_xml(char val, Stream & out)
{
if( !try_esc_to_xml((wchar_t)(unsigned char)val, out) )
if( !try_esc_to_xml((char32_t)(unsigned char)val, out) )
{
out << val;
}
@@ -318,7 +316,7 @@ void esc_to_xml(const std::wstring & in, Stream & out)
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_csv(wchar_t val, pt::Stream & out)
bool try_esc_to_csv(char32_t val, pt::Stream & out)
{
bool status = false;
@@ -341,7 +339,7 @@ bool try_esc_to_csv(wchar_t val, pt::Stream & out)
void esc_to_csv(wchar_t val, pt::Stream & out)
{
if( !try_esc_to_csv(val, out) )
if( !try_esc_to_csv((char32_t)val, out) )
{
out << val;
}
@@ -350,7 +348,7 @@ void esc_to_csv(wchar_t val, pt::Stream & out)
void esc_to_csv(char val, Stream & out)
{
if( !try_esc_to_csv((wchar_t)(unsigned char)val, out) )
if( !try_esc_to_csv((char32_t)(unsigned char)val, out) )
{
out << val;
}
@@ -405,6 +403,127 @@ void esc_to_csv(const std::wstring & in, Stream & out)
}
bool try_esc_to_tex(char32_t c, pt::Stream & out)
{
bool status = false;
switch(c)
{
case 0:
status = true;
break; // ignore the null character
case '{':
out << "{\\char`{}";
status = true;
break;
case '}':
out << "{\\char`}}";
status = true;
break;
case '\\':
out << "{\\char`\\\\}";
status = true;
break;
case '#':
out << "{\\#}";
status = true;
break;
case '$':
out << "{\\$}";
status = true;
break;
case '%':
out << "{\\%}";
status = true;
break;
case '&':
out << "{\\&}";
status = true;
break;
case '~':
out << "{\\~\\relax}";
status = true;
break;
case '^':
out << "{\\^\\relax}";
status = true;
break;
case '<':
out << "{\\char`<}";
status = true;
break;
case '>':
out << "{\\char`>}";
status = true;
break;
}
return status;
}
bool try_esc_to_html(char32_t c, pt::Stream & out)
{
bool status = false;
switch(c)
{
case 0:
out << L"&#0;";
status = true;
break;
case '<':
out << L"&lt;";
status = true;
break;
case '>':
out << L"&gt;";
status = true;
break;
case '&':
out << L"&amp;";
status = true;
break;
case '\"':
out << L"&quot;";
status = true;
break;
case '\'':
out << L"&#39;"; // (it is "&apos;" but IE8 has a problem with &apos;) (&apos; is valid in HTML5, but not HTML4)
status = true;
break;
case 10:
out << L"&#10;";
status = true;
break;
case 13:
out << L"&#13;";
status = true;
break;
}
return status;
}
}