- added some converting methods: esc_to_json(...), esc_to_xml(...), esc_to_csv() (convert/misc.h)

- BaseParser: added possibility to read from TextStream and WTextStream
- HTMLParser: added filter(const WTextStream & in, Stream & out, ...) method
- added utf8_stream.h with one method:
  template<typename StreamIteratorType>
  size_t utf8_to_int(
    StreamIteratorType & iterator_in,
    StreamIteratorType & iterator_end,
    int & res,
    bool & correct)
This commit is contained in:
2021-10-12 19:53:11 +02:00
parent 4902eb6037
commit 17d2c0fb25
13 changed files with 807 additions and 128 deletions

View File

@@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2017, Tomasz Sowa
* Copyright (c) 2017-2021, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,8 @@
*/
#include "misc.h"
#include "inttostr.h"
#include "utf8/utf8.h"
namespace pt
@@ -49,6 +51,287 @@ void SetOverflow(bool * was_overflow, bool val)
}
void esc_to_json(char val, Stream & out)
{
if( (unsigned char)val < 32 )
{
char buf[10];
size_t len;
Toa((unsigned char)val, buf, sizeof(buf)/sizeof(char), 16, &len);
out << "\\u";
if( len < 4 )
{
for(size_t i=0 ; i < (4-len) ; ++i)
{
out << '0';
}
}
out << buf;
}
else
{
// CHECKME
// \r \n \t are <32 and will be serialized os \u.... above
switch( val )
{
case 0: out << '\\'; out << '0'; break; // may to skip this character is better?
case '\r': out << '\\'; out << 'r'; break;
case '\n': out << '\\'; out << 'n'; break;
case '\t': out << '\\'; out << 't'; break;
case 0x08: out << '\\'; out << 'b'; break;
case 0x0c: out << '\\'; out << 'f'; break;
case '\\': out << '\\'; out << '\\'; break;
case '"': out << '\\'; out << '\"'; break;
default:
out << val;
}
}
}
void esc_to_json(wchar_t val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
{
esc_to_json(utf8_buf[a], out);
}
}
void esc_to_json(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const std::string & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
void esc_to_json(const std::wstring & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
void esc_to_xml(char val, Stream & out)
{
switch(val)
{
case '<':
out << "&lt;";
break;
case '>':
out << "&gt;";
break;
case '&':
out << "&amp;";
break;
case '"':
out << "&quot;";
break;
default:
out << val;
break;
// what about zero (null) character?
}
}
void esc_to_xml(wchar_t val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
{
esc_to_xml(utf8_buf[a], out);
}
}
void esc_to_xml(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const std::string & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
void esc_to_xml(const std::wstring & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
void esc_to_csv(char c, pt::Stream & out)
{
switch(c)
{
case '"':
out << "\"\"";
break;
default:
out << c;
break;
// what about zero (null) character?
}
}
void esc_to_csv(wchar_t val, Stream & out)
{
char utf8_buf[10];
std::size_t utf8_buf_len = sizeof(utf8_buf) / sizeof(char);
size_t len = int_to_utf8(static_cast<int>(val), utf8_buf, utf8_buf_len);
for(size_t a = 0 ; a < len ; ++a)
{
esc_to_csv(utf8_buf[a], out);
}
}
void esc_to_csv(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const std::string & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
void esc_to_csv(const std::wstring & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
}