Read the whole character from a multibyte string (as int/char32_t) and then check if it needs to be escaped. Also don't use a tmp stream object when serializing between wide/char strings. while here: - add try_esc_to_space(...) global function - add wide_to_output_function(const wchar_t * str, size_t len, OutputFunction output_function, int mode) - add wide_to_output_function(const wchar_t * str, OutputFunction output_function, int mode)
583 lines
8.8 KiB
C++
583 lines
8.8 KiB
C++
/*
|
|
* This file is a part of PikoTools
|
|
* and is distributed under the 2-Clause BSD licence.
|
|
* Author: Tomasz Sowa <t.sowa@ttmath.org>
|
|
*/
|
|
|
|
/*
|
|
* Copyright (c) 2017-2024, Tomasz Sowa
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
*/
|
|
|
|
#include "misc.h"
|
|
#include "inttostr.h"
|
|
|
|
|
|
namespace pt
|
|
{
|
|
|
|
|
|
void SetOverflow(bool * was_overflow, bool val)
|
|
{
|
|
if( was_overflow )
|
|
*was_overflow = val;
|
|
}
|
|
|
|
|
|
|
|
void esc_to_json_uformat(char32_t val, Stream & out)
|
|
{
|
|
char buf[17];
|
|
size_t len;
|
|
|
|
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
|
|
out << "\\u";
|
|
|
|
if( len < 4 )
|
|
{
|
|
for(size_t i=0 ; i < (4-len) ; ++i)
|
|
{
|
|
out << '0';
|
|
}
|
|
}
|
|
|
|
out << buf;
|
|
}
|
|
|
|
|
|
/*
|
|
* return true if the val character was escaped and put to the out stream
|
|
* if the character is invalid for such a stream then only return true
|
|
* but not put it to the stream
|
|
*/
|
|
bool try_esc_to_json(char32_t val, Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
if( val == '\r' )
|
|
{
|
|
out << '\\' << 'r';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '\n' )
|
|
{
|
|
out << '\\' << 'n';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '\t' )
|
|
{
|
|
out << '\\' << 't';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == 0x08 )
|
|
{
|
|
out << '\\' << 'b';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == 0x0c )
|
|
{
|
|
out << '\\' << 'f';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '\\' )
|
|
{
|
|
out << '\\' << '\\';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '"' )
|
|
{
|
|
out << '\\' << '\"';
|
|
status = true;
|
|
}
|
|
else
|
|
if( val < 32 )
|
|
{
|
|
esc_to_json_uformat(val, out);
|
|
status = true;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
void esc_to_json(wchar_t val, Stream & out)
|
|
{
|
|
if( !try_esc_to_json((char32_t)(val), out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_json(char val, Stream & out)
|
|
{
|
|
if( !try_esc_to_json((char32_t)(unsigned char)val, out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void esc_to_json(const char * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_json(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_json(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_json(const wchar_t * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_json(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_json(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_json(const std::string & in, Stream & out)
|
|
{
|
|
esc_to_json(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
void esc_to_json(const std::wstring & in, Stream & out)
|
|
{
|
|
esc_to_json(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* return true if the val character was escaped and put to the out stream
|
|
* if the character is invalid for such a stream then only return true
|
|
* but not put it to the stream
|
|
*/
|
|
bool try_esc_to_xml(char32_t val, Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
if( val == 0 )
|
|
{
|
|
// null character is invalid in XML 1.0 and 1.1
|
|
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
|
|
// return true but not put the char to the out stream
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '<')
|
|
{
|
|
out << "<";
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '>')
|
|
{
|
|
out << ">";
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '&')
|
|
{
|
|
out << "&";
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '"')
|
|
{
|
|
out << """;
|
|
status = true;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
void esc_to_xml(wchar_t val, Stream & out)
|
|
{
|
|
if( !try_esc_to_xml((char32_t)val, out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(char val, Stream & out)
|
|
{
|
|
if( !try_esc_to_xml((char32_t)(unsigned char)val, out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(const char * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_xml(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_xml(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(const wchar_t * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_xml(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_xml(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_xml(const std::string & in, Stream & out)
|
|
{
|
|
esc_to_xml(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
void esc_to_xml(const std::wstring & in, Stream & out)
|
|
{
|
|
esc_to_xml(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
* return true if the val character was escaped and put to the out stream
|
|
* if the character is invalid for such a stream then only return true
|
|
* but not put it to the stream
|
|
*/
|
|
bool try_esc_to_csv(char32_t val, pt::Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
if( val == 0 )
|
|
{
|
|
// null characters are invalid in text files
|
|
// return true but not put to the out stream
|
|
status = true;
|
|
}
|
|
else
|
|
if( val == '"' )
|
|
{
|
|
out << "\"\"";
|
|
status = true;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
void esc_to_csv(wchar_t val, pt::Stream & out)
|
|
{
|
|
if( !try_esc_to_csv((char32_t)val, out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(char val, Stream & out)
|
|
{
|
|
if( !try_esc_to_csv((char32_t)(unsigned char)val, out) )
|
|
{
|
|
out << val;
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(const char * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_csv(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_csv(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(const wchar_t * c, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; c[i] != 0 ; ++i)
|
|
{
|
|
esc_to_csv(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
|
|
{
|
|
for(size_t i = 0 ; i < len ; ++i)
|
|
{
|
|
esc_to_csv(c[i], out);
|
|
}
|
|
}
|
|
|
|
|
|
void esc_to_csv(const std::string & in, Stream & out)
|
|
{
|
|
esc_to_csv(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
void esc_to_csv(const std::wstring & in, Stream & out)
|
|
{
|
|
esc_to_csv(in.c_str(), in.size(), out);
|
|
}
|
|
|
|
|
|
bool try_esc_to_tex(char32_t c, pt::Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
switch(c)
|
|
{
|
|
case 0:
|
|
status = true;
|
|
break; // ignore the null character
|
|
|
|
case '{':
|
|
out << "{\\char`{}";
|
|
status = true;
|
|
break;
|
|
|
|
case '}':
|
|
out << "{\\char`}}";
|
|
status = true;
|
|
break;
|
|
|
|
case '\\':
|
|
out << "{\\char`\\\\}";
|
|
status = true;
|
|
break;
|
|
|
|
case '#':
|
|
out << "{\\#}";
|
|
status = true;
|
|
break;
|
|
|
|
case '$':
|
|
out << "{\\$}";
|
|
status = true;
|
|
break;
|
|
|
|
case '%':
|
|
out << "{\\%}";
|
|
status = true;
|
|
break;
|
|
|
|
case '&':
|
|
out << "{\\&}";
|
|
status = true;
|
|
break;
|
|
|
|
case '~':
|
|
out << "{\\~\\relax}";
|
|
status = true;
|
|
break;
|
|
|
|
case '^':
|
|
out << "{\\^\\relax}";
|
|
status = true;
|
|
break;
|
|
|
|
case '<':
|
|
out << "{\\char`<}";
|
|
status = true;
|
|
break;
|
|
|
|
case '>':
|
|
out << "{\\char`>}";
|
|
status = true;
|
|
break;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
|
|
bool try_esc_to_html(char32_t c, pt::Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
switch(c)
|
|
{
|
|
case 0:
|
|
out << L"�";
|
|
status = true;
|
|
break;
|
|
|
|
case '<':
|
|
out << L"<";
|
|
status = true;
|
|
break;
|
|
|
|
case '>':
|
|
out << L">";
|
|
status = true;
|
|
break;
|
|
|
|
case '&':
|
|
out << L"&";
|
|
status = true;
|
|
break;
|
|
|
|
case '\"':
|
|
out << L""";
|
|
status = true;
|
|
break;
|
|
|
|
case '\'':
|
|
out << L"'"; // (it is "'" but IE8 has a problem with ') (' is valid in HTML5, but not HTML4)
|
|
status = true;
|
|
break;
|
|
|
|
case 10:
|
|
out << L" ";
|
|
status = true;
|
|
break;
|
|
|
|
case 13:
|
|
out << L" ";
|
|
status = true;
|
|
break;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
bool try_esc_to_space(char32_t c, pt::Stream & out)
|
|
{
|
|
bool status = false;
|
|
|
|
switch(c)
|
|
{
|
|
case 0:
|
|
out << '\\';
|
|
out << 'u' << '{' << '0' << '}';
|
|
status = true;
|
|
break;
|
|
|
|
case '\r': // 13
|
|
out << '\\';
|
|
out << 'r';
|
|
status = true;
|
|
break;
|
|
|
|
case '\n': // 10
|
|
out << '\\';
|
|
out << 'n';
|
|
status = true;
|
|
break;
|
|
|
|
case '\\':
|
|
out << '\\';
|
|
out << '\\';
|
|
status = true;
|
|
break;
|
|
|
|
case '"':
|
|
out << '\\';
|
|
out << '\"';
|
|
status = true;
|
|
break;
|
|
|
|
case '\b': // 8
|
|
out << '\\';
|
|
out << 'b';
|
|
status = true;
|
|
break;
|
|
|
|
case '\f': // 12
|
|
out << '\\';
|
|
out << 'f';
|
|
status = true;
|
|
break;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
}
|
|
|