Files
pikotools/src/convert/misc.cpp
Tomasz Sowa f85f1dade5 improve the Space text convertion methods
Read the whole character from a multibyte string (as int/char32_t) and
then check if it needs to be escaped. Also don't use a tmp stream object
when serializing between wide/char strings.

while here:
- add try_esc_to_space(...) global function
- add wide_to_output_function(const wchar_t * str, size_t len, OutputFunction output_function, int mode)
- add wide_to_output_function(const wchar_t * str, OutputFunction output_function, int mode)
2024-06-19 04:46:00 +02:00

583 lines
8.8 KiB
C++

/*
* This file is a part of PikoTools
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2017-2024, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include "misc.h"
#include "inttostr.h"
namespace pt
{
void SetOverflow(bool * was_overflow, bool val)
{
if( was_overflow )
*was_overflow = val;
}
void esc_to_json_uformat(char32_t val, Stream & out)
{
char buf[17];
size_t len;
Toa((unsigned long)val, buf, sizeof(buf)/sizeof(char), 16, &len);
out << "\\u";
if( len < 4 )
{
for(size_t i=0 ; i < (4-len) ; ++i)
{
out << '0';
}
}
out << buf;
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_json(char32_t val, Stream & out)
{
bool status = false;
if( val == '\r' )
{
out << '\\' << 'r';
status = true;
}
else
if( val == '\n' )
{
out << '\\' << 'n';
status = true;
}
else
if( val == '\t' )
{
out << '\\' << 't';
status = true;
}
else
if( val == 0x08 )
{
out << '\\' << 'b';
status = true;
}
else
if( val == 0x0c )
{
out << '\\' << 'f';
status = true;
}
else
if( val == '\\' )
{
out << '\\' << '\\';
status = true;
}
else
if( val == '"' )
{
out << '\\' << '\"';
status = true;
}
else
if( val < 32 )
{
esc_to_json_uformat(val, out);
status = true;
}
return status;
}
void esc_to_json(wchar_t val, Stream & out)
{
if( !try_esc_to_json((char32_t)(val), out) )
{
out << val;
}
}
void esc_to_json(char val, Stream & out)
{
if( !try_esc_to_json((char32_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_json(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_json(c[i], out);
}
}
void esc_to_json(const std::string & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
void esc_to_json(const std::wstring & in, Stream & out)
{
esc_to_json(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_xml(char32_t val, Stream & out)
{
bool status = false;
if( val == 0 )
{
// null character is invalid in XML 1.0 and 1.1
// https://en.wikipedia.org/wiki/Valid_characters_in_XML
// return true but not put the char to the out stream
status = true;
}
else
if( val == '<')
{
out << "&lt;";
status = true;
}
else
if( val == '>')
{
out << "&gt;";
status = true;
}
else
if( val == '&')
{
out << "&amp;";
status = true;
}
else
if( val == '"')
{
out << "&quot;";
status = true;
}
return status;
}
void esc_to_xml(wchar_t val, Stream & out)
{
if( !try_esc_to_xml((char32_t)val, out) )
{
out << val;
}
}
void esc_to_xml(char val, Stream & out)
{
if( !try_esc_to_xml((char32_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_xml(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_xml(c[i], out);
}
}
void esc_to_xml(const std::string & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
void esc_to_xml(const std::wstring & in, Stream & out)
{
esc_to_xml(in.c_str(), in.size(), out);
}
/*
* return true if the val character was escaped and put to the out stream
* if the character is invalid for such a stream then only return true
* but not put it to the stream
*/
bool try_esc_to_csv(char32_t val, pt::Stream & out)
{
bool status = false;
if( val == 0 )
{
// null characters are invalid in text files
// return true but not put to the out stream
status = true;
}
else
if( val == '"' )
{
out << "\"\"";
status = true;
}
return status;
}
void esc_to_csv(wchar_t val, pt::Stream & out)
{
if( !try_esc_to_csv((char32_t)val, out) )
{
out << val;
}
}
void esc_to_csv(char val, Stream & out)
{
if( !try_esc_to_csv((char32_t)(unsigned char)val, out) )
{
out << val;
}
}
void esc_to_csv(const char * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const char * c, std::size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, pt::Stream & out)
{
for(size_t i = 0 ; c[i] != 0 ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const wchar_t * c, size_t len, pt::Stream & out)
{
for(size_t i = 0 ; i < len ; ++i)
{
esc_to_csv(c[i], out);
}
}
void esc_to_csv(const std::string & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
void esc_to_csv(const std::wstring & in, Stream & out)
{
esc_to_csv(in.c_str(), in.size(), out);
}
bool try_esc_to_tex(char32_t c, pt::Stream & out)
{
bool status = false;
switch(c)
{
case 0:
status = true;
break; // ignore the null character
case '{':
out << "{\\char`{}";
status = true;
break;
case '}':
out << "{\\char`}}";
status = true;
break;
case '\\':
out << "{\\char`\\\\}";
status = true;
break;
case '#':
out << "{\\#}";
status = true;
break;
case '$':
out << "{\\$}";
status = true;
break;
case '%':
out << "{\\%}";
status = true;
break;
case '&':
out << "{\\&}";
status = true;
break;
case '~':
out << "{\\~\\relax}";
status = true;
break;
case '^':
out << "{\\^\\relax}";
status = true;
break;
case '<':
out << "{\\char`<}";
status = true;
break;
case '>':
out << "{\\char`>}";
status = true;
break;
}
return status;
}
bool try_esc_to_html(char32_t c, pt::Stream & out)
{
bool status = false;
switch(c)
{
case 0:
out << L"&#0;";
status = true;
break;
case '<':
out << L"&lt;";
status = true;
break;
case '>':
out << L"&gt;";
status = true;
break;
case '&':
out << L"&amp;";
status = true;
break;
case '\"':
out << L"&quot;";
status = true;
break;
case '\'':
out << L"&#39;"; // (it is "&apos;" but IE8 has a problem with &apos;) (&apos; is valid in HTML5, but not HTML4)
status = true;
break;
case 10:
out << L"&#10;";
status = true;
break;
case 13:
out << L"&#13;";
status = true;
break;
}
return status;
}
bool try_esc_to_space(char32_t c, pt::Stream & out)
{
bool status = false;
switch(c)
{
case 0:
out << '\\';
out << 'u' << '{' << '0' << '}';
status = true;
break;
case '\r': // 13
out << '\\';
out << 'r';
status = true;
break;
case '\n': // 10
out << '\\';
out << 'n';
status = true;
break;
case '\\':
out << '\\';
out << '\\';
status = true;
break;
case '"':
out << '\\';
out << '\"';
status = true;
break;
case '\b': // 8
out << '\\';
out << 'b';
status = true;
break;
case '\f': // 12
out << '\\';
out << 'f';
status = true;
break;
}
return status;
}
}