fixed: using size() when serializing strings - this allows to serialize a string which contain a null character

fixed: printing null character in space format: \u0000 (before was \0 which is not correct in json) fixed: in serialize_string_buffer(const char * input_str, ...) a temporary fixed was used when copying input string added support for surrogate pairs when reading \uHHHH format added support to parse \u{H...} format (only if parsing Space format)
2021-06-14 13:48:32 +02:00
parent 49c2b478c0
commit 4d70ae9e87
9 changed files with 262 additions and 39 deletions
--- a/src/utf8/utf8.cpp
+++ b/src/utf8/utf8.cpp
@@ -94,6 +94,43 @@ return false;



+bool is_surrogate_char(int c)
+{
+	return (c>=0xD800 && c<=0xDFFF);
+}
+
+
+bool is_first_surrogate_char(int c)
+{
+	return (c>=0xD800 && c<=0xDBFF);
+}
+
+
+bool is_second_surrogate_char(int c)
+{
+	return (c>=0xDC00 && c<=0xDFFF);
+}
+
+
+bool surrogate_pair_to_int(int c1, int c2, int & z)
+{
+	z = 0xFFFD; // U+FFFD "replacement character";
+
+	if( is_first_surrogate_char(c1) )
+	{
+		if( is_second_surrogate_char(c2) )
+		{
+			z = 0x10000 + (((c1 & 0x3FF) << 10) | (c2 & 0x3FF));
+			return true;
+		}
+	}
+
+	return false;
+}
+
+
+
+
 /*!
 	this function converts one UTF-8 character into one wide-character

@@ -276,6 +313,7 @@ static void int_to_wide(int c, std::wstring & res)



+
 /*!
 	this function converts an utf8 string into wide string (std::wstring)