improve AcceptBaseParser parsing algorithm

remove AcceptParser - not needed now, AcceptBaseParser can prepare a table now
2022-02-02 17:58:27 +01:00
parent d0d2cfb22c
commit 75daf37bbd
10 changed files with 230 additions and 207 deletions
--- a/winixd/utils/Makefile.dep
+++ b/winixd/utils/Makefile.dep
@@ -1,3 +1,53 @@
 # DO NOT DELETE

-acceptbaseparser.o: acceptbaseparser.h ../../../tito/src/misc.h
+acceptbaseparser.o: acceptbaseparser.h ../../../winix/winixd/core/header.h
+acceptbaseparser.o: ../../../winix/winixd/core/log.h
+acceptbaseparser.o: ../../../winix/winixd/core/logmanipulators.h
+acceptbaseparser.o: ../../../pikotools/src/log/log.h
+acceptbaseparser.o: ../../../pikotools/src/textstream/textstream.h
+acceptbaseparser.o: ../../../pikotools/src/textstream/stream.h
+acceptbaseparser.o: ../../../pikotools/src/space/space.h
+acceptbaseparser.o: ../../../pikotools/src/textstream/types.h
+acceptbaseparser.o: ../../../pikotools/src/convert/inttostr.h
+acceptbaseparser.o: ../../../pikotools/src/utf8/utf8.h
+acceptbaseparser.o: ../../../pikotools/src/textstream/stream.h
+acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_templates.h
+acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_private.h
+acceptbaseparser.o: ../../../pikotools/src/date/date.h
+acceptbaseparser.o: ../../../pikotools/src/membuffer/membuffer.h
+acceptbaseparser.o: ../../../pikotools/src/textstream/types.h
+acceptbaseparser.o: ../../../pikotools/src/log/filelog.h
+acceptbaseparser.o: ../../../morm/src/morm.h ../../../morm/src/morm_types.h
+acceptbaseparser.o: ../../../morm/src/model.h
+acceptbaseparser.o: ../../../morm/src/modelconnector.h
+acceptbaseparser.o: ../../../morm/src/clearer.h
+acceptbaseparser.o: ../../../morm/src/dbconnector.h
+acceptbaseparser.o: ../../../morm/src/queryresult.h ../../../morm/src/ft.h
+acceptbaseparser.o: ../../../morm/src/flatconnector.h
+acceptbaseparser.o: ../../../morm/src/dbexpression.h
+acceptbaseparser.o: ../../../morm/src/baseexpression.h
+acceptbaseparser.o: ../../../morm/src/modelenv.h
+acceptbaseparser.o: ../../../morm/src/modeldata.h
+acceptbaseparser.o: ../../../morm/src/cursorhelper.h
+acceptbaseparser.o: ../../../morm/src/finderhelper.h
+acceptbaseparser.o: ../../../morm/src/fieldvaluehelper.h
+acceptbaseparser.o: ../../../morm/src/wrapper.h
+acceptbaseparser.o: ../../../morm/src/spacewrapper.h
+acceptbaseparser.o: ../../../morm/src/baseobjectwrapper.h
+acceptbaseparser.o: ../../../morm/src/modelcontainerwrapper.h
+acceptbaseparser.o: ../../../pikotools/src/convert/text.h
+acceptbaseparser.o: ../../../morm/src/flatexpression.h
+acceptbaseparser.o: ../../../morm/src/finder.h ../../../morm/src/cursor.h
+acceptbaseparser.o: ../../../morm/src/jsonexpression.h
+acceptbaseparser.o: ../../../morm/src/postgresqlexpression.h
+acceptbaseparser.o: ../../../morm/src/jsonconnector.h
+acceptbaseparser.o: ../../../morm/src/postgresqlconnector.h
+acceptbaseparser.o: ../../../morm/src/postgresqlqueryresult.h
+acceptbaseparser.o: ../../../pikotools/src/convert/convert.h
+acceptbaseparser.o: ../../../pikotools/src/convert/inttostr.h
+acceptbaseparser.o: ../../../pikotools/src/convert/patternreplacer.h
+acceptbaseparser.o: ../../../pikotools/src/convert/strtoint.h
+acceptbaseparser.o: ../../../pikotools/src/convert/text.h
+acceptbaseparser.o: ../../../pikotools/src/convert/misc.h
+acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_stream.h
+acceptbaseparser.o: ../../../pikotools/src/convert/double.h
--- a/winixd/utils/acceptbaseparser.cpp
+++ b/winixd/utils/acceptbaseparser.cpp
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2008-2014, Tomasz Sowa
+ * Copyright (c) 2008-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -32,107 +32,167 @@
 * 
 */
 
-#include <wchar.h>
 #include "acceptbaseparser.h"
-#include "core/misc.h"
-
+#include "convert/convert.h"


 namespace Winix
 {


-
-
-bool AcceptBaseParser::IsWhite(int c)
+AcceptBaseParser::AcceptBaseParser()
 {
-	if( c==' ' || c=='\t' )
-		return true;
+}

-return false;
+AcceptBaseParser::~AcceptBaseParser()
+{
 }


-void AcceptBaseParser::skip_white()
+
+bool AcceptBaseParser::is_delimiter(wchar_t c, wchar_t delimiter)
 {
-	while( IsWhite(*text) )
+	return delimiter != 0 && delimiter == c;
+}
+
+
+bool AcceptBaseParser::is_delimiter(wchar_t c, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3)
+{
+	return is_delimiter(c, delimiter1) || is_delimiter(c, delimiter2) || is_delimiter(c, delimiter3);
+}
+
+
+void AcceptBaseParser::read_token(std::wstring & token, size_t max_len, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3)
+{
+	token.clear();
+	text = pt::skip_white(text, false, false);
+
+	while( *text!=0 && !is_delimiter(*text, delimiter1, delimiter2, delimiter3) )
+	{
+		if( token.size() < max_len )
+		{
+			token += *text;
+		}
+
 		++text;
+	}
+
+	pt::trim_white(token);
 }


+void AcceptBaseParser::read_name()
+{
+	read_token(name, MAX_NAME_LENGTH, ',', ';');
+
+	if( !name.empty() )
+	{
+		parsed_name(name);
+	}
+}


 void AcceptBaseParser::read_parameter()
 {
-	param.clear();
-	skip_white();
+	param_value.clear();
+	read_token(param, MAX_PARAM_LENGTH, '=', ',', ';');

-	while( *text!=0 && *text!=',' && *text!=';' )
+	if( *text == '=' )
 	{
-		param += *text;
 		++text;
+		read_token(param_value, MAX_PARAM_VALUE_LENGTH, ';', ',');
 	}

-	TrimWhite(param);
+	if( !name.empty() && !param.empty() )
+	{
+		parsed_param(param, param_value);
+	}
+
+	if( param == L"q" && !param_value.empty() )
+	{
+		q = pt::to_double(param_value);
+	}
 }


-void AcceptBaseParser::ReadQ()
+void AcceptBaseParser::read_loop(std::vector<HeaderValue> * header_values, size_t max_len)
 {
-	q = 1.0;
-	skip_white();
+	while( *text != 0 )
+	{
+		q = 1.0;
+		read_name();

-	if( *text != ';' )
-		return;
+		while( *text == ';' )
+		{
+			++text;
+			read_parameter();
+		}

-	++text; // skipping a semicolon
+		if( !name.empty() && q > 0.0 )
+		{
+			if( q > 1.0 )
+				q = 1.0;

-	while( *text!=0 && *text!=',' && *text!='=' )
-		// skipping until ',' or '='
-		++text;
+			parsed_name_q(name, q);

-	if( *text==0 || *text==',' )
-		return;
+			if( header_values && (max_len == 0 || header_values->size() < max_len) )
+			{
+				header_values->resize(header_values->size() + 1);
+				header_values->back().value = name;
+				header_values->back().weight = q;
+			}
+		}

-	++text; // skipping '='
-
-	skip_white();
-	q = wcstod(text, (wchar_t**)&text);
+		if( *text != 0 )
+			++text;
+	}
 }


-void AcceptBaseParser::SkipParam()
+void AcceptBaseParser::read(const wchar_t * str, std::vector<HeaderValue> * header_values, size_t max_len)
 {
-	skip_white();
+	text = str;
+	init();

-	if( *text == ',' )
-		++text;
+	read_loop(header_values, max_len);
+
+	name.clear();
+	param.clear();
+	param_value.clear();
 }


-
 void AcceptBaseParser::parse(const wchar_t * str)
 {
-	text = str;
-	init();
-	
-	while( *text != 0 )
-	{
-		read_parameter();
-		ReadQ();
-		SkipParam();
-		Param(param, q);
-	}
+	read(str, nullptr, 0);
 }


-
 void AcceptBaseParser::parse(const std::wstring & str)
 {
 	parse(str.c_str());
 }


+void AcceptBaseParser::parse(const wchar_t * str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values)
+{
+	if( clear_header_values )
+		header_values.clear();
+
+	AcceptBaseParser::read(str, &header_values, max_len);
+
+	std::sort(header_values.begin(), header_values.end(), [](HeaderValue & h1, HeaderValue & h2) -> bool {
+		return h1.weight > h2.weight;
+	});
+}
+
+
+void AcceptBaseParser::parse(const std::wstring & str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values)
+{
+	return parse(str.c_str(), header_values, max_len, clear_header_values);
+}
+
+

 } // namespace Winix

--- a/winixd/utils/acceptbaseparser.h
+++ b/winixd/utils/acceptbaseparser.h
@@ -5,7 +5,7 @@
 */

 /* 
- * Copyright (c) 2008-2018, Tomasz Sowa
+ * Copyright (c) 2008-2022, Tomasz Sowa
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
@@ -32,44 +32,64 @@
 * 
 */
 
-#ifndef headerfile_winix_core_acceptbaseparser
-#define headerfile_winix_core_acceptbaseparser
+#ifndef headerfile_winix_utils_acceptbaseparser
+#define headerfile_winix_utils_acceptbaseparser

 #include <string>
-#include "core/winixbase.h"
+#include "core/header.h"


 namespace Winix
 {

-
-
-
-// sample (you must create your own class derived from this one):
-// object.parse(L"   text/html  ;  ,  ; q = 45, application  /   xhtml+xml  ; q  = 0.4   ,    application/xml   ;   q  =  0.9  ,  */*   ;  q   =  0.8    ");
-class AcceptBaseParser : public WinixBase
+class AcceptBaseParser
 {
 public:

+	static size_t constexpr MAX_NAME_LENGTH = 64;
+	static size_t constexpr MAX_PARAM_LENGTH = 32;
+	static size_t constexpr MAX_PARAM_VALUE_LENGTH = 32;
+
+	AcceptBaseParser();
+	virtual ~AcceptBaseParser();
+
+	/*
+	 *
+	 *
+	 *
+	 *
+	 */
+	// sample:
+	// object.parse(L"   text/html  , text/* ; q = 45, application  /   xhtml+xml  ; q  = 0.4  ; limit = 1 ,  application/xml ;  charset = UTF-8  ;  q  =  0.9 , */* ;  q =  0.8  ");
 	void parse(const wchar_t * str);
 	void parse(const std::wstring & str);

+	void parse(const wchar_t * str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values = true);
+	void parse(const std::wstring & str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values = true);
+

 private:

-	virtual void init() {} ;
-	virtual void Param(const std::wstring & param, double q) = 0;
-
-	bool IsWhite(int c);
-	void skip_white();
+	bool is_delimiter(wchar_t c, wchar_t delimiter);
+	bool is_delimiter(wchar_t c, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3);
+	void read_token(std::wstring & token, size_t max_len, wchar_t delimiter1, wchar_t delimiter2 = 0, wchar_t delimiter3 = 0);
+	void read_name();
 	void read_parameter();
-	void ReadQ();
-	void SkipParam();
-	
+	void read_loop(std::vector<HeaderValue> * header_values, size_t max_len);
+	void read(const wchar_t * str, std::vector<HeaderValue> * header_values, size_t max_len);
+
+	virtual void init() {} ;
+	virtual void parsed_name(const std::wstring & name) {};
+	virtual void parsed_param(const std::wstring & param, const std::wstring & param_value) {};
+	virtual void parsed_name_q(const std::wstring & name, double q) {};
 	
 	const wchar_t * text;
+	std::wstring name;
 	std::wstring param;
+	std::wstring param_value;
 	double q;
+
+
 };