improve AcceptBaseParser parsing algorithm

remove AcceptParser - not needed now, AcceptBaseParser can prepare a table now
This commit is contained in:
Tomasz Sowa 2022-02-02 17:58:27 +01:00
parent d0d2cfb22c
commit 75daf37bbd
10 changed files with 230 additions and 207 deletions

File diff suppressed because one or more lines are too long

View File

@ -145,7 +145,8 @@ app.o: ../../../winix/winixd/templates/indexpatterns.h
app.o: ../../../winix/winixd/templates/patterns.h
app.o: ../../../winix/winixd/templates/changepatterns.h compress.h
app.o: postparser.h httpsimpleparser.h cookieparser.h postmultiparser.h
app.o: acceptencodingparser.h acceptparser.h winixrequest.h
app.o: acceptencodingparser.h ../../../winix/winixd/utils/acceptbaseparser.h
app.o: ../../../winix/winixd/core/header.h winixrequest.h
app.o: ../../../winix/winixd/models/migration.h
basethread.o: basethread.h synchro.h winixmodeldeprecated.h
basethread.o: ../../../winix/winixd/core/winixbase.h

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2008-2014, Tomasz Sowa
* Copyright (c) 2008-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -60,7 +60,7 @@ public:
}
void ParseAndLog(const wchar_t * str)
void ParseAndLog(const wchar_t * str, Log & log)
{
parse(str);
@ -84,9 +84,9 @@ public:
}
void ParseAndLog(const std::wstring & str)
void ParseAndLog(const std::wstring & str, Log & log)
{
ParseAndLog(str.c_str());
ParseAndLog(str.c_str(), log);
}
@ -99,7 +99,7 @@ private:
}
void Param(const std::wstring & param, double q)
void parsed_name_q(const std::wstring & param, double q)
{
if( param == L"deflate" && q!=0.0 )
{

View File

@ -1,124 +0,0 @@
/*
* This file is a part of Winix
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
* Copyright (c) 2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifndef headerfile_winix_core_acceptparser
#define headerfile_winix_core_acceptparser
#include "utils/acceptbaseparser.h"
#include "log.h"
#include "header.h"
namespace Winix
{
class AcceptParser : public AcceptBaseParser
{
public:
static constexpr size_t MAX_CONTAINER_LENGTH = 16;
/*
* IMPROVEME add support for something like "text/html;level=1" (skip the level part)
*
* https://developer.mozilla.org/en-US/docs/Glossary/Quality_values
* Some syntax, like the one of Accept, allow additional specifiers like text/html;level=1.
* These increase the specificity of the value. Their use is extremely rare.
*
*
*/
void Parse(const wchar_t * str, std::vector<HeaderValue> & header_values, bool clear_header_values = true)
{
if( clear_header_values )
header_values.clear();
this->header_values = &header_values;
AcceptBaseParser::parse(str);
std::sort(header_values.begin(), header_values.end(), [](HeaderValue & h1, HeaderValue & h2) -> bool {
return h1.weight > h2.weight;
});
PutToLog(header_values);
}
void Parse(const std::wstring & str, std::vector<HeaderValue> & header_values, bool clear_header_values = true)
{
Parse(str.c_str(), header_values, clear_header_values);
}
private:
std::vector<HeaderValue> * header_values;
void Param(const std::wstring & param, double q)
{
if( header_values->size() < MAX_CONTAINER_LENGTH && q > 0.0 )
{
if( q > 1.0 )
q = 1.0;
header_values->resize(header_values->size() + 1);
header_values->back().value = param;
header_values->back().weight = q;
}
}
void PutToLog(std::vector<HeaderValue> & header_values)
{
if( !header_values.empty() )
{
log << log3 << "AP: " << Header::accept << " header consists of: ";
HeaderValue::log_values(header_values, log);
log << logend;
}
else
{
log << log3 << "AP: there is no " << Header::accept << " header" << logend;
}
}
};
} // namespace Winix
#endif

View File

@ -385,9 +385,6 @@ bool App::Init()
cookie_parser.set_dependency(&winix_model);
accept_encoding_parser.set_dependency(&winix_base);
accept_parser.set_dependency(&winix_base);
plugin.Call((Session*)0, WINIX_PLUGIN_INIT);
return true;
@ -935,6 +932,23 @@ void App::LogEnvironmentHTTPVariables()
}
void App::ParseAcceptHeader()
{
accept_base_parser.parse(cur.request->env_http_accept, cur.request->accept_mime_types, 16);
if( !cur.request->accept_mime_types.empty() )
{
log << log3 << "App: " << Winix::Header::accept << " header consists of: ";
HeaderValue::log_values(cur.request->accept_mime_types, log);
log << logend;
}
else
{
log << log3 << "App: there is no " << Winix::Header::accept << " header" << logend;
}
}
/*
* reading the request (without GET parameters in the URL)
@ -952,16 +966,16 @@ void App::ReadRequest()
ReadPostVars();
cookie_parser.Parse(cur.request->env_http_cookie, cur.request->cookie_tab);
accept_encoding_parser.ParseAndLog(cur.request->env_http_accept_encoding);
accept_parser.Parse(cur.request->env_http_accept, cur.request->accept_mime_types);
if( config.log_env_variables )
LogEnvironmentVariables();
if( config.log_env_http_variables )
LogEnvironmentHTTPVariables();
ParseAcceptHeader();
accept_encoding_parser.ParseAndLog(cur.request->env_http_accept_encoding, log);
cookie_parser.Parse(cur.request->env_http_cookie, cur.request->cookie_tab);
CheckIE();
CheckKonqueror();
CheckHtmx();

View File

@ -140,8 +140,8 @@ private:
std::string post_buffer;
CookieParser cookie_parser;
AcceptBaseParser accept_base_parser;
AcceptEncodingParser accept_encoding_parser;
AcceptParser accept_parser;
Compress compress;
FCGX_Request fcgi_request;
int fcgi_socket;
@ -240,6 +240,7 @@ private:
void LogEnvironmentVariables();
void LogEnvironmentHTTPVariables();
void ParseAcceptHeader();
void SetEnv(const char * name, std::wstring & env);
void ReadEnvVariables();

View File

@ -183,6 +183,7 @@ main.o: ../../../winix/winixd/core/httpsimpleparser.h
main.o: ../../../winix/winixd/core/cookieparser.h
main.o: ../../../winix/winixd/core/postmultiparser.h
main.o: ../../../winix/winixd/core/acceptencodingparser.h
main.o: ../../../winix/winixd/core/acceptparser.h
main.o: ../../../winix/winixd/utils/acceptbaseparser.h
main.o: ../../../winix/winixd/core/header.h
main.o: ../../../winix/winixd/core/winixrequest.h
main.o: ../../../winix/winixd/core/version.h

View File

@ -1,3 +1,53 @@
# DO NOT DELETE
acceptbaseparser.o: acceptbaseparser.h ../../../tito/src/misc.h
acceptbaseparser.o: acceptbaseparser.h ../../../winix/winixd/core/header.h
acceptbaseparser.o: ../../../winix/winixd/core/log.h
acceptbaseparser.o: ../../../winix/winixd/core/logmanipulators.h
acceptbaseparser.o: ../../../pikotools/src/log/log.h
acceptbaseparser.o: ../../../pikotools/src/textstream/textstream.h
acceptbaseparser.o: ../../../pikotools/src/textstream/stream.h
acceptbaseparser.o: ../../../pikotools/src/space/space.h
acceptbaseparser.o: ../../../pikotools/src/textstream/types.h
acceptbaseparser.o: ../../../pikotools/src/convert/inttostr.h
acceptbaseparser.o: ../../../pikotools/src/utf8/utf8.h
acceptbaseparser.o: ../../../pikotools/src/textstream/stream.h
acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_templates.h
acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_private.h
acceptbaseparser.o: ../../../pikotools/src/date/date.h
acceptbaseparser.o: ../../../pikotools/src/membuffer/membuffer.h
acceptbaseparser.o: ../../../pikotools/src/textstream/types.h
acceptbaseparser.o: ../../../pikotools/src/log/filelog.h
acceptbaseparser.o: ../../../morm/src/morm.h ../../../morm/src/morm_types.h
acceptbaseparser.o: ../../../morm/src/model.h
acceptbaseparser.o: ../../../morm/src/modelconnector.h
acceptbaseparser.o: ../../../morm/src/clearer.h
acceptbaseparser.o: ../../../morm/src/dbconnector.h
acceptbaseparser.o: ../../../morm/src/queryresult.h ../../../morm/src/ft.h
acceptbaseparser.o: ../../../morm/src/flatconnector.h
acceptbaseparser.o: ../../../morm/src/dbexpression.h
acceptbaseparser.o: ../../../morm/src/baseexpression.h
acceptbaseparser.o: ../../../morm/src/modelenv.h
acceptbaseparser.o: ../../../morm/src/modeldata.h
acceptbaseparser.o: ../../../morm/src/cursorhelper.h
acceptbaseparser.o: ../../../morm/src/finderhelper.h
acceptbaseparser.o: ../../../morm/src/fieldvaluehelper.h
acceptbaseparser.o: ../../../morm/src/wrapper.h
acceptbaseparser.o: ../../../morm/src/spacewrapper.h
acceptbaseparser.o: ../../../morm/src/baseobjectwrapper.h
acceptbaseparser.o: ../../../morm/src/modelcontainerwrapper.h
acceptbaseparser.o: ../../../pikotools/src/convert/text.h
acceptbaseparser.o: ../../../morm/src/flatexpression.h
acceptbaseparser.o: ../../../morm/src/finder.h ../../../morm/src/cursor.h
acceptbaseparser.o: ../../../morm/src/jsonexpression.h
acceptbaseparser.o: ../../../morm/src/postgresqlexpression.h
acceptbaseparser.o: ../../../morm/src/jsonconnector.h
acceptbaseparser.o: ../../../morm/src/postgresqlconnector.h
acceptbaseparser.o: ../../../morm/src/postgresqlqueryresult.h
acceptbaseparser.o: ../../../pikotools/src/convert/convert.h
acceptbaseparser.o: ../../../pikotools/src/convert/inttostr.h
acceptbaseparser.o: ../../../pikotools/src/convert/patternreplacer.h
acceptbaseparser.o: ../../../pikotools/src/convert/strtoint.h
acceptbaseparser.o: ../../../pikotools/src/convert/text.h
acceptbaseparser.o: ../../../pikotools/src/convert/misc.h
acceptbaseparser.o: ../../../pikotools/src/utf8/utf8_stream.h
acceptbaseparser.o: ../../../pikotools/src/convert/double.h

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2008-2014, Tomasz Sowa
* Copyright (c) 2008-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,100 +32,141 @@
*
*/
#include <wchar.h>
#include "acceptbaseparser.h"
#include "core/misc.h"
#include "convert/convert.h"
namespace Winix
{
bool AcceptBaseParser::IsWhite(int c)
AcceptBaseParser::AcceptBaseParser()
{
if( c==' ' || c=='\t' )
return true;
}
return false;
AcceptBaseParser::~AcceptBaseParser()
{
}
void AcceptBaseParser::skip_white()
bool AcceptBaseParser::is_delimiter(wchar_t c, wchar_t delimiter)
{
while( IsWhite(*text) )
return delimiter != 0 && delimiter == c;
}
bool AcceptBaseParser::is_delimiter(wchar_t c, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3)
{
return is_delimiter(c, delimiter1) || is_delimiter(c, delimiter2) || is_delimiter(c, delimiter3);
}
void AcceptBaseParser::read_token(std::wstring & token, size_t max_len, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3)
{
token.clear();
text = pt::skip_white(text, false, false);
while( *text!=0 && !is_delimiter(*text, delimiter1, delimiter2, delimiter3) )
{
if( token.size() < max_len )
{
token += *text;
}
++text;
}
pt::trim_white(token);
}
void AcceptBaseParser::read_name()
{
read_token(name, MAX_NAME_LENGTH, ',', ';');
if( !name.empty() )
{
parsed_name(name);
}
}
void AcceptBaseParser::read_parameter()
{
param.clear();
skip_white();
param_value.clear();
read_token(param, MAX_PARAM_LENGTH, '=', ',', ';');
while( *text!=0 && *text!=',' && *text!=';' )
if( *text == '=' )
{
param += *text;
++text;
read_token(param_value, MAX_PARAM_VALUE_LENGTH, ';', ',');
}
TrimWhite(param);
if( !name.empty() && !param.empty() )
{
parsed_param(param, param_value);
}
if( param == L"q" && !param_value.empty() )
{
q = pt::to_double(param_value);
}
}
void AcceptBaseParser::ReadQ()
void AcceptBaseParser::read_loop(std::vector<HeaderValue> * header_values, size_t max_len)
{
while( *text != 0 )
{
q = 1.0;
skip_white();
read_name();
if( *text != ';' )
return;
++text; // skipping a semicolon
while( *text!=0 && *text!=',' && *text!='=' )
// skipping until ',' or '='
++text;
if( *text==0 || *text==',' )
return;
++text; // skipping '='
skip_white();
q = wcstod(text, (wchar_t**)&text);
}
void AcceptBaseParser::SkipParam()
while( *text == ';' )
{
skip_white();
if( *text == ',' )
++text;
read_parameter();
}
if( !name.empty() && q > 0.0 )
{
if( q > 1.0 )
q = 1.0;
parsed_name_q(name, q);
if( header_values && (max_len == 0 || header_values->size() < max_len) )
{
header_values->resize(header_values->size() + 1);
header_values->back().value = name;
header_values->back().weight = q;
}
}
if( *text != 0 )
++text;
}
}
void AcceptBaseParser::parse(const wchar_t * str)
void AcceptBaseParser::read(const wchar_t * str, std::vector<HeaderValue> * header_values, size_t max_len)
{
text = str;
init();
while( *text != 0 )
{
read_parameter();
ReadQ();
SkipParam();
Param(param, q);
}
read_loop(header_values, max_len);
name.clear();
param.clear();
param_value.clear();
}
void AcceptBaseParser::parse(const wchar_t * str)
{
read(str, nullptr, 0);
}
void AcceptBaseParser::parse(const std::wstring & str)
{
@ -133,6 +174,25 @@ void AcceptBaseParser::parse(const std::wstring & str)
}
void AcceptBaseParser::parse(const wchar_t * str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values)
{
if( clear_header_values )
header_values.clear();
AcceptBaseParser::read(str, &header_values, max_len);
std::sort(header_values.begin(), header_values.end(), [](HeaderValue & h1, HeaderValue & h2) -> bool {
return h1.weight > h2.weight;
});
}
void AcceptBaseParser::parse(const std::wstring & str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values)
{
return parse(str.c_str(), header_values, max_len, clear_header_values);
}
} // namespace Winix

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2008-2018, Tomasz Sowa
* Copyright (c) 2008-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,44 +32,64 @@
*
*/
#ifndef headerfile_winix_core_acceptbaseparser
#define headerfile_winix_core_acceptbaseparser
#ifndef headerfile_winix_utils_acceptbaseparser
#define headerfile_winix_utils_acceptbaseparser
#include <string>
#include "core/winixbase.h"
#include "core/header.h"
namespace Winix
{
// sample (you must create your own class derived from this one):
// object.parse(L" text/html ; , ; q = 45, application / xhtml+xml ; q = 0.4 , application/xml ; q = 0.9 , */* ; q = 0.8 ");
class AcceptBaseParser : public WinixBase
class AcceptBaseParser
{
public:
static size_t constexpr MAX_NAME_LENGTH = 64;
static size_t constexpr MAX_PARAM_LENGTH = 32;
static size_t constexpr MAX_PARAM_VALUE_LENGTH = 32;
AcceptBaseParser();
virtual ~AcceptBaseParser();
/*
*
*
*
*
*/
// sample:
// object.parse(L" text/html , text/* ; q = 45, application / xhtml+xml ; q = 0.4 ; limit = 1 , application/xml ; charset = UTF-8 ; q = 0.9 , */* ; q = 0.8 ");
void parse(const wchar_t * str);
void parse(const std::wstring & str);
void parse(const wchar_t * str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values = true);
void parse(const std::wstring & str, std::vector<HeaderValue> & header_values, size_t max_len, bool clear_header_values = true);
private:
virtual void init() {} ;
virtual void Param(const std::wstring & param, double q) = 0;
bool IsWhite(int c);
void skip_white();
bool is_delimiter(wchar_t c, wchar_t delimiter);
bool is_delimiter(wchar_t c, wchar_t delimiter1, wchar_t delimiter2, wchar_t delimiter3);
void read_token(std::wstring & token, size_t max_len, wchar_t delimiter1, wchar_t delimiter2 = 0, wchar_t delimiter3 = 0);
void read_name();
void read_parameter();
void ReadQ();
void SkipParam();
void read_loop(std::vector<HeaderValue> * header_values, size_t max_len);
void read(const wchar_t * str, std::vector<HeaderValue> * header_values, size_t max_len);
virtual void init() {} ;
virtual void parsed_name(const std::wstring & name) {};
virtual void parsed_param(const std::wstring & param, const std::wstring & param_value) {};
virtual void parsed_name_q(const std::wstring & name, double q) {};
const wchar_t * text;
std::wstring name;
std::wstring param;
std::wstring param_value;
double q;
};