scorpioengine/src/headers_parser.cpp

302 lines
6.6 KiB
C++

#include <iostream>
#include <utf8/utf8.h>
#include "headers_parser.h"
#include "string_functions.h"
void HeadersParser::ParseHeaders(Client & client)
{
header_index = 0;
if( !ParseFirstHeader(client) )
{
std::cout << "incorrect first header, closing connection ------------------------------------" << std::endl;
client.close_connection = true;
return;
}
// remove me
if( client.url == L"/static/styles.css" )
{
header_index = 0;
}
// we are testing header_index + 3 < client.input_buffer.size() because we know
// that the \r\n\r\n sequence already exists in this string
// so there is no a problem that we leave some characters at the end of the string
while( header_index + 3 < client.input_buffer.size() &&
!IsHeadersEnding(client.input_buffer.c_str() + header_index) )
{
tmp_header.clear();
tmp_value.clear();
if( ParseHeaderKey(client) )
{
SkipWhite(client);
ParseHeaderValue(client);
TrimWhiteAtEnd(tmp_value);
if( tmp_header.size() > 0 )
{
//std::wcout << L"header parsed: ";
//std::wcout << L"\"" << tmp_header << L"\"=\"";
//std::wcout << tmp_value << L"\"" << std::endl;
client.in.Add(tmp_header, tmp_value);
}
else
{
std::wcout << "skiping empty name for a header" << std::endl;
}
}
else
{
// add some code to skip this line
}
}
}
// ptr buffer should consists of at least more 3 characters
bool HeadersParser::IsHeadersEnding(const char * ptr)
{
return ptr[0] == '\r' && ptr[0+1] == '\n' &&
ptr[0+2] == '\r' && ptr[0+3] == '\n';
}
bool HeadersParser::ParseFirstHeader(Client & client)
{
bool first_ok = ParseFirstHeaderMethodName(client) &&
ParseFirstHeaderURL(client) &&
ParseFirstHeaderHTTPVersion(client);
SkipWhite(client);
if( header_index + 1 < client.input_buffer.size() &&
client.input_buffer[header_index] == '\r' &&
client.input_buffer[header_index+1] == '\n' )
{
header_index += 2;
}
return first_ok;
}
bool HeadersParser::ParseFirstHeaderMethodName(Client & client)
{
wchar_t method_name[32];
size_t method_name_index = 0;
while( header_index < client.input_buffer.size() &&
client.input_buffer[header_index] != '\r' &&
!IsWhite(client.input_buffer[header_index]) )
{
wchar_t c = (unsigned char)client.input_buffer[header_index++];
method_name[method_name_index++] = c;
if( method_name_index >= sizeof(method_name) / sizeof(wchar_t) )
return false;
}
method_name[method_name_index++] = 0;
client.http_method_str = method_name;
SkipWhite(client);
return true;
}
/*
* The generic URI syntax mandates that new URI schemes that provide for the representation
* of character data in a URI must, in effect, represent characters from the unreserved set
* without translation, and should convert all other characters to bytes according to UTF-8,
* and then percent-encode those values. This requirement was introduced in January 2005
* with the publication of RFC 3986. URI schemes introduced before this date are not affected.
*
*
*/
bool HeadersParser::ParseFirstHeaderURL(Client & client)
{
client.url.clear();
url_ascii.clear();
while( header_index < client.input_buffer.size() &&
client.input_buffer[header_index] != '\r' &&
!IsWhite(client.input_buffer[header_index]) )
{
wchar_t c = (unsigned char)client.input_buffer[header_index++];
if( c == '+' )
{
c = ' ';
}
else
if( c == '%' )
{
if( header_index + 2 < client.input_buffer.size() )
{
wchar_t c1 = ToLower((unsigned char)client.input_buffer[header_index++]);
wchar_t c2 = ToLower((unsigned char)client.input_buffer[header_index++]);
if( IsHexDigit(c1) && IsHexDigit(c2) )
{
int v1 = HexDigitToValue(c1);
int v2 = HexDigitToValue(c2);
c = static_cast<wchar_t>((v1 << 4) + v2);
}
else
{
return false;
}
}
else
{
return false;
}
}
else
if( c < 32 || c > 127 )
{
return false;
}
url_ascii += static_cast<char>(c);
}
std::cout << "URL ASCII: " << url_ascii << "-----------------" << std::endl;
bool utf8_correct = PT::UTF8ToWide(url_ascii, client.url);
SkipWhite(client);
url_ascii.clear();
return utf8_correct && !client.url.empty();
}
bool HeadersParser::ParseFirstHeaderHTTPVersion(Client & client)
{
client.http_version = http_version_unsupported;
if( header_index + 7 < client.input_buffer.size() &&
client.input_buffer[header_index] == 'H' &&
client.input_buffer[header_index + 1] == 'T' &&
client.input_buffer[header_index + 2] == 'T' &&
client.input_buffer[header_index + 3] == 'P' &&
client.input_buffer[header_index + 4] == '/' &&
IsDecDigit(client.input_buffer[header_index + 5]) &&
client.input_buffer[header_index + 6] == '.' &&
IsDecDigit(client.input_buffer[header_index + 7]) )
{
int d1 = client.input_buffer[header_index + 5] - '0';
int d2 = client.input_buffer[header_index + 7] - '0';
if( d1 == 1 )
{
if( d2 == 0 )
client.http_version = http_version_1_0;
else
if( d2 == 1 )
client.http_version = http_version_1_1;
}
header_index += 8;
SkipWhite(client);
}
return client.http_version != http_version_unsupported;
}
// ParseHeaderKey should increment header_index at least once
bool HeadersParser::ParseHeaderKey(Client & client)
{
while( header_index < client.input_buffer.size() &&
client.input_buffer[header_index] != '\r' )
{
if( client.input_buffer[header_index] == ':' )
{
header_index += 1;
return true;
}
wchar_t c = (unsigned char)client.input_buffer[header_index];
if( c >= 32 && c < 127 )
{
// allow only ascii characters
tmp_header += ToLower(c);
}
header_index += 1;
}
// there was not a colon at the end of the name
header_index += 1;
return false;
}
void HeadersParser::ParseHeaderValue(Client & client)
{
while( header_index < client.input_buffer.size() )
{
if( header_index + 1 < client.input_buffer.size() &&
client.input_buffer[header_index] == '\r' &&
client.input_buffer[header_index+1] == '\n' )
{
if( header_index + 2 < client.input_buffer.size() &&
(client.input_buffer[header_index+2] == ' ' ||
client.input_buffer[header_index+2] == '\t') )
{
// this line will be continued in the next line
header_index += 3;
}
else
{
header_index += 2;
break;
}
}
else
{
wchar_t c = (unsigned char)client.input_buffer[header_index];
if( c >= 32 && c < 127 )
{
// allow only ascii characters
tmp_value += c;
}
header_index += 1;
}
}
}
void HeadersParser::SkipWhite(Client & client)
{
while( header_index < client.input_buffer.size() &&
IsWhite(client.input_buffer[header_index]) )
{
header_index += 1;
}
}