scorpioengine/src/headers_parser.cpp

#include <iostream>
#include <utf8/utf8.h>
#include "headers_parser.h"
#include "string_functions.h"


void HeadersParser::ParseHeaders(Client & client)
{
	header_index = 0;

	if( !ParseFirstHeader(client) )
	{
		std::cout << "incorrect first header, closing connection ------------------------------------" << std::endl;
		client.close_connection = true;
		return;
	}

	// remove me
	if( client.url == L"/static/styles.css" )
	{
		header_index = 0;
	}

	// we are testing header_index + 3 < client.input_buffer.size() because we know
	// that the \r\n\r\n sequence already exists in this string
	// so there is no a problem that we leave some characters at the end of the string
	while( header_index + 3 < client.input_buffer.size() &&
		   !IsHeadersEnding(client.input_buffer.c_str() + header_index) )
	{
		tmp_header.clear();
		tmp_value.clear();

		if( ParseHeaderKey(client) )
		{
			SkipWhite(client);
			ParseHeaderValue(client);
			TrimWhiteAtEnd(tmp_value);


			if( tmp_header.size() > 0 )
			{
				//std::wcout << L"header parsed: ";
				//std::wcout << L"\"" << tmp_header << L"\"=\"";
				//std::wcout << tmp_value << L"\"" << std::endl;
				client.in.Add(tmp_header, tmp_value);
			}
			else
			{
				std::wcout << "skiping empty name for a header" << std::endl;
			}
		}
		else
		{
			// add some code to skip this line
		}
	}
}


// ptr buffer should consists of at least more 3 characters
bool HeadersParser::IsHeadersEnding(const char * ptr)
{
	return 	ptr[0]   == '\r' && ptr[0+1] == '\n' &&
			ptr[0+2] == '\r' && ptr[0+3] == '\n';
}


bool HeadersParser::ParseFirstHeader(Client & client)
{
	bool first_ok = ParseFirstHeaderMethodName(client) 	&&
					ParseFirstHeaderURL(client) 		&&
					ParseFirstHeaderHTTPVersion(client);

	SkipWhite(client);

	if( header_index + 1 < client.input_buffer.size() &&
		client.input_buffer[header_index]   == '\r'   &&
		client.input_buffer[header_index+1] == '\n'	)
	{
		header_index += 2;
	}

	return first_ok;
}


bool HeadersParser::ParseFirstHeaderMethodName(Client & client)
{
	wchar_t method_name[32];
	size_t method_name_index = 0;

	while( 	header_index < client.input_buffer.size() &&
			client.input_buffer[header_index] != '\r' &&
			!IsWhite(client.input_buffer[header_index]) )
	{
		wchar_t c = (unsigned char)client.input_buffer[header_index++];
		method_name[method_name_index++] = c;

		if( method_name_index >= sizeof(method_name) / sizeof(wchar_t) )
			return false;
	}

	method_name[method_name_index++] = 0;
	client.http_method_str = method_name;
	SkipWhite(client);

	return true;
}


/*
 * The generic URI syntax mandates that new URI schemes that provide for the representation
 * of character data in a URI must, in effect, represent characters from the unreserved set
 * without translation, and should convert all other characters to bytes according to UTF-8,
 * and then percent-encode those values. This requirement was introduced in January 2005
 * with the publication of RFC 3986. URI schemes introduced before this date are not affected.
 *
 *
 */
bool HeadersParser::ParseFirstHeaderURL(Client & client)
{
	client.url.clear();
	url_ascii.clear();

	while( 	header_index < client.input_buffer.size() &&
			client.input_buffer[header_index] != '\r' &&
			!IsWhite(client.input_buffer[header_index]) )
	{
		wchar_t c = (unsigned char)client.input_buffer[header_index++];

		if( c == '+' )
		{
			c = ' ';
		}
		else
		if( c == '%' )
		{
			if( header_index + 2 < client.input_buffer.size() )
			{
				wchar_t c1 = ToLower((unsigned char)client.input_buffer[header_index++]);
				wchar_t c2 = ToLower((unsigned char)client.input_buffer[header_index++]);

				if( IsHexDigit(c1) && IsHexDigit(c2) )
				{
					int v1 = HexDigitToValue(c1);
					int v2 = HexDigitToValue(c2);

					c = static_cast<wchar_t>((v1 << 4) + v2);
				}
				else
				{
					return false;
				}
			}
			else
			{
				return false;
			}
		}
		else
		if( c < 32 || c > 127 )
		{
			return false;
		}

		url_ascii += static_cast<char>(c);
	}

	std::cout << "URL ASCII: " << url_ascii << "-----------------" << std::endl;

	bool utf8_correct = PT::UTF8ToWide(url_ascii, client.url);

	SkipWhite(client);
	url_ascii.clear();

	return utf8_correct && !client.url.empty();
}


bool HeadersParser::ParseFirstHeaderHTTPVersion(Client & client)
{
	client.http_version = http_version_unsupported;

	if( header_index + 7 < client.input_buffer.size() &&
		client.input_buffer[header_index] 		== 'H'  &&
		client.input_buffer[header_index + 1] 	== 'T'  &&
		client.input_buffer[header_index + 2] 	== 'T'  &&
		client.input_buffer[header_index + 3] 	== 'P'  &&
		client.input_buffer[header_index + 4] 	== '/'  &&
		IsDecDigit(client.input_buffer[header_index + 5]) &&
		client.input_buffer[header_index + 6] 	== '.'  &&
		IsDecDigit(client.input_buffer[header_index + 7]) )
	{
		int d1 = client.input_buffer[header_index + 5] - '0';
		int d2 = client.input_buffer[header_index + 7] - '0';

		if( d1 == 1 )
		{
			if( d2 == 0 )
				client.http_version = http_version_1_0;
			else
			if( d2 == 1 )
				client.http_version = http_version_1_1;
		}

		header_index += 8;
		SkipWhite(client);
	}

	return client.http_version != http_version_unsupported;
}


// ParseHeaderKey should increment header_index at least once
bool HeadersParser::ParseHeaderKey(Client & client)
{
	while( header_index < client.input_buffer.size() &&
		   client.input_buffer[header_index] != '\r' )
	{
		if( client.input_buffer[header_index] == ':' )
		{
			header_index += 1;
			return true;
		}

		wchar_t c = (unsigned char)client.input_buffer[header_index];

		if( c >= 32 && c < 127 )
		{
			// allow only ascii characters

			tmp_header += ToLower(c);
		}

		header_index += 1;
	}

	// there was not a colon at the end of the name
	header_index += 1;
	return false;
}


void HeadersParser::ParseHeaderValue(Client & client)
{
	while( header_index < client.input_buffer.size() )
	{
		if( header_index + 1 < client.input_buffer.size() &&
			client.input_buffer[header_index]   == '\r'   &&
			client.input_buffer[header_index+1] == '\n'	)
		{
			if( header_index + 2 < client.input_buffer.size() &&
				(client.input_buffer[header_index+2] == ' ' ||
				client.input_buffer[header_index+2] == '\t') )
			{
				// this line will be continued in the next line
				header_index += 3;

			}
			else
			{
				header_index += 2;
				break;
			}
		}
		else
		{
			wchar_t c = (unsigned char)client.input_buffer[header_index];

			if( c >= 32 && c < 127 )
			{
				// allow only ascii characters

				tmp_value += c;
			}

			header_index += 1;
		}
	}
}


void HeadersParser::SkipWhite(Client & client)
{
	while( header_index < client.input_buffer.size() &&
		   IsWhite(client.input_buffer[header_index]) )
	{
		header_index += 1;
	}
}