winix/winixd/utils/http.cpp

/*
 * This file is a part of Winix
 * and is distributed under the 2-Clause BSD licence.
 * Author: Tomasz Sowa <t.sowa@ttmath.org>
 */

/*
 * Copyright (c) 2010-2022, Tomasz Sowa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include "http.h"
#include "core/lock.h"
#include "core/log.h"
#include "utf8/utf8.h"
#include "core/header.h"


namespace Winix
{


Http::Http()
{
	curl = nullptr;
	synchro = nullptr;
	browser_name = "Winix";
	conn_timeout = 30;
	http_headers = nullptr;
	read_function_input = nullptr;
	read_function_index = 0;
	additional_headers_to_send = nullptr;
	bearer_token = nullptr;
	output_headers_space = nullptr;
	change_header_names_to_lower = true;
	output_content_type = nullptr;
}


Http::~Http()
{
	uninitialize_curl();
}


Http & Http::add_input_headers(pt::Space * headers)
{
	additional_headers_to_send = headers;
	return *this;
}


Http & Http::get_output_headers_to(pt::Space * out_headers, bool change_names_to_lower)
{
	this->output_headers_space = out_headers;
	this->change_header_names_to_lower = change_names_to_lower;
	return *this;
}


Http & Http::get_output_content_type_to(std::wstring * out_content_type)
{
	this->output_content_type = out_content_type;
	return *this;
}


Http & Http::add_bearer_token(const wchar_t * token)
{
	this->bearer_token = token;
	return *this;
}


Http & Http::add_bearer_token(const std::wstring & token)
{
	this->bearer_token = token.c_str();
	return *this;
}


bool Http::get(const wchar_t * url, std::wstring & out, bool clear_str)
{
	std::string url_ascii;
	pt::TextStream out_stream;

	pt::wide_to_utf8(url, url_ascii);

	reset_headers();
	bool status = fetch_internal(url_ascii.c_str(), nullptr, out_stream);

	// IMPROVE pikotools, add: pt::UTF8ToInt(out_stream, out);
	std::string temp;
	out_stream.to_str(temp);
	pt::utf8_to_wide(temp, out, clear_str);

	return status;
}


bool Http::get(const std::wstring & url, std::wstring & out, bool clear_str)
{
	return get(url.c_str(), out, clear_str);
}


bool Http::get(const pt::WTextStream & url, std::wstring & out,  bool clear_str)
{
	std::wstring url_str;
	url.to_str(url_str);
	return get(url_str, out, clear_str);
}


bool Http::get(const wchar_t * url, pt::WTextStream & out, bool clear_stream)
{
	std::string url_ascii;
	pt::TextStream out_stream;

	pt::wide_to_utf8(url, url_ascii);

	reset_headers();
	bool status = fetch_internal(url_ascii.c_str(), nullptr, out_stream);

	// IMPROVE pikotools, add: pt::UTF8ToInt(out_stream, out);
	std::string temp;
	out_stream.to_str(temp);

	// similar, improve pikotools
	std::wstring temp_wide;
	pt::utf8_to_wide(temp, temp_wide);

	if( clear_stream )
	{
		out.clear();
	}

	out << temp_wide;

	return status;
}


bool Http::get(const std::wstring & url, pt::WTextStream & out,  bool clear_stream)
{
	return get(url.c_str(), out, clear_stream);
}


bool Http::get(const pt::WTextStream & url, pt::WTextStream & out, bool clear_stream)
{
	std::wstring url_str;
	url.to_str(url_str);
	return get(url_str, out, clear_stream);
}


bool Http::put(const wchar_t * url, const std::string & in, pt::WTextStream & out, bool clear_stream)
{
	std::string url_ascii;
	pt::TextStream out_stream;

	if( clear_stream )
	{
		out.clear();
	}

	pt::wide_to_utf8(url, url_ascii);

	reset_headers();
	bool status = fetch_internal(url_ascii.c_str(), &in, out_stream);
	out << out_stream;

	return status;
}


bool Http::put(const std::wstring & url, const std::string & in, pt::WTextStream & out, bool clear_stream)
{
	return put(url.c_str(), in, out, clear_stream);
}


bool Http::put(const wchar_t * url, pt::WTextStream & in, pt::WTextStream & out, bool clear_stream)
{
	std::string in_ascii;
	in.to_str(in_ascii);

	return put(url, in_ascii, out, clear_stream);
}


long Http::get_status()
{
	long status = -1;

	if( curl )
	{
		curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &status);
	}

	return status;
}


void Http::reset_headers()
{
	if( http_headers )
	{
		curl_slist_free_all(http_headers);
		http_headers = nullptr;
	}
}


void Http::add_additional_headers()
{
	if( additional_headers_to_send && additional_headers_to_send->is_object() )
	{
		pt::WTextStream header;
		pt::Space::ObjectType::iterator i = additional_headers_to_send->value.value_object.begin();

		while( i != additional_headers_to_send->value.value_object.end() )
		{
			header.clear();

			header << i->first << ": ";
			header << i->second->to_wstr();
			add_header(header);

			++i;
		}
	}
}


void Http::add_bearer_token()
{
	if( bearer_token && bearer_token[0] != 0 )
	{
		pt::WTextStream header;
		header << L"Authorization: Bearer " << bearer_token;
		add_header(header);
	}
}


void Http::add_header(const pt::WTextStream & header)
{
	header.to_str(temp_header);
	add_header(temp_header);
	temp_header.clear();
}


void Http::add_header(const std::wstring & header)
{
	pt::wide_to_utf8(header, temp_header_ascii);
	http_headers = curl_slist_append(http_headers, temp_header_ascii.c_str());
}


void Http::initialize_curl_if_needed()
{
	if( !curl )
	{
		/*
		 * curl_easy_init() is not thread safe before curl 7.84.0 version
		 *
		 * from https://curl.se/libcurl/c/curl_easy_init.html
		 * If you did not already call curl_global_init, curl_easy_init does it automatically.
		 * This may be lethal in multi-threaded cases, since curl_global_init is not thread-safe,
		 * and it may result in resource problems because there is no corresponding cleanup.
		 *
		 * but we have called curl_global_init() in main.cpp (InitCurlLibrary() method)
		 *
		 */
		curl = curl_easy_init();
	}
}


void Http::uninitialize_curl()
{
	reset_headers();

	if( curl )
	{
		curl_easy_cleanup(curl);
		curl = nullptr;
	}
}


// in can be pointer to const char *
bool Http::fetch_internal(const char * url, const std::string * in, pt::TextStream & out)
{
	initialize_curl_if_needed();

	if( output_headers_space )
		output_headers_space->clear();

	if( output_content_type )
		output_content_type->clear();

	if( !curl )
	{
		log << log1 << "Http: I can't initialize curl easy session" << logend;
		return false;
	}

	error_buf[0] = 0;

	read_function_input = in; // can be null
	read_function_index = 0;

	if( read_function_input )
	{
		curl_easy_setopt(curl, CURLOPT_READFUNCTION, 	fetch_read_function);
		curl_easy_setopt(curl, CURLOPT_READDATA,		this);
		curl_easy_setopt(curl, CURLOPT_POST,			1);

		/*
		 * do not set content-leght header here
		 * curl uses "Expect: 100-continue" and it collides if content-length is set
		 * https://daniel.haxx.se/blog/2020/02/27/expect-tweaks-in-curl/
		 *
		 */
	}

	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,	fetch_write_function);
	curl_easy_setopt(curl, CURLOPT_WRITEDATA,		&out);
	curl_easy_setopt(curl, CURLOPT_URL,				url);
	curl_easy_setopt(curl, CURLOPT_USERAGENT,		browser_name.c_str());
	curl_easy_setopt(curl, CURLOPT_TIMEOUT,			conn_timeout);
	curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT,	conn_timeout);
	curl_easy_setopt(curl, CURLOPT_ERRORBUFFER,		error_buf);
	curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION,	1);
	curl_easy_setopt(curl, CURLOPT_MAXREDIRS,		20);
	curl_easy_setopt(curl, CURLOPT_NOSIGNAL,		1);

	if( output_headers_space || output_content_type )
	{
		out_headers_stream.clear();
		curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION,  fetch_header_function);
		curl_easy_setopt(curl, CURLOPT_HEADERDATA,		&out_headers_stream);
	}

	add_additional_headers();
	add_bearer_token();

	if( http_headers )
	{
		curl_easy_setopt(curl, CURLOPT_HTTPHEADER, 	http_headers);
	}

	CURLcode res = curl_easy_perform(curl);

	reset_headers();

	if( res == CURLE_OK )
	{
		parse_headers();
		out_headers_stream.clear();
	}
	else
	{
		log << log1 << "Http: fetching failed: " << error_buf << ", code: "
			<< static_cast<int>(res) << logend;

		return false;
	}

	return true;
}


size_t Http::fetch_read_function(char * ptr, size_t size, size_t nmemb, void * userdata)
{
	size_t len = 0;

	if( userdata )
	{
		Http * http = reinterpret_cast<Http*>(userdata);

		if( http->read_function_index < http->read_function_input->size() )
		{
			len = size * nmemb;

			if( http->read_function_index + len > http->read_function_input->size() )
			{
				len = http->read_function_input->size() - http->read_function_index;
			}

			for(size_t i=0 ; i < len ; ++i)
			{
				ptr[i] = (*http->read_function_input)[http->read_function_index + i];
			}

			http->read_function_index += len;
		}
	}

	return len;
}


size_t Http::fetch_write_function(char * ptr, size_t size, size_t nmemb, void * userdata)
{
	size_t len = size * nmemb;

	if( userdata )
	{
		pt::TextStream * out = reinterpret_cast<pt::TextStream*>(userdata);

		if( len > 0 )
			out->write(ptr, len);
	}

	return len;
}


size_t Http::fetch_header_function(char * ptr, size_t size, size_t nmemb, void * userdata)
{
	size_t len = size * nmemb;

	if( userdata  )
	{
		pt::TextStream * out = reinterpret_cast<pt::TextStream*>(userdata);

		if( len > 0 )
			out->write(ptr, len);
	}

	return len;
}


void Http::skip_white(pt::TextStream::iterator & i)
{
	for( ; i != out_headers_stream.end() ; ++i )
	{
		if( !pt::is_white(*i, false, false) )
		{
			break;
		}
	}
}


void Http::parse_header_name(pt::TextStream::iterator & i, std::string & name)
{
	name.clear();

	for( ; i != out_headers_stream.end() ; ++i )
	{
		char c = *i;

		if( c == '\r' )
		{
			// just skip
		}
		else
		if( c == '\n' )
		{
			break;
		}
		else
		if( c == ':' )
		{
			++i;
			break;
		}
		else
		{
			if( change_header_names_to_lower )
			{
				c = pt::to_lower(c);
			}

			name += c;
		}
	}
}


void Http::parse_header_value(pt::TextStream::iterator & i, std::string & value)
{
	value.clear();

	for( ; i != out_headers_stream.end() ; ++i )
	{
		char c = *i;

		if( c == '\r' )
		{
			// just skip
		}
		else
		if( c == '\n' )
		{
			++i;
			break;
		}
		else
		{
			value += c;
		}
	}
}


void Http::parse_headers()
{
	if( output_headers_space || output_content_type )
	{
		pt::TextStream::iterator i = out_headers_stream.begin();

		while( i != out_headers_stream.end() )
		{
			parse_header_name(i, temp_header_ascii);
			skip_white(i);
			parse_header_value(i, temp_header_value_ascii);

			pt::utf8_to_wide(temp_header_ascii, temp_header);
			pt::utf8_to_wide(temp_header_value_ascii, temp_header_value);

			// both temp_header and temp_header_value will be empty
			// after the first header 'HTTP/1.1 100 Continue' (if exists)
			// (there is an empty line in such a case)
			if( !temp_header.empty() || !temp_header_value.empty() )
			{
				if( output_headers_space )
					output_headers_space->add(temp_header, temp_header_value);

				if( output_content_type && pt::is_equal_nc(temp_header, Header::content_type) )
					*output_content_type = temp_header_value;
			}
		}

		temp_header.clear();
		temp_header_ascii.clear();
		temp_header_value.clear();
		temp_header_value_ascii.clear();
	}
}


}