allplacefinder/lib/finder.cpp

531 lines
10 KiB
C++
Raw Permalink Normal View History

/*
* This file is a part of AllPlaceFinder library
* and is distributed under the 2-Clause BSD licence.
* Author: Tomasz Sowa <t.sowa@ttmath.org>
*/
/*
2024-04-03 17:03:50 +02:00
* Copyright (c) 2021-2024, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <iostream>
#include <sys/stat.h>
#include "finder.h"
#include "utf8/utf8.h"
#include "misc.h"
#include "convert/convert.h"
#include "space/spaceparser.h"
namespace allplacefinder
{
Finder::Finder()
{
clear();
}
void Finder::clear()
{
params = nullptr;
second_phase = false;
nodes = 0;
ways = 0;
ways_table.clear();
}
bool Finder::create_lookup_dir(Params & params)
{
if( params.osm_file.empty() )
{
std::cerr << "you have to provide a path to osm file in --osm-file parameter" << std::endl;
return false;
}
if( params.lookup_dir.empty() )
{
std::cerr << "you have to provide a path to lookup dir in --lookup-dir parameter" << std::endl;
return false;
}
if( !open_file_for_reading(params.osm_file, osm_file_str, "I cannot open the osm file for reading") )
return false;
if( !dir_exists(params.lookup_dir ) )
{
std::cerr << "There is no the lookup directory" << std::endl;
return false;
}
this->params = &params;
return parse_osm_file();
}
bool Finder::create_hints_file(Params & params)
{
if( params.osm_file.empty() )
{
std::cerr << "you have to provide a path to osm file in --osm-file parameter" << std::endl;
return false;
}
if( params.hints_file_name.empty() )
{
std::cerr << "you have to provide a path to the hints file in --hints_file_name parameter" << std::endl;
return false;
}
if( !open_file_for_writing(params.hints_file_name, hints_file, "I cannot open the hints file for writing") )
return false;
this->params = &params;
bool status = parse_osm_file();
if( status && params.use_two_ways_hints_file_algorithm )
{
std::cout << "places found: " << ways_table.size() << std::endl;
std::cout << "doing a second phase" << std::endl;
second_phase = true;
status = parse_osm_file();
}
return status;
}
bool Finder::parse_osm_file()
{
pt::HTMLParser parser;
pt::Space space;
std::string file_path;
pt::wide_to_utf8(params->osm_file, file_path);
nodes = 0;
ways = 0;
parser.set_item_parsed_listener(this);
pt::HTMLParser::Status status = parser.parse_xml_file(file_path, space, false);
switch(status)
{
case pt::HTMLParser::cant_open_file:
std::cerr << "I cannot open the input file: " << file_path << std::endl;
break;
case pt::HTMLParser::syntax_error:
std::cerr << "Syntax error: " << file_path << ":" << parser.get_last_parsed_line() << std::endl;
break;
case pt::HTMLParser::ok:
//std::cout << space.serialize_to_json_str(true) << std::endl;
break;
}
std::cout << "nodes: " << nodes << std::endl;
std::cout << "ways: " << ways << std::endl;
return true;
}
void Finder::item_parsed(const pt::HTMLParser::Item & item)
{
if( item.name == L"node" )
{
nodes += 1;
parse_node(*item.space);
}
if( item.name == L"way" )
{
ways += 1;
parse_way(*item.space);
}
}
bool Finder::should_remove(const pt::HTMLParser::Item & item)
{
return (item.name == L"node" || item.name == L"way" || item.name == L"relation" || item.name == L"bounds" );
}
void Finder::parse_node(pt::Space & node)
{
pt::Space * attr = node.get_space(L"attr");
pt::Space out;
if( attr )
{
pt::Space * id = attr->get_space(L"id");
pt::Space * lat = attr->get_space(L"lat");
pt::Space * lon = attr->get_space(L"lon");
if( id && lat && lon )
{
long id_long = id->to_long();
if( id_long >= 0 )
{
out.add(L"id", id_long);
std::wstring lat_str = lat->to_wstr();
std::wstring lon_str = lon->to_wstr();
out.add(L"lat", lat_str);
out.add(L"lon", lon_str);
if( params->create_lookup_dir )
{
if( !save_file(L"node", id_long, out) )
{
std::cout << "I cannot save node file with id: " << id_long << std::endl;
}
}
if( params->create_hints_file )
{
if( params->use_two_ways_hints_file_algorithm && second_phase )
{
check_way(id_long, lat_str, lon_str);
}
}
}
else
{
std::cout << "Skipping node with negative id" << std::endl;
}
}
else
{
std::cout << "Skipping node" << std::endl;
}
}
//std::cout << out.serialize_to_json_str(true) << std::endl;
}
void Finder::parse_way(pt::Space & way)
{
pt::Space * attr = way.get_space(L"attr");
pt::Space out;
if( attr )
{
pt::Space * id = attr->get_space(L"id");
if( id )
{
long id_long = id->to_long();
if( id_long >= 0 )
{
out.add(L"id", id_long);
pt::Space * childs = way.get_space(L"childs");
if( childs )
{
if( parse_way_childs(out, *childs) )
{
std::string str;
str = out.serialize_to_json_str(false);
std::cout << str << std::endl;
}
}
}
}
}
//std::cout << out.serialize_to_json_str(true) << std::endl;
}
bool Finder::parse_way_childs(pt::Space & out, pt::Space & childs)
{
size_t nd_len = 0;
bool has_name = parse_way_name(out, childs, nd_len);
bool has_node = false;
if( nd_len > 0 && has_name )
{
has_node = parse_way_middle_node(out, childs, nd_len);
}
return nd_len > 0 && has_name && has_node;
}
bool Finder::parse_way_name(pt::Space & out, pt::Space & childs, size_t & nd_len)
{
bool has_name = false;
if( childs.is_table() )
{
for(pt::Space * child : childs.value.value_table)
{
std::wstring * name = child->get_wstr(L"name");
if( name )
{
if( *name == L"nd" )
{
nd_len += 1;
}
if( *name == L"tag" )
{
pt::Space * attr = child->get_space(L"attr");
if( attr )
{
std::wstring * k = attr->get_wstr(L"k");
std::wstring * v = attr->get_wstr(L"v");
if( k && v && *k == L"name" )
{
has_name = true;
out.add(L"name", *v);
}
}
}
}
}
}
return has_name;
}
bool Finder::parse_way_middle_node(pt::Space & out, pt::Space & childs, size_t nd_len)
{
bool has_node = false;
size_t nd_mid = (nd_len / 2);
size_t index = 0;
if( childs.is_table() )
{
for(pt::Space * child : childs.value.value_table)
{
std::wstring * name = child->get_wstr(L"name");
if( name && *name == L"nd" )
{
if( index == nd_mid )
{
pt::Space * attr = child->get_space(L"attr");
if( attr )
{
std::wstring * ref = attr->get_wstr(L"ref");
if( ref )
{
out.add(L"node_id", *ref);
long node_id = pt::to_l(ref->c_str());
if( params->create_hints_file && params->use_two_ways_hints_file_algorithm )
{
if( !second_phase )
{
ways_table.insert(std::make_pair(node_id, out));
}
}
else
{
has_node = get_node(node_id, out);
}
}
}
break;
}
index += 1;
}
}
}
return has_node;
}
bool Finder::get_node(long node_id, pt::Space & out)
{
std::vector<std::wstring> subdirs;
create_id_str(node_id, subdirs);
std::wstring path;
path = params->lookup_dir;
path += L"/";
path += L"node";
for(std::wstring & str : subdirs)
{
path += L"/";
path += str;
}
pt::SpaceParser parser;
pt::Space node;
pt::SpaceParser::Status status = parser.parse_json_file(path, node);
if( status == pt::SpaceParser::ok )
{
std::wstring * lat = node.get_wstr(L"lat");
std::wstring * lon = node.get_wstr(L"lon");
if( lat && lon )
{
out.add(L"lat", *lat);
out.add(L"lon", *lon);
return true;
}
}
return false;
}
bool Finder::save_file(const wchar_t * subdir, long id, pt::Space & space)
{
std::vector<std::wstring> subdirs;
//std::string p;
create_id_str(id, subdirs);
// for(size_t i = 0 ; i < path.size() ; ++i)
// {
// pt::wide_to_utf8(path[i], p);
// std::cout << p << " / ";
// }
//
// std::cout << std::endl;
std::wstring path;
path = params->lookup_dir;
path += L"/";
path += subdir;
if( !create_dir_if_not_exists(path) )
return false;
for(size_t i = 0 ; i + 1 < subdirs.size() ; ++i)
{
path += L"/";
path += subdirs[i];
if( !create_dir_if_not_exists(path) )
return false;
}
if( !subdirs.empty() )
{
path += L"/";
path += subdirs.back();
std::string path_utf8;
pt::wide_to_utf8(path, path_utf8);
std::ofstream str(path_utf8, std::ios_base::out | std::ios_base::binary);
if( str )
{
text_stream.clear();
space.serialize_to_json_stream(text_stream, true);
text_stream << '\n';
save_stream(text_stream, str);
}
else
{
return false;
}
str.close();
if( !str )
return false;
}
return true;
}
bool Finder::check_way(long node_id, std::wstring & lat, std::wstring & lon)
{
bool found = false;
std::map<long, pt::Space>::iterator i = ways_table.find(node_id);
if( i != ways_table.end() )
{
pt::Space & way = i->second;
long id = way.to_long(L"id");
std::wstring * name = way.get_wstr(L"name");
long node_id = way.to_long(L"node_id");
if( name )
{
pt::TextStream str;
str << "{\"id\":" << id;
str << ",\"lat\":\"";
pt::esc_to_json(lat, str);
str << "\",\"lon\":\"";
pt::esc_to_json(lon, str);
str << "\",\"name\":\"";
pt::esc_to_json(*name, str);
str << "\",\"node_id\":" << node_id;
str << "}";
std::string str_utf8;
str.to_str(str_utf8);
hints_file << str_utf8 << std::endl;
}
found = true;
}
return found;
}
} // namespace