/* * This file is a part of AllPlaceFinder library * and is distributed under the 2-Clause BSD licence. * Author: Tomasz Sowa */ /* * Copyright (c) 2021-2024, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include #include #include "finder.h" #include "utf8/utf8.h" #include "misc.h" #include "convert/convert.h" #include "space/spaceparser.h" namespace allplacefinder { Finder::Finder() { clear(); } void Finder::clear() { params = nullptr; second_phase = false; nodes = 0; ways = 0; ways_table.clear(); } bool Finder::create_lookup_dir(Params & params) { if( params.osm_file.empty() ) { std::cerr << "you have to provide a path to osm file in --osm-file parameter" << std::endl; return false; } if( params.lookup_dir.empty() ) { std::cerr << "you have to provide a path to lookup dir in --lookup-dir parameter" << std::endl; return false; } if( !open_file_for_reading(params.osm_file, osm_file_str, "I cannot open the osm file for reading") ) return false; if( !dir_exists(params.lookup_dir ) ) { std::cerr << "There is no the lookup directory" << std::endl; return false; } this->params = ¶ms; return parse_osm_file(); } bool Finder::create_hints_file(Params & params) { if( params.osm_file.empty() ) { std::cerr << "you have to provide a path to osm file in --osm-file parameter" << std::endl; return false; } if( params.hints_file_name.empty() ) { std::cerr << "you have to provide a path to the hints file in --hints_file_name parameter" << std::endl; return false; } if( !open_file_for_writing(params.hints_file_name, hints_file, "I cannot open the hints file for writing") ) return false; this->params = ¶ms; bool status = parse_osm_file(); if( status && params.use_two_ways_hints_file_algorithm ) { std::cout << "places found: " << ways_table.size() << std::endl; std::cout << "doing a second phase" << std::endl; second_phase = true; status = parse_osm_file(); } return status; } bool Finder::parse_osm_file() { pt::HTMLParser parser; pt::Space space; std::string file_path; pt::wide_to_utf8(params->osm_file, file_path); nodes = 0; ways = 0; parser.set_item_parsed_listener(this); pt::HTMLParser::Status status = parser.parse_xml_file(file_path, space, false); switch(status) { case pt::HTMLParser::cant_open_file: std::cerr << "I cannot open the input file: " << file_path << std::endl; break; case pt::HTMLParser::syntax_error: std::cerr << "Syntax error: " << file_path << ":" << parser.get_last_parsed_line() << std::endl; break; case pt::HTMLParser::ok: //std::cout << space.serialize_to_json_str(true) << std::endl; break; } std::cout << "nodes: " << nodes << std::endl; std::cout << "ways: " << ways << std::endl; return true; } void Finder::item_parsed(const pt::HTMLParser::Item & item) { if( item.name == L"node" ) { nodes += 1; parse_node(*item.space); } if( item.name == L"way" ) { ways += 1; parse_way(*item.space); } } bool Finder::should_remove(const pt::HTMLParser::Item & item) { return (item.name == L"node" || item.name == L"way" || item.name == L"relation" || item.name == L"bounds" ); } void Finder::parse_node(pt::Space & node) { pt::Space * attr = node.get_space(L"attr"); pt::Space out; if( attr ) { pt::Space * id = attr->get_space(L"id"); pt::Space * lat = attr->get_space(L"lat"); pt::Space * lon = attr->get_space(L"lon"); if( id && lat && lon ) { long id_long = id->to_long(); if( id_long >= 0 ) { out.add(L"id", id_long); std::wstring lat_str = lat->to_wstr(); std::wstring lon_str = lon->to_wstr(); out.add(L"lat", lat_str); out.add(L"lon", lon_str); if( params->create_lookup_dir ) { if( !save_file(L"node", id_long, out) ) { std::cout << "I cannot save node file with id: " << id_long << std::endl; } } if( params->create_hints_file ) { if( params->use_two_ways_hints_file_algorithm && second_phase ) { check_way(id_long, lat_str, lon_str); } } } else { std::cout << "Skipping node with negative id" << std::endl; } } else { std::cout << "Skipping node" << std::endl; } } //std::cout << out.serialize_to_json_str(true) << std::endl; } void Finder::parse_way(pt::Space & way) { pt::Space * attr = way.get_space(L"attr"); pt::Space out; if( attr ) { pt::Space * id = attr->get_space(L"id"); if( id ) { long id_long = id->to_long(); if( id_long >= 0 ) { out.add(L"id", id_long); pt::Space * childs = way.get_space(L"childs"); if( childs ) { if( parse_way_childs(out, *childs) ) { std::string str; str = out.serialize_to_json_str(false); std::cout << str << std::endl; } } } } } //std::cout << out.serialize_to_json_str(true) << std::endl; } bool Finder::parse_way_childs(pt::Space & out, pt::Space & childs) { size_t nd_len = 0; bool has_name = parse_way_name(out, childs, nd_len); bool has_node = false; if( nd_len > 0 && has_name ) { has_node = parse_way_middle_node(out, childs, nd_len); } return nd_len > 0 && has_name && has_node; } bool Finder::parse_way_name(pt::Space & out, pt::Space & childs, size_t & nd_len) { bool has_name = false; if( childs.is_table() ) { for(pt::Space * child : childs.value.value_table) { std::wstring * name = child->get_wstr(L"name"); if( name ) { if( *name == L"nd" ) { nd_len += 1; } if( *name == L"tag" ) { pt::Space * attr = child->get_space(L"attr"); if( attr ) { std::wstring * k = attr->get_wstr(L"k"); std::wstring * v = attr->get_wstr(L"v"); if( k && v && *k == L"name" ) { has_name = true; out.add(L"name", *v); } } } } } } return has_name; } bool Finder::parse_way_middle_node(pt::Space & out, pt::Space & childs, size_t nd_len) { bool has_node = false; size_t nd_mid = (nd_len / 2); size_t index = 0; if( childs.is_table() ) { for(pt::Space * child : childs.value.value_table) { std::wstring * name = child->get_wstr(L"name"); if( name && *name == L"nd" ) { if( index == nd_mid ) { pt::Space * attr = child->get_space(L"attr"); if( attr ) { std::wstring * ref = attr->get_wstr(L"ref"); if( ref ) { out.add(L"node_id", *ref); long node_id = pt::to_l(ref->c_str()); if( params->create_hints_file && params->use_two_ways_hints_file_algorithm ) { if( !second_phase ) { ways_table.insert(std::make_pair(node_id, out)); } } else { has_node = get_node(node_id, out); } } } break; } index += 1; } } } return has_node; } bool Finder::get_node(long node_id, pt::Space & out) { std::vector subdirs; create_id_str(node_id, subdirs); std::wstring path; path = params->lookup_dir; path += L"/"; path += L"node"; for(std::wstring & str : subdirs) { path += L"/"; path += str; } pt::SpaceParser parser; pt::Space node; pt::SpaceParser::Status status = parser.parse_json_file(path, node); if( status == pt::SpaceParser::ok ) { std::wstring * lat = node.get_wstr(L"lat"); std::wstring * lon = node.get_wstr(L"lon"); if( lat && lon ) { out.add(L"lat", *lat); out.add(L"lon", *lon); return true; } } return false; } bool Finder::save_file(const wchar_t * subdir, long id, pt::Space & space) { std::vector subdirs; //std::string p; create_id_str(id, subdirs); // for(size_t i = 0 ; i < path.size() ; ++i) // { // pt::wide_to_utf8(path[i], p); // std::cout << p << " / "; // } // // std::cout << std::endl; std::wstring path; path = params->lookup_dir; path += L"/"; path += subdir; if( !create_dir_if_not_exists(path) ) return false; for(size_t i = 0 ; i + 1 < subdirs.size() ; ++i) { path += L"/"; path += subdirs[i]; if( !create_dir_if_not_exists(path) ) return false; } if( !subdirs.empty() ) { path += L"/"; path += subdirs.back(); std::string path_utf8; pt::wide_to_utf8(path, path_utf8); std::ofstream str(path_utf8, std::ios_base::out | std::ios_base::binary); if( str ) { text_stream.clear(); space.serialize_to_json_stream(text_stream, true); text_stream << '\n'; save_stream(text_stream, str); } else { return false; } str.close(); if( !str ) return false; } return true; } bool Finder::check_way(long node_id, std::wstring & lat, std::wstring & lon) { bool found = false; std::map::iterator i = ways_table.find(node_id); if( i != ways_table.end() ) { pt::Space & way = i->second; long id = way.to_long(L"id"); std::wstring * name = way.get_wstr(L"name"); long node_id = way.to_long(L"node_id"); if( name ) { pt::TextStream str; str << "{\"id\":" << id; str << ",\"lat\":\""; pt::esc_to_json(lat, str); str << "\",\"lon\":\""; pt::esc_to_json(lon, str); str << "\",\"name\":\""; pt::esc_to_json(*name, str); str << "\",\"node_id\":" << node_id; str << "}"; std::string str_utf8; str.to_str(str_utf8); hints_file << str_utf8 << std::endl; } found = true; } return found; } } // namespace