diff --git a/src/convert/convert.h b/src/convert/convert.h index b52e83c..d3644f9 100644 --- a/src/convert/convert.h +++ b/src/convert/convert.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2021, Tomasz Sowa + * Copyright (c) 2012-2022, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_membuffer_convert_convert -#define headerfile_pikotools_src_membuffer_convert_convert +#ifndef headerfile_pikotools_src_convert_convert +#define headerfile_pikotools_src_convert_convert #include "inttostr.h" diff --git a/src/convert/double.h b/src/convert/double.h index adfa6ad..096b6ea 100644 --- a/src/convert/double.h +++ b/src/convert/double.h @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_membuffer_convert_double -#define headerfile_pikotools_src_membuffer_convert_double +#ifndef headerfile_pikotools_src_convert_double +#define headerfile_pikotools_src_convert_double #include diff --git a/src/date/date.cpp b/src/date/date.cpp index 0a579bf..5252d44 100644 --- a/src/date/date.cpp +++ b/src/date/date.cpp @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012-2018, Tomasz Sowa + * Copyright (c) 2012-2022, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -69,30 +69,24 @@ Date::Date(const tm & t) Date::Date(const char * str) { - // parsing can be break in the middle of the string (if errors) - // and some values would not be initialized - Clear(); Parse(str); } Date::Date(const wchar_t * str) { - Clear(); Parse(str); } Date::Date(const std::string & str) { - Clear(); Parse(str); } Date::Date(const std::wstring & str) { - Clear(); Parse(str); } @@ -296,17 +290,29 @@ bool Date::operator<=(const Date & d) const } -void Date::Clear() +void Date::ClearDate() { year = 1970; month = 1; day = 1; +} + + +void Date::ClearTime() +{ hour = 0; min = 0; sec = 0; } +void Date::Clear() +{ + ClearDate(); + ClearTime(); +} + + void Date::AssertRange(int & val, int val_min, int val_max) { if( val < val_min ) diff --git a/src/date/date.h b/src/date/date.h index 34b94ef..e824764 100644 --- a/src/date/date.h +++ b/src/date/date.h @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_mainparser_mainparser -#define headerfile_pikotools_src_mainparser_mainparser +#ifndef headerfile_pikotools_src_date_date +#define headerfile_pikotools_src_date_date #include #include @@ -64,7 +64,7 @@ public: /* the date */ - int year; // 1970 - ... + int year; // 1970 - 10000 int month; // 1 - 12 int day; // 1 - 31 int hour; // 0 - 23 @@ -188,7 +188,21 @@ public: /* - set the Unix Epoch: 1970.01.01 00:00:00 + set the date to: 1970-01-01 + the time is not changed + */ + void ClearDate(); + + + /* + set the time to 00:00:00 + the date is not changed + */ + void ClearTime(); + + + /* + set the Unix Epoch: 1970-01-01 00:00:00 */ void Clear(); @@ -322,7 +336,7 @@ public: use IsCorrectDate() to check */ template - bool ParseDayMonthYear(const CStringType * str, const CStringType ** str_after = 0); + bool ParseDayMonthYear(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseDayMonthYear(const StringType & str); @@ -347,7 +361,7 @@ public: use IsCorrectDate() to check */ template - bool ParseYearMonthDay(const CStringType * str, const CStringType ** str_after = 0); + bool ParseYearMonthDay(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseYearMonthDay(const StringType & str); @@ -372,7 +386,7 @@ public: use IsCorrectDate() to check */ template - bool ParseMonthDay(const CStringType * str, const CStringType ** str_after = 0); + bool ParseMonthDay(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseMonthDay(const StringType & str); @@ -394,7 +408,7 @@ public: use IsCorrectDate() to check */ template - bool ParseHourMinSec(const CStringType * str, const CStringType ** str_after = 0); + bool ParseHourMinSec(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseHourMinSec(const StringType & str); @@ -416,34 +430,83 @@ public: use IsCorrectDate() to check */ template - bool ParseHourMin(const CStringType * str, const CStringType ** str_after = 0); + bool ParseHourMin(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseHourMin(const StringType & str); template - bool ParseZoneOffset(const CStringType * str, const CStringType ** str_after = 0); + bool ParseZoneOffset(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseZoneOffset(const StringType & str); /* - parsing hour and minutes (if exists) and seconds (if exists) - the input string can be as follows: + parsing a year and a month (if exists) and a day (if exists) + + the input string can be as follows: YYYY[sep]MM[sep]DD + the separator is optional, it can be: '.', '-', '/' or just white characters + (white characters before and after a separator are skipped) + + sample valid dates: + "2022" + " 2022" + "202212" + " 2022 12" + "2022-12" + " 2022 - 12" + "20221222" + " 20221222 " + " 2022 12 22 " + " 2022 12 22 " + "2022-12-22" + " 2022-12-22" + " 2022 - 12 - 22 " + + if the month is not provided then it is set to 01, + if the day is not provided then it is set to 01 too + + this method doesn't test if the values are correct + use IsCorrectDate() to check + */ + template + bool ParseDate(const CStringType * str, const CStringType ** str_after = nullptr); + + template + bool ParseDate(const StringType & str); + + + /* + parsing an hour and minutes (if exists) and seconds (if exists) + the input string can be as follows: HH[sep]MM[sep]SS + + separator is optional, it can be ':' or just white characters + (white characters before and after a separator are skipped) + "14" -- only an hour given (min and sec will be zero) "14:10" -- hour with minutes (sec will be zero) + "1410" -- the same as above "14:10:35" -- hour, minutes and seconds + "141035" -- the same as above + "14 10 35" -- the same as above white characters are ommited so these are valid strings too: " 14 : 10 : 35 " " 14 : 10 : 35some text " a white character means a space or a tab + a decimal fraction may be added to the lowest order time element present, e.g: + "14.500" = 14:30:00 + "14:10.500" = 14:10:30 + "14:10:35.500" = 14:10:35 (the seconds' fraction is skipped) + instead of dot you can use a comma too, + the fraction part can be from 1 to 9 digits + this method doesn't test if the values are correct use IsCorrectDate() to check */ template - bool ParseTime(const CStringType * str, const CStringType ** str_after = 0); + bool ParseTime(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseTime(const StringType & str); @@ -463,9 +526,11 @@ public: this method doesn't test if the values are correct use IsCorrectDate() to check + + the time is parsed with ParseTime() method, look there for an additional description about available time strings */ template - bool ParseMonthDayTime(const CStringType * str, const CStringType ** str_after = 0); + bool ParseMonthDayTime(const CStringType * str, const CStringType ** str_after = nullptr); template bool ParseMonthDayTime(const StringType & str); @@ -474,24 +539,48 @@ public: /* parsing year/month/day hour:min:sec the input strings can be as follows: + "20081012 141035" + "20081012T141035" + "20081012141035" "2008-10-12 14:10:35" "2008/10/12 14:10:35" "2008.10.12 14:10:35" "2008-10/12 14:10:35" + "2008-10-12 14:10:35.500" + "2008-10-12 14:10.500" + "2008-10-12 14.500" + "2008-10-12 14" + "2008-10 14" + "2008 14" + + months and days can be omitted - in such a case 01 is set, + similar min and sec can be omitted (they are assumed to be 00 in such a case) white characters are ommited so the input string can be: " 2008 - 10 / 12 14 : 10 : 35 " a white character means a space or a tab - as a separator for year/month/day can be '-' '/' or '.' - see ParseYearMonthDay() for details + as an optional separator for the date can be '-' '/' or '.' + as an optional separator for the time is the ':' character - as a separator for hour:min:sec is the ':' character - see ParseHourMinSec() for details + at the and of the string there can be defined a time zone in the form of "+01:00", e.g: + "2008-10-12 14:10:35+01:00" + "2008-10-12 14:10:35-02:00" + or without a separator: + "2008-10-12 14:10:35+0100" + "2008-10-12 14:10:35-0200" + or just a 'Z' character indicating 00:00 zone + "2008-10-12 14:10:35Z" at the end the method checks if the values are correct (by using IsCorrectDate()) + + the format is similar to ISO 8601 + https://en.wikipedia.org/wiki/ISO_8601 + at the moment there is no support for week dates e.g. "2008-W01-3" and ordinal dates e.g. "2008-010" formats + + see ParseDate() and ParseTime for more examples */ template bool Parse(const CStringType * str, const CStringType ** str_after, bool check_time_zone = true); @@ -520,7 +609,16 @@ private: void SkipWhite(const CStringType * & str); template - bool ReadInt(const CStringType * & str, int & result, size_t max_digits = 0); + bool IsDigit(const CStringType * str); + + template + bool ReadInt(const CStringType * & str, int & result, size_t max_digits = 0, size_t * digits_read = nullptr, int * digits_base = nullptr); + + template + bool ReadCommaInt(const CStringType * & str, bool & was_comma, int & result, size_t max_digits = 0, size_t * digits_read = nullptr, int * base = nullptr); + + template + bool ParseTimeValue(const CStringType * & str, int & value, bool & has_fraction, int & fraction, int * fraction_base); template bool SkipSeparator(const CStringType * & str, int separator, int separator2 = -1, int separator3 = -1); @@ -714,7 +812,6 @@ bool Date::ParseMonthDay(const StringType & str) } - template bool Date::ParseHourMinSec(const CStringType * str, const CStringType ** str_after) { @@ -777,39 +874,37 @@ bool Date::ParseZoneOffset(const CStringType * str, const CStringType ** str_aft is_sign = true; str += 1; + result = true; // if there are no digits we return true - if( ReadInt(str, offset_hour, 2) && offset_hour >= -12 && offset_hour <= 14 ) + if( IsDigit(str) ) { - SkipWhite(str); - SetAfter(str, str_after); + result = ReadInt(str, offset_hour, 2) && offset_hour >= -12 && offset_hour <= 14; - if( *str == ':' ) + if( result ) { - str += 1; - SkipWhite(str); - SetAfter(str, str_after); + SkipSeparator(str, ':'); + + if( IsDigit(str) ) + { + // offset_min is optional + result = ReadInt(str, offset_min, 2) && offset_min > -60 && offset_min < 60; + } + + if( result ) + { + time_t offset = (time_t)offset_hour * 60 * 60 + (time_t)offset_min * 60; + + if( is_sign ) + offset = -offset; + + FromTime(ToTime() - offset); + } } - - if( ReadInt(str, offset_min, 2) && offset_min > -60 && offset_min < 60 ) - { - SetAfter(str, str_after); - } - else - { - offset_min = 0; - } - - time_t offset = (time_t)offset_hour * 60 * 60 + (time_t)offset_min * 60; - result = true; - - if( is_sign ) - offset = -offset; - - FromTime(ToTime() - offset); } } -return result; + SetAfter(str, str_after); + return result; } @@ -821,47 +916,114 @@ bool Date::ParseZoneOffset(const StringType & str) +template +bool Date::ParseDate(const CStringType * str, const CStringType ** str_after) +{ + bool status = false; + ClearDate(); + + if( ReadInt(str, year, 4) ) + { + status = true; + SkipSeparator(str, '.', '-', '/'); + + if( IsDigit(str) ) + { + status = ReadInt(str, month, 2); + SkipSeparator(str, '.', '-', '/'); + + if( status && IsDigit(str) ) + { + status = ReadInt(str, day, 2); + } + } + } + + SetAfter(str, str_after); + return status; +} + + +template +bool Date::ParseDate(const StringType & str) +{ + return ParseDate(str.c_str()); +} + + + + + +template +bool Date::ParseTimeValue(const CStringType * & str, int & value, bool & has_fraction, int & fraction, int * fraction_base) +{ + bool status = false; + size_t digits_read = 0; + bool was_comma = false; + has_fraction = false; + fraction = 0; + *fraction_base = 1; + + if( ReadInt(str, value, 2) ) + { + status = ReadCommaInt(str, was_comma, fraction, 0, &digits_read, fraction_base); + + if( status ) + { + if( was_comma ) + has_fraction = true; + + SkipSeparator(str, ':'); + } + } + + return status; +} template bool Date::ParseTime(const CStringType * str, const CStringType ** str_after) { - if( !ReadInt(str, hour) ) - { - SetAfter(str, str_after); - return false; - } + bool status = false; + bool has_fraction = false; + int fraction = 0; + int fraction_base = 0; - min = 0; - sec = 0; + ClearTime(); - if( !SkipSeparator(str, ':') ) + if( ParseTimeValue(str, hour, has_fraction, fraction, &fraction_base) ) { - SetAfter(str, str_after); - return true; // only an hour given - } + status = true; - if( !ReadInt(str, min) ) - { - SetAfter(str, str_after); - return false; - } + if( has_fraction ) + { + min = (60L * fraction) / (long)(fraction_base); + int min_rem = (60L * fraction) % (long)(fraction_base); + sec = (min_rem * 60L) / (long)fraction_base; + } + else + if( IsDigit(str) ) + { + status = ParseTimeValue(str, min, has_fraction, fraction, &fraction_base); - if( !SkipSeparator(str, ':') ) - { - SetAfter(str, str_after); - return true; // only an hour and minutes given - } - - if( !ReadInt(str, sec) ) - { - SetAfter(str, str_after); - return false; + if( status ) + { + if( has_fraction ) + { + sec = (60L * fraction) / (long)(fraction_base); + } + else + if( IsDigit(str) ) + { + status = ParseTimeValue(str, sec, has_fraction, fraction, &fraction_base); + // ignore the seconds fraction if exists + } + } + } } SetAfter(str, str_after); - -return true; + return status; } @@ -903,43 +1065,44 @@ bool Date::Parse(const CStringType * str, const CStringType ** str_after, bool c { const CStringType * after; bool result = false; + Clear(); - if( ParseYearMonthDay(str, &after) ) + if( ParseDate(str, &after) ) + { + result = true; + SkipWhite(after); + + if( *after == 'T' ) { + after += 1; SkipWhite(after); + } - if( check_time_zone && *after == 'T' ) - { - // ISO 8601 format - // https://en.wikipedia.org/wiki/ISO_8601 - // at the moment skip the 'T' character only - after += 1; - } + if( IsDigit(after) ) + { + result = ParseTime(after, &after); - if( ParseHourMinSec(after, &after) ) + if( result && check_time_zone ) { SkipWhite(after); - result = true; - if( check_time_zone ) + if( *after == 'Z' ) { - if( *after == 'Z' ) - { - after += 1; - } - else - { - // we dont have to check errors here - ParseZoneOffset(after, &after); - } + after += 1; + } + else + if( *after == '-' || *after == '+' ) + { + result = ParseZoneOffset(after, &after); } } } + } - SetAfter(after, str_after); + SetAfter(after, str_after); - if( result ) - result = IsCorrectDate(); + if( result ) + result = IsCorrectDate(); return result; } @@ -980,34 +1143,80 @@ void Date::SkipWhite(const CStringType * & str) } +template +bool Date::IsDigit(const CStringType * str) +{ + return (*str >= '0' && *str <= '9'); +} + template -bool Date::ReadInt(const CStringType * & str, int & result, size_t max_digits) +bool Date::ReadInt(const CStringType * & str, int & result, size_t max_digits, size_t * digits_read, int * digits_base) { -bool something_read = false; - SkipWhite(str); result = 0; - size_t len = 0; + size_t read_chars = 0; + int base = 1; + bool skip_last_digits = false; - while( *str >= '0' && *str <= '9' && (max_digits == 0 || len < max_digits)) + if( max_digits == 0 ) + skip_last_digits = true; + + if( max_digits == 0 || max_digits > 9 ) + max_digits = 9; + + while( IsDigit(str) && read_chars < max_digits ) { result = result * 10 + (*str - '0'); + base = base * 10; str += 1; - len += 1; - something_read = true; + read_chars += 1; + } - if( result > 10000 ) + if( skip_last_digits ) + { + while( IsDigit(str) ) { - // we assumed the max year to be 10000 - return false; + str += 1; + read_chars += 1; } } -return something_read; + if( digits_read ) + *digits_read = read_chars; + + if( digits_base ) + *digits_base = base; + + return read_chars > 0 && read_chars <= max_digits; } +template +bool Date::ReadCommaInt(const CStringType * & str, bool & was_comma, int & result, size_t max_digits, size_t * digits_read, int * base) +{ + bool status = true; // the comma is optional so we return true if it not exists + result = 0; + was_comma = false; + + if( digits_read ) + *digits_read = 0; + + if( base ) + *base = 1; + + if( *str == '.' || *str == ',' ) + { + str += 1; + was_comma = true; + + if( IsDigit(str) ) + status = ReadInt(str, result, max_digits, digits_read, base); + } + + return status; +} + template bool Date::SkipSeparator(const CStringType * & str, int separator, int separator2, int separator3) @@ -1017,22 +1226,25 @@ bool Date::SkipSeparator(const CStringType * & str, int separator, int separator if( *str == separator ) { str += 1; + SkipWhite(str); return true; } if( separator2 != -1 && *str == separator2 ) { str += 1; + SkipWhite(str); return true; } if( separator3 != -1 && *str == separator3 ) { str += 1; + SkipWhite(str); return true; } -return false; + return false; } diff --git a/src/html/htmlparser.h b/src/html/htmlparser.h index e8ffbaf..502c245 100644 --- a/src/html/htmlparser.h +++ b/src/html/htmlparser.h @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_html_htmlfilter -#define headerfile_pikotools_src_html_htmlfilter +#ifndef headerfile_pikotools_src_html_htmlparser +#define headerfile_pikotools_src_html_htmlparser #include #include diff --git a/src/space/spaceparser.h b/src/space/spaceparser.h index 5736573..cbbcd3d 100644 --- a/src/space/spaceparser.h +++ b/src/space/spaceparser.h @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_space_jsonspaceparser -#define headerfile_pikotools_src_space_jsonspaceparser +#ifndef headerfile_pikotools_src_space_spaceparser +#define headerfile_pikotools_src_space_spaceparser #include #include "space.h" diff --git a/src/textstream/types.h b/src/textstream/types.h index 3f996fa..c10a243 100644 --- a/src/textstream/types.h +++ b/src/textstream/types.h @@ -5,7 +5,7 @@ */ /* - * Copyright (c) 2012, Tomasz Sowa + * Copyright (c) 2012-2022, Tomasz Sowa * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ * */ -#ifndef headerfile_pikotools_src_space_types -#define headerfile_pikotools_src_space_types +#ifndef headerfile_pikotools_src_textstream_types +#define headerfile_pikotools_src_textstream_types