allow to parse a time decimal fraction in ParseTime() method

while here:
- let ParseDate() is able to parse such formats: "20081012" (without a separator)
  and without the month or day e.g: "2008" or "200810"
- let ParseTime() is able to parse a time without separators, e.g.:
  "141030", or "1410" or just "14"
- let Parse(...) method use ParseDate() and ParseTime()
  this will parse a format similar to ISO 8601
This commit is contained in:
Tomasz Sowa 2022-12-23 02:15:11 +01:00
parent 3b3c04b85d
commit 379adf6a69
7 changed files with 346 additions and 128 deletions

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2021, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_membuffer_convert_convert
#define headerfile_pikotools_src_membuffer_convert_convert
#ifndef headerfile_pikotools_src_convert_convert
#define headerfile_pikotools_src_convert_convert
#include "inttostr.h"

View File

@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_membuffer_convert_double
#define headerfile_pikotools_src_membuffer_convert_double
#ifndef headerfile_pikotools_src_convert_double
#define headerfile_pikotools_src_convert_double
#include <string>

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012-2018, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -69,30 +69,24 @@ Date::Date(const tm & t)
Date::Date(const char * str)
{
// parsing can be break in the middle of the string (if errors)
// and some values would not be initialized
Clear();
Parse(str);
}
Date::Date(const wchar_t * str)
{
Clear();
Parse(str);
}
Date::Date(const std::string & str)
{
Clear();
Parse(str);
}
Date::Date(const std::wstring & str)
{
Clear();
Parse(str);
}
@ -296,17 +290,29 @@ bool Date::operator<=(const Date & d) const
}
void Date::Clear()
void Date::ClearDate()
{
year = 1970;
month = 1;
day = 1;
}
void Date::ClearTime()
{
hour = 0;
min = 0;
sec = 0;
}
void Date::Clear()
{
ClearDate();
ClearTime();
}
void Date::AssertRange(int & val, int val_min, int val_max)
{
if( val < val_min )

View File

@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_mainparser_mainparser
#define headerfile_pikotools_src_mainparser_mainparser
#ifndef headerfile_pikotools_src_date_date
#define headerfile_pikotools_src_date_date
#include <ctime>
#include <string>
@ -64,7 +64,7 @@ public:
/*
the date
*/
int year; // 1970 - ...
int year; // 1970 - 10000
int month; // 1 - 12
int day; // 1 - 31
int hour; // 0 - 23
@ -188,7 +188,21 @@ public:
/*
set the Unix Epoch: 1970.01.01 00:00:00
set the date to: 1970-01-01
the time is not changed
*/
void ClearDate();
/*
set the time to 00:00:00
the date is not changed
*/
void ClearTime();
/*
set the Unix Epoch: 1970-01-01 00:00:00
*/
void Clear();
@ -322,7 +336,7 @@ public:
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseDayMonthYear(const CStringType * str, const CStringType ** str_after = 0);
bool ParseDayMonthYear(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseDayMonthYear(const StringType & str);
@ -347,7 +361,7 @@ public:
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseYearMonthDay(const CStringType * str, const CStringType ** str_after = 0);
bool ParseYearMonthDay(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseYearMonthDay(const StringType & str);
@ -372,7 +386,7 @@ public:
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseMonthDay(const CStringType * str, const CStringType ** str_after = 0);
bool ParseMonthDay(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseMonthDay(const StringType & str);
@ -394,7 +408,7 @@ public:
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseHourMinSec(const CStringType * str, const CStringType ** str_after = 0);
bool ParseHourMinSec(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseHourMinSec(const StringType & str);
@ -416,34 +430,83 @@ public:
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseHourMin(const CStringType * str, const CStringType ** str_after = 0);
bool ParseHourMin(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseHourMin(const StringType & str);
template<class CStringType>
bool ParseZoneOffset(const CStringType * str, const CStringType ** str_after = 0);
bool ParseZoneOffset(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseZoneOffset(const StringType & str);
/*
parsing hour and minutes (if exists) and seconds (if exists)
the input string can be as follows:
parsing a year and a month (if exists) and a day (if exists)
the input string can be as follows: YYYY[sep]MM[sep]DD
the separator is optional, it can be: '.', '-', '/' or just white characters
(white characters before and after a separator are skipped)
sample valid dates:
"2022"
" 2022"
"202212"
" 2022 12"
"2022-12"
" 2022 - 12"
"20221222"
" 20221222 "
" 2022 12 22 "
" 2022 12 22 "
"2022-12-22"
" 2022-12-22"
" 2022 - 12 - 22 "
if the month is not provided then it is set to 01,
if the day is not provided then it is set to 01 too
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseDate(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseDate(const StringType & str);
/*
parsing an hour and minutes (if exists) and seconds (if exists)
the input string can be as follows: HH[sep]MM[sep]SS
separator is optional, it can be ':' or just white characters
(white characters before and after a separator are skipped)
"14" -- only an hour given (min and sec will be zero)
"14:10" -- hour with minutes (sec will be zero)
"1410" -- the same as above
"14:10:35" -- hour, minutes and seconds
"141035" -- the same as above
"14 10 35" -- the same as above
white characters are ommited so these are valid strings too:
" 14 : 10 : 35 "
" 14 : 10 : 35some text "
a white character means a space or a tab
a decimal fraction may be added to the lowest order time element present, e.g:
"14.500" = 14:30:00
"14:10.500" = 14:10:30
"14:10:35.500" = 14:10:35 (the seconds' fraction is skipped)
instead of dot you can use a comma too,
the fraction part can be from 1 to 9 digits
this method doesn't test if the values are correct
use IsCorrectDate() to check
*/
template<class CStringType>
bool ParseTime(const CStringType * str, const CStringType ** str_after = 0);
bool ParseTime(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseTime(const StringType & str);
@ -463,9 +526,11 @@ public:
this method doesn't test if the values are correct
use IsCorrectDate() to check
the time is parsed with ParseTime() method, look there for an additional description about available time strings
*/
template<class CStringType>
bool ParseMonthDayTime(const CStringType * str, const CStringType ** str_after = 0);
bool ParseMonthDayTime(const CStringType * str, const CStringType ** str_after = nullptr);
template<class StringType>
bool ParseMonthDayTime(const StringType & str);
@ -474,24 +539,48 @@ public:
/*
parsing year/month/day hour:min:sec
the input strings can be as follows:
"20081012 141035"
"20081012T141035"
"20081012141035"
"2008-10-12 14:10:35"
"2008/10/12 14:10:35"
"2008.10.12 14:10:35"
"2008-10/12 14:10:35"
"2008-10-12 14:10:35.500"
"2008-10-12 14:10.500"
"2008-10-12 14.500"
"2008-10-12 14"
"2008-10 14"
"2008 14"
months and days can be omitted - in such a case 01 is set,
similar min and sec can be omitted (they are assumed to be 00 in such a case)
white characters are ommited
so the input string can be:
" 2008 - 10 / 12 14 : 10 : 35 "
a white character means a space or a tab
as a separator for year/month/day can be '-' '/' or '.'
see ParseYearMonthDay() for details
as an optional separator for the date can be '-' '/' or '.'
as an optional separator for the time is the ':' character
as a separator for hour:min:sec is the ':' character
see ParseHourMinSec() for details
at the and of the string there can be defined a time zone in the form of "+01:00", e.g:
"2008-10-12 14:10:35+01:00"
"2008-10-12 14:10:35-02:00"
or without a separator:
"2008-10-12 14:10:35+0100"
"2008-10-12 14:10:35-0200"
or just a 'Z' character indicating 00:00 zone
"2008-10-12 14:10:35Z"
at the end the method checks if the values are correct
(by using IsCorrectDate())
the format is similar to ISO 8601
https://en.wikipedia.org/wiki/ISO_8601
at the moment there is no support for week dates e.g. "2008-W01-3" and ordinal dates e.g. "2008-010" formats
see ParseDate() and ParseTime for more examples
*/
template<class CStringType>
bool Parse(const CStringType * str, const CStringType ** str_after, bool check_time_zone = true);
@ -520,7 +609,16 @@ private:
void SkipWhite(const CStringType * & str);
template<class CStringType>
bool ReadInt(const CStringType * & str, int & result, size_t max_digits = 0);
bool IsDigit(const CStringType * str);
template<class CStringType>
bool ReadInt(const CStringType * & str, int & result, size_t max_digits = 0, size_t * digits_read = nullptr, int * digits_base = nullptr);
template<class CStringType>
bool ReadCommaInt(const CStringType * & str, bool & was_comma, int & result, size_t max_digits = 0, size_t * digits_read = nullptr, int * base = nullptr);
template<class CStringType>
bool ParseTimeValue(const CStringType * & str, int & value, bool & has_fraction, int & fraction, int * fraction_base);
template<class CStringType>
bool SkipSeparator(const CStringType * & str, int separator, int separator2 = -1, int separator3 = -1);
@ -714,7 +812,6 @@ bool Date::ParseMonthDay(const StringType & str)
}
template<class CStringType>
bool Date::ParseHourMinSec(const CStringType * str, const CStringType ** str_after)
{
@ -777,39 +874,37 @@ bool Date::ParseZoneOffset(const CStringType * str, const CStringType ** str_aft
is_sign = true;
str += 1;
result = true; // if there are no digits we return true
if( ReadInt(str, offset_hour, 2) && offset_hour >= -12 && offset_hour <= 14 )
if( IsDigit(str) )
{
SkipWhite(str);
SetAfter(str, str_after);
result = ReadInt(str, offset_hour, 2) && offset_hour >= -12 && offset_hour <= 14;
if( *str == ':' )
if( result )
{
str += 1;
SkipWhite(str);
SetAfter(str, str_after);
SkipSeparator(str, ':');
if( IsDigit(str) )
{
// offset_min is optional
result = ReadInt(str, offset_min, 2) && offset_min > -60 && offset_min < 60;
}
if( result )
{
time_t offset = (time_t)offset_hour * 60 * 60 + (time_t)offset_min * 60;
if( is_sign )
offset = -offset;
FromTime(ToTime() - offset);
}
}
if( ReadInt(str, offset_min, 2) && offset_min > -60 && offset_min < 60 )
{
SetAfter(str, str_after);
}
else
{
offset_min = 0;
}
time_t offset = (time_t)offset_hour * 60 * 60 + (time_t)offset_min * 60;
result = true;
if( is_sign )
offset = -offset;
FromTime(ToTime() - offset);
}
}
return result;
SetAfter(str, str_after);
return result;
}
@ -821,47 +916,114 @@ bool Date::ParseZoneOffset(const StringType & str)
template<class CStringType>
bool Date::ParseDate(const CStringType * str, const CStringType ** str_after)
{
bool status = false;
ClearDate();
if( ReadInt(str, year, 4) )
{
status = true;
SkipSeparator(str, '.', '-', '/');
if( IsDigit(str) )
{
status = ReadInt(str, month, 2);
SkipSeparator(str, '.', '-', '/');
if( status && IsDigit(str) )
{
status = ReadInt(str, day, 2);
}
}
}
SetAfter(str, str_after);
return status;
}
template<class StringType>
bool Date::ParseDate(const StringType & str)
{
return ParseDate(str.c_str());
}
template<class CStringType>
bool Date::ParseTimeValue(const CStringType * & str, int & value, bool & has_fraction, int & fraction, int * fraction_base)
{
bool status = false;
size_t digits_read = 0;
bool was_comma = false;
has_fraction = false;
fraction = 0;
*fraction_base = 1;
if( ReadInt(str, value, 2) )
{
status = ReadCommaInt(str, was_comma, fraction, 0, &digits_read, fraction_base);
if( status )
{
if( was_comma )
has_fraction = true;
SkipSeparator(str, ':');
}
}
return status;
}
template<class CStringType>
bool Date::ParseTime(const CStringType * str, const CStringType ** str_after)
{
if( !ReadInt(str, hour) )
{
SetAfter(str, str_after);
return false;
}
bool status = false;
bool has_fraction = false;
int fraction = 0;
int fraction_base = 0;
min = 0;
sec = 0;
ClearTime();
if( !SkipSeparator(str, ':') )
if( ParseTimeValue(str, hour, has_fraction, fraction, &fraction_base) )
{
SetAfter(str, str_after);
return true; // only an hour given
}
status = true;
if( !ReadInt(str, min) )
{
SetAfter(str, str_after);
return false;
}
if( has_fraction )
{
min = (60L * fraction) / (long)(fraction_base);
int min_rem = (60L * fraction) % (long)(fraction_base);
sec = (min_rem * 60L) / (long)fraction_base;
}
else
if( IsDigit(str) )
{
status = ParseTimeValue(str, min, has_fraction, fraction, &fraction_base);
if( !SkipSeparator(str, ':') )
{
SetAfter(str, str_after);
return true; // only an hour and minutes given
}
if( !ReadInt(str, sec) )
{
SetAfter(str, str_after);
return false;
if( status )
{
if( has_fraction )
{
sec = (60L * fraction) / (long)(fraction_base);
}
else
if( IsDigit(str) )
{
status = ParseTimeValue(str, sec, has_fraction, fraction, &fraction_base);
// ignore the seconds fraction if exists
}
}
}
}
SetAfter(str, str_after);
return true;
return status;
}
@ -903,43 +1065,44 @@ bool Date::Parse(const CStringType * str, const CStringType ** str_after, bool c
{
const CStringType * after;
bool result = false;
Clear();
if( ParseYearMonthDay(str, &after) )
if( ParseDate(str, &after) )
{
result = true;
SkipWhite(after);
if( *after == 'T' )
{
after += 1;
SkipWhite(after);
}
if( check_time_zone && *after == 'T' )
{
// ISO 8601 format
// https://en.wikipedia.org/wiki/ISO_8601
// at the moment skip the 'T' character only
after += 1;
}
if( IsDigit(after) )
{
result = ParseTime(after, &after);
if( ParseHourMinSec(after, &after) )
if( result && check_time_zone )
{
SkipWhite(after);
result = true;
if( check_time_zone )
if( *after == 'Z' )
{
if( *after == 'Z' )
{
after += 1;
}
else
{
// we dont have to check errors here
ParseZoneOffset(after, &after);
}
after += 1;
}
else
if( *after == '-' || *after == '+' )
{
result = ParseZoneOffset(after, &after);
}
}
}
}
SetAfter(after, str_after);
SetAfter(after, str_after);
if( result )
result = IsCorrectDate();
if( result )
result = IsCorrectDate();
return result;
}
@ -980,34 +1143,80 @@ void Date::SkipWhite(const CStringType * & str)
}
template<class CStringType>
bool Date::IsDigit(const CStringType * str)
{
return (*str >= '0' && *str <= '9');
}
template<class CStringType>
bool Date::ReadInt(const CStringType * & str, int & result, size_t max_digits)
bool Date::ReadInt(const CStringType * & str, int & result, size_t max_digits, size_t * digits_read, int * digits_base)
{
bool something_read = false;
SkipWhite(str);
result = 0;
size_t len = 0;
size_t read_chars = 0;
int base = 1;
bool skip_last_digits = false;
while( *str >= '0' && *str <= '9' && (max_digits == 0 || len < max_digits))
if( max_digits == 0 )
skip_last_digits = true;
if( max_digits == 0 || max_digits > 9 )
max_digits = 9;
while( IsDigit(str) && read_chars < max_digits )
{
result = result * 10 + (*str - '0');
base = base * 10;
str += 1;
len += 1;
something_read = true;
read_chars += 1;
}
if( result > 10000 )
if( skip_last_digits )
{
while( IsDigit(str) )
{
// we assumed the max year to be 10000
return false;
str += 1;
read_chars += 1;
}
}
return something_read;
if( digits_read )
*digits_read = read_chars;
if( digits_base )
*digits_base = base;
return read_chars > 0 && read_chars <= max_digits;
}
template<class CStringType>
bool Date::ReadCommaInt(const CStringType * & str, bool & was_comma, int & result, size_t max_digits, size_t * digits_read, int * base)
{
bool status = true; // the comma is optional so we return true if it not exists
result = 0;
was_comma = false;
if( digits_read )
*digits_read = 0;
if( base )
*base = 1;
if( *str == '.' || *str == ',' )
{
str += 1;
was_comma = true;
if( IsDigit(str) )
status = ReadInt(str, result, max_digits, digits_read, base);
}
return status;
}
template<class CStringType>
bool Date::SkipSeparator(const CStringType * & str, int separator, int separator2, int separator3)
@ -1017,22 +1226,25 @@ bool Date::SkipSeparator(const CStringType * & str, int separator, int separator
if( *str == separator )
{
str += 1;
SkipWhite(str);
return true;
}
if( separator2 != -1 && *str == separator2 )
{
str += 1;
SkipWhite(str);
return true;
}
if( separator3 != -1 && *str == separator3 )
{
str += 1;
SkipWhite(str);
return true;
}
return false;
return false;
}

View File

@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_html_htmlfilter
#define headerfile_pikotools_src_html_htmlfilter
#ifndef headerfile_pikotools_src_html_htmlparser
#define headerfile_pikotools_src_html_htmlparser
#include <string>
#include <map>

View File

@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_space_jsonspaceparser
#define headerfile_pikotools_src_space_jsonspaceparser
#ifndef headerfile_pikotools_src_space_spaceparser
#define headerfile_pikotools_src_space_spaceparser
#include <fstream>
#include "space.h"

View File

@ -5,7 +5,7 @@
*/
/*
* Copyright (c) 2012, Tomasz Sowa
* Copyright (c) 2012-2022, Tomasz Sowa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -32,8 +32,8 @@
*
*/
#ifndef headerfile_pikotools_src_space_types
#define headerfile_pikotools_src_space_types
#ifndef headerfile_pikotools_src_textstream_types
#define headerfile_pikotools_src_textstream_types