diff --git a/README.md b/README.md index c7fa8a8..4a67623 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ It also adds support for parsing dates in DMY format, with the `parse_with_prefe "2017-11-25T22:34:50Z", // rfc2822 "Wed, 02 Jun 2021 06:31:39 GMT", -// yyyy-mm-dd hh:mm:ss +// yyyy-mm-dd hh:mm:ss (separator: space or ISO 8601 'T') "2014-04-26 05:24:37 PM", "2021-04-30 21:14", "2021-04-30 21:14:10", @@ -30,6 +30,9 @@ It also adds support for parsing dates in DMY format, with the `parse_with_prefe "2014-04-26 17:24:37.123", "2014-04-26 17:24:37.3186369", "2012-08-03 18:31:59.257000000", +"2020-01-15T08:00", +"2020-01-15T08:00:00", +"2020-01-15T08:00:00.123456", // yyyy-mm-dd hh:mm:ss z "2017-11-25 13:31:15 PST", "2017-11-25 13:31 PST", diff --git a/src/datetime.rs b/src/datetime.rs index f09df79..ef5f71e 100644 --- a/src/datetime.rs +++ b/src/datetime.rs @@ -181,7 +181,7 @@ where .map(Ok) } - // yyyy-mm-dd hh:mm:ss + // yyyy-mm-dd hh:mm:ss (separator is space OR ISO 8601 'T') // - 2014-04-26 05:24:37 PM // - 2021-04-30 21:14 // - 2021-04-30 21:14:10 @@ -189,22 +189,47 @@ where // - 2014-04-26 17:24:37.123 // - 2014-04-26 17:24:37.3186369 // - 2012-08-03 18:31:59.257000000 + // - 2020-01-15T08:00 + // - 2020-01-15T08:00:00 + // - 2020-01-15T08:00:00.123456 #[inline] fn ymd_hms(&self, input: &str) -> Option>> { let re: &Regex = regex! { - r"^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}(:\d{2})?(\.\d{1,9})?\s*(am|pm|AM|PM)?$" + r"^\d{4}-\d{2}-\d{2}[T\s]+\d{2}:\d{2}(:\d{2})?(\.\d{1,9})?\s*(am|pm|AM|PM)?$" }; if !re.is_match(input) { return None; } + // Byte 10 is the date/time separator. The regex guarantees the input + // has at least 16 bytes and that byte 10 is either 'T' or ASCII + // whitespace, so picking the format-string family on this single byte + // avoids doubling the trial-parse chain for the common space case. + let (fmt_hms, fmt_hm, fmt_hms_f, fmt_ims_p, fmt_im_p) = if input.as_bytes()[10] == b'T' { + ( + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%dT%H:%M", + "%Y-%m-%dT%H:%M:%S%.f", + "%Y-%m-%dT%I:%M:%S %P", + "%Y-%m-%dT%I:%M %P", + ) + } else { + ( + "%Y-%m-%d %H:%M:%S", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S%.f", + "%Y-%m-%d %I:%M:%S %P", + "%Y-%m-%d %I:%M %P", + ) + }; + self.tz - .datetime_from_str(input, "%Y-%m-%d %H:%M:%S") - .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %H:%M")) - .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %H:%M:%S%.f")) - .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %I:%M:%S %P")) - .or_else(|_| self.tz.datetime_from_str(input, "%Y-%m-%d %I:%M %P")) + .datetime_from_str(input, fmt_hms) + .or_else(|_| self.tz.datetime_from_str(input, fmt_hm)) + .or_else(|_| self.tz.datetime_from_str(input, fmt_hms_f)) + .or_else(|_| self.tz.datetime_from_str(input, fmt_ims_p)) + .or_else(|_| self.tz.datetime_from_str(input, fmt_im_p)) .ok() .map(|parsed| parsed.with_timezone(&Utc)) .map(Ok) @@ -811,6 +836,22 @@ mod tests { "2012-08-03 18:31:59.257000000", Utc.ymd(2012, 8, 3).and_hms_nano(18, 31, 59, 257000000), ), + // ISO 8601 with 'T' separator and no timezone (naive wall-clock). + // Must agree with the space-separated form on the same wall-clock instant. + ("2020-01-15T08:00", Utc.ymd(2020, 1, 15).and_hms(8, 0, 0)), + ("2020-01-15T08:00:00", Utc.ymd(2020, 1, 15).and_hms(8, 0, 0)), + ( + "2020-01-15T08:00:00.123", + Utc.ymd(2020, 1, 15).and_hms_milli(8, 0, 0, 123), + ), + ( + "2020-01-15T08:00:00.123456", + Utc.ymd(2020, 1, 15).and_hms_micro(8, 0, 0, 123456), + ), + ( + "2020-01-15T08:00:00.123456789", + Utc.ymd(2020, 1, 15).and_hms_nano(8, 0, 0, 123456789), + ), ]; for &(input, want) in test_cases.iter() { @@ -822,6 +863,11 @@ mod tests { ) } assert!(parse.ymd_hms("not-date-time").is_none()); + + // T and space separators must produce the same instant. + let t_form = parse.ymd_hms("2020-01-15T08:00:00").unwrap().unwrap(); + let space_form = parse.ymd_hms("2020-01-15 08:00:00").unwrap().unwrap(); + assert_eq!(t_form, space_form, "T-separator vs space disagree"); } #[test] diff --git a/src/lib.rs b/src/lib.rs index b06e23c..ae9ac83 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -756,9 +756,16 @@ mod tests { #[test] fn parse_unambiguous_dmy() { + // `parse()` uses Local timezone and pads date-only inputs with the + // current time of day, so the resulting UTC date can roll by ±1 day + // depending on host TZ and the moment the test runs. Assert on the + // Local date — that's what `parse()` actually models for this input. assert_eq!( - super::parse("31/3/22").unwrap().date(), - Utc.ymd(2022, 3, 31) + super::parse("31/3/22") + .unwrap() + .with_timezone(&Local) + .date(), + Local.ymd(2022, 3, 31) ); assert_eq!( super::parse_with_preference("3/31/22", true) @@ -773,4 +780,50 @@ mod tests { Utc.ymd(2021, 7, 31) ); } + + // Regression: ISO 8601 with 'T' separator and no timezone (e.g. Python's + // datetime.isoformat() without astimezone) must parse via the naive + // wall-clock path, matching the equivalent space-separated form. + #[test] + fn parse_iso_t_no_tz() { + // Bare T, no fractional, no tz. + let got = super::parse_with_preference("2020-01-15T08:00:00", false).unwrap(); + assert_eq!(got, Utc.ymd(2020, 1, 15).and_hms(8, 0, 0)); + + // T, no seconds, no tz. + let got = super::parse_with_preference("2020-01-15T08:00", false).unwrap(); + assert_eq!(got, Utc.ymd(2020, 1, 15).and_hms(8, 0, 0)); + + // T with millisecond + microsecond + nanosecond precision. + for (input, want) in [ + ( + "2020-01-15T08:00:00.123", + Utc.ymd(2020, 1, 15).and_hms_milli(8, 0, 0, 123), + ), + ( + "2020-01-15T08:00:00.123456", + Utc.ymd(2020, 1, 15).and_hms_micro(8, 0, 0, 123456), + ), + ( + "2020-01-15T08:00:00.123456789", + Utc.ymd(2020, 1, 15).and_hms_nano(8, 0, 0, 123456789), + ), + ] { + assert_eq!( + super::parse_with_preference(input, false).unwrap(), + want, + "parse_iso_t_no_tz/{input}" + ); + } + + // T-form and space-form must produce the same instant. + assert_eq!( + super::parse_with_preference("2020-01-15T08:00:00", false).unwrap(), + super::parse_with_preference("2020-01-15 08:00:00", false).unwrap(), + ); + + // Existing tz-bearing T-forms must continue to parse (no regression). + assert!(super::parse_with_preference("2020-01-15T08:00:00Z", false).is_ok()); + assert!(super::parse_with_preference("2020-01-15T08:00:00+00:00", false).is_ok()); + } }