Skip to content

Commit

Permalink
Merge branch 'main' into cdf_delta_spark_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
OussamaSaoudi-db authored Jan 8, 2025
2 parents 18e97a0 + cbb52a4 commit c8a01ac
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 4 deletions.
59 changes: 55 additions & 4 deletions kernel/src/expressions/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,22 @@ impl PrimitiveType {
let days = date.signed_duration_since(DateTime::UNIX_EPOCH).num_days() as i32;
Ok(Scalar::Date(days))
}
// NOTE: Timestamp and TimestampNtz are parsed in the same way, as microsecond since unix epoch.
// NOTE: Timestamp and TimestampNtz are both parsed into microsecond since unix epoch.
// They may both have the format `{year}-{month}-{day} {hour}:{minute}:{second}`.
// Timestamps may additionally be encoded as a ISO 8601 formatted string such as
// `1970-01-01T00:00:00.123456Z`.
//
// The difference arrises mostly in how they are to be handled on the engine side - i.e. timestampNTZ
// is not adjusted to UTC, this is just so we can (de-)serialize it as a date sting.
// https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization
Timestamp | TimestampNtz => {
let timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f")
.map_err(|_| self.parse_error(raw))?;
TimestampNtz | Timestamp => {
let mut timestamp = NaiveDateTime::parse_from_str(raw, "%Y-%m-%d %H:%M:%S%.f");

if timestamp.is_err() && *self == Timestamp {
// Note: `%+` specifies the ISO 8601 / RFC 3339 format
timestamp = NaiveDateTime::parse_from_str(raw, "%+");
}
let timestamp = timestamp.map_err(|_| self.parse_error(raw))?;
let timestamp = Utc.from_utc_datetime(&timestamp);
let micros = timestamp
.signed_duration_since(DateTime::UNIX_EPOCH)
Expand Down Expand Up @@ -576,4 +585,46 @@ mod tests {
assert_eq!(&format!("{}", column_op), "3.1415927 IN Column(item)");
assert_eq!(&format!("{}", column_not_op), "'Cool' NOT IN Column(item)");
}
#[test]
fn test_timestamp_parse() {
let assert_timestamp_eq = |scalar_string, micros| {
let scalar = PrimitiveType::Timestamp
.parse_scalar(scalar_string)
.unwrap();
assert_eq!(scalar, Scalar::Timestamp(micros));
};
assert_timestamp_eq("1971-07-22T03:06:40.678910Z", 49000000678910);
assert_timestamp_eq("1971-07-22T03:06:40Z", 49000000000000);
assert_timestamp_eq("2011-01-11 13:06:07", 1294751167000000);
assert_timestamp_eq("2011-01-11 13:06:07.123456", 1294751167123456);
assert_timestamp_eq("1970-01-01 00:00:00", 0);
}
#[test]
fn test_timestamp_ntz_parse() {
let assert_timestamp_eq = |scalar_string, micros| {
let scalar = PrimitiveType::TimestampNtz
.parse_scalar(scalar_string)
.unwrap();
assert_eq!(scalar, Scalar::TimestampNtz(micros));
};
assert_timestamp_eq("2011-01-11 13:06:07", 1294751167000000);
assert_timestamp_eq("2011-01-11 13:06:07.123456", 1294751167123456);
assert_timestamp_eq("1970-01-01 00:00:00", 0);
}

#[test]
fn test_timestamp_parse_fails() {
let assert_timestamp_fails = |p_type: &PrimitiveType, scalar_string| {
let res = p_type.parse_scalar(scalar_string);
assert!(res.is_err());
};

let p_type = PrimitiveType::TimestampNtz;
assert_timestamp_fails(&p_type, "1971-07-22T03:06:40.678910Z");
assert_timestamp_fails(&p_type, "1971-07-22T03:06:40Z");
assert_timestamp_fails(&p_type, "1971-07-22");

let p_type = PrimitiveType::Timestamp;
assert_timestamp_fails(&p_type, "1971-07-22");
}
}
Binary file not shown.
19 changes: 19 additions & 0 deletions kernel/tests/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1062,3 +1062,22 @@ fn predicate_references_invalid_missing_column() -> Result<(), Box<dyn std::erro
.expect_err("unknown column");
Ok(())
}

// Note: This test is disabled for windows because it creates a directory with name
// `time=1971-07-22T03:06:40.000000Z`. This is disallowed in windows due to having a `:` in
// the name.
#[cfg(not(windows))]
#[test]
fn timestamp_partitioned_table() -> Result<(), Box<dyn std::error::Error>> {
let expected = vec![
"+----+-----+---+----------------------+",
"| id | x | s | time |",
"+----+-----+---+----------------------+",
"| 1 | 0.5 | | 1971-07-22T03:06:40Z |",
"+----+-----+---+----------------------+",
];
let test_name = "timestamp-partitioned-table";
let test_dir = common::load_test_data("./tests/data", test_name).unwrap();
let test_path = test_dir.path().join(test_name);
read_table_data_str(test_path.to_str().unwrap(), None, None, expected)
}

0 comments on commit c8a01ac

Please sign in to comment.