diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 5edc2a2b1..630eedc81 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -442,12 +442,6 @@ struct Remove { #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] pub(crate) size: Option, - /// Contains [statistics] (e.g., count, min/max values for columns) about the data in this logical file encoded as a JSON string. - /// - /// [statistics]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Per-file-Statistics - #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] - pub(crate) stats: Option, - /// Map containing metadata about this logical file. #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] pub(crate) tags: Option>, @@ -639,7 +633,6 @@ mod tests { StructField::new("extendedFileMetadata", DataType::BOOLEAN, true), partition_values_field(), StructField::new("size", DataType::LONG, true), - StructField::new("stats", DataType::STRING, true), tags_field(), deletion_vector_field(), StructField::new("baseRowId", DataType::LONG, true), diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 5f2dab4d2..957befe80 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -255,7 +255,7 @@ impl RemoveVisitor { getters: &[&'a dyn GetData<'a>], ) -> DeltaResult { require!( - getters.len() == 15, + getters.len() == 14, Error::InternalError(format!( "Wrong number of RemoveVisitor getters: {}", getters.len() @@ -272,15 +272,13 @@ impl RemoveVisitor { let size: Option = getters[5].get_opt(row_index, "remove.size")?; - let stats: Option = getters[6].get_opt(row_index, "remove.stats")?; - - // TODO(nick) tags are skipped in getters[7] + // TODO(nick) tags are skipped in getters[6] - let deletion_vector = visit_deletion_vector_at(row_index, &getters[8..])?; + let deletion_vector = visit_deletion_vector_at(row_index, &getters[7..])?; - let base_row_id: Option = getters[13].get_opt(row_index, "remove.baseRowId")?; + let base_row_id: Option = getters[12].get_opt(row_index, "remove.baseRowId")?; let default_row_commit_version: Option = - getters[14].get_opt(row_index, "remove.defaultRowCommitVersion")?; + getters[13].get_opt(row_index, "remove.defaultRowCommitVersion")?; Ok(Remove { path, @@ -289,7 +287,6 @@ impl RemoveVisitor { extended_file_metadata, partition_values, size, - stats, tags: None, deletion_vector, base_row_id, @@ -635,40 +632,6 @@ mod tests { } } - #[test] - fn test_parse_remove() { - let engine = SyncEngine::new(); - let json_handler = engine.get_json_handler(); - let json_strings: StringArray = vec![ - r#"{"commitInfo":{"timestamp":1670892998177,"operation":"DELETE","operationParameters":{"mode":"Append"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#, - r#"{"remove":{"path":"part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"size":452,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}"}}"#, - ] - .into(); - let output_schema = get_log_schema().clone(); - let batch = json_handler - .parse_json(string_array_to_engine_data(json_strings), output_schema) - .unwrap(); - let mut remove_visitor = RemoveVisitor::default(); - remove_visitor.visit_rows_of(batch.as_ref()).unwrap(); - let expected_remove: Remove = Remove { - path: "part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet".into(), - deletion_timestamp: Some(1670892998135), - data_change: true, - size: Some(452), - stats: Some("{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}".into()), - ..Default::default() - }; - assert_eq!( - remove_visitor.removes.len(), - 1, - "Unexpected number of remove actions" - ); - assert_eq!( - remove_visitor.removes[0], expected_remove, - "Unexpected remove action" - ); - } - #[test] fn test_parse_remove_partitioned() { let engine = SyncEngine::new(); diff --git a/kernel/src/schema.rs b/kernel/src/schema.rs index 83f5f3435..d2ff65193 100644 --- a/kernel/src/schema.rs +++ b/kernel/src/schema.rs @@ -223,7 +223,7 @@ pub struct StructType { pub type_name: String, /// The type of element stored in this array // We use indexmap to preserve the order of fields as they are defined in the schema - // while also allowing for fast lookup by name. The alternative to do a linear search + // while also allowing for fast lookup by name. The alternative is to do a linear search // for each field by name would be potentially quite expensive for large schemas. pub fields: IndexMap, }