-
Notifications
You must be signed in to change notification settings - Fork 56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: read partition_values
in RemoveVisitor
and remove break
in RowVisitor
for RemoveVisitor
#633
Changes from all commits
05b9b2d
8a531c4
6c92831
f668193
7b43b35
bd932d6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -267,7 +267,8 @@ impl RemoveVisitor { | |
let extended_file_metadata: Option<bool> = | ||
getters[3].get_opt(row_index, "remove.extendedFileMetadata")?; | ||
|
||
// TODO(nick) handle partition values in getters[4] | ||
let partition_values: Option<HashMap<_, _>> = | ||
getters[4].get_opt(row_index, "remove.partitionValues")?; | ||
|
||
let size: Option<i64> = getters[5].get_opt(row_index, "remove.size")?; | ||
|
||
|
@@ -284,7 +285,7 @@ impl RemoveVisitor { | |
data_change, | ||
deletion_timestamp, | ||
extended_file_metadata, | ||
partition_values: None, | ||
partition_values, | ||
size, | ||
tags: None, | ||
deletion_vector, | ||
|
@@ -305,10 +306,9 @@ impl RowVisitor for RemoveVisitor { | |
} | ||
fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()> { | ||
for i in 0..row_count { | ||
// Since path column is required, use it to detect presence of an Remove action | ||
// Since path column is required, use it to detect presence of a Remove action | ||
if let Some(path) = getters[0].get_opt(i, "remove.path")? { | ||
self.removes.push(Self::visit_remove(i, path, getters)?); | ||
break; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. woah I didn't realize the break was there from the beginning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean, this has been dead code for a while, but what in the world? 🤦 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good find lol! |
||
} | ||
} | ||
Ok(()) | ||
|
@@ -603,11 +603,7 @@ mod tests { | |
modification_time: 1670892998135, | ||
data_change: true, | ||
stats: Some("{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}".into()), | ||
tags: None, | ||
deletion_vector: None, | ||
base_row_id: None, | ||
default_row_commit_version: None, | ||
clustering_provider: None, | ||
..Default::default() | ||
}; | ||
let add2 = Add { | ||
path: "c1=5/c2=b/part-00007-4e73fa3b-2c88-424a-8051-f8b54328ffdb.c000.snappy.parquet".into(), | ||
|
@@ -630,11 +626,51 @@ mod tests { | |
..add1.clone() | ||
}; | ||
let expected = vec![add1, add2, add3]; | ||
assert_eq!(add_visitor.adds.len(), expected.len()); | ||
for (add, expected) in add_visitor.adds.into_iter().zip(expected.into_iter()) { | ||
assert_eq!(add, expected); | ||
} | ||
} | ||
|
||
#[test] | ||
fn test_parse_remove_partitioned() { | ||
sebastiantia marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let engine = SyncEngine::new(); | ||
let json_handler = engine.get_json_handler(); | ||
let json_strings: StringArray = vec![ | ||
r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#, | ||
r#"{"metaData":{"id":"aff5cb91-8cd9-4195-aef9-446908507302","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c1","c2"],"configuration":{},"createdTime":1670892997849}}"#, | ||
r#"{"remove":{"path":"c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"partitionValues":{"c1":"4","c2":"c"},"size":452}}"#, | ||
] | ||
.into(); | ||
let output_schema = get_log_schema().clone(); | ||
let batch = json_handler | ||
.parse_json(string_array_to_engine_data(json_strings), output_schema) | ||
.unwrap(); | ||
let mut remove_visitor = RemoveVisitor::default(); | ||
remove_visitor.visit_rows_of(batch.as_ref()).unwrap(); | ||
let expected_remove = Remove { | ||
path: "c1=4/c2=c/part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet" | ||
.into(), | ||
deletion_timestamp: Some(1670892998135), | ||
data_change: true, | ||
partition_values: Some(HashMap::from([ | ||
("c1".to_string(), "4".to_string()), | ||
("c2".to_string(), "c".to_string()), | ||
])), | ||
size: Some(452), | ||
..Default::default() | ||
}; | ||
assert_eq!( | ||
remove_visitor.removes.len(), | ||
1, | ||
"Unexpected number of remove actions" | ||
); | ||
assert_eq!( | ||
remove_visitor.removes[0], expected_remove, | ||
"Unexpected remove action" | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_parse_txn() { | ||
let engine = SyncEngine::new(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
late drive-by -- long line? (surprised format check didn't complain)