From 155069a60d0a3663c2110b55c95f34675688c2c6 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 18:02:31 +0100 Subject: [PATCH 1/9] Add helpers for downcasting arrow arrays --- crates/store/re_chunk/src/arrow2_util.rs | 18 ++++++++++++++++++ crates/store/re_chunk/src/arrow_util.rs | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/crates/store/re_chunk/src/arrow2_util.rs b/crates/store/re_chunk/src/arrow2_util.rs index cebfa8505e31..ece51d48fc38 100644 --- a/crates/store/re_chunk/src/arrow2_util.rs +++ b/crates/store/re_chunk/src/arrow2_util.rs @@ -13,6 +13,24 @@ use itertools::Itertools; use crate::TransportChunk; +/// Downcast an arrow array to another array, without having to go via `Any`. +/// +/// This is shorter, but also better: it means we don't accidentally downcast +/// an arrow2 array to an arrow1 array, or vice versa. +pub trait Arrow2ArrayDowncastRef { + /// Downcast an arrow array to another array, without having to go via `Any`. + /// + /// This is shorter, but also better: it means we don't accidentally downcast + /// an arrow2 array to an arrow1 array, or vice versa. + fn downcast_array_ref(&self) -> Option<&T>; +} + +impl Arrow2ArrayDowncastRef for dyn Arrow2Array { + fn downcast_array_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + // --- /// Returns true if the given `list_array` is semantically empty. diff --git a/crates/store/re_chunk/src/arrow_util.rs b/crates/store/re_chunk/src/arrow_util.rs index c3d80b207b9b..7cfa68b6d869 100644 --- a/crates/store/re_chunk/src/arrow_util.rs +++ b/crates/store/re_chunk/src/arrow_util.rs @@ -7,6 +7,24 @@ use itertools::Itertools; // --- +/// Downcast an arrow array to another array, without having to go via `Any`. +/// +/// This is shorter, but also better: it means we don't accidentally downcast +/// an arrow2 array to an arrow1 array, or vice versa. +pub trait ArrowArrayDowncastRef { + /// Downcast an arrow array to another array, without having to go via `Any`. + /// + /// This is shorter, but also better: it means we don't accidentally downcast + /// an arrow2 array to an arrow1 array, or vice versa. + fn downcast_array_ref(&self) -> Option<&T>; +} + +impl ArrowArrayDowncastRef for dyn Array { + fn downcast_array_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + #[inline] pub fn into_arrow_ref(array: impl Array + 'static) -> ArrayRef { std::sync::Arc::new(array) From 752b770072f89d6e42c9d92d9a385f1c6f28d066 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 18:24:17 +0100 Subject: [PATCH 2/9] Move arrow utils to new crate re_arrow_util --- Cargo.lock | 15 +++++++ Cargo.toml | 3 +- clippy.toml | 18 ++++---- crates/store/re_chunk/Cargo.toml | 1 + crates/store/re_chunk/src/batcher.rs | 3 +- crates/store/re_chunk/src/builder.rs | 3 +- .../re_chunk/src/concat_record_batches.rs | 40 +++++++++++++++++ crates/store/re_chunk/src/lib.rs | 3 +- crates/store/re_chunk/src/merge.rs | 7 ++- crates/store/re_chunk/src/migration.rs | 6 +-- crates/store/re_chunk/src/shuffle.rs | 2 +- crates/store/re_chunk/src/slice.rs | 4 +- crates/store/re_chunk/src/transport.rs | 10 ++--- crates/store/re_chunk/tests/memory_test.rs | 2 +- crates/store/re_chunk_store/Cargo.toml | 1 + crates/store/re_chunk_store/src/writes.rs | 2 +- crates/store/re_dataframe/Cargo.toml | 1 + crates/store/re_dataframe/src/lib.rs | 2 +- crates/store/re_dataframe/src/query.rs | 5 ++- crates/store/re_grpc_client/Cargo.toml | 1 + crates/store/re_grpc_client/src/lib.rs | 4 +- crates/utils/re_arrow_util/Cargo.toml | 31 +++++++++++++ crates/utils/re_arrow_util/README.md | 10 +++++ .../re_arrow_util}/src/arrow2_util.rs | 45 +------------------ .../re_arrow_util}/src/arrow_util.rs | 2 +- crates/utils/re_arrow_util/src/lib.rs | 7 +++ 26 files changed, 147 insertions(+), 81 deletions(-) create mode 100644 crates/store/re_chunk/src/concat_record_batches.rs create mode 100644 crates/utils/re_arrow_util/Cargo.toml create mode 100644 crates/utils/re_arrow_util/README.md rename crates/{store/re_chunk => utils/re_arrow_util}/src/arrow2_util.rs (91%) rename crates/{store/re_chunk => utils/re_arrow_util}/src/arrow_util.rs (99%) create mode 100644 crates/utils/re_arrow_util/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 85eeabf7fead..3b781e0df7fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5610,6 +5610,17 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "re_arrow_util" +version = "0.22.0-alpha.1+dev" +dependencies = [ + "arrow", + "itertools 0.13.0", + "re_arrow2", + "re_log", + "re_tracing", +] + [[package]] name = "re_blueprint_tree" version = "0.22.0-alpha.1+dev" @@ -5692,6 +5703,7 @@ dependencies = [ "nohash-hasher", "rand", "re_arrow2", + "re_arrow_util", "re_byte_size", "re_error", "re_format", @@ -5725,6 +5737,7 @@ dependencies = [ "parking_lot", "rand", "re_arrow2", + "re_arrow_util", "re_byte_size", "re_chunk", "re_format", @@ -5904,6 +5917,7 @@ dependencies = [ "nohash-hasher", "rayon", "re_arrow2", + "re_arrow_util", "re_chunk", "re_chunk_store", "re_log", @@ -6010,6 +6024,7 @@ name = "re_grpc_client" version = "0.22.0-alpha.1+dev" dependencies = [ "arrow", + "re_arrow_util", "re_chunk", "re_error", "re_log", diff --git a/Cargo.toml b/Cargo.toml index 7574f393ff2d..c5ca94e282a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,6 +72,8 @@ rerun-cli = { path = "crates/top/rerun-cli", version = "=0.22.0-alpha.1", defaul # crates/utils: re_analytics = { path = "crates/utils/re_analytics", version = "=0.22.0-alpha.1", default-features = false } +re_arrow_util = { path = "crates/utils/re_arrow_util", version = "=0.22.0-alpha.1", default-features = false } +re_byte_size = { path = "crates/utils/re_byte_size", version = "=0.22.0-alpha.1", default-features = false } re_capabilities = { path = "crates/utils/re_capabilities", version = "=0.22.0-alpha.1", default-features = false } re_case = { path = "crates/utils/re_case", version = "=0.22.0-alpha.1", default-features = false } re_crash_handler = { path = "crates/utils/re_crash_handler", version = "=0.22.0-alpha.1", default-features = false } @@ -80,7 +82,6 @@ re_format = { path = "crates/utils/re_format", version = "=0.22.0-alpha.1", defa re_int_histogram = { path = "crates/utils/re_int_histogram", version = "=0.22.0-alpha.1", default-features = false } re_log = { path = "crates/utils/re_log", version = "=0.22.0-alpha.1", default-features = false } re_memory = { path = "crates/utils/re_memory", version = "=0.22.0-alpha.1", default-features = false } -re_byte_size = { path = "crates/utils/re_byte_size", version = "=0.22.0-alpha.1", default-features = false } re_smart_channel = { path = "crates/utils/re_smart_channel", version = "=0.22.0-alpha.1", default-features = false } re_string_interner = { path = "crates/utils/re_string_interner", version = "=0.22.0-alpha.1", default-features = false } re_tracing = { path = "crates/utils/re_tracing", version = "=0.22.0-alpha.1", default-features = false } diff --git a/clippy.toml b/clippy.toml index 01e482ba6bdd..7a3595366c79 100644 --- a/clippy.toml +++ b/clippy.toml @@ -50,19 +50,19 @@ disallowed-methods = [ { path = "std::panic::catch_unwind", reason = "We compile with `panic = 'abort'`" }, { path = "std::thread::spawn", reason = "Use `std::thread::Builder` and name the thread" }, - { path = "arrow::compute::concat", reason = "Use `re_chunk::arrow_util::concat_arrays` instead, which has better memory management" }, - { path = "arrow::compute::filter", reason = "Use `re_chunk::arrow_util::filter_array` instead" }, - { path = "arrow::compute::take", reason = "Use `re_chunk::arrow_util::take_array` instead" }, + { path = "arrow::compute::concat", reason = "Use `re_arrow_util::arrow_util::concat_arrays` instead, which has better memory management" }, + { path = "arrow::compute::filter", reason = "Use `re_arrow_util::arrow_util::filter_array` instead" }, + { path = "arrow::compute::take", reason = "Use `re_arrow_util::arrow_util::take_array` instead" }, { path = "arrow::datatypes::Schema::new", reason = "Use `arrow::datatypes::Schema::new_with_metadata` instead. There is usually some metadata you want to preserve." }, # Specify both `arrow2` and `re_arrow2` -- clippy gets lost in all the package renaming happening. - { path = "arrow2::compute::concatenate::concatenate", reason = "Use `re_chunk::arrow2_util::concat_arrays` instead, which has proper early outs" }, - { path = "arrow2::compute::filter::filter", reason = "Use `re_chunk::arrow2_util::filter_array` instead, which has proper early outs" }, - { path = "arrow2::compute::take::take", reason = "Use `re_chunk::arrow2_util::take_array` instead, which has proper early outs" }, - { path = "re_arrow2::compute::concatenate::concatenate", reason = "Use `re_chunk::arrow2_util::concat_arrays` instead, which has proper early outs" }, - { path = "re_arrow2::compute::filter::filter", reason = "Use `re_chunk::arrow2_util::filter_array` instead, which has proper early outs" }, - { path = "re_arrow2::compute::take::take", reason = "Use `re_chunk::arrow2_util::take_array` instead, which has proper early outs" }, + { path = "arrow2::compute::concatenate::concatenate", reason = "Use `re_arrow_util::arrow2_util::concat_arrays` instead, which has proper early outs" }, + { path = "arrow2::compute::filter::filter", reason = "Use `re_arrow_util::arrow2_util::filter_array` instead, which has proper early outs" }, + { path = "arrow2::compute::take::take", reason = "Use `re_arrow_util::arrow2_util::take_array` instead, which has proper early outs" }, + { path = "re_arrow2::compute::concatenate::concatenate", reason = "Use `re_arrow_util::arrow2_util::concat_arrays` instead, which has proper early outs" }, + { path = "re_arrow2::compute::filter::filter", reason = "Use `re_arrow_util::arrow2_util::filter_array` instead, which has proper early outs" }, + { path = "re_arrow2::compute::take::take", reason = "Use `re_arrow_util::arrow2_util::take_array` instead, which has proper early outs" }, # There are many things that aren't allowed on wasm, # but we cannot disable them all here (because of e.g. https://github.com/rust-lang/rust-clippy/issues/10406) diff --git a/crates/store/re_chunk/Cargo.toml b/crates/store/re_chunk/Cargo.toml index 38a9e5b8c963..d3e285ca2a53 100644 --- a/crates/store/re_chunk/Cargo.toml +++ b/crates/store/re_chunk/Cargo.toml @@ -37,6 +37,7 @@ arrow = ["arrow2/arrow"] [dependencies] # Rerun +re_arrow_util.workspace = true re_byte_size.workspace = true re_error.workspace = true re_format.workspace = true diff --git a/crates/store/re_chunk/src/batcher.rs b/crates/store/re_chunk/src/batcher.rs index b1f4a61d0afe..db897b8267f5 100644 --- a/crates/store/re_chunk/src/batcher.rs +++ b/crates/store/re_chunk/src/batcher.rs @@ -9,11 +9,12 @@ use arrow::buffer::ScalarBuffer as ArrowScalarBuffer; use crossbeam::channel::{Receiver, Sender}; use nohash_hasher::IntMap; +use re_arrow_util::arrow_util; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, ResolvedTimeRange, TimeInt, TimePoint, Timeline}; use re_types_core::ComponentDescriptor; -use crate::{arrow_util, chunk::ChunkComponents, Chunk, ChunkId, ChunkResult, RowId, TimeColumn}; +use crate::{chunk::ChunkComponents, Chunk, ChunkId, ChunkResult, RowId, TimeColumn}; // --- diff --git a/crates/store/re_chunk/src/builder.rs b/crates/store/re_chunk/src/builder.rs index e08e20132fab..fe8f40228d78 100644 --- a/crates/store/re_chunk/src/builder.rs +++ b/crates/store/re_chunk/src/builder.rs @@ -2,10 +2,11 @@ use arrow::{array::ArrayRef, datatypes::DataType as ArrowDatatype}; use itertools::Itertools; use nohash_hasher::IntMap; +use re_arrow_util::arrow_util; use re_log_types::{EntityPath, TimeInt, TimePoint, Timeline}; use re_types_core::{AsComponents, ComponentBatch, ComponentDescriptor, SerializedComponentBatch}; -use crate::{arrow_util, chunk::ChunkComponents, Chunk, ChunkId, ChunkResult, RowId, TimeColumn}; +use crate::{chunk::ChunkComponents, Chunk, ChunkId, ChunkResult, RowId, TimeColumn}; // --- diff --git a/crates/store/re_chunk/src/concat_record_batches.rs b/crates/store/re_chunk/src/concat_record_batches.rs new file mode 100644 index 000000000000..beae93c6a655 --- /dev/null +++ b/crates/store/re_chunk/src/concat_record_batches.rs @@ -0,0 +1,40 @@ +use crate::TransportChunk; + +use arrow::datatypes::Schema as ArrowSchema; +use arrow2::chunk::Chunk as Arrow2Chunk; + +/// Concatenate multiple [`TransportChunk`]s into one. +/// +/// This is a temporary method that we use while waiting to migrate towards `arrow-rs`. +/// * `arrow2` doesn't have a `RecordBatch` type, therefore we emulate that using our `TransportChunk`s. +/// * `arrow-rs` does have one, and it natively supports concatenation. +pub fn concatenate_record_batches( + schema: impl Into, + batches: &[TransportChunk], +) -> anyhow::Result { + let schema: ArrowSchema = schema.into(); + anyhow::ensure!( + batches + .iter() + .all(|batch| batch.schema_ref().as_ref() == &schema), + "concatenate_record_batches: all batches must have the same schema" + ); + + let mut output_columns = Vec::new(); + + if !batches.is_empty() { + for (i, _field) in schema.fields.iter().enumerate() { + let arrays: Option> = batches.iter().map(|batch| batch.column(i)).collect(); + let arrays = arrays.ok_or_else(|| { + anyhow::anyhow!("concatenate_record_batches: all batches must have the same schema") + })?; + let array = re_arrow_util::arrow2_util::concat_arrays(&arrays)?; + output_columns.push(array); + } + } + + Ok(TransportChunk::new( + schema, + Arrow2Chunk::new(output_columns), + )) +} diff --git a/crates/store/re_chunk/src/lib.rs b/crates/store/re_chunk/src/lib.rs index b4edc54d1ce6..30ff3c5820da 100644 --- a/crates/store/re_chunk/src/lib.rs +++ b/crates/store/re_chunk/src/lib.rs @@ -4,10 +4,9 @@ #![doc = document_features::document_features!()] //! -pub mod arrow2_util; -pub mod arrow_util; mod builder; mod chunk; +pub mod concat_record_batches; mod helpers; mod id; mod iter; diff --git a/crates/store/re_chunk/src/merge.rs b/crates/store/re_chunk/src/merge.rs index af80cfef1343..4a10e3483aee 100644 --- a/crates/store/re_chunk/src/merge.rs +++ b/crates/store/re_chunk/src/merge.rs @@ -4,10 +4,9 @@ use arrow2::array::{Array as Arrow2Array, ListArray as Arrow2ListArray}; use itertools::{izip, Itertools}; use nohash_hasher::IntMap; -use crate::{ - arrow2_util, arrow_util, chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, - TimeColumn, -}; +use re_arrow_util::{arrow2_util, arrow_util}; + +use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, TimeColumn}; // --- diff --git a/crates/store/re_chunk/src/migration.rs b/crates/store/re_chunk/src/migration.rs index fe50b9a53e20..5ff67e45838b 100644 --- a/crates/store/re_chunk/src/migration.rs +++ b/crates/store/re_chunk/src/migration.rs @@ -1,8 +1,9 @@ use arrow2::array::{Array, Utf8Array}; - use itertools::Itertools; use nohash_hasher::IntMap; +use re_arrow_util::arrow2_util; + use crate::Chunk; impl Chunk { @@ -78,8 +79,7 @@ impl Chunk { .map(|a| a.as_deref() as Option<&dyn Array>) .collect_vec(); - if let Some(list_array_patched) = - crate::arrow2_util::arrays_to_list_array_opt(&arrays) + if let Some(list_array_patched) = arrow2_util::arrays_to_list_array_opt(&arrays) { *list_array = list_array_patched; } diff --git a/crates/store/re_chunk/src/shuffle.rs b/crates/store/re_chunk/src/shuffle.rs index 4b15f425def6..ef2ccb586a17 100644 --- a/crates/store/re_chunk/src/shuffle.rs +++ b/crates/store/re_chunk/src/shuffle.rs @@ -279,7 +279,7 @@ impl Chunk { ArrowOffsets::try_from_lengths(sorted_arrays.iter().map(|array| array.len())) .unwrap(); #[allow(clippy::unwrap_used)] // these are slices of the same outer array - let values = crate::arrow2_util::concat_arrays(&sorted_arrays).unwrap(); + let values = re_arrow_util::arrow2_util::concat_arrays(&sorted_arrays).unwrap(); let validity = original .validity() .map(|validity| swaps.iter().map(|&from| validity.get_bit(from)).collect()); diff --git a/crates/store/re_chunk/src/slice.rs b/crates/store/re_chunk/src/slice.rs index 782f97cacb0b..bb81462ac704 100644 --- a/crates/store/re_chunk/src/slice.rs +++ b/crates/store/re_chunk/src/slice.rs @@ -5,10 +5,12 @@ use arrow2::array::{ use itertools::Itertools; use nohash_hasher::IntSet; +use re_arrow_util::arrow2_util; +use re_arrow_util::arrow_util; use re_log_types::Timeline; use re_types_core::{ComponentDescriptor, ComponentName}; -use crate::{arrow2_util, arrow_util, Chunk, RowId, TimeColumn}; +use crate::{Chunk, RowId, TimeColumn}; // --- diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index 3fe57fad8d09..ab06ce84a36c 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -16,14 +16,12 @@ use itertools::Itertools; use nohash_hasher::IntMap; use tap::Tap as _; +use re_arrow_util::arrow_util::into_arrow_ref; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, Timeline}; use re_types_core::{Component as _, ComponentDescriptor, Loggable as _}; -use crate::{ - arrow_util::into_arrow_ref, chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, - RowId, TimeColumn, -}; +use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, RowId, TimeColumn}; pub type ArrowMetadata = std::collections::HashMap; @@ -791,13 +789,13 @@ impl Chunk { #[cfg(test)] mod tests { use nohash_hasher::IntMap; + + use re_arrow_util::arrow2_util; use re_log_types::{ example_components::{MyColor, MyPoint}, Timeline, }; - use crate::arrow2_util; - use super::*; #[test] diff --git a/crates/store/re_chunk/tests/memory_test.rs b/crates/store/re_chunk/tests/memory_test.rs index 84ae71fd25a3..fac5e7bbcb31 100644 --- a/crates/store/re_chunk/tests/memory_test.rs +++ b/crates/store/re_chunk/tests/memory_test.rs @@ -62,7 +62,7 @@ use arrow2::{ offset::Offsets as Arrow2Offsets, }; use itertools::Itertools; -use re_chunk::arrow2_util; +use re_arrow_util::arrow2_util; // --- concat --- diff --git a/crates/store/re_chunk_store/Cargo.toml b/crates/store/re_chunk_store/Cargo.toml index b4d65bf4e554..eb9a6c598e3a 100644 --- a/crates/store/re_chunk_store/Cargo.toml +++ b/crates/store/re_chunk_store/Cargo.toml @@ -27,6 +27,7 @@ deadlock_detection = ["parking_lot/deadlock_detection"] [dependencies] # Rerun dependencies: +re_arrow_util.workspace = true re_byte_size.workspace = true re_chunk.workspace = true re_format.workspace = true diff --git a/crates/store/re_chunk_store/src/writes.rs b/crates/store/re_chunk_store/src/writes.rs index 45cbadc43bb5..01688903247a 100644 --- a/crates/store/re_chunk_store/src/writes.rs +++ b/crates/store/re_chunk_store/src/writes.rs @@ -387,7 +387,7 @@ impl ChunkStore { }); { let is_semantically_empty = - re_chunk::arrow2_util::is_list_array_semantically_empty(list_array); + re_arrow_util::arrow2_util::is_list_array_semantically_empty(list_array); column_metadata_state.is_semantically_empty &= is_semantically_empty; } diff --git a/crates/store/re_dataframe/Cargo.toml b/crates/store/re_dataframe/Cargo.toml index 185614733cc8..af5cce5c268a 100644 --- a/crates/store/re_dataframe/Cargo.toml +++ b/crates/store/re_dataframe/Cargo.toml @@ -26,6 +26,7 @@ default = [] [dependencies] # Rerun dependencies: +re_arrow_util.workspace = true re_chunk.workspace = true re_chunk_store.workspace = true re_log.workspace = true diff --git a/crates/store/re_dataframe/src/lib.rs b/crates/store/re_dataframe/src/lib.rs index 964ec466c789..2ab290381603 100644 --- a/crates/store/re_dataframe/src/lib.rs +++ b/crates/store/re_dataframe/src/lib.rs @@ -9,7 +9,7 @@ pub use self::query::QueryHandle; #[doc(no_inline)] pub use self::external::arrow2::chunk::Chunk as Arrow2Chunk; #[doc(no_inline)] -pub use self::external::re_chunk::{arrow2_util::concatenate_record_batches, TransportChunk}; +pub use self::external::re_chunk::{concat_record_batches::concatenate_record_batches, TransportChunk}; #[doc(no_inline)] pub use self::external::re_chunk_store::{ ChunkStoreConfig, ChunkStoreHandle, ColumnSelector, ComponentColumnSelector, Index, IndexRange, diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 955e2099b944..35bf12a115c9 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -261,7 +261,7 @@ impl QueryHandle { archetype_name: descr.archetype_name, archetype_field_name: descr.archetype_field_name, }, - re_chunk::arrow2_util::new_list_array_of_empties( + re_arrow_util::arrow2_util::new_list_array_of_empties( child_datatype, chunk.num_rows(), ), @@ -1324,7 +1324,8 @@ mod tests { use std::sync::Arc; use re_chunk::{ - arrow2_util::concatenate_record_batches, Chunk, ChunkId, RowId, TimePoint, TransportChunk, + concat_record_batches::concatenate_record_batches, Chunk, ChunkId, RowId, TimePoint, + TransportChunk, }; use re_chunk_store::{ ChunkStore, ChunkStoreConfig, ChunkStoreHandle, ResolvedTimeRange, TimeInt, diff --git a/crates/store/re_grpc_client/Cargo.toml b/crates/store/re_grpc_client/Cargo.toml index 7b167c564a5a..b3eed2d127c3 100644 --- a/crates/store/re_grpc_client/Cargo.toml +++ b/crates/store/re_grpc_client/Cargo.toml @@ -20,6 +20,7 @@ all-features = true [dependencies] +re_arrow_util.workspace = true re_chunk.workspace = true re_error.workspace = true re_log.workspace = true diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs index 0be4b33f743e..d95afa235cce 100644 --- a/crates/store/re_grpc_client/src/lib.rs +++ b/crates/store/re_grpc_client/src/lib.rs @@ -439,7 +439,7 @@ async fn stream_catalog_async( let data_arrays = sliced.iter().map(|e| Some(e.as_ref())).collect::>(); #[allow(clippy::unwrap_used)] // we know we've given the right field type let data_field_array: arrow2::array::ListArray = - re_chunk::arrow2_util::arrays_to_list_array( + re_arrow_util::arrow2_util::arrays_to_list_array( data_field_inner.data_type().clone().into(), &data_arrays, ) @@ -500,7 +500,7 @@ async fn stream_catalog_async( let rec_id_field = ArrowField::new("item", ArrowDataType::Utf8, true); #[allow(clippy::unwrap_used)] // we know we've given the right field type - let uris = re_chunk::arrow2_util::arrays_to_list_array( + let uris = re_arrow_util::arrow2_util::arrays_to_list_array( rec_id_field.data_type().clone().into(), &recording_id_arrays, ) diff --git a/crates/utils/re_arrow_util/Cargo.toml b/crates/utils/re_arrow_util/Cargo.toml new file mode 100644 index 000000000000..68a36945c59d --- /dev/null +++ b/crates/utils/re_arrow_util/Cargo.toml @@ -0,0 +1,31 @@ +[package] +name = "re_arrow_util" +authors.workspace = true +description = "Helpers for working with arrow." +edition.workspace = true +homepage.workspace = true +include.workspace = true +license.workspace = true +publish = true +readme = "README.md" +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[lints] +workspace = true + +[package.metadata.docs.rs] +all-features = true + + +[features] + + +[dependencies] +re_log.workspace = true +re_tracing.workspace = true + +arrow.workspace = true +arrow2.workspace = true +itertools.workspace = true diff --git a/crates/utils/re_arrow_util/README.md b/crates/utils/re_arrow_util/README.md new file mode 100644 index 000000000000..6508f218a3ad --- /dev/null +++ b/crates/utils/re_arrow_util/README.md @@ -0,0 +1,10 @@ +# re_arrow_util + +Part of the [`rerun`](https://github.com/rerun-io/rerun) family of crates. + +[![Latest version](https://img.shields.io/crates/v/re_arrow_util.svg)](https://crates.io/crates/re_arrow_util?speculative-link) +[![Documentation](https://docs.rs/re_arrow_util/badge.svg)](https://docs.rs/re_arrow_util?speculative-link) +![MIT](https://img.shields.io/badge/license-MIT-blue.svg) +![Apache](https://img.shields.io/badge/license-Apache-blue.svg) + +Helpers for working with arrow diff --git a/crates/store/re_chunk/src/arrow2_util.rs b/crates/utils/re_arrow_util/src/arrow2_util.rs similarity index 91% rename from crates/store/re_chunk/src/arrow2_util.rs rename to crates/utils/re_arrow_util/src/arrow2_util.rs index ece51d48fc38..b2f64fd61800 100644 --- a/crates/store/re_chunk/src/arrow2_util.rs +++ b/crates/utils/re_arrow_util/src/arrow2_util.rs @@ -1,4 +1,3 @@ -use arrow::datatypes::Schema as ArrowSchema; use arrow2::{ array::{ Array as Arrow2Array, BooleanArray as Arrow2BooleanArray, @@ -9,9 +8,7 @@ use arrow2::{ datatypes::DataType as Arrow2Datatype, offset::Offsets as ArrowOffsets, }; -use itertools::Itertools; - -use crate::TransportChunk; +use itertools::Itertools as _; /// Downcast an arrow array to another array, without having to go via `Any`. /// @@ -454,43 +451,3 @@ pub fn take_array( .unwrap() .clone() } - -// --- - -use arrow2::chunk::Chunk as Arrow2Chunk; - -/// Concatenate multiple [`TransportChunk`]s into one. -/// -/// This is a temporary method that we use while waiting to migrate towards `arrow-rs`. -/// * `arrow2` doesn't have a `RecordBatch` type, therefore we emulate that using our `TransportChunk`s. -/// * `arrow-rs` does have one, and it natively supports concatenation. -pub fn concatenate_record_batches( - schema: impl Into, - batches: &[TransportChunk], -) -> anyhow::Result { - let schema: ArrowSchema = schema.into(); - anyhow::ensure!( - batches - .iter() - .all(|batch| batch.schema_ref().as_ref() == &schema), - "concatenate_record_batches: all batches must have the same schema" - ); - - let mut output_columns = Vec::new(); - - if !batches.is_empty() { - for (i, _field) in schema.fields.iter().enumerate() { - let arrays: Option> = batches.iter().map(|batch| batch.column(i)).collect(); - let arrays = arrays.ok_or_else(|| { - anyhow::anyhow!("concatenate_record_batches: all batches must have the same schema") - })?; - let array = concat_arrays(&arrays)?; - output_columns.push(array); - } - } - - Ok(TransportChunk::new( - schema, - Arrow2Chunk::new(output_columns), - )) -} diff --git a/crates/store/re_chunk/src/arrow_util.rs b/crates/utils/re_arrow_util/src/arrow_util.rs similarity index 99% rename from crates/store/re_chunk/src/arrow_util.rs rename to crates/utils/re_arrow_util/src/arrow_util.rs index 7cfa68b6d869..010f30fd351e 100644 --- a/crates/store/re_chunk/src/arrow_util.rs +++ b/crates/utils/re_arrow_util/src/arrow_util.rs @@ -3,7 +3,7 @@ use arrow::{ buffer::{NullBuffer, OffsetBuffer}, datatypes::{DataType, Field}, }; -use itertools::Itertools; +use itertools::Itertools as _; // --- diff --git a/crates/utils/re_arrow_util/src/lib.rs b/crates/utils/re_arrow_util/src/lib.rs new file mode 100644 index 000000000000..20e0ffe898ef --- /dev/null +++ b/crates/utils/re_arrow_util/src/lib.rs @@ -0,0 +1,7 @@ +//! Helpers for working with arrow + +pub mod arrow2_util; +pub mod arrow_util; + +pub use arrow2_util::Arrow2ArrayDowncastRef; +pub use arrow_util::ArrowArrayDowncastRef; From 98681f5a7a91f42a07108baed1838eb0b0456946 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 18:29:14 +0100 Subject: [PATCH 3/9] Call it `downcast_array2_ref` for arrow2 --- crates/utils/re_arrow_util/src/arrow2_util.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/utils/re_arrow_util/src/arrow2_util.rs b/crates/utils/re_arrow_util/src/arrow2_util.rs index b2f64fd61800..ebb023029491 100644 --- a/crates/utils/re_arrow_util/src/arrow2_util.rs +++ b/crates/utils/re_arrow_util/src/arrow2_util.rs @@ -19,11 +19,11 @@ pub trait Arrow2ArrayDowncastRef { /// /// This is shorter, but also better: it means we don't accidentally downcast /// an arrow2 array to an arrow1 array, or vice versa. - fn downcast_array_ref(&self) -> Option<&T>; + fn downcast_array2_ref(&self) -> Option<&T>; } impl Arrow2ArrayDowncastRef for dyn Arrow2Array { - fn downcast_array_ref(&self) -> Option<&T> { + fn downcast_array2_ref(&self) -> Option<&T> { self.as_any().downcast_ref() } } From cb7efd7f0a8fee96650dcddc289ff6c4f8082a55 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 18:38:43 +0100 Subject: [PATCH 4/9] Implement downcast_array_ref for all arrays, not just dyn Array --- crates/utils/re_arrow_util/src/arrow2_util.rs | 13 ++++++++++++- crates/utils/re_arrow_util/src/arrow_util.rs | 13 ++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/crates/utils/re_arrow_util/src/arrow2_util.rs b/crates/utils/re_arrow_util/src/arrow2_util.rs index ebb023029491..dcf5c45c61ea 100644 --- a/crates/utils/re_arrow_util/src/arrow2_util.rs +++ b/crates/utils/re_arrow_util/src/arrow2_util.rs @@ -10,6 +10,8 @@ use arrow2::{ }; use itertools::Itertools as _; +// --------------------------------------------------------------------------------- + /// Downcast an arrow array to another array, without having to go via `Any`. /// /// This is shorter, but also better: it means we don't accidentally downcast @@ -28,7 +30,16 @@ impl Arrow2ArrayDowncastRef for dyn Arrow2Array { } } -// --- +impl Arrow2ArrayDowncastRef for A +where + A: Arrow2Array, +{ + fn downcast_array2_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + +// --------------------------------------------------------------------------------- /// Returns true if the given `list_array` is semantically empty. /// diff --git a/crates/utils/re_arrow_util/src/arrow_util.rs b/crates/utils/re_arrow_util/src/arrow_util.rs index 010f30fd351e..f240b109c833 100644 --- a/crates/utils/re_arrow_util/src/arrow_util.rs +++ b/crates/utils/re_arrow_util/src/arrow_util.rs @@ -5,7 +5,7 @@ use arrow::{ }; use itertools::Itertools as _; -// --- +// --------------------------------------------------------------------------------- /// Downcast an arrow array to another array, without having to go via `Any`. /// @@ -25,6 +25,17 @@ impl ArrowArrayDowncastRef for dyn Array { } } +impl ArrowArrayDowncastRef for A +where + A: Array, +{ + fn downcast_array_ref(&self) -> Option<&T> { + self.as_any().downcast_ref() + } +} + +// --------------------------------------------------------------------------------- + #[inline] pub fn into_arrow_ref(array: impl Array + 'static) -> ArrayRef { std::sync::Arc::new(array) From dd6f35921fd59cf88141bde435d4e6c4771b387f Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 18:39:02 +0100 Subject: [PATCH 5/9] Use downcast_array2_ref in a bunch of places --- Cargo.lock | 3 ++ crates/store/re_chunk/src/iter.rs | 28 ++++++++---------- crates/store/re_chunk/src/merge.rs | 8 ++---- crates/store/re_chunk/src/migration.rs | 4 +-- crates/store/re_chunk/src/slice.rs | 4 +-- crates/store/re_chunk/src/transport.rs | 5 ++-- crates/store/re_chunk/tests/memory_test.rs | 32 +++++++-------------- crates/store/re_dataframe/src/query.rs | 4 +-- crates/store/re_grpc_client/src/lib.rs | 7 ++--- crates/store/re_query/Cargo.toml | 1 + crates/store/re_query/examples/latest_at.rs | 4 +-- crates/top/rerun_c/Cargo.toml | 1 + crates/top/rerun_c/src/lib.rs | 11 +++---- rerun_py/Cargo.toml | 1 + rerun_py/src/arrow.rs | 3 +- rerun_py/src/remote.rs | 4 +-- rerun_py/src/video.rs | 7 +++-- 17 files changed, 59 insertions(+), 68 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3b781e0df7fc..4c86e4f6e93c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6241,6 +6241,7 @@ dependencies = [ "paste", "rand", "re_arrow2", + "re_arrow_util", "re_byte_size", "re_chunk", "re_chunk_store", @@ -7385,6 +7386,7 @@ dependencies = [ "once_cell", "parking_lot", "re_arrow2", + "re_arrow_util", "re_log", "re_sdk", "re_video", @@ -7408,6 +7410,7 @@ dependencies = [ "pyo3-build-config", "rand", "re_arrow2", + "re_arrow_util", "re_build_info", "re_build_tools", "re_chunk", diff --git a/crates/store/re_chunk/src/iter.rs b/crates/store/re_chunk/src/iter.rs index f86d28919172..46ee88a42b27 100644 --- a/crates/store/re_chunk/src/iter.rs +++ b/crates/store/re_chunk/src/iter.rs @@ -12,6 +12,7 @@ use arrow2::{ }; use itertools::{izip, Itertools}; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_log_types::{TimeInt, TimePoint, Timeline}; use re_types_core::{ArrowBuffer, ArrowString, Component, ComponentName}; @@ -250,8 +251,7 @@ impl Chunk { let Some(struct_array) = list_array .values() - .as_any() - .downcast_ref::() + .downcast_array2_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -317,7 +317,7 @@ fn slice_as_native<'a, T: arrow2::types::NativeType + arrow::datatypes::ArrowNat array: &'a dyn Arrow2Array, component_offsets: impl Iterator + 'a, ) -> impl Iterator + 'a { - let Some(values) = array.as_any().downcast_ref::>() else { + let Some(values) = array.downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); } else { @@ -373,7 +373,7 @@ fn slice_as_array_native< where [T; N]: bytemuck::Pod, { - let Some(fixed_size_list_array) = array.as_any().downcast_ref::() + let Some(fixed_size_list_array) = array.downcast_array2_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -385,8 +385,7 @@ where let Some(values) = fixed_size_list_array .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -445,7 +444,7 @@ fn slice_as_buffer_native<'a, T: arrow2::types::NativeType + arrow::datatypes::A array: &'a dyn Arrow2Array, component_offsets: impl Iterator + 'a, ) -> impl Iterator>> + 'a { - let Some(inner_list_array) = array.as_any().downcast_ref::>() else { + let Some(inner_list_array) = array.downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); } else { @@ -456,8 +455,7 @@ fn slice_as_buffer_native<'a, T: arrow2::types::NativeType + arrow::datatypes::A let Some(values) = inner_list_array .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -524,7 +522,7 @@ fn slice_as_array_list_native< where [T; N]: bytemuck::Pod, { - let Some(inner_list_array) = array.as_any().downcast_ref::>() else { + let Some(inner_list_array) = array.downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); } else { @@ -538,8 +536,7 @@ where let Some(fixed_size_list_array) = inner_list_array .values() - .as_any() - .downcast_ref::() + .downcast_array2_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -551,8 +548,7 @@ where let Some(values) = fixed_size_list_array .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); @@ -618,7 +614,7 @@ impl ChunkComponentSlicer for String { array: &'a dyn Arrow2Array, component_offsets: impl Iterator + 'a, ) -> impl Iterator> + 'a { - let Some(utf8_array) = array.as_any().downcast_ref::>() else { + let Some(utf8_array) = array.downcast_array2_ref::>() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); } else { @@ -650,7 +646,7 @@ impl ChunkComponentSlicer for bool { array: &'a dyn Arrow2Array, component_offsets: impl Iterator + 'a, ) -> impl Iterator> + 'a { - let Some(values) = array.as_any().downcast_ref::() else { + let Some(values) = array.downcast_array2_ref::() else { if cfg!(debug_assertions) { panic!("downcast failed for {component_name}, data discarded"); } else { diff --git a/crates/store/re_chunk/src/merge.rs b/crates/store/re_chunk/src/merge.rs index 4a10e3483aee..4995d51494f7 100644 --- a/crates/store/re_chunk/src/merge.rs +++ b/crates/store/re_chunk/src/merge.rs @@ -4,7 +4,7 @@ use arrow2::array::{Array as Arrow2Array, ListArray as Arrow2ListArray}; use itertools::{izip, Itertools}; use nohash_hasher::IntMap; -use re_arrow_util::{arrow2_util, arrow_util}; +use re_arrow_util::{arrow2_util, arrow_util, Arrow2ArrayDowncastRef as _}; use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, TimeColumn}; @@ -108,8 +108,7 @@ impl Chunk { let list_array = arrow2_util::concat_arrays(&[lhs_list_array, rhs_list_array]).ok()?; let list_array = list_array - .as_any() - .downcast_ref::>()? + .downcast_array2_ref::>()? .clone(); Some((component_desc.clone(), list_array)) @@ -150,8 +149,7 @@ impl Chunk { let list_array = arrow2_util::concat_arrays(&[lhs_list_array, rhs_list_array]).ok()?; let list_array = list_array - .as_any() - .downcast_ref::>()? + .downcast_array2_ref::>()? .clone(); Some((component_desc.clone(), list_array)) diff --git a/crates/store/re_chunk/src/migration.rs b/crates/store/re_chunk/src/migration.rs index 5ff67e45838b..65866002f858 100644 --- a/crates/store/re_chunk/src/migration.rs +++ b/crates/store/re_chunk/src/migration.rs @@ -2,7 +2,7 @@ use arrow2::array::{Array, Utf8Array}; use itertools::Itertools; use nohash_hasher::IntMap; -use re_arrow_util::arrow2_util; +use re_arrow_util::{arrow2_util, Arrow2ArrayDowncastRef as _}; use crate::Chunk; @@ -52,7 +52,7 @@ impl Chunk { .iter() .map(|utf8_array| { utf8_array.map(|array| { - let Some(array) = array.as_any().downcast_ref::>() + let Some(array) = array.downcast_array2_ref::>() else { // Unreachable, just avoiding unwraps. return array; diff --git a/crates/store/re_chunk/src/slice.rs b/crates/store/re_chunk/src/slice.rs index bb81462ac704..dc233225bf34 100644 --- a/crates/store/re_chunk/src/slice.rs +++ b/crates/store/re_chunk/src/slice.rs @@ -7,6 +7,7 @@ use nohash_hasher::IntSet; use re_arrow_util::arrow2_util; use re_arrow_util::arrow_util; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_log_types::Timeline; use re_types_core::{ComponentDescriptor, ComponentName}; @@ -388,8 +389,7 @@ impl Chunk { #[allow(clippy::unwrap_used)] filtered .with_validity(None) - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() // Unwrap: cannot possibly fail -- going from a ListArray back to a ListArray. .unwrap() .clone() diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index ab06ce84a36c..05e113158873 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -16,7 +16,7 @@ use itertools::Itertools; use nohash_hasher::IntMap; use tap::Tap as _; -use re_arrow_util::arrow_util::into_arrow_ref; +use re_arrow_util::{arrow_util::into_arrow_ref, Arrow2ArrayDowncastRef as _}; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, Timeline}; use re_types_core::{Component as _, ComponentDescriptor, Loggable as _}; @@ -714,8 +714,7 @@ impl Chunk { for (field, column) in transport.components() { let column = column - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .ok_or_else(|| ChunkError::Malformed { reason: format!( "The outer array in a chunked component batch must be a sparse list, got {:?}", diff --git a/crates/store/re_chunk/tests/memory_test.rs b/crates/store/re_chunk/tests/memory_test.rs index fac5e7bbcb31..5352508cf310 100644 --- a/crates/store/re_chunk/tests/memory_test.rs +++ b/crates/store/re_chunk/tests/memory_test.rs @@ -62,7 +62,7 @@ use arrow2::{ offset::Offsets as Arrow2Offsets, }; use itertools::Itertools; -use re_arrow_util::arrow2_util; +use re_arrow_util::{arrow2_util, Arrow2ArrayDowncastRef as _}; // --- concat --- @@ -140,12 +140,10 @@ fn concat_single_is_noop() { { let unconcatenated = unconcatenated - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); let concatenated = concatenated - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); assert!( @@ -203,13 +201,11 @@ fn filter_does_allocate() { { let unfiltered = unfiltered .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); let filtered = filtered .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); assert!( @@ -269,13 +265,11 @@ fn filter_empty_or_full_is_noop() { { let unfiltered = unfiltered .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); let filtered = filtered .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); assert!( @@ -338,13 +332,11 @@ fn take_does_not_allocate() { { let untaken = untaken .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); let taken = taken .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); assert!( @@ -400,13 +392,11 @@ fn take_empty_or_full_is_noop() { { let untaken = untaken .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); let taken = taken .values() - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .unwrap(); assert!( diff --git a/crates/store/re_dataframe/src/query.rs b/crates/store/re_dataframe/src/query.rs index 35bf12a115c9..4834b631dff4 100644 --- a/crates/store/re_dataframe/src/query.rs +++ b/crates/store/re_dataframe/src/query.rs @@ -19,6 +19,7 @@ use arrow2::{ use itertools::Itertools; use nohash_hasher::{IntMap, IntSet}; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_chunk::{ external::arrow::array::ArrayRef, Chunk, ComponentName, EntityPath, RangeQuery, RowId, TimeInt, Timeline, UnitChunkShared, @@ -518,8 +519,7 @@ impl QueryHandle { let values = list_array .values() - .as_any() - .downcast_ref::()?; + .downcast_array2_ref::()?; let indices = Arrow2PrimitiveArray::from_vec( values diff --git a/crates/store/re_grpc_client/src/lib.rs b/crates/store/re_grpc_client/src/lib.rs index d95afa235cce..62ee365c1a9e 100644 --- a/crates/store/re_grpc_client/src/lib.rs +++ b/crates/store/re_grpc_client/src/lib.rs @@ -3,6 +3,7 @@ mod address; pub use address::{InvalidRedapAddress, RedapAddress}; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_chunk::external::arrow2; use re_log_encoding::codec::wire::decoder::Decode; use re_log_types::external::re_types_core::ComponentDescriptor; @@ -280,8 +281,7 @@ pub fn store_info_from_catalog_chunk( reason: "no application_id field found".to_owned(), }))?; let app_id = data - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { reason: format!("application_id must be a utf8 array: {:?}", tc.schema_ref()), }))? @@ -294,8 +294,7 @@ pub fn store_info_from_catalog_chunk( reason: "no start_time field found".to_owned(), }))?; let start_time = data - .as_any() - .downcast_ref::() + .downcast_array2_ref::() .ok_or(StreamError::ChunkError(re_chunk::ChunkError::Malformed { reason: format!("start_time must be an int64 array: {:?}", tc.schema_ref()), }))? diff --git a/crates/store/re_query/Cargo.toml b/crates/store/re_query/Cargo.toml index 389d3e749ff5..c41b0c74bb32 100644 --- a/crates/store/re_query/Cargo.toml +++ b/crates/store/re_query/Cargo.toml @@ -28,6 +28,7 @@ codegen = [] [dependencies] # Rerun dependencies: +re_arrow_util.workspace = true re_byte_size.workspace = true re_chunk.workspace = true re_chunk_store.workspace = true diff --git a/crates/store/re_query/examples/latest_at.rs b/crates/store/re_query/examples/latest_at.rs index 2cfc8136dbb1..749858f089d5 100644 --- a/crates/store/re_query/examples/latest_at.rs +++ b/crates/store/re_query/examples/latest_at.rs @@ -4,6 +4,7 @@ use anyhow::Context; use arrow2::array::PrimitiveArray as Arrow2PrimitiveArray; use itertools::Itertools; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_chunk::{Chunk, RowId}; use re_chunk_store::{ChunkStore, ChunkStoreHandle, LatestAtQuery}; use re_log_types::example_components::{MyColor, MyLabel, MyPoint, MyPoints}; @@ -77,8 +78,7 @@ fn main() -> anyhow::Result<()> { .component_batch_raw_arrow2(&MyColor::name()) .context("invalid")?; let colors = colors - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .context("invalid")?; let colors = colors .values() diff --git a/crates/top/rerun_c/Cargo.toml b/crates/top/rerun_c/Cargo.toml index 978d6cde38c5..d8368f7102f0 100644 --- a/crates/top/rerun_c/Cargo.toml +++ b/crates/top/rerun_c/Cargo.toml @@ -35,6 +35,7 @@ test = false [dependencies] +re_arrow_util.workspace = true re_log = { workspace = true, features = ["setup"] } re_sdk = { workspace = true, features = ["data_loaders"] } re_video.workspace = true diff --git a/crates/top/rerun_c/src/lib.rs b/crates/top/rerun_c/src/lib.rs index 2da8b11d97f4..f5e0bafe322a 100644 --- a/crates/top/rerun_c/src/lib.rs +++ b/crates/top/rerun_c/src/lib.rs @@ -14,11 +14,11 @@ mod video; use std::ffi::{c_char, c_uchar, CString}; -use component_type_registry::COMPONENT_TYPES; -use once_cell::sync::Lazy; - use arrow::array::ArrayRef as ArrowArrayRef; use arrow_utils::arrow_array_from_c_ffi; +use once_cell::sync::Lazy; + +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_sdk::{ external::nohash_hasher::IntMap, log::{Chunk, ChunkId, PendingRow, TimeColumn}, @@ -26,6 +26,8 @@ use re_sdk::{ ComponentDescriptor, EntityPath, RecordingStream, RecordingStreamBuilder, StoreKind, TimePoint, Timeline, }; + +use component_type_registry::COMPONENT_TYPES; use recording_streams::{recording_stream, RECORDING_STREAMS}; // ---------------------------------------------------------------------------- @@ -997,8 +999,7 @@ fn rr_recording_stream_send_columns_impl( let component_values_untyped = unsafe { arrow_array_from_c_ffi(array, datatype) }?; let component_values = component_values_untyped - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .ok_or_else(|| { CError::new( CErrorCode::ArrowFfiArrayImportError, diff --git a/rerun_py/Cargo.toml b/rerun_py/Cargo.toml index 99cdf915700a..0cc66e0e91a2 100644 --- a/rerun_py/Cargo.toml +++ b/rerun_py/Cargo.toml @@ -58,6 +58,7 @@ web_viewer = [ [dependencies] +re_arrow_util.workspace = true re_build_info.workspace = true re_chunk = { workspace = true, features = ["arrow"] } re_chunk_store.workspace = true diff --git a/rerun_py/src/arrow.rs b/rerun_py/src/arrow.rs index 87a6968e5f07..16dc761d3066 100644 --- a/rerun_py/src/arrow.rs +++ b/rerun_py/src/arrow.rs @@ -17,6 +17,7 @@ use pyo3::{ Bound, PyAny, PyResult, }; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_chunk::{Chunk, ChunkError, ChunkId, PendingRow, RowId, TimeColumn, TransportChunk}; use re_log_types::TimePoint; use re_sdk::{external::nohash_hasher::IntMap, ComponentDescriptor, EntityPath, Timeline}; @@ -161,7 +162,7 @@ pub fn build_chunk_from_components( .into_iter() .zip(fields) .map(|(value, field)| { - let batch = if let Some(batch) = value.as_any().downcast_ref::>() { + let batch = if let Some(batch) = value.downcast_array2_ref::>() { batch.clone() } else { let offsets = Offsets::try_from_lengths(std::iter::repeat(1).take(value.len())) diff --git a/rerun_py/src/remote.rs b/rerun_py/src/remote.rs index b7aa30b30d1a..448c56f8015d 100644 --- a/rerun_py/src/remote.rs +++ b/rerun_py/src/remote.rs @@ -15,6 +15,7 @@ use pyo3::{ types::PyDict, Bound, PyResult, }; +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_chunk::{Chunk, TransportChunk}; use re_chunk_store::ChunkStore; use re_dataframe::{ChunkStoreHandle, QueryExpression, SparseFillStrategy, ViewContentsSelector}; @@ -350,8 +351,7 @@ impl PyStorageNodeClient { .find(|(field, _data)| field.name() == "rerun_recording_id") .map(|(_field, data)| data) .ok_or(PyRuntimeError::new_err("No rerun_recording_id"))? - .as_any() - .downcast_ref::>() + .downcast_array2_ref::>() .ok_or(PyRuntimeError::new_err("Recording Id is not a string"))? .value(0) .to_owned(); diff --git a/rerun_py/src/video.rs b/rerun_py/src/video.rs index 42b3ad08e174..0ada08d02c29 100644 --- a/rerun_py/src/video.rs +++ b/rerun_py/src/video.rs @@ -1,6 +1,8 @@ #![allow(unsafe_op_in_unsafe_fn)] // False positive due to #[pyfunction] macro use pyo3::{exceptions::PyRuntimeError, pyfunction, Bound, PyAny, PyResult}; + +use re_arrow_util::Arrow2ArrayDowncastRef as _; use re_sdk::ComponentDescriptor; use re_video::VideoLoadError; @@ -26,9 +28,8 @@ pub fn asset_video_read_frame_timestamps_ns( let video_bytes_arrow_array = array_to_rust(video_bytes_arrow_array, &component_descr)?.0; let video_bytes_arrow_uint8_array = video_bytes_arrow_array - .as_any() - .downcast_ref::>() - .and_then(|arr| arr.values().as_any().downcast_ref::()) + .downcast_array2_ref::>() + .and_then(|arr| arr.values().downcast_array2_ref::()) .ok_or_else(|| { PyRuntimeError::new_err(format!( "Expected arrow array to be a list with a single uint8 array, instead it has the datatype {:?}", From 910544b933e077b03196ffdef11b53fc7a431e37 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 19:02:27 +0100 Subject: [PATCH 6/9] Use downcast_array_ref --- Cargo.lock | 5 +++ crates/store/re_chunk/src/chunk.rs | 34 +++++++------------ crates/store/re_chunk/src/merge.rs | 7 ++-- crates/store/re_chunk/src/transport.rs | 10 +++--- crates/store/re_dataframe/src/lib.rs | 4 ++- crates/store/re_format_arrow/Cargo.toml | 6 ++-- crates/store/re_format_arrow/src/lib.rs | 7 ++-- crates/store/re_log_types/Cargo.toml | 1 + .../re_log_types/src/example_components.rs | 19 ++++------- crates/store/re_types_core/Cargo.toml | 1 + crates/store/re_types_core/src/tuid.rs | 6 ++-- crates/utils/re_arrow_util/src/arrow_util.rs | 7 ++-- crates/viewer/re_ui/Cargo.toml | 1 + crates/viewer/re_ui/src/arrow_ui.rs | 6 ++-- crates/viewer/re_view_dataframe/Cargo.toml | 1 + .../src/display_record_batch.rs | 10 +++--- rerun_py/src/dataframe.rs | 7 ++-- 17 files changed, 65 insertions(+), 67 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4c86e4f6e93c..bdbbb475fccb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6015,6 +6015,7 @@ dependencies = [ "arrow", "comfy-table", "itertools 0.13.0", + "re_arrow_util", "re_tuid", "re_types_core", ] @@ -6138,6 +6139,7 @@ dependencies = [ "num-derive", "num-traits", "re_arrow2", + "re_arrow_util", "re_build_info", "re_byte_size", "re_format", @@ -6600,6 +6602,7 @@ dependencies = [ "nohash-hasher", "once_cell", "re_arrow2", + "re_arrow_util", "re_byte_size", "re_case", "re_error", @@ -6628,6 +6631,7 @@ dependencies = [ "once_cell", "parking_lot", "rand", + "re_arrow_util", "re_entity_db", "re_format", "re_log", @@ -6722,6 +6726,7 @@ dependencies = [ "egui", "egui_table", "itertools 0.13.0", + "re_arrow_util", "re_chunk_store", "re_dataframe", "re_error", diff --git a/crates/store/re_chunk/src/chunk.rs b/crates/store/re_chunk/src/chunk.rs index c47f0c4663a5..72827a1b81e5 100644 --- a/crates/store/re_chunk/src/chunk.rs +++ b/crates/store/re_chunk/src/chunk.rs @@ -15,6 +15,7 @@ use arrow2::{ use itertools::{izip, Itertools}; use nohash_hasher::IntMap; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, ResolvedTimeRange, Time, TimeInt, TimePoint, Timeline}; use re_types_core::{ @@ -445,8 +446,7 @@ impl Chunk { let row_ids = ::to_arrow(&row_ids) // Unwrap: native RowIds cannot fail to serialize. .unwrap() - .as_any() - .downcast_ref::() + .downcast_array_ref::() // Unwrap: RowId schema is known in advance to be a struct array -- always. .unwrap() .clone(); @@ -501,8 +501,7 @@ impl Chunk { let row_ids = ::to_arrow(&row_ids) // Unwrap: native RowIds cannot fail to serialize. .unwrap() - .as_any() - .downcast_ref::() + .downcast_array_ref::() // Unwrap: RowId schema is known in advance to be a struct array -- always. .unwrap() .clone(); @@ -876,8 +875,7 @@ impl Chunk { .map_err(|err| ChunkError::Malformed { reason: format!("RowIds failed to serialize: {err}"), })? - .as_any() - .downcast_ref::() + .downcast_array_ref::() // NOTE: impossible, but better safe than sorry. .ok_or_else(|| ChunkError::Malformed { reason: "RowIds failed to downcast".to_owned(), @@ -1131,21 +1129,18 @@ impl TimeColumn { } // Sequence timelines are i64, but time columns are nanoseconds (also as i64). - if let Some(times) = array.as_any().downcast_ref::() { + if let Some(times) = array.downcast_array_ref::() { Ok(times.values().clone()) - } else if let Some(times) = array - .as_any() - .downcast_ref::() + } else if let Some(times) = + array.downcast_array_ref::() { Ok(times.values().clone()) - } else if let Some(times) = array - .as_any() - .downcast_ref::() + } else if let Some(times) = + array.downcast_array_ref::() { Ok(times.values().clone()) - } else if let Some(times) = array - .as_any() - .downcast_ref::() + } else if let Some(times) = + array.downcast_array_ref::() { Ok(times.values().clone()) } else { @@ -1224,13 +1219,10 @@ impl Chunk { }; #[allow(clippy::unwrap_used)] - let times = times.as_any().downcast_ref::().unwrap(); // sanity checked + let times = times.downcast_array_ref::().unwrap(); // sanity checked #[allow(clippy::unwrap_used)] - let counters = counters - .as_any() - .downcast_ref::() - .unwrap(); // sanity checked + let counters = counters.downcast_array_ref::().unwrap(); // sanity checked (times, counters) } diff --git a/crates/store/re_chunk/src/merge.rs b/crates/store/re_chunk/src/merge.rs index 4995d51494f7..16d4e1ad5bb8 100644 --- a/crates/store/re_chunk/src/merge.rs +++ b/crates/store/re_chunk/src/merge.rs @@ -4,7 +4,9 @@ use arrow2::array::{Array as Arrow2Array, ListArray as Arrow2ListArray}; use itertools::{izip, Itertools}; use nohash_hasher::IntMap; -use re_arrow_util::{arrow2_util, arrow_util, Arrow2ArrayDowncastRef as _}; +use re_arrow_util::{ + arrow2_util, arrow_util, Arrow2ArrayDowncastRef as _, ArrowArrayDowncastRef as _, +}; use crate::{chunk::ChunkComponents, Chunk, ChunkError, ChunkId, ChunkResult, TimeColumn}; @@ -51,8 +53,7 @@ impl Chunk { #[allow(clippy::unwrap_used)] // concatenating 2 RowId arrays must yield another RowId array row_ids - .as_any() - .downcast_ref::() + .downcast_array_ref::() .unwrap() .clone() }; diff --git a/crates/store/re_chunk/src/transport.rs b/crates/store/re_chunk/src/transport.rs index 05e113158873..5f0a9c17e162 100644 --- a/crates/store/re_chunk/src/transport.rs +++ b/crates/store/re_chunk/src/transport.rs @@ -2,8 +2,7 @@ use std::sync::Arc; use arrow::{ array::{ - Array as ArrowArray, ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch, - StructArray as ArrowStructArray, + ArrayRef as ArrowArrayRef, RecordBatch as ArrowRecordBatch, StructArray as ArrowStructArray, }, datatypes::{Field as ArrowField, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}, }; @@ -16,7 +15,9 @@ use itertools::Itertools; use nohash_hasher::IntMap; use tap::Tap as _; -use re_arrow_util::{arrow_util::into_arrow_ref, Arrow2ArrayDowncastRef as _}; +use re_arrow_util::{ + arrow_util::into_arrow_ref, Arrow2ArrayDowncastRef as _, ArrowArrayDowncastRef as _, +}; use re_byte_size::SizeBytes as _; use re_log_types::{EntityPath, Timeline}; use re_types_core::{Component as _, ComponentDescriptor, Loggable as _}; @@ -648,8 +649,7 @@ impl Chunk { }; ArrowArrayRef::from(row_ids.clone()) - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or_else(|| ChunkError::Malformed { reason: format!( "RowId data has the wrong datatype: expected {:?} but got {:?} instead", diff --git a/crates/store/re_dataframe/src/lib.rs b/crates/store/re_dataframe/src/lib.rs index 2ab290381603..58e2d4c4dddf 100644 --- a/crates/store/re_dataframe/src/lib.rs +++ b/crates/store/re_dataframe/src/lib.rs @@ -9,7 +9,9 @@ pub use self::query::QueryHandle; #[doc(no_inline)] pub use self::external::arrow2::chunk::Chunk as Arrow2Chunk; #[doc(no_inline)] -pub use self::external::re_chunk::{concat_record_batches::concatenate_record_batches, TransportChunk}; +pub use self::external::re_chunk::{ + concat_record_batches::concatenate_record_batches, TransportChunk, +}; #[doc(no_inline)] pub use self::external::re_chunk_store::{ ChunkStoreConfig, ChunkStoreHandle, ColumnSelector, ComponentColumnSelector, Index, IndexRange, diff --git a/crates/store/re_format_arrow/Cargo.toml b/crates/store/re_format_arrow/Cargo.toml index 1801d1154b3d..6287819d6ecb 100644 --- a/crates/store/re_format_arrow/Cargo.toml +++ b/crates/store/re_format_arrow/Cargo.toml @@ -20,11 +20,13 @@ all-features = true [dependencies] -arrow.workspace = true -itertools.workspace = true +re_arrow_util.workspace = true re_tuid.workspace = true re_types_core.workspace = true # tuid serialization +arrow.workspace = true +itertools.workspace = true + # native dependencies: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] comfy-table = { workspace = true, features = ["tty"] } diff --git a/crates/store/re_format_arrow/src/lib.rs b/crates/store/re_format_arrow/src/lib.rs index 423f1d8db865..28af79ae8be5 100644 --- a/crates/store/re_format_arrow/src/lib.rs +++ b/crates/store/re_format_arrow/src/lib.rs @@ -8,8 +8,9 @@ use arrow::{ util::display::{ArrayFormatter, FormatOptions}, }; use comfy_table::{presets, Cell, Row, Table}; - use itertools::Itertools as _; + +use re_arrow_util::ArrowArrayDowncastRef as _; use re_tuid::Tuid; use re_types_core::Loggable as _; @@ -55,9 +56,7 @@ fn parse_tuid(array: &dyn Array, index: usize) -> Option { match array.data_type() { // Legacy MsgId lists: just grab the first value, they're all identical - DataType::List(_) => { - parse_inner(&array.as_any().downcast_ref::()?.value(index), 0) - } + DataType::List(_) => parse_inner(&array.downcast_array_ref::()?.value(index), 0), // New control columns: it's not a list to begin with! _ => parse_inner(array, index), } diff --git a/crates/store/re_log_types/Cargo.toml b/crates/store/re_log_types/Cargo.toml index fcf98b9794e7..2437bbf3e5bf 100644 --- a/crates/store/re_log_types/Cargo.toml +++ b/crates/store/re_log_types/Cargo.toml @@ -42,6 +42,7 @@ serde = [ [dependencies] # Rerun +re_arrow_util.workspace = true re_build_info.workspace = true re_byte_size.workspace = true re_format.workspace = true diff --git a/crates/store/re_log_types/src/example_components.rs b/crates/store/re_log_types/src/example_components.rs index 43cd251bde33..06e1da14a4c8 100644 --- a/crates/store/re_log_types/src/example_components.rs +++ b/crates/store/re_log_types/src/example_components.rs @@ -2,6 +2,7 @@ use std::sync::Arc; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_byte_size::SizeBytes; use re_types_core::{Component, ComponentDescriptor, DeserializationError, Loggable}; @@ -116,8 +117,7 @@ impl Loggable for MyPoint { data: &dyn arrow::array::Array, ) -> re_types_core::DeserializationResult>> { let array = data - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::StructArray, >())?; @@ -126,14 +126,12 @@ impl Loggable for MyPoint { let y_array = array.columns()[1].as_ref(); let xs = x_array - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::Float32Array, >())?; let ys = y_array - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::Float32Array, >())?; @@ -235,8 +233,7 @@ impl Loggable for MyPoint64 { data: &dyn arrow::array::Array, ) -> re_types_core::DeserializationResult>> { let array = data - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::StructArray, >())?; @@ -245,14 +242,12 @@ impl Loggable for MyPoint64 { let y_array = array.columns()[1].as_ref(); let xs = x_array - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::Float64Array, >())?; let ys = y_array - .as_any() - .downcast_ref::() + .downcast_array_ref::() .ok_or(DeserializationError::downcast_error::< arrow::array::Float64Array, >())?; diff --git a/crates/store/re_types_core/Cargo.toml b/crates/store/re_types_core/Cargo.toml index 2b83921d9012..f37d8d987a83 100644 --- a/crates/store/re_types_core/Cargo.toml +++ b/crates/store/re_types_core/Cargo.toml @@ -34,6 +34,7 @@ serde = ["dep:serde", "re_string_interner/serde"] [dependencies] # Rerun +re_arrow_util.workspace = true re_byte_size.workspace = true re_case.workspace = true re_error.workspace = true diff --git a/crates/store/re_types_core/src/tuid.rs b/crates/store/re_types_core/src/tuid.rs index 1bcbd9d6e22a..0075f2346e3e 100644 --- a/crates/store/re_types_core/src/tuid.rs +++ b/crates/store/re_types_core/src/tuid.rs @@ -5,6 +5,7 @@ use arrow::{ datatypes::{DataType, Field, Fields}, }; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_tuid::Tuid; use crate::{DeserializationError, Loggable}; @@ -79,7 +80,7 @@ impl Loggable for Tuid { // NOTE: Unwrap is safe everywhere below, datatype is checked above. // NOTE: We don't even look at the validity, our datatype says we don't care. - let array = array.as_any().downcast_ref::().unwrap(); + let array = array.downcast_array_ref::().unwrap(); // TODO(cmc): Can we rely on the fields ordering from the datatype? I would assume not // since we generally cannot rely on anything when it comes to arrow… @@ -99,8 +100,7 @@ impl Loggable for Tuid { let get_buffer = |field_index: usize| { array.columns()[field_index] - .as_any() - .downcast_ref::() + .downcast_array_ref::() .unwrap() .values() }; diff --git a/crates/utils/re_arrow_util/src/arrow_util.rs b/crates/utils/re_arrow_util/src/arrow_util.rs index f240b109c833..a8a5cd298822 100644 --- a/crates/utils/re_arrow_util/src/arrow_util.rs +++ b/crates/utils/re_arrow_util/src/arrow_util.rs @@ -19,16 +19,13 @@ pub trait ArrowArrayDowncastRef { fn downcast_array_ref(&self) -> Option<&T>; } -impl ArrowArrayDowncastRef for dyn Array { +impl ArrowArrayDowncastRef for &dyn Array { fn downcast_array_ref(&self) -> Option<&T> { self.as_any().downcast_ref() } } -impl ArrowArrayDowncastRef for A -where - A: Array, -{ +impl ArrowArrayDowncastRef for ArrayRef { fn downcast_array_ref(&self) -> Option<&T> { self.as_any().downcast_ref() } diff --git a/crates/viewer/re_ui/Cargo.toml b/crates/viewer/re_ui/Cargo.toml index f166efe51cf2..126ebfe9d6f3 100644 --- a/crates/viewer/re_ui/Cargo.toml +++ b/crates/viewer/re_ui/Cargo.toml @@ -34,6 +34,7 @@ arrow = ["dep:arrow"] [dependencies] +re_arrow_util.workspace = true re_entity_db.workspace = true # syntax-highlighting for InstancePath. TODO(emilk): move InstancePath re_format.workspace = true re_log.workspace = true diff --git a/crates/viewer/re_ui/src/arrow_ui.rs b/crates/viewer/re_ui/src/arrow_ui.rs index 778c91fe64ba..4333279b9730 100644 --- a/crates/viewer/re_ui/src/arrow_ui.rs +++ b/crates/viewer/re_ui/src/arrow_ui.rs @@ -1,6 +1,8 @@ use arrow::util::display::{ArrayFormatter, FormatOptions}; use itertools::Itertools as _; +use re_arrow_util::ArrowArrayDowncastRef as _; + use crate::UiLayout; pub fn arrow_ui(ui: &mut egui::Ui, ui_layout: UiLayout, array: &dyn arrow::array::Array) { @@ -16,14 +18,14 @@ pub fn arrow_ui(ui: &mut egui::Ui, ui_layout: UiLayout, array: &dyn arrow::array // Special-treat text. // Note: we match on the raw data here, so this works for any component containing text. - if let Some(utf8) = array.as_any().downcast_ref::() { + if let Some(utf8) = array.downcast_array_ref::() { if utf8.values().len() == 1 { let string = utf8.value(0); ui_layout.data_label(ui, string); return; } } - if let Some(utf8) = array.as_any().downcast_ref::() { + if let Some(utf8) = array.downcast_array_ref::() { if utf8.values().len() == 1 { let string = utf8.value(0); ui_layout.data_label(ui, string); diff --git a/crates/viewer/re_view_dataframe/Cargo.toml b/crates/viewer/re_view_dataframe/Cargo.toml index ab6613059ce6..129b3c75e566 100644 --- a/crates/viewer/re_view_dataframe/Cargo.toml +++ b/crates/viewer/re_view_dataframe/Cargo.toml @@ -19,6 +19,7 @@ workspace = true all-features = true [dependencies] +re_arrow_util.workspace = true re_chunk_store.workspace = true re_dataframe.workspace = true re_error.workspace = true diff --git a/crates/viewer/re_view_dataframe/src/display_record_batch.rs b/crates/viewer/re_view_dataframe/src/display_record_batch.rs index b774c5f80f90..99d8d65fd735 100644 --- a/crates/viewer/re_view_dataframe/src/display_record_batch.rs +++ b/crates/viewer/re_view_dataframe/src/display_record_batch.rs @@ -11,6 +11,7 @@ use arrow::{ }; use thiserror::Error; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_chunk_store::{ColumnDescriptor, ComponentColumnDescriptor, LatestAtQuery}; use re_dataframe::external::re_chunk::{TimeColumn, TimeColumnError}; use re_log_types::{EntityPath, TimeInt, Timeline}; @@ -52,21 +53,18 @@ impl ComponentData { ArrowDataType::Null => Ok(Self::Null), ArrowDataType::List(_) => Ok(Self::ListArray( column_data - .as_any() - .downcast_ref::() + .downcast_array_ref::() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(), )), ArrowDataType::Dictionary(_, _) => { let dict = column_data - .as_any() - .downcast_ref::() + .downcast_array_ref::() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(); let values = dict .values() - .as_any() - .downcast_ref::() + .downcast_array_ref::() .expect("`data_type` checked, failure is a bug in re_dataframe") .clone(); Ok(Self::DictionaryArray { dict, values }) diff --git a/rerun_py/src/dataframe.rs b/rerun_py/src/dataframe.rs index fd7772b3a4be..79b8ba2f82d2 100644 --- a/rerun_py/src/dataframe.rs +++ b/rerun_py/src/dataframe.rs @@ -8,7 +8,7 @@ use std::{ }; use arrow::{ - array::{make_array, Array, ArrayData, Int64Array, RecordBatchIterator, RecordBatchReader}, + array::{make_array, ArrayData, Int64Array, RecordBatchIterator, RecordBatchReader}, pyarrow::PyArrowType, }; use numpy::PyArrayMethods as _; @@ -18,6 +18,7 @@ use pyo3::{ types::{PyDict, PyTuple}, }; +use re_arrow_util::ArrowArrayDowncastRef as _; use re_chunk_store::{ ChunkStore, ChunkStoreConfig, ChunkStoreHandle, ColumnDescriptor, ColumnSelector, ComponentColumnDescriptor, ComponentColumnSelector, QueryExpression, SparseFillStrategy, @@ -344,7 +345,7 @@ impl IndexValuesLike<'_> { Self::PyArrow(array) => { let array = make_array(array.0.clone()); - let int_array = array.as_any().downcast_ref::().ok_or_else(|| { + let int_array = array.downcast_array_ref::().ok_or_else(|| { PyTypeError::new_err("pyarrow.Array for IndexValuesLike must be of type int64.") })?; @@ -393,7 +394,7 @@ impl IndexValuesLike<'_> { let array = make_array(chunk.0.clone()); let int_array = - array.as_any().downcast_ref::().ok_or_else(|| { + array.downcast_array_ref::().ok_or_else(|| { PyTypeError::new_err( "pyarrow.Array for IndexValuesLike must be of type int64.", ) From dcbf2b72a7f075201b059218a800b88bfe3bcd3e Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 19:14:43 +0100 Subject: [PATCH 7/9] Update ARCHITECTURE.md --- ARCHITECTURE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index ccfe5dcc570d..a4976c373956 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -210,6 +210,7 @@ Update instructions: | Crate | Description | |--------------------|--------------------------------------------------------------------------------------| | re_analytics | Rerun's analytics SDK | +| re_arrow_util | Helpers for working with arrow | | re_byte_size | Calculate the heap-allocated size of values at runtime | | re_capabilities | Capability tokens | | re_case | Case conversions, the way Rerun likes them | From abf808e5f14c181c833578196547b7846255f556 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Tue, 14 Jan 2025 19:18:09 +0100 Subject: [PATCH 8/9] toml fmt --- crates/viewer/re_ui/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/viewer/re_ui/Cargo.toml b/crates/viewer/re_ui/Cargo.toml index 126ebfe9d6f3..461293cc5246 100644 --- a/crates/viewer/re_ui/Cargo.toml +++ b/crates/viewer/re_ui/Cargo.toml @@ -35,10 +35,10 @@ arrow = ["dep:arrow"] [dependencies] re_arrow_util.workspace = true -re_entity_db.workspace = true # syntax-highlighting for InstancePath. TODO(emilk): move InstancePath +re_entity_db.workspace = true # syntax-highlighting for InstancePath. TODO(emilk): move InstancePath re_format.workspace = true re_log.workspace = true -re_log_types.workspace = true # syntax-highlighting for EntityPath +re_log_types.workspace = true # syntax-highlighting for EntityPath re_tracing.workspace = true eframe = { workspace = true, default-features = false, features = ["wgpu"] } From 93adae2cfe3a39637f422199b575fe933c7b160e Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 15 Jan 2025 05:42:41 +0100 Subject: [PATCH 9/9] Update re_arrow2 --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 8370f6352a8b..950cec6514a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5584,7 +5584,7 @@ dependencies = [ [[package]] name = "re_arrow2" version = "0.18.1" -source = "git+https://github.com/rerun-io/re_arrow2.git?branch=main#e8576708a1b41b493980ecb995e808aefcfa1fbc" +source = "git+https://github.com/rerun-io/re_arrow2.git?branch=main#61ac48418df229584155c5f669477bedf76d4505" dependencies = [ "ahash", "arrow-array",