diff --git a/duckdb b/duckdb index 89bcc3e..0024e5d 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 89bcc3e2ce739b1b470afa79818ee03c8cf96fe8 +Subproject commit 0024e5d4beba0185733df68642775e3f38e089cb diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 9cf8337..2b21527 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -638,9 +638,9 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte void LoggerCallback::Initialize(DatabaseInstance &db_p) { auto &instance = GetInstance(); unique_lock lck(instance.lock); - if (instance.db.expired()) { - instance.db = db_p.shared_from_this(); - } + if (instance.db.expired()) { + instance.db = db_p.shared_from_this(); + } } void LoggerCallback::CallbackEvent(ffi::Event event) { diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 0593d77..3c287a9 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -711,12 +711,12 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, - const vector &local_names, const vector &global_types, - const vector &global_names, + const vector &local_columns, + const vector &global_columns, const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) { - MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, - global_column_ids, reader_data, context, global_state); + MultiFileReader::FinalizeBind(file_options, options, filename, local_columns, global_columns, global_column_ids, + reader_data, context, global_state); // Handle custom delta option set in MultiFileReaderOptions::custom_options auto file_number_opt = file_options.custom_options.find("delta_file_number"); @@ -744,9 +744,9 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio if (IsRowIdColumnId(col_id)) { continue; } - auto col_partition_entry = file_metadata.partition_map.find(global_names[col_id]); + auto col_partition_entry = file_metadata.partition_map.find(global_columns[col_id].name); if (col_partition_entry != file_metadata.partition_map.end()) { - auto ¤t_type = global_types[col_id]; + auto ¤t_type = global_columns[col_id].type; if (current_type == LogicalType::BLOB) { reader_data.constant_map.emplace_back(i, Value::BLOB_RAW(col_partition_entry->second)); } else { @@ -813,11 +813,11 @@ void DeltaMultiFileReaderGlobalState::SetColumnIdx(const string &column, idx_t i throw IOException("Unknown column '%s' found as required by the DeltaMultiFileReader"); } -unique_ptr DeltaMultiFileReader::InitializeGlobalState( - duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options, - const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list, - const vector &global_types, const vector &global_names, - const vector &global_column_ids) { +unique_ptr +DeltaMultiFileReader::InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options, + const MultiFileReaderBindData &bind_data, const MultiFileList &file_list, + const vector &global_columns, + const vector &global_column_ids) { vector extra_columns; vector> mapped_columns; @@ -829,7 +829,7 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta continue; } - auto &global_name = global_names[global_id]; + auto &global_name = global_columns[global_id].name; selected_columns.insert({global_name, i}); } @@ -881,17 +881,16 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta // This code is duplicated from MultiFileReader::CreateNameMapping the difference is that for columns that are not found // in the parquet files, we just add null constant columns -static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, - const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, - MultiFileReaderData &reader_data, const string &initial_file, +static void CustomMulfiFileNameMapping(const string &file_name, + const vector &local_columns, + const vector &global_columns, + const vector &global_column_ids, MultiFileReaderData &reader_data, + const string &initial_file, optional_ptr global_state) { - D_ASSERT(global_types.size() == global_names.size()); - D_ASSERT(local_types.size() == local_names.size()); // we have expected types: create a map of name -> column index case_insensitive_map_t name_map; - for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) { - name_map[local_names[col_idx]] = col_idx; + for (idx_t col_idx = 0; col_idx < local_columns.size(); col_idx++) { + name_map[local_columns[col_idx].name] = col_idx; } for (idx_t i = 0; i < global_column_ids.size(); i++) { // check if this is a constant column @@ -908,32 +907,32 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector= global_types.size()) { + if (global_id >= global_columns.size()) { throw InternalException( "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file"); } - auto &global_name = global_names[global_id]; + auto &global_name = global_columns[global_id].name; auto entry = name_map.find(global_name); if (entry == name_map.end()) { string candidate_names; - for (auto &local_name : local_names) { + for (auto &local_column : local_columns) { if (!candidate_names.empty()) { candidate_names += ", "; } - candidate_names += local_name; + candidate_names += local_column.name; } // FIXME: this override is pretty hacky: for missing columns we just insert NULL constants - auto &global_type = global_types[global_id]; + auto &global_type = global_columns[global_id].type; Value val(global_type); reader_data.constant_map.push_back({i, val}); continue; } // we found the column in the local file - check if the types are the same auto local_id = entry->second; - D_ASSERT(global_id < global_types.size()); - D_ASSERT(local_id < local_types.size()); - auto &global_type = global_types[global_id]; - auto &local_type = local_types[local_id]; + D_ASSERT(global_id < global_columns.size()); + D_ASSERT(local_id < local_columns.size()); + auto &global_type = global_columns[global_id].type; + auto &local_type = local_columns[local_id].type; if (global_type != local_type) { reader_data.cast_map[local_id] = global_type; } @@ -945,15 +944,16 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, - const vector &local_names, const vector &global_types, - const vector &global_names, - const vector &global_column_ids, - MultiFileReaderData &reader_data, const string &initial_file, - optional_ptr global_state) { +void DeltaMultiFileReader::CreateColumnMapping(const string &file_name, + const vector &local_columns, + const vector &global_columns, + const vector &global_column_ids, + MultiFileReaderData &reader_data, + const MultiFileReaderBindData &bind_data, const string &initial_file, + optional_ptr global_state) { // First call the base implementation to do most mapping - CustomMulfiFileNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids, - reader_data, initial_file, global_state); + CustomMulfiFileNameMapping(file_name, local_columns, global_columns, global_column_ids, reader_data, initial_file, + global_state); // Then we handle delta specific mapping D_ASSERT(global_state); @@ -965,8 +965,8 @@ void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vect // Build the name map case_insensitive_map_t name_map; - for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) { - name_map[local_names[col_idx]] = col_idx; + for (idx_t col_idx = 0; col_idx < local_columns.size(); col_idx++) { + name_map[local_columns[col_idx].name] = col_idx; } // Lookup the required column in the local map diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 86edd2e..f363920 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -141,21 +141,21 @@ struct DeltaMultiFileReader : public MultiFileReader { void BindOptions(MultiFileReaderOptions &options, MultiFileList &files, vector &return_types, vector &names, MultiFileReaderBindData &bind_data) override; - void CreateNameMapping(const string &file_name, const vector &local_types, - const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, - MultiFileReaderData &reader_data, const string &initial_file, - optional_ptr global_state) override; + void CreateColumnMapping(const string &file_name, const vector &local_columns, + const vector &global_columns, + const vector &global_column_ids, MultiFileReaderData &reader_data, + const MultiFileReaderBindData &bind_data, const string &initial_file, + optional_ptr global_state) override; unique_ptr InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &bind_data, const MultiFileList &file_list, - const vector &global_types, const vector &global_names, + const vector &global_columns, const vector &global_column_ids) override; void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, - const string &filename, const vector &local_names, - const vector &global_types, const vector &global_names, + const string &filename, const vector &local_columns, + const vector &global_columns, const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) override;