Skip to content

Commit

Permalink
bump duckdb, apply patches
Browse files Browse the repository at this point in the history
  • Loading branch information
samansmink committed Jan 16, 2025
1 parent 2e99af7 commit ef568c6
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 51 deletions.
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 302 files
6 changes: 3 additions & 3 deletions src/delta_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,9 +638,9 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte
void LoggerCallback::Initialize(DatabaseInstance &db_p) {
auto &instance = GetInstance();
unique_lock<mutex> lck(instance.lock);
if (instance.db.expired()) {
instance.db = db_p.shared_from_this();
}
if (instance.db.expired()) {
instance.db = db_p.shared_from_this();
}
}

void LoggerCallback::CallbackEvent(ffi::Event event) {
Expand Down
78 changes: 39 additions & 39 deletions src/functions/delta_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,12 +711,12 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil

void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &options, const string &filename,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names,
const vector<MultiFileReaderColumnDefinition> &local_columns,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) {
MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names,
global_column_ids, reader_data, context, global_state);
MultiFileReader::FinalizeBind(file_options, options, filename, local_columns, global_columns, global_column_ids,
reader_data, context, global_state);

// Handle custom delta option set in MultiFileReaderOptions::custom_options
auto file_number_opt = file_options.custom_options.find("delta_file_number");
Expand Down Expand Up @@ -744,9 +744,9 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio
if (IsRowIdColumnId(col_id)) {
continue;
}
auto col_partition_entry = file_metadata.partition_map.find(global_names[col_id]);
auto col_partition_entry = file_metadata.partition_map.find(global_columns[col_id].name);
if (col_partition_entry != file_metadata.partition_map.end()) {
auto &current_type = global_types[col_id];
auto &current_type = global_columns[col_id].type;
if (current_type == LogicalType::BLOB) {
reader_data.constant_map.emplace_back(i, Value::BLOB_RAW(col_partition_entry->second));
} else {
Expand Down Expand Up @@ -813,11 +813,11 @@ void DeltaMultiFileReaderGlobalState::SetColumnIdx(const string &column, idx_t i
throw IOException("Unknown column '%s' found as required by the DeltaMultiFileReader");
}

unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalState(
duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options,
const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list,
const vector<duckdb::LogicalType> &global_types, const vector<std::string> &global_names,
const vector<ColumnIndex> &global_column_ids) {
unique_ptr<MultiFileReaderGlobalState>
DeltaMultiFileReader::InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids) {
vector<LogicalType> extra_columns;
vector<pair<string, idx_t>> mapped_columns;

Expand All @@ -829,7 +829,7 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta
continue;
}

auto &global_name = global_names[global_id];
auto &global_name = global_columns[global_id].name;
selected_columns.insert({global_name, i});
}

Expand Down Expand Up @@ -881,17 +881,16 @@ unique_ptr<MultiFileReaderGlobalState> DeltaMultiFileReader::InitializeGlobalSta

// This code is duplicated from MultiFileReader::CreateNameMapping the difference is that for columns that are not found
// in the parquet files, we just add null constant columns
static void CustomMulfiFileNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
static void CustomMulfiFileNameMapping(const string &file_name,
const vector<MultiFileReaderColumnDefinition> &local_columns,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
D_ASSERT(global_types.size() == global_names.size());
D_ASSERT(local_types.size() == local_names.size());
// we have expected types: create a map of name -> column index
case_insensitive_map_t<idx_t> name_map;
for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) {
name_map[local_names[col_idx]] = col_idx;
for (idx_t col_idx = 0; col_idx < local_columns.size(); col_idx++) {
name_map[local_columns[col_idx].name] = col_idx;
}
for (idx_t i = 0; i < global_column_ids.size(); i++) {
// check if this is a constant column
Expand All @@ -908,32 +907,32 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
}
// not constant - look up the column in the name map
auto global_id = global_column_ids[i].GetPrimaryIndex();
if (global_id >= global_types.size()) {
if (global_id >= global_columns.size()) {
throw InternalException(
"MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file");
}
auto &global_name = global_names[global_id];
auto &global_name = global_columns[global_id].name;
auto entry = name_map.find(global_name);
if (entry == name_map.end()) {
string candidate_names;
for (auto &local_name : local_names) {
for (auto &local_column : local_columns) {
if (!candidate_names.empty()) {
candidate_names += ", ";
}
candidate_names += local_name;
candidate_names += local_column.name;
}
// FIXME: this override is pretty hacky: for missing columns we just insert NULL constants
auto &global_type = global_types[global_id];
auto &global_type = global_columns[global_id].type;
Value val(global_type);
reader_data.constant_map.push_back({i, val});
continue;
}
// we found the column in the local file - check if the types are the same
auto local_id = entry->second;
D_ASSERT(global_id < global_types.size());
D_ASSERT(local_id < local_types.size());
auto &global_type = global_types[global_id];
auto &local_type = local_types[local_id];
D_ASSERT(global_id < global_columns.size());
D_ASSERT(local_id < local_columns.size());
auto &global_type = global_columns[global_id].type;
auto &local_type = local_columns[local_id].type;
if (global_type != local_type) {
reader_data.cast_map[local_id] = global_type;
}
Expand All @@ -945,15 +944,16 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector<Log
reader_data.empty_columns = reader_data.column_ids.empty();
}

void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names,
const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
void DeltaMultiFileReader::CreateColumnMapping(const string &file_name,
const vector<MultiFileReaderColumnDefinition> &local_columns,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data,
const MultiFileReaderBindData &bind_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) {
// First call the base implementation to do most mapping
CustomMulfiFileNameMapping(file_name, local_types, local_names, global_types, global_names, global_column_ids,
reader_data, initial_file, global_state);
CustomMulfiFileNameMapping(file_name, local_columns, global_columns, global_column_ids, reader_data, initial_file,
global_state);

// Then we handle delta specific mapping
D_ASSERT(global_state);
Expand All @@ -965,8 +965,8 @@ void DeltaMultiFileReader::CreateNameMapping(const string &file_name, const vect

// Build the name map
case_insensitive_map_t<idx_t> name_map;
for (idx_t col_idx = 0; col_idx < local_names.size(); col_idx++) {
name_map[local_names[col_idx]] = col_idx;
for (idx_t col_idx = 0; col_idx < local_columns.size(); col_idx++) {
name_map[local_columns[col_idx].name] = col_idx;
}

// Lookup the required column in the local map
Expand Down
16 changes: 8 additions & 8 deletions src/include/functions/delta_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,21 +141,21 @@ struct DeltaMultiFileReader : public MultiFileReader {
void BindOptions(MultiFileReaderOptions &options, MultiFileList &files, vector<LogicalType> &return_types,
vector<string> &names, MultiFileReaderBindData &bind_data) override;

void CreateNameMapping(const string &file_name, const vector<LogicalType> &local_types,
const vector<string> &local_names, const vector<LogicalType> &global_types,
const vector<string> &global_names, const vector<ColumnIndex> &global_column_ids,
MultiFileReaderData &reader_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) override;
void CreateColumnMapping(const string &file_name, const vector<MultiFileReaderColumnDefinition> &local_columns,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
const MultiFileReaderBindData &bind_data, const string &initial_file,
optional_ptr<MultiFileReaderGlobalState> global_state) override;

unique_ptr<MultiFileReaderGlobalState>
InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options,
const MultiFileReaderBindData &bind_data, const MultiFileList &file_list,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids) override;

void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options,
const string &filename, const vector<string> &local_names,
const vector<LogicalType> &global_types, const vector<string> &global_names,
const string &filename, const vector<MultiFileReaderColumnDefinition> &local_columns,
const vector<MultiFileReaderColumnDefinition> &global_columns,
const vector<ColumnIndex> &global_column_ids, MultiFileReaderData &reader_data,
ClientContext &context, optional_ptr<MultiFileReaderGlobalState> global_state) override;

Expand Down

0 comments on commit ef568c6

Please sign in to comment.