From 1261ea985c0b9707b3924bec2c64ddecd35efbb6 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 24 Oct 2024 16:09:57 +0200 Subject: [PATCH 01/45] Support attaching delta tables as catalogs --- .github/regression/micro.csv | 5 + .github/workflows/LocalTesting.yml | 6 + .gitignore | 2 +- CMakeLists.txt | 8 +- benchmark/benchmark.Makefile | 17 +- .../snapshot_performance/delta_scan.benchmark | 16 ++ .../delta_scan_filter.benchmark | 16 ++ .../snapshot_no_pin.benchmark | 19 ++ .../snapshot_no_pin_filter.benchmark | 19 ++ .../snapshot_pin.benchmark | 19 ++ .../snapshot_pin_filter.benchmark | 19 ++ .../tpcds/sf1/local/delta_attach/load.sql | 24 +++ .../sf1/local/delta_attach/q01.benchmark | 7 + .../sf1/local/delta_attach/q02.benchmark | 7 + .../sf1/local/delta_attach/q03.benchmark | 7 + .../sf1/local/delta_attach/q04.benchmark | 7 + .../sf1/local/delta_attach/q05.benchmark | 7 + .../sf1/local/delta_attach/q06.benchmark | 7 + .../sf1/local/delta_attach/q07.benchmark | 7 + .../sf1/local/delta_attach/q08.benchmark | 7 + .../sf1/local/delta_attach/q09.benchmark | 7 + .../sf1/local/delta_attach/q10.benchmark | 7 + .../sf1/local/delta_attach/q11.benchmark | 7 + .../sf1/local/delta_attach/q12.benchmark | 7 + .../sf1/local/delta_attach/q13.benchmark | 7 + .../sf1/local/delta_attach/q14.benchmark | 7 + .../sf1/local/delta_attach/q15.benchmark | 7 + .../sf1/local/delta_attach/q16.benchmark | 7 + .../sf1/local/delta_attach/q17.benchmark | 7 + .../sf1/local/delta_attach/q18.benchmark | 7 + .../sf1/local/delta_attach/q19.benchmark | 7 + .../sf1/local/delta_attach/q20.benchmark | 7 + .../sf1/local/delta_attach/q21.benchmark | 7 + .../sf1/local/delta_attach/q22.benchmark | 7 + .../sf1/local/delta_attach/q23.benchmark | 7 + .../sf1/local/delta_attach/q24.benchmark | 7 + .../sf1/local/delta_attach/q25.benchmark | 7 + .../sf1/local/delta_attach/q26.benchmark | 7 + .../sf1/local/delta_attach/q27.benchmark | 7 + .../sf1/local/delta_attach/q28.benchmark | 7 + .../sf1/local/delta_attach/q29.benchmark | 7 + .../sf1/local/delta_attach/q30.benchmark | 7 + .../sf1/local/delta_attach/q31.benchmark | 7 + .../sf1/local/delta_attach/q32.benchmark | 7 + .../sf1/local/delta_attach/q33.benchmark | 7 + .../sf1/local/delta_attach/q34.benchmark | 7 + .../sf1/local/delta_attach/q35.benchmark | 7 + .../sf1/local/delta_attach/q36.benchmark | 7 + .../sf1/local/delta_attach/q37.benchmark | 7 + .../sf1/local/delta_attach/q38.benchmark | 7 + .../sf1/local/delta_attach/q39.benchmark | 7 + .../sf1/local/delta_attach/q40.benchmark | 7 + .../sf1/local/delta_attach/q41.benchmark | 7 + .../sf1/local/delta_attach/q42.benchmark | 7 + .../sf1/local/delta_attach/q43.benchmark | 7 + .../sf1/local/delta_attach/q44.benchmark | 7 + .../sf1/local/delta_attach/q45.benchmark | 7 + .../sf1/local/delta_attach/q46.benchmark | 7 + .../sf1/local/delta_attach/q47.benchmark | 7 + .../sf1/local/delta_attach/q48.benchmark | 7 + .../sf1/local/delta_attach/q49.benchmark | 7 + .../sf1/local/delta_attach/q50.benchmark | 7 + .../sf1/local/delta_attach/q51.benchmark | 7 + .../sf1/local/delta_attach/q52.benchmark | 7 + .../sf1/local/delta_attach/q53.benchmark | 7 + .../sf1/local/delta_attach/q54.benchmark | 7 + .../sf1/local/delta_attach/q55.benchmark | 7 + .../sf1/local/delta_attach/q56.benchmark | 7 + .../sf1/local/delta_attach/q57.benchmark | 7 + .../sf1/local/delta_attach/q58.benchmark | 7 + .../sf1/local/delta_attach/q59.benchmark | 7 + .../sf1/local/delta_attach/q60.benchmark | 7 + .../sf1/local/delta_attach/q61.benchmark | 7 + .../sf1/local/delta_attach/q62.benchmark | 7 + .../sf1/local/delta_attach/q63.benchmark | 7 + .../sf1/local/delta_attach/q64.benchmark | 7 + .../sf1/local/delta_attach/q65.benchmark | 7 + .../sf1/local/delta_attach/q66.benchmark | 7 + .../sf1/local/delta_attach/q67.benchmark | 7 + .../sf1/local/delta_attach/q68.benchmark | 7 + .../sf1/local/delta_attach/q69.benchmark | 7 + .../sf1/local/delta_attach/q70.benchmark | 7 + .../sf1/local/delta_attach/q71.benchmark | 7 + .../sf1/local/delta_attach/q72.benchmark | 7 + .../sf1/local/delta_attach/q73.benchmark | 7 + .../sf1/local/delta_attach/q74.benchmark | 7 + .../sf1/local/delta_attach/q75.benchmark | 7 + .../sf1/local/delta_attach/q76.benchmark | 7 + .../sf1/local/delta_attach/q77.benchmark | 7 + .../sf1/local/delta_attach/q78.benchmark | 7 + .../sf1/local/delta_attach/q79.benchmark | 7 + .../sf1/local/delta_attach/q80.benchmark | 7 + .../sf1/local/delta_attach/q81.benchmark | 7 + .../sf1/local/delta_attach/q82.benchmark | 7 + .../sf1/local/delta_attach/q83.benchmark | 7 + .../sf1/local/delta_attach/q84.benchmark | 7 + .../sf1/local/delta_attach/q85.benchmark | 7 + .../sf1/local/delta_attach/q86.benchmark | 7 + .../sf1/local/delta_attach/q87.benchmark | 7 + .../sf1/local/delta_attach/q88.benchmark | 7 + .../sf1/local/delta_attach/q89.benchmark | 7 + .../sf1/local/delta_attach/q90.benchmark | 7 + .../sf1/local/delta_attach/q91.benchmark | 7 + .../sf1/local/delta_attach/q92.benchmark | 7 + .../sf1/local/delta_attach/q93.benchmark | 7 + .../sf1/local/delta_attach/q94.benchmark | 7 + .../sf1/local/delta_attach/q95.benchmark | 7 + .../sf1/local/delta_attach/q96.benchmark | 7 + .../sf1/local/delta_attach/q97.benchmark | 7 + .../sf1/local/delta_attach/q98.benchmark | 7 + .../sf1/local/delta_attach/q99.benchmark | 7 + .../local/delta_attach/tpcds_sf1.benchmark.in | 17 ++ .../tpcds/sf1/local/delta_attach_pin/load.sql | 24 +++ .../sf1/local/delta_attach_pin/q01.benchmark | 7 + .../sf1/local/delta_attach_pin/q02.benchmark | 7 + .../sf1/local/delta_attach_pin/q03.benchmark | 7 + .../sf1/local/delta_attach_pin/q04.benchmark | 7 + .../sf1/local/delta_attach_pin/q05.benchmark | 7 + .../sf1/local/delta_attach_pin/q06.benchmark | 7 + .../sf1/local/delta_attach_pin/q07.benchmark | 7 + .../sf1/local/delta_attach_pin/q08.benchmark | 7 + .../sf1/local/delta_attach_pin/q09.benchmark | 7 + .../sf1/local/delta_attach_pin/q10.benchmark | 7 + .../sf1/local/delta_attach_pin/q11.benchmark | 7 + .../sf1/local/delta_attach_pin/q12.benchmark | 7 + .../sf1/local/delta_attach_pin/q13.benchmark | 7 + .../sf1/local/delta_attach_pin/q14.benchmark | 7 + .../sf1/local/delta_attach_pin/q15.benchmark | 7 + .../sf1/local/delta_attach_pin/q16.benchmark | 7 + .../sf1/local/delta_attach_pin/q17.benchmark | 7 + .../sf1/local/delta_attach_pin/q18.benchmark | 7 + .../sf1/local/delta_attach_pin/q19.benchmark | 7 + .../sf1/local/delta_attach_pin/q20.benchmark | 7 + .../sf1/local/delta_attach_pin/q21.benchmark | 7 + .../sf1/local/delta_attach_pin/q22.benchmark | 7 + .../sf1/local/delta_attach_pin/q23.benchmark | 7 + .../sf1/local/delta_attach_pin/q24.benchmark | 7 + .../sf1/local/delta_attach_pin/q25.benchmark | 7 + .../sf1/local/delta_attach_pin/q26.benchmark | 7 + .../sf1/local/delta_attach_pin/q27.benchmark | 7 + .../sf1/local/delta_attach_pin/q28.benchmark | 7 + .../sf1/local/delta_attach_pin/q29.benchmark | 7 + .../sf1/local/delta_attach_pin/q30.benchmark | 7 + .../sf1/local/delta_attach_pin/q31.benchmark | 7 + .../sf1/local/delta_attach_pin/q32.benchmark | 7 + .../sf1/local/delta_attach_pin/q33.benchmark | 7 + .../sf1/local/delta_attach_pin/q34.benchmark | 7 + .../sf1/local/delta_attach_pin/q35.benchmark | 7 + .../sf1/local/delta_attach_pin/q36.benchmark | 7 + .../sf1/local/delta_attach_pin/q37.benchmark | 7 + .../sf1/local/delta_attach_pin/q38.benchmark | 7 + .../sf1/local/delta_attach_pin/q39.benchmark | 7 + .../sf1/local/delta_attach_pin/q40.benchmark | 7 + .../sf1/local/delta_attach_pin/q41.benchmark | 7 + .../sf1/local/delta_attach_pin/q42.benchmark | 7 + .../sf1/local/delta_attach_pin/q43.benchmark | 7 + .../sf1/local/delta_attach_pin/q44.benchmark | 7 + .../sf1/local/delta_attach_pin/q45.benchmark | 7 + .../sf1/local/delta_attach_pin/q46.benchmark | 7 + .../sf1/local/delta_attach_pin/q47.benchmark | 7 + .../sf1/local/delta_attach_pin/q48.benchmark | 7 + .../sf1/local/delta_attach_pin/q49.benchmark | 7 + .../sf1/local/delta_attach_pin/q50.benchmark | 7 + .../sf1/local/delta_attach_pin/q51.benchmark | 7 + .../sf1/local/delta_attach_pin/q52.benchmark | 7 + .../sf1/local/delta_attach_pin/q53.benchmark | 7 + .../sf1/local/delta_attach_pin/q54.benchmark | 7 + .../sf1/local/delta_attach_pin/q55.benchmark | 7 + .../sf1/local/delta_attach_pin/q56.benchmark | 7 + .../sf1/local/delta_attach_pin/q57.benchmark | 7 + .../sf1/local/delta_attach_pin/q58.benchmark | 7 + .../sf1/local/delta_attach_pin/q59.benchmark | 7 + .../sf1/local/delta_attach_pin/q60.benchmark | 7 + .../sf1/local/delta_attach_pin/q61.benchmark | 7 + .../sf1/local/delta_attach_pin/q62.benchmark | 7 + .../sf1/local/delta_attach_pin/q63.benchmark | 7 + .../sf1/local/delta_attach_pin/q64.benchmark | 7 + .../sf1/local/delta_attach_pin/q65.benchmark | 7 + .../sf1/local/delta_attach_pin/q66.benchmark | 7 + .../sf1/local/delta_attach_pin/q67.benchmark | 7 + .../sf1/local/delta_attach_pin/q68.benchmark | 7 + .../sf1/local/delta_attach_pin/q69.benchmark | 7 + .../sf1/local/delta_attach_pin/q70.benchmark | 7 + .../sf1/local/delta_attach_pin/q71.benchmark | 7 + .../sf1/local/delta_attach_pin/q72.benchmark | 7 + .../sf1/local/delta_attach_pin/q73.benchmark | 7 + .../sf1/local/delta_attach_pin/q74.benchmark | 7 + .../sf1/local/delta_attach_pin/q75.benchmark | 7 + .../sf1/local/delta_attach_pin/q76.benchmark | 7 + .../sf1/local/delta_attach_pin/q77.benchmark | 7 + .../sf1/local/delta_attach_pin/q78.benchmark | 7 + .../sf1/local/delta_attach_pin/q79.benchmark | 7 + .../sf1/local/delta_attach_pin/q80.benchmark | 7 + .../sf1/local/delta_attach_pin/q81.benchmark | 7 + .../sf1/local/delta_attach_pin/q82.benchmark | 7 + .../sf1/local/delta_attach_pin/q83.benchmark | 7 + .../sf1/local/delta_attach_pin/q84.benchmark | 7 + .../sf1/local/delta_attach_pin/q85.benchmark | 7 + .../sf1/local/delta_attach_pin/q86.benchmark | 7 + .../sf1/local/delta_attach_pin/q87.benchmark | 7 + .../sf1/local/delta_attach_pin/q88.benchmark | 7 + .../sf1/local/delta_attach_pin/q89.benchmark | 7 + .../sf1/local/delta_attach_pin/q90.benchmark | 7 + .../sf1/local/delta_attach_pin/q91.benchmark | 7 + .../sf1/local/delta_attach_pin/q92.benchmark | 7 + .../sf1/local/delta_attach_pin/q93.benchmark | 7 + .../sf1/local/delta_attach_pin/q94.benchmark | 7 + .../sf1/local/delta_attach_pin/q95.benchmark | 7 + .../sf1/local/delta_attach_pin/q96.benchmark | 7 + .../sf1/local/delta_attach_pin/q97.benchmark | 7 + .../sf1/local/delta_attach_pin/q98.benchmark | 7 + .../sf1/local/delta_attach_pin/q99.benchmark | 7 + .../delta_attach_pin/tpcds_sf1.benchmark.in | 17 ++ .../tpch/sf1/local/delta_attach/load.sql | 8 + .../tpch/sf1/local/delta_attach/q01.benchmark | 7 + .../tpch/sf1/local/delta_attach/q02.benchmark | 7 + .../tpch/sf1/local/delta_attach/q03.benchmark | 7 + .../tpch/sf1/local/delta_attach/q04.benchmark | 7 + .../tpch/sf1/local/delta_attach/q05.benchmark | 7 + .../tpch/sf1/local/delta_attach/q06.benchmark | 7 + .../tpch/sf1/local/delta_attach/q07.benchmark | 7 + .../tpch/sf1/local/delta_attach/q08.benchmark | 7 + .../tpch/sf1/local/delta_attach/q09.benchmark | 7 + .../tpch/sf1/local/delta_attach/q10.benchmark | 7 + .../tpch/sf1/local/delta_attach/q11.benchmark | 7 + .../tpch/sf1/local/delta_attach/q12.benchmark | 7 + .../tpch/sf1/local/delta_attach/q13.benchmark | 7 + .../tpch/sf1/local/delta_attach/q14.benchmark | 7 + .../tpch/sf1/local/delta_attach/q15.benchmark | 7 + .../tpch/sf1/local/delta_attach/q16.benchmark | 7 + .../tpch/sf1/local/delta_attach/q17.benchmark | 7 + .../tpch/sf1/local/delta_attach/q18.benchmark | 7 + .../tpch/sf1/local/delta_attach/q19.benchmark | 7 + .../tpch/sf1/local/delta_attach/q20.benchmark | 7 + .../tpch/sf1/local/delta_attach/q21.benchmark | 7 + .../tpch/sf1/local/delta_attach/q22.benchmark | 7 + .../delta_attach/tpch_sf1_delta.benchmark.in | 17 ++ duckdb | 2 +- extension-ci-tools | 2 +- scripts/generate_test_data.py | 13 +- scripts/plot.py | 3 +- src/delta_extension.cpp | 38 ++++ src/functions/delta_scan.cpp | 96 ++++++--- src/include/delta_extension.hpp | 2 + src/include/delta_utils.hpp | 55 ++++++ src/include/functions/delta_scan.hpp | 27 ++- src/include/storage/delta_catalog.hpp | 81 ++++++++ src/include/storage/delta_schema_entry.hpp | 52 +++++ src/include/storage/delta_table_entry.hpp | 36 ++++ src/include/storage/delta_transaction.hpp | 45 +++++ .../storage/delta_transaction_manager.hpp | 33 ++++ src/storage/delta_catalog.cpp | 108 ++++++++++ src/storage/delta_schema_entry.cpp | 184 ++++++++++++++++++ src/storage/delta_table_entry.cpp | 74 +++++++ src/storage/delta_transaction.cpp | 42 ++++ src/storage/delta_transaction_manager.cpp | 38 ++++ test/sql/dat/attach.test | 125 ++++++++++++ 257 files changed, 2808 insertions(+), 41 deletions(-) create mode 100644 .github/regression/micro.csv create mode 100644 benchmark/micro/snapshot_performance/delta_scan.benchmark create mode 100644 benchmark/micro/snapshot_performance/delta_scan_filter.benchmark create mode 100644 benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark create mode 100644 benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark create mode 100644 benchmark/micro/snapshot_performance/snapshot_pin.benchmark create mode 100644 benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/load.sql create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q01.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q02.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q03.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q04.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q05.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q06.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q07.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q08.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q09.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q10.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q11.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q12.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q13.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q14.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q15.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q16.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q17.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q18.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q19.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q20.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q21.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q22.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q23.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q24.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q25.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q26.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q27.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q28.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q29.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q30.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q31.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q32.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q33.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q34.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q35.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q36.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q37.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q38.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q39.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q40.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q41.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q42.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q43.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q44.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q45.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q46.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q47.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q48.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q49.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q50.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q51.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q52.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q53.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q54.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q55.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q56.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q57.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q58.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q59.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q60.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q61.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q62.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q63.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q64.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q65.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q66.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q67.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q68.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q69.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q70.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q71.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q72.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q73.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q74.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q75.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q76.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q77.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q78.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q79.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q80.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q81.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q82.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q83.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q84.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q85.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q86.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q87.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q88.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q89.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q90.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q91.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q92.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q93.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q94.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q95.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q96.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q97.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q98.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/q99.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/load.sql create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q01.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q02.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q03.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q04.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q05.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q06.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q07.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q08.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q09.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q10.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q11.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q12.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q13.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q14.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q15.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q16.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q17.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q18.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q19.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q20.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q21.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q22.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q23.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q24.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q25.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q26.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q27.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q28.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q29.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q30.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q31.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q32.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q33.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q34.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q35.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q36.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q37.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q38.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q39.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q40.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q41.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q42.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q43.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q44.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q45.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q46.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q47.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q48.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q49.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q50.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q51.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q52.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q53.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q54.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q55.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q56.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q57.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q58.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q59.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q60.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q61.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q62.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q63.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q64.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q65.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q66.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q67.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q68.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q69.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q70.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q71.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q72.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q73.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q74.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q75.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q76.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q77.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q78.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q79.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q80.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q81.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q82.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q83.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q84.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q85.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q86.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q87.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q88.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q89.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q90.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q91.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q92.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q93.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q94.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q95.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q96.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q97.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q98.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/q99.benchmark create mode 100644 benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in create mode 100644 benchmark/tpch/sf1/local/delta_attach/load.sql create mode 100644 benchmark/tpch/sf1/local/delta_attach/q01.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q02.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q03.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q04.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q05.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q06.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q07.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q08.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q09.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q10.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q11.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q12.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q13.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q14.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q15.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q16.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q17.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q18.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q19.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q20.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q21.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/q22.benchmark create mode 100644 benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in create mode 100644 src/include/storage/delta_catalog.hpp create mode 100644 src/include/storage/delta_schema_entry.hpp create mode 100644 src/include/storage/delta_table_entry.hpp create mode 100644 src/include/storage/delta_transaction.hpp create mode 100644 src/include/storage/delta_transaction_manager.hpp create mode 100644 src/storage/delta_catalog.cpp create mode 100644 src/storage/delta_schema_entry.cpp create mode 100644 src/storage/delta_table_entry.cpp create mode 100644 src/storage/delta_transaction.cpp create mode 100644 src/storage/delta_transaction_manager.cpp create mode 100644 test/sql/dat/attach.test diff --git a/.github/regression/micro.csv b/.github/regression/micro.csv new file mode 100644 index 0000000..2a4ef0f --- /dev/null +++ b/.github/regression/micro.csv @@ -0,0 +1,5 @@ +benchmark/micro/snapshot_performance/delta_scan.benchmark +benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark +benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark +benchmark/micro/snapshot_performance/snapshot_pin.benchmark +benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark \ No newline at end of file diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 7735508..fe2fe02 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -288,6 +288,12 @@ jobs: run: | python ./duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds_sf1_local.csv --verbose --threads=2 --root-dir=. + - name: Regression Test Micro + if: always() + shell: bash + run: | + python ./duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2 --root-dir=. + - name: Test benchmark makefile shell: bash run: | diff --git a/.gitignore b/.gitignore index bc1caa6..e9e3e7f 100644 --- a/.gitignore +++ b/.gitignore @@ -11,5 +11,5 @@ test/python/__pycache__/ data/generated __azurite*__.json __blobstorage__ -.venv +venv .vscode \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index f4f9267..fd9255d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,13 @@ set(EXTENSION_SOURCES src/delta_extension.cpp src/delta_functions.cpp src/delta_utils.cpp - src/functions/delta_scan.cpp) + src/functions/delta_scan.cpp + src/storage/delta_catalog.cpp + src/storage/delta_schema_entry.cpp + src/storage/delta_table_entry.cpp + src/storage/delta_transaction.cpp + src/storage/delta_transaction_manager.cpp +) ### Custom config # TODO: figure out if we really need this? diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index fc59109..4387d5e 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -26,6 +26,8 @@ plot: # TPCH SF1 on delta table bench-run-tpch-sf1-delta: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/delta/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-delta.csv +bench-run-tpch-sf1-delta-attach: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/delta_attach/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-delta-attach.csv # TPCH SF1 on parquet files bench-run-tpch-sf1-parquet: bench-output-dir ./build/release/benchmark/benchmark_runner 'benchmark/tpch/sf1-parquet/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-parquet.csv @@ -33,7 +35,7 @@ bench-run-tpch-sf1-parquet: bench-output-dir bench-run-tpch-sf1-duckdb: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/duckdb/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-duckdb.csv # COMPARES TPCH SF1 on parquet file vs on delta files vs on duckdb files -bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet +bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet bench-run-tpch-sf1-attach ### # TPCDS @@ -42,6 +44,10 @@ bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet # TPCDS SF1 on delta table bench-run-tpcds-sf1-delta: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1/$(IO_MODE)/delta/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpcds-sf1-delta-$(IO_MODE).csv +bench-run-tpcds-sf1-delta-attach: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1/$(IO_MODE)/delta_attach/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpcds-sf1-delta-attach-$(IO_MODE).csv +bench-run-tpcds-sf1-delta-attach-pin: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1/$(IO_MODE)/delta_attach_pin/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpcds-sf1-delta-attach-pin-$(IO_MODE).csv # TPCDS SF1 on parquet files bench-run-tpcds-sf1-parquet: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1/$(IO_MODE)/parquet/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpcds-sf1-parquet-$(IO_MODE).csv @@ -50,4 +56,11 @@ bench-run-tpcds-sf1-duckdb: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpcds/sf1/$(IO_MODE)/duckdb/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpcds-sf1-duckdb-$(IO_MODE).csv # COMPARES TPCDS SF1 on parquet file vs on delta files -bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet bench-run-tpcds-sf1-duckdb +bench-run-tpcds-sf1: bench-run-tpcds-sf1-delta bench-run-tpcds-sf1-parquet bench-run-tpcds-sf1-duckdb bench-run-tpcds-sf1-delta-attach bench-run-tpcds-sf1-delta-attach-pin + +### +# MICRO +### + +bench-run-snapshot-performance: bench-output-dir + ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/micro/snapshot_performance/.*' 2>&1 | tee benchmark_results/snapshot-performance.csv diff --git a/benchmark/micro/snapshot_performance/delta_scan.benchmark b/benchmark/micro/snapshot_performance/delta_scan.benchmark new file mode 100644 index 0000000..f6d4502 --- /dev/null +++ b/benchmark/micro/snapshot_performance/delta_scan.benchmark @@ -0,0 +1,16 @@ +# name: benchmark/micro/snapshot_performance/delta_scan.benchmark +# description: Reference result to compare attach functions to +# group: [aggregate] + +name delta_scan reference +group snapshot_performance + +require delta + +require parquet + +run +SELECT COUNT(*) FROM delta_scan('./data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake') + +result I +6001215 \ No newline at end of file diff --git a/benchmark/micro/snapshot_performance/delta_scan_filter.benchmark b/benchmark/micro/snapshot_performance/delta_scan_filter.benchmark new file mode 100644 index 0000000..f97dce5 --- /dev/null +++ b/benchmark/micro/snapshot_performance/delta_scan_filter.benchmark @@ -0,0 +1,16 @@ +# name: benchmark/micro/snapshot_performance/delta_scan.benchmark +# description: Reference result to compare attach functions to +# group: [aggregate] + +name delta_scan reference +group snapshot_performance + +require delta + +require parquet + +run +SELECT COUNT(*) FROM delta_scan('./data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake') where l_orderkey is not null + +result I +6001215 \ No newline at end of file diff --git a/benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark b/benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark new file mode 100644 index 0000000..4ec17f3 --- /dev/null +++ b/benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark @@ -0,0 +1,19 @@ +# name: benchmark/micro/snapshot_performance/snapshot_no_pin.benchmark +# description: Performance of reading from a table with many log entries +# group: [aggregate] + +name Snapshot no pin +group snapshot_performance + +require delta + +require parquet + +load +ATTACH './data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta); + +run +SELECT COUNT(*) FROM lineitem_no_pin + +result I +6001215 \ No newline at end of file diff --git a/benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark b/benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark new file mode 100644 index 0000000..f24e1ab --- /dev/null +++ b/benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark @@ -0,0 +1,19 @@ +# name: benchmark/micro/snapshot_performance/snapshot_no_pin_filter.benchmark +# description: Performance of reading from a table with many log entries +# group: [aggregate] + +name Snapshot no pin filter +group snapshot_performance + +require delta + +require parquet + +load +ATTACH './data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_no_pin (TYPE delta); + +run +SELECT COUNT(*) FROM lineitem_no_pin where l_orderkey is not null + +result I +6001215 \ No newline at end of file diff --git a/benchmark/micro/snapshot_performance/snapshot_pin.benchmark b/benchmark/micro/snapshot_performance/snapshot_pin.benchmark new file mode 100644 index 0000000..3e4dbaf --- /dev/null +++ b/benchmark/micro/snapshot_performance/snapshot_pin.benchmark @@ -0,0 +1,19 @@ +# name: benchmark/micro/snapshot_performance/snapshot_pin.benchmark +# description: Performance of reading from a table with many log entries +# group: [aggregate] + +name Snapshot pin +group snapshot_performance + +require delta + +require parquet + +load +ATTACH './data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT); + +run +SELECT COUNT(*) FROM lineitem_pin + +result I +6001215 \ No newline at end of file diff --git a/benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark b/benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark new file mode 100644 index 0000000..ee74eae --- /dev/null +++ b/benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark @@ -0,0 +1,19 @@ +# name: benchmark/micro/snapshot_performance/snapshot_pin_filter.benchmark +# description: Performance of reading from a table with many log entries +# group: [aggregate] + +name Snapshot pin filter +group snapshot_performance + +require delta + +require parquet + +load +ATTACH './data/generated/delta_rs_tpch_sf1_100_splits/lineitem/delta_lake' as lineitem_pin (TYPE delta, PIN_SNAPSHOT); + +run +SELECT COUNT(*) FROM lineitem_pin where l_orderkey is not null + +result I +6001215 \ No newline at end of file diff --git a/benchmark/tpcds/sf1/local/delta_attach/load.sql b/benchmark/tpcds/sf1/local/delta_attach/load.sql new file mode 100644 index 0000000..3acd7b5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/load.sql @@ -0,0 +1,24 @@ +ATTACH './data/generated/tpcds_sf1/call_center/delta_lake' as call_center (TYPE delta); +ATTACH './data/generated/tpcds_sf1/catalog_page/delta_lake' as catalog_page (TYPE delta); +ATTACH './data/generated/tpcds_sf1/catalog_returns/delta_lake' as catalog_returns (TYPE delta); +ATTACH './data/generated/tpcds_sf1/catalog_sales/delta_lake' as catalog_sales (TYPE delta); +ATTACH './data/generated/tpcds_sf1/customer/delta_lake' as customer (TYPE delta); +ATTACH './data/generated/tpcds_sf1/customer_demographics/delta_lake' as customer_demographics (TYPE delta); +ATTACH './data/generated/tpcds_sf1/customer_address/delta_lake' as customer_address (TYPE delta); +ATTACH './data/generated/tpcds_sf1/date_dim/delta_lake' as date_dim (TYPE delta); +ATTACH './data/generated/tpcds_sf1/household_demographics/delta_lake' as household_demographics (TYPE delta); +ATTACH './data/generated/tpcds_sf1/inventory/delta_lake' as inventory (TYPE delta); +ATTACH './data/generated/tpcds_sf1/income_band/delta_lake' as income_band (TYPE delta); +ATTACH './data/generated/tpcds_sf1/item/delta_lake' as item (TYPE delta); +ATTACH './data/generated/tpcds_sf1/promotion/delta_lake' as promotion (TYPE delta); +ATTACH './data/generated/tpcds_sf1/reason/delta_lake' as reason (TYPE delta); +ATTACH './data/generated/tpcds_sf1/ship_mode/delta_lake' as ship_mode (TYPE delta); +ATTACH './data/generated/tpcds_sf1/store/delta_lake' as store (TYPE delta); +ATTACH './data/generated/tpcds_sf1/store_returns/delta_lake' as store_returns (TYPE delta); +ATTACH './data/generated/tpcds_sf1/store_sales/delta_lake' as store_sales (TYPE delta); +ATTACH './data/generated/tpcds_sf1/time_dim/delta_lake' as time_dim (TYPE delta); +ATTACH './data/generated/tpcds_sf1/warehouse/delta_lake' as warehouse (TYPE delta); +ATTACH './data/generated/tpcds_sf1/web_page/delta_lake' as web_page (TYPE delta); +ATTACH './data/generated/tpcds_sf1/web_returns/delta_lake' as web_returns (TYPE delta); +ATTACH './data/generated/tpcds_sf1/web_sales/delta_lake' as web_sales (TYPE delta); +ATTACH './data/generated/tpcds_sf1/web_site/delta_lake' as web_site (TYPE delta); \ No newline at end of file diff --git a/benchmark/tpcds/sf1/local/delta_attach/q01.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q01.benchmark new file mode 100644 index 0000000..36a66b1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q01.benchmark +# description: Run query 01 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q02.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q02.benchmark new file mode 100644 index 0000000..62a0ca0 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q02.benchmark +# description: Run query 02 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q03.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q03.benchmark new file mode 100644 index 0000000..830f785 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q03.benchmark +# description: Run query 03 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q04.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q04.benchmark new file mode 100644 index 0000000..df53a2b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q04.benchmark +# description: Run query 04 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q05.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q05.benchmark new file mode 100644 index 0000000..aae5d6c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q05.benchmark +# description: Run query 05 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q06.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q06.benchmark new file mode 100644 index 0000000..f6ec98b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q06.benchmark +# description: Run query 06 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q07.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q07.benchmark new file mode 100644 index 0000000..1e2563b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q07.benchmark +# description: Run query 07 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q08.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q08.benchmark new file mode 100644 index 0000000..26c426a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q08.benchmark +# description: Run query 08 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q09.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q09.benchmark new file mode 100644 index 0000000..c5a388a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q09.benchmark +# description: Run query 09 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q10.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q10.benchmark new file mode 100644 index 0000000..627c022 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q10.benchmark +# description: Run query 10 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q11.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q11.benchmark new file mode 100644 index 0000000..36909cd --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q11.benchmark +# description: Run query 11 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q12.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q12.benchmark new file mode 100644 index 0000000..0e26df5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q12.benchmark +# description: Run query 12 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q13.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q13.benchmark new file mode 100644 index 0000000..fad0aed --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q13.benchmark +# description: Run query 13 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q14.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q14.benchmark new file mode 100644 index 0000000..bbb12a3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q14.benchmark +# description: Run query 14 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q15.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q15.benchmark new file mode 100644 index 0000000..b5ae728 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q15.benchmark +# description: Run query 15 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q16.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q16.benchmark new file mode 100644 index 0000000..e3f7494 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q16.benchmark +# description: Run query 16 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q17.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q17.benchmark new file mode 100644 index 0000000..a15c322 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q17.benchmark +# description: Run query 17 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q18.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q18.benchmark new file mode 100644 index 0000000..b176fcc --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q18.benchmark +# description: Run query 18 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q19.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q19.benchmark new file mode 100644 index 0000000..d53f444 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q19.benchmark +# description: Run query 19 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q20.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q20.benchmark new file mode 100644 index 0000000..7479dea --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q20.benchmark +# description: Run query 20 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q21.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q21.benchmark new file mode 100644 index 0000000..0ba2492 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q21.benchmark +# description: Run query 21 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q22.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q22.benchmark new file mode 100644 index 0000000..458432c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q22.benchmark +# description: Run query 22 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q23.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q23.benchmark new file mode 100644 index 0000000..c0b5fff --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q23.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q23.benchmark +# description: Run query 23 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=23 +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q24.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q24.benchmark new file mode 100644 index 0000000..c0c0c9b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q24.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q24.benchmark +# description: Run query 24 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=24 +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q25.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q25.benchmark new file mode 100644 index 0000000..60e8e71 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q25.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q25.benchmark +# description: Run query 25 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=25 +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q26.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q26.benchmark new file mode 100644 index 0000000..86b5909 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q26.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q26.benchmark +# description: Run query 26 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=26 +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q27.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q27.benchmark new file mode 100644 index 0000000..6cc1dd2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q27.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q27.benchmark +# description: Run query 27 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=27 +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q28.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q28.benchmark new file mode 100644 index 0000000..1753fca --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q28.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q28.benchmark +# description: Run query 28 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=28 +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q29.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q29.benchmark new file mode 100644 index 0000000..52e04c2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q29.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q29.benchmark +# description: Run query 29 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=29 +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q30.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q30.benchmark new file mode 100644 index 0000000..ed21eb1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q30.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q30.benchmark +# description: Run query 30 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=30 +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q31.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q31.benchmark new file mode 100644 index 0000000..cdfce45 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q31.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q31.benchmark +# description: Run query 31 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=31 +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q32.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q32.benchmark new file mode 100644 index 0000000..fea4738 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q32.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q32.benchmark +# description: Run query 32 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=32 +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q33.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q33.benchmark new file mode 100644 index 0000000..1cd8f37 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q33.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q33.benchmark +# description: Run query 33 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=33 +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q34.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q34.benchmark new file mode 100644 index 0000000..be18764 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q34.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q34.benchmark +# description: Run query 34 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=34 +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q35.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q35.benchmark new file mode 100644 index 0000000..1c22f98 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q35.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q35.benchmark +# description: Run query 35 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=35 +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q36.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q36.benchmark new file mode 100644 index 0000000..ea57684 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q36.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q36.benchmark +# description: Run query 36 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=36 +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q37.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q37.benchmark new file mode 100644 index 0000000..0a4e760 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q37.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q37.benchmark +# description: Run query 37 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=37 +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q38.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q38.benchmark new file mode 100644 index 0000000..1dd0897 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q38.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q38.benchmark +# description: Run query 38 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=38 +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q39.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q39.benchmark new file mode 100644 index 0000000..5cb5b1b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q39.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q39.benchmark +# description: Run query 39 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=39 +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q40.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q40.benchmark new file mode 100644 index 0000000..0708d81 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q40.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q40.benchmark +# description: Run query 40 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=40 +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q41.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q41.benchmark new file mode 100644 index 0000000..83e027a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q41.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q41.benchmark +# description: Run query 41 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=41 +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q42.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q42.benchmark new file mode 100644 index 0000000..051fe20 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q42.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q42.benchmark +# description: Run query 42 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=42 +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q43.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q43.benchmark new file mode 100644 index 0000000..4a1c981 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q43.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q43.benchmark +# description: Run query 43 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=43 +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q44.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q44.benchmark new file mode 100644 index 0000000..42c2a2b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q44.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q44.benchmark +# description: Run query 44 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=44 +QUERY_NUMBER_PADDED=44 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q45.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q45.benchmark new file mode 100644 index 0000000..fc25799 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q45.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q45.benchmark +# description: Run query 45 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=45 +QUERY_NUMBER_PADDED=45 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q46.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q46.benchmark new file mode 100644 index 0000000..bed3d26 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q46.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q46.benchmark +# description: Run query 46 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=46 +QUERY_NUMBER_PADDED=46 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q47.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q47.benchmark new file mode 100644 index 0000000..fd4631e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q47.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q47.benchmark +# description: Run query 47 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=47 +QUERY_NUMBER_PADDED=47 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q48.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q48.benchmark new file mode 100644 index 0000000..f23dcc2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q48.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q48.benchmark +# description: Run query 48 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=48 +QUERY_NUMBER_PADDED=48 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q49.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q49.benchmark new file mode 100644 index 0000000..18288fb --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q49.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q49.benchmark +# description: Run query 49 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=49 +QUERY_NUMBER_PADDED=49 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q50.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q50.benchmark new file mode 100644 index 0000000..40eb2cb --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q50.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q50.benchmark +# description: Run query 50 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=50 +QUERY_NUMBER_PADDED=50 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q51.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q51.benchmark new file mode 100644 index 0000000..1e0357b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q51.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q51.benchmark +# description: Run query 51 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=51 +QUERY_NUMBER_PADDED=51 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q52.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q52.benchmark new file mode 100644 index 0000000..f0eee1e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q52.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q52.benchmark +# description: Run query 52 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=52 +QUERY_NUMBER_PADDED=52 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q53.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q53.benchmark new file mode 100644 index 0000000..a4469ee --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q53.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q53.benchmark +# description: Run query 53 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=53 +QUERY_NUMBER_PADDED=53 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q54.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q54.benchmark new file mode 100644 index 0000000..ab97b6a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q54.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q54.benchmark +# description: Run query 54 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=54 +QUERY_NUMBER_PADDED=54 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q55.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q55.benchmark new file mode 100644 index 0000000..784ddb5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q55.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q55.benchmark +# description: Run query 55 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=55 +QUERY_NUMBER_PADDED=55 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q56.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q56.benchmark new file mode 100644 index 0000000..c9e6305 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q56.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q56.benchmark +# description: Run query 56 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=56 +QUERY_NUMBER_PADDED=56 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q57.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q57.benchmark new file mode 100644 index 0000000..97a9f31 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q57.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q57.benchmark +# description: Run query 57 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=57 +QUERY_NUMBER_PADDED=57 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q58.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q58.benchmark new file mode 100644 index 0000000..c33493b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q58.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q58.benchmark +# description: Run query 58 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=58 +QUERY_NUMBER_PADDED=58 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q59.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q59.benchmark new file mode 100644 index 0000000..d8c036b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q59.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q59.benchmark +# description: Run query 59 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=59 +QUERY_NUMBER_PADDED=59 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q60.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q60.benchmark new file mode 100644 index 0000000..1918bd5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q60.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q60.benchmark +# description: Run query 60 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=60 +QUERY_NUMBER_PADDED=60 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q61.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q61.benchmark new file mode 100644 index 0000000..32f762d --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q61.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q61.benchmark +# description: Run query 61 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=61 +QUERY_NUMBER_PADDED=61 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q62.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q62.benchmark new file mode 100644 index 0000000..9a373f1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q62.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q62.benchmark +# description: Run query 62 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=62 +QUERY_NUMBER_PADDED=62 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q63.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q63.benchmark new file mode 100644 index 0000000..e7f6b2f --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q63.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q63.benchmark +# description: Run query 63 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=63 +QUERY_NUMBER_PADDED=63 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q64.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q64.benchmark new file mode 100644 index 0000000..80034a7 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q64.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q64.benchmark +# description: Run query 64 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=64 +QUERY_NUMBER_PADDED=64 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q65.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q65.benchmark new file mode 100644 index 0000000..c128a3e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q65.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q65.benchmark +# description: Run query 65 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=65 +QUERY_NUMBER_PADDED=65 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q66.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q66.benchmark new file mode 100644 index 0000000..587aa3d --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q66.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q66.benchmark +# description: Run query 66 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=66 +QUERY_NUMBER_PADDED=66 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q67.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q67.benchmark new file mode 100644 index 0000000..6e4d504 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q67.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q67.benchmark +# description: Run query 67 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=67 +QUERY_NUMBER_PADDED=67 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q68.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q68.benchmark new file mode 100644 index 0000000..8f9b7de --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q68.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q68.benchmark +# description: Run query 68 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=68 +QUERY_NUMBER_PADDED=68 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q69.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q69.benchmark new file mode 100644 index 0000000..8027996 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q69.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q69.benchmark +# description: Run query 69 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=69 +QUERY_NUMBER_PADDED=69 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q70.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q70.benchmark new file mode 100644 index 0000000..f64fc6e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q70.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q70.benchmark +# description: Run query 70 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=70 +QUERY_NUMBER_PADDED=70 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q71.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q71.benchmark new file mode 100644 index 0000000..e34c7b5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q71.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q71.benchmark +# description: Run query 71 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=71 +QUERY_NUMBER_PADDED=71 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q72.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q72.benchmark new file mode 100644 index 0000000..bedf26e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q72.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q72.benchmark +# description: Run query 72 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=72 +QUERY_NUMBER_PADDED=72 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q73.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q73.benchmark new file mode 100644 index 0000000..a53877d --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q73.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q73.benchmark +# description: Run query 73 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=73 +QUERY_NUMBER_PADDED=73 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q74.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q74.benchmark new file mode 100644 index 0000000..da3dbd3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q74.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q74.benchmark +# description: Run query 74 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=74 +QUERY_NUMBER_PADDED=74 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q75.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q75.benchmark new file mode 100644 index 0000000..0970243 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q75.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q75.benchmark +# description: Run query 75 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=75 +QUERY_NUMBER_PADDED=75 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q76.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q76.benchmark new file mode 100644 index 0000000..3c1d28a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q76.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q76.benchmark +# description: Run query 76 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=76 +QUERY_NUMBER_PADDED=76 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q77.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q77.benchmark new file mode 100644 index 0000000..d7bc196 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q77.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q77.benchmark +# description: Run query 77 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=77 +QUERY_NUMBER_PADDED=77 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q78.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q78.benchmark new file mode 100644 index 0000000..6f5d6f2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q78.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q78.benchmark +# description: Run query 78 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=78 +QUERY_NUMBER_PADDED=78 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q79.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q79.benchmark new file mode 100644 index 0000000..1d1f213 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q79.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q79.benchmark +# description: Run query 79 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=79 +QUERY_NUMBER_PADDED=79 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q80.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q80.benchmark new file mode 100644 index 0000000..8245c3a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q80.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q80.benchmark +# description: Run query 80 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=80 +QUERY_NUMBER_PADDED=80 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q81.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q81.benchmark new file mode 100644 index 0000000..7f4595e --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q81.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q81.benchmark +# description: Run query 81 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=81 +QUERY_NUMBER_PADDED=81 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q82.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q82.benchmark new file mode 100644 index 0000000..fe631d4 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q82.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q82.benchmark +# description: Run query 82 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=82 +QUERY_NUMBER_PADDED=82 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q83.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q83.benchmark new file mode 100644 index 0000000..a0e776a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q83.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q83.benchmark +# description: Run query 83 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=83 +QUERY_NUMBER_PADDED=83 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q84.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q84.benchmark new file mode 100644 index 0000000..37b0eff --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q84.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q84.benchmark +# description: Run query 84 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=84 +QUERY_NUMBER_PADDED=84 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q85.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q85.benchmark new file mode 100644 index 0000000..335038c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q85.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q85.benchmark +# description: Run query 85 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=85 +QUERY_NUMBER_PADDED=85 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q86.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q86.benchmark new file mode 100644 index 0000000..4eb4b7a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q86.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q86.benchmark +# description: Run query 86 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=86 +QUERY_NUMBER_PADDED=86 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q87.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q87.benchmark new file mode 100644 index 0000000..1247c5b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q87.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q87.benchmark +# description: Run query 87 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=87 +QUERY_NUMBER_PADDED=87 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q88.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q88.benchmark new file mode 100644 index 0000000..162bf62 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q88.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q88.benchmark +# description: Run query 88 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=88 +QUERY_NUMBER_PADDED=88 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q89.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q89.benchmark new file mode 100644 index 0000000..39823df --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q89.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q89.benchmark +# description: Run query 89 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=89 +QUERY_NUMBER_PADDED=89 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q90.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q90.benchmark new file mode 100644 index 0000000..ba0cedf --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q90.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q90.benchmark +# description: Run query 90 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=90 +QUERY_NUMBER_PADDED=90 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q91.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q91.benchmark new file mode 100644 index 0000000..b8e20a6 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q91.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q91.benchmark +# description: Run query 91 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=91 +QUERY_NUMBER_PADDED=91 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q92.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q92.benchmark new file mode 100644 index 0000000..21e2e16 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q92.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q92.benchmark +# description: Run query 92 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=92 +QUERY_NUMBER_PADDED=92 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q93.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q93.benchmark new file mode 100644 index 0000000..479c894 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q93.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q93.benchmark +# description: Run query 93 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=93 +QUERY_NUMBER_PADDED=93 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q94.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q94.benchmark new file mode 100644 index 0000000..0395902 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q94.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q94.benchmark +# description: Run query 94 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=94 +QUERY_NUMBER_PADDED=94 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q95.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q95.benchmark new file mode 100644 index 0000000..b268027 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q95.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q95.benchmark +# description: Run query 95 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=95 +QUERY_NUMBER_PADDED=95 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q96.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q96.benchmark new file mode 100644 index 0000000..b393858 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q96.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q96.benchmark +# description: Run query 96 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=96 +QUERY_NUMBER_PADDED=96 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q97.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q97.benchmark new file mode 100644 index 0000000..6793c9c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q97.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q97.benchmark +# description: Run query 97 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=97 +QUERY_NUMBER_PADDED=97 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q98.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q98.benchmark new file mode 100644 index 0000000..88113c0 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q98.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q98.benchmark +# description: Run query 98 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=98 +QUERY_NUMBER_PADDED=98 diff --git a/benchmark/tpcds/sf1/local/delta_attach/q99.benchmark b/benchmark/tpcds/sf1/local/delta_attach/q99.benchmark new file mode 100644 index 0000000..13fc449 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/q99.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach/q99.benchmark +# description: Run query 99 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in +QUERY_NUMBER=99 +QUERY_NUMBER_PADDED=99 diff --git a/benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in new file mode 100644 index 0000000..480092b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach/tpcds_sf1.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [tpcds-sf1] + +name DSQ${QUERY_NUMBER_PADDED} +group tpcds +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpcds/sf1/local/delta_attach/load.sql + +run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/load.sql b/benchmark/tpcds/sf1/local/delta_attach_pin/load.sql new file mode 100644 index 0000000..065d09a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/load.sql @@ -0,0 +1,24 @@ +ATTACH './data/generated/tpcds_sf1/call_center/delta_lake' as call_center (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/catalog_page/delta_lake' as catalog_page (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/catalog_returns/delta_lake' as catalog_returns (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/catalog_sales/delta_lake' as catalog_sales (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/customer/delta_lake' as customer (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/customer_demographics/delta_lake' as customer_demographics (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/customer_address/delta_lake' as customer_address (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/date_dim/delta_lake' as date_dim (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/household_demographics/delta_lake' as household_demographics (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/inventory/delta_lake' as inventory (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/income_band/delta_lake' as income_band (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/item/delta_lake' as item (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/promotion/delta_lake' as promotion (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/reason/delta_lake' as reason (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/ship_mode/delta_lake' as ship_mode (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/store/delta_lake' as store (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/store_returns/delta_lake' as store_returns (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/store_sales/delta_lake' as store_sales (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/time_dim/delta_lake' as time_dim (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/warehouse/delta_lake' as warehouse (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/web_page/delta_lake' as web_page (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/web_returns/delta_lake' as web_returns (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/web_sales/delta_lake' as web_sales (TYPE delta, PIN_SNAPSHOT); +ATTACH './data/generated/tpcds_sf1/web_site/delta_lake' as web_site (TYPE delta, PIN_SNAPSHOT); \ No newline at end of file diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q01.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q01.benchmark new file mode 100644 index 0000000..fabac15 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q01.benchmark +# description: Run query 01 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q02.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q02.benchmark new file mode 100644 index 0000000..7862a35 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q02.benchmark +# description: Run query 02 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q03.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q03.benchmark new file mode 100644 index 0000000..a6e53ab --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q03.benchmark +# description: Run query 03 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q04.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q04.benchmark new file mode 100644 index 0000000..ad4bb1d --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q04.benchmark +# description: Run query 04 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q05.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q05.benchmark new file mode 100644 index 0000000..44daae0 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q05.benchmark +# description: Run query 05 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q06.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q06.benchmark new file mode 100644 index 0000000..e4a0231 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q06.benchmark +# description: Run query 06 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q07.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q07.benchmark new file mode 100644 index 0000000..4585bcc --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q07.benchmark +# description: Run query 07 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q08.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q08.benchmark new file mode 100644 index 0000000..1a561d1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q08.benchmark +# description: Run query 08 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q09.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q09.benchmark new file mode 100644 index 0000000..d906a6a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q09.benchmark +# description: Run query 09 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q10.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q10.benchmark new file mode 100644 index 0000000..6903f11 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q10.benchmark +# description: Run query 10 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q11.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q11.benchmark new file mode 100644 index 0000000..7ff3f46 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q11.benchmark +# description: Run query 11 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q12.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q12.benchmark new file mode 100644 index 0000000..0f9cdbe --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q12.benchmark +# description: Run query 12 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q13.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q13.benchmark new file mode 100644 index 0000000..1c2e511 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q13.benchmark +# description: Run query 13 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q14.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q14.benchmark new file mode 100644 index 0000000..730ecbe --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q14.benchmark +# description: Run query 14 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q15.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q15.benchmark new file mode 100644 index 0000000..92f872b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q15.benchmark +# description: Run query 15 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q16.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q16.benchmark new file mode 100644 index 0000000..0b3c74f --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q16.benchmark +# description: Run query 16 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q17.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q17.benchmark new file mode 100644 index 0000000..cb96118 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q17.benchmark +# description: Run query 17 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q18.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q18.benchmark new file mode 100644 index 0000000..dc2d5f3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q18.benchmark +# description: Run query 18 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q19.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q19.benchmark new file mode 100644 index 0000000..eec75af --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q19.benchmark +# description: Run query 19 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q20.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q20.benchmark new file mode 100644 index 0000000..ace71c4 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q20.benchmark +# description: Run query 20 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q21.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q21.benchmark new file mode 100644 index 0000000..bf79d67 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q21.benchmark +# description: Run query 21 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q22.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q22.benchmark new file mode 100644 index 0000000..e6faf31 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q22.benchmark +# description: Run query 22 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q23.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q23.benchmark new file mode 100644 index 0000000..68fd0a2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q23.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q23.benchmark +# description: Run query 23 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=23 +QUERY_NUMBER_PADDED=23 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q24.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q24.benchmark new file mode 100644 index 0000000..96de8d1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q24.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q24.benchmark +# description: Run query 24 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=24 +QUERY_NUMBER_PADDED=24 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q25.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q25.benchmark new file mode 100644 index 0000000..027fcf3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q25.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q25.benchmark +# description: Run query 25 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=25 +QUERY_NUMBER_PADDED=25 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q26.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q26.benchmark new file mode 100644 index 0000000..961b4ad --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q26.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q26.benchmark +# description: Run query 26 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=26 +QUERY_NUMBER_PADDED=26 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q27.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q27.benchmark new file mode 100644 index 0000000..b8bf0c1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q27.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q27.benchmark +# description: Run query 27 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=27 +QUERY_NUMBER_PADDED=27 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q28.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q28.benchmark new file mode 100644 index 0000000..624550a --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q28.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q28.benchmark +# description: Run query 28 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=28 +QUERY_NUMBER_PADDED=28 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q29.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q29.benchmark new file mode 100644 index 0000000..6174019 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q29.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q29.benchmark +# description: Run query 29 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=29 +QUERY_NUMBER_PADDED=29 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q30.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q30.benchmark new file mode 100644 index 0000000..d3230d4 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q30.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q30.benchmark +# description: Run query 30 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=30 +QUERY_NUMBER_PADDED=30 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q31.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q31.benchmark new file mode 100644 index 0000000..02f1ee3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q31.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q31.benchmark +# description: Run query 31 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=31 +QUERY_NUMBER_PADDED=31 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q32.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q32.benchmark new file mode 100644 index 0000000..a798c44 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q32.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q32.benchmark +# description: Run query 32 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=32 +QUERY_NUMBER_PADDED=32 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q33.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q33.benchmark new file mode 100644 index 0000000..420af50 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q33.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q33.benchmark +# description: Run query 33 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=33 +QUERY_NUMBER_PADDED=33 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q34.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q34.benchmark new file mode 100644 index 0000000..55998b5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q34.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q34.benchmark +# description: Run query 34 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=34 +QUERY_NUMBER_PADDED=34 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q35.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q35.benchmark new file mode 100644 index 0000000..9af5004 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q35.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q35.benchmark +# description: Run query 35 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=35 +QUERY_NUMBER_PADDED=35 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q36.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q36.benchmark new file mode 100644 index 0000000..7391980 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q36.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q36.benchmark +# description: Run query 36 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=36 +QUERY_NUMBER_PADDED=36 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q37.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q37.benchmark new file mode 100644 index 0000000..c6e6635 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q37.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q37.benchmark +# description: Run query 37 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=37 +QUERY_NUMBER_PADDED=37 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q38.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q38.benchmark new file mode 100644 index 0000000..703d1e7 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q38.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q38.benchmark +# description: Run query 38 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=38 +QUERY_NUMBER_PADDED=38 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q39.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q39.benchmark new file mode 100644 index 0000000..38c1957 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q39.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q39.benchmark +# description: Run query 39 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=39 +QUERY_NUMBER_PADDED=39 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q40.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q40.benchmark new file mode 100644 index 0000000..46fbc31 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q40.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q40.benchmark +# description: Run query 40 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=40 +QUERY_NUMBER_PADDED=40 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q41.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q41.benchmark new file mode 100644 index 0000000..0de449f --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q41.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q41.benchmark +# description: Run query 41 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=41 +QUERY_NUMBER_PADDED=41 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q42.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q42.benchmark new file mode 100644 index 0000000..453995f --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q42.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q42.benchmark +# description: Run query 42 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=42 +QUERY_NUMBER_PADDED=42 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q43.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q43.benchmark new file mode 100644 index 0000000..b1ce9a5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q43.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q43.benchmark +# description: Run query 43 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=43 +QUERY_NUMBER_PADDED=43 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q44.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q44.benchmark new file mode 100644 index 0000000..ffb4344 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q44.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q44.benchmark +# description: Run query 44 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=44 +QUERY_NUMBER_PADDED=44 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q45.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q45.benchmark new file mode 100644 index 0000000..1e2c35b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q45.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q45.benchmark +# description: Run query 45 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=45 +QUERY_NUMBER_PADDED=45 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q46.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q46.benchmark new file mode 100644 index 0000000..e51ed26 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q46.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q46.benchmark +# description: Run query 46 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=46 +QUERY_NUMBER_PADDED=46 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q47.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q47.benchmark new file mode 100644 index 0000000..4b62cb4 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q47.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q47.benchmark +# description: Run query 47 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=47 +QUERY_NUMBER_PADDED=47 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q48.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q48.benchmark new file mode 100644 index 0000000..d7ed70f --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q48.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q48.benchmark +# description: Run query 48 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=48 +QUERY_NUMBER_PADDED=48 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q49.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q49.benchmark new file mode 100644 index 0000000..224ed96 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q49.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q49.benchmark +# description: Run query 49 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=49 +QUERY_NUMBER_PADDED=49 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q50.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q50.benchmark new file mode 100644 index 0000000..39904e9 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q50.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q50.benchmark +# description: Run query 50 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=50 +QUERY_NUMBER_PADDED=50 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q51.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q51.benchmark new file mode 100644 index 0000000..693c7e2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q51.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q51.benchmark +# description: Run query 51 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=51 +QUERY_NUMBER_PADDED=51 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q52.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q52.benchmark new file mode 100644 index 0000000..54020c7 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q52.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q52.benchmark +# description: Run query 52 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=52 +QUERY_NUMBER_PADDED=52 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q53.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q53.benchmark new file mode 100644 index 0000000..158a5de --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q53.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q53.benchmark +# description: Run query 53 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=53 +QUERY_NUMBER_PADDED=53 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q54.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q54.benchmark new file mode 100644 index 0000000..48e4562 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q54.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q54.benchmark +# description: Run query 54 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=54 +QUERY_NUMBER_PADDED=54 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q55.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q55.benchmark new file mode 100644 index 0000000..6df4d49 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q55.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q55.benchmark +# description: Run query 55 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=55 +QUERY_NUMBER_PADDED=55 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q56.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q56.benchmark new file mode 100644 index 0000000..b84fb56 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q56.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q56.benchmark +# description: Run query 56 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=56 +QUERY_NUMBER_PADDED=56 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q57.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q57.benchmark new file mode 100644 index 0000000..c4a69a1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q57.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q57.benchmark +# description: Run query 57 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=57 +QUERY_NUMBER_PADDED=57 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q58.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q58.benchmark new file mode 100644 index 0000000..ebe0655 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q58.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q58.benchmark +# description: Run query 58 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=58 +QUERY_NUMBER_PADDED=58 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q59.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q59.benchmark new file mode 100644 index 0000000..0a81d8b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q59.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q59.benchmark +# description: Run query 59 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=59 +QUERY_NUMBER_PADDED=59 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q60.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q60.benchmark new file mode 100644 index 0000000..986d2df --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q60.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q60.benchmark +# description: Run query 60 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=60 +QUERY_NUMBER_PADDED=60 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q61.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q61.benchmark new file mode 100644 index 0000000..fdb8bb7 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q61.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q61.benchmark +# description: Run query 61 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=61 +QUERY_NUMBER_PADDED=61 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q62.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q62.benchmark new file mode 100644 index 0000000..b4454e7 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q62.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q62.benchmark +# description: Run query 62 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=62 +QUERY_NUMBER_PADDED=62 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q63.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q63.benchmark new file mode 100644 index 0000000..72805e6 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q63.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q63.benchmark +# description: Run query 63 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=63 +QUERY_NUMBER_PADDED=63 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q64.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q64.benchmark new file mode 100644 index 0000000..a3b7267 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q64.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q64.benchmark +# description: Run query 64 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=64 +QUERY_NUMBER_PADDED=64 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q65.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q65.benchmark new file mode 100644 index 0000000..4c50bd9 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q65.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q65.benchmark +# description: Run query 65 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=65 +QUERY_NUMBER_PADDED=65 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q66.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q66.benchmark new file mode 100644 index 0000000..2e9ede8 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q66.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q66.benchmark +# description: Run query 66 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=66 +QUERY_NUMBER_PADDED=66 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q67.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q67.benchmark new file mode 100644 index 0000000..f50871c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q67.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q67.benchmark +# description: Run query 67 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=67 +QUERY_NUMBER_PADDED=67 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q68.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q68.benchmark new file mode 100644 index 0000000..f2c8808 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q68.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q68.benchmark +# description: Run query 68 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=68 +QUERY_NUMBER_PADDED=68 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q69.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q69.benchmark new file mode 100644 index 0000000..7b7f4d2 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q69.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q69.benchmark +# description: Run query 69 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=69 +QUERY_NUMBER_PADDED=69 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q70.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q70.benchmark new file mode 100644 index 0000000..98d7bad --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q70.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q70.benchmark +# description: Run query 70 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=70 +QUERY_NUMBER_PADDED=70 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q71.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q71.benchmark new file mode 100644 index 0000000..ad7b7a9 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q71.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q71.benchmark +# description: Run query 71 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=71 +QUERY_NUMBER_PADDED=71 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q72.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q72.benchmark new file mode 100644 index 0000000..cb4496b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q72.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q72.benchmark +# description: Run query 72 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=72 +QUERY_NUMBER_PADDED=72 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q73.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q73.benchmark new file mode 100644 index 0000000..a291e47 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q73.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q73.benchmark +# description: Run query 73 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=73 +QUERY_NUMBER_PADDED=73 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q74.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q74.benchmark new file mode 100644 index 0000000..d470aa9 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q74.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q74.benchmark +# description: Run query 74 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=74 +QUERY_NUMBER_PADDED=74 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q75.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q75.benchmark new file mode 100644 index 0000000..8795c49 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q75.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q75.benchmark +# description: Run query 75 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=75 +QUERY_NUMBER_PADDED=75 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q76.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q76.benchmark new file mode 100644 index 0000000..a4082b6 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q76.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q76.benchmark +# description: Run query 76 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=76 +QUERY_NUMBER_PADDED=76 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q77.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q77.benchmark new file mode 100644 index 0000000..2ace6b6 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q77.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q77.benchmark +# description: Run query 77 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=77 +QUERY_NUMBER_PADDED=77 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q78.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q78.benchmark new file mode 100644 index 0000000..8c7e396 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q78.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q78.benchmark +# description: Run query 78 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=78 +QUERY_NUMBER_PADDED=78 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q79.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q79.benchmark new file mode 100644 index 0000000..b977a62 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q79.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q79.benchmark +# description: Run query 79 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=79 +QUERY_NUMBER_PADDED=79 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q80.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q80.benchmark new file mode 100644 index 0000000..67df3e5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q80.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q80.benchmark +# description: Run query 80 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=80 +QUERY_NUMBER_PADDED=80 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q81.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q81.benchmark new file mode 100644 index 0000000..d240ada --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q81.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q81.benchmark +# description: Run query 81 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=81 +QUERY_NUMBER_PADDED=81 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q82.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q82.benchmark new file mode 100644 index 0000000..e8f98c3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q82.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q82.benchmark +# description: Run query 82 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=82 +QUERY_NUMBER_PADDED=82 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q83.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q83.benchmark new file mode 100644 index 0000000..5508996 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q83.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q83.benchmark +# description: Run query 83 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=83 +QUERY_NUMBER_PADDED=83 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q84.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q84.benchmark new file mode 100644 index 0000000..05404d1 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q84.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q84.benchmark +# description: Run query 84 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=84 +QUERY_NUMBER_PADDED=84 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q85.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q85.benchmark new file mode 100644 index 0000000..46592c3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q85.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q85.benchmark +# description: Run query 85 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=85 +QUERY_NUMBER_PADDED=85 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q86.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q86.benchmark new file mode 100644 index 0000000..72ca931 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q86.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q86.benchmark +# description: Run query 86 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=86 +QUERY_NUMBER_PADDED=86 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q87.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q87.benchmark new file mode 100644 index 0000000..44525a3 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q87.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q87.benchmark +# description: Run query 87 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=87 +QUERY_NUMBER_PADDED=87 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q88.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q88.benchmark new file mode 100644 index 0000000..f6c7875 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q88.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q88.benchmark +# description: Run query 88 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=88 +QUERY_NUMBER_PADDED=88 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q89.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q89.benchmark new file mode 100644 index 0000000..2f9a540 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q89.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q89.benchmark +# description: Run query 89 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=89 +QUERY_NUMBER_PADDED=89 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q90.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q90.benchmark new file mode 100644 index 0000000..72193ac --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q90.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q90.benchmark +# description: Run query 90 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=90 +QUERY_NUMBER_PADDED=90 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q91.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q91.benchmark new file mode 100644 index 0000000..5706ce5 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q91.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q91.benchmark +# description: Run query 91 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=91 +QUERY_NUMBER_PADDED=91 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q92.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q92.benchmark new file mode 100644 index 0000000..582d72d --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q92.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q92.benchmark +# description: Run query 92 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=92 +QUERY_NUMBER_PADDED=92 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q93.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q93.benchmark new file mode 100644 index 0000000..15af12b --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q93.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q93.benchmark +# description: Run query 93 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=93 +QUERY_NUMBER_PADDED=93 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q94.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q94.benchmark new file mode 100644 index 0000000..3545dc4 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q94.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q94.benchmark +# description: Run query 94 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=94 +QUERY_NUMBER_PADDED=94 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q95.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q95.benchmark new file mode 100644 index 0000000..f230bba --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q95.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q95.benchmark +# description: Run query 95 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=95 +QUERY_NUMBER_PADDED=95 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q96.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q96.benchmark new file mode 100644 index 0000000..e721966 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q96.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q96.benchmark +# description: Run query 96 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=96 +QUERY_NUMBER_PADDED=96 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q97.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q97.benchmark new file mode 100644 index 0000000..fc40c93 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q97.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q97.benchmark +# description: Run query 97 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=97 +QUERY_NUMBER_PADDED=97 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q98.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q98.benchmark new file mode 100644 index 0000000..4a8bc99 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q98.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q98.benchmark +# description: Run query 98 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=98 +QUERY_NUMBER_PADDED=98 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/q99.benchmark b/benchmark/tpcds/sf1/local/delta_attach_pin/q99.benchmark new file mode 100644 index 0000000..ddec43c --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/q99.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpcds/sf1/local/delta_attach_pin/q99.benchmark +# description: Run query 99 from the TPC-DS benchmark +# group: [sf1] + +template benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in +QUERY_NUMBER=99 +QUERY_NUMBER_PADDED=99 diff --git a/benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in b/benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in new file mode 100644 index 0000000..db2f631 --- /dev/null +++ b/benchmark/tpcds/sf1/local/delta_attach_pin/tpcds_sf1.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [tpcds-sf1] + +name DSQ${QUERY_NUMBER_PADDED} +group tpcds +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpcds/sf1/local/delta_attach_pin/load.sql + +run duckdb/extension/tpcds/dsdgen/queries/${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpcds/dsdgen/answers/sf1/${QUERY_NUMBER_PADDED}.csv diff --git a/benchmark/tpch/sf1/local/delta_attach/load.sql b/benchmark/tpch/sf1/local/delta_attach/load.sql new file mode 100644 index 0000000..cbeac72 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/load.sql @@ -0,0 +1,8 @@ +ATTACH './data/generated/tpch_sf1/customer/delta_lake' as customer (TYPE delta); +ATTACH './data/generated/tpch_sf1/lineitem/delta_lake' as lineitem (TYPE delta); +ATTACH './data/generated/tpch_sf1/nation/delta_lake' as nation (TYPE delta); +ATTACH './data/generated/tpch_sf1/orders/delta_lake' as orders (TYPE delta); +ATTACH './data/generated/tpch_sf1/part/delta_lake' as part (TYPE delta); +ATTACH './data/generated/tpch_sf1/partsupp/delta_lake' as partsupp (TYPE delta); +ATTACH './data/generated/tpch_sf1/region/delta_lake' as region (TYPE delta); +ATTACH './data/generated/tpch_sf1/supplier/delta_lake' as supplier (TYPE delta); \ No newline at end of file diff --git a/benchmark/tpch/sf1/local/delta_attach/q01.benchmark b/benchmark/tpch/sf1/local/delta_attach/q01.benchmark new file mode 100644 index 0000000..fc1f938 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q01.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q01.benchmark +# description: Run query 01 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=1 +QUERY_NUMBER_PADDED=01 diff --git a/benchmark/tpch/sf1/local/delta_attach/q02.benchmark b/benchmark/tpch/sf1/local/delta_attach/q02.benchmark new file mode 100644 index 0000000..84c3bbc --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q02.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q02.benchmark +# description: Run query 02 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=2 +QUERY_NUMBER_PADDED=02 diff --git a/benchmark/tpch/sf1/local/delta_attach/q03.benchmark b/benchmark/tpch/sf1/local/delta_attach/q03.benchmark new file mode 100644 index 0000000..18d2608 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q03.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q03.benchmark +# description: Run query 03 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=3 +QUERY_NUMBER_PADDED=03 diff --git a/benchmark/tpch/sf1/local/delta_attach/q04.benchmark b/benchmark/tpch/sf1/local/delta_attach/q04.benchmark new file mode 100644 index 0000000..a9ab91e --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q04.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q04.benchmark +# description: Run query 04 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=4 +QUERY_NUMBER_PADDED=04 diff --git a/benchmark/tpch/sf1/local/delta_attach/q05.benchmark b/benchmark/tpch/sf1/local/delta_attach/q05.benchmark new file mode 100644 index 0000000..d276a76 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q05.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q05.benchmark +# description: Run query 05 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=5 +QUERY_NUMBER_PADDED=05 diff --git a/benchmark/tpch/sf1/local/delta_attach/q06.benchmark b/benchmark/tpch/sf1/local/delta_attach/q06.benchmark new file mode 100644 index 0000000..a525c49 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q06.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q06.benchmark +# description: Run query 06 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=6 +QUERY_NUMBER_PADDED=06 diff --git a/benchmark/tpch/sf1/local/delta_attach/q07.benchmark b/benchmark/tpch/sf1/local/delta_attach/q07.benchmark new file mode 100644 index 0000000..42177a3 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q07.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q07.benchmark +# description: Run query 07 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=7 +QUERY_NUMBER_PADDED=07 diff --git a/benchmark/tpch/sf1/local/delta_attach/q08.benchmark b/benchmark/tpch/sf1/local/delta_attach/q08.benchmark new file mode 100644 index 0000000..3f4f74d --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q08.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q08.benchmark +# description: Run query 08 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=8 +QUERY_NUMBER_PADDED=08 diff --git a/benchmark/tpch/sf1/local/delta_attach/q09.benchmark b/benchmark/tpch/sf1/local/delta_attach/q09.benchmark new file mode 100644 index 0000000..cb80c45 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q09.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q09.benchmark +# description: Run query 09 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=9 +QUERY_NUMBER_PADDED=09 diff --git a/benchmark/tpch/sf1/local/delta_attach/q10.benchmark b/benchmark/tpch/sf1/local/delta_attach/q10.benchmark new file mode 100644 index 0000000..a800252 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q10.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q10.benchmark +# description: Run query 10 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=10 +QUERY_NUMBER_PADDED=10 diff --git a/benchmark/tpch/sf1/local/delta_attach/q11.benchmark b/benchmark/tpch/sf1/local/delta_attach/q11.benchmark new file mode 100644 index 0000000..11baed8 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q11.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q11.benchmark +# description: Run query 11 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=11 +QUERY_NUMBER_PADDED=11 diff --git a/benchmark/tpch/sf1/local/delta_attach/q12.benchmark b/benchmark/tpch/sf1/local/delta_attach/q12.benchmark new file mode 100644 index 0000000..e115292 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q12.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q12.benchmark +# description: Run query 12 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=12 +QUERY_NUMBER_PADDED=12 diff --git a/benchmark/tpch/sf1/local/delta_attach/q13.benchmark b/benchmark/tpch/sf1/local/delta_attach/q13.benchmark new file mode 100644 index 0000000..729c4f2 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q13.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q13.benchmark +# description: Run query 13 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=13 +QUERY_NUMBER_PADDED=13 diff --git a/benchmark/tpch/sf1/local/delta_attach/q14.benchmark b/benchmark/tpch/sf1/local/delta_attach/q14.benchmark new file mode 100644 index 0000000..923abb0 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q14.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q14.benchmark +# description: Run query 14 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=14 +QUERY_NUMBER_PADDED=14 diff --git a/benchmark/tpch/sf1/local/delta_attach/q15.benchmark b/benchmark/tpch/sf1/local/delta_attach/q15.benchmark new file mode 100644 index 0000000..c16240c --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q15.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q15.benchmark +# description: Run query 15 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=15 +QUERY_NUMBER_PADDED=15 diff --git a/benchmark/tpch/sf1/local/delta_attach/q16.benchmark b/benchmark/tpch/sf1/local/delta_attach/q16.benchmark new file mode 100644 index 0000000..83db3e0 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q16.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q16.benchmark +# description: Run query 16 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=16 +QUERY_NUMBER_PADDED=16 diff --git a/benchmark/tpch/sf1/local/delta_attach/q17.benchmark b/benchmark/tpch/sf1/local/delta_attach/q17.benchmark new file mode 100644 index 0000000..28dbc83 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q17.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q17.benchmark +# description: Run query 17 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=17 +QUERY_NUMBER_PADDED=17 diff --git a/benchmark/tpch/sf1/local/delta_attach/q18.benchmark b/benchmark/tpch/sf1/local/delta_attach/q18.benchmark new file mode 100644 index 0000000..1e969a7 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q18.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q18.benchmark +# description: Run query 18 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=18 +QUERY_NUMBER_PADDED=18 diff --git a/benchmark/tpch/sf1/local/delta_attach/q19.benchmark b/benchmark/tpch/sf1/local/delta_attach/q19.benchmark new file mode 100644 index 0000000..259c78a --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q19.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q19.benchmark +# description: Run query 19 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=19 +QUERY_NUMBER_PADDED=19 diff --git a/benchmark/tpch/sf1/local/delta_attach/q20.benchmark b/benchmark/tpch/sf1/local/delta_attach/q20.benchmark new file mode 100644 index 0000000..82d1c3a --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q20.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q20.benchmark +# description: Run query 20 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=20 +QUERY_NUMBER_PADDED=20 diff --git a/benchmark/tpch/sf1/local/delta_attach/q21.benchmark b/benchmark/tpch/sf1/local/delta_attach/q21.benchmark new file mode 100644 index 0000000..bd30f2a --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q21.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q21.benchmark +# description: Run query 21 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=21 +QUERY_NUMBER_PADDED=21 diff --git a/benchmark/tpch/sf1/local/delta_attach/q22.benchmark b/benchmark/tpch/sf1/local/delta_attach/q22.benchmark new file mode 100644 index 0000000..410457e --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/q22.benchmark @@ -0,0 +1,7 @@ +# name: benchmark/tpch/sf1/local/delta_attach/q22.benchmark +# description: Run query 22 from the TPC-H benchmark +# group: [sf1-parquet] + +template benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in +QUERY_NUMBER=22 +QUERY_NUMBER_PADDED=22 diff --git a/benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in b/benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in new file mode 100644 index 0000000..fb8c916 --- /dev/null +++ b/benchmark/tpch/sf1/local/delta_attach/tpch_sf1_delta.benchmark.in @@ -0,0 +1,17 @@ +# name: ${FILE_PATH} +# description: ${DESCRIPTION} +# group: [sf1] + +name Q${QUERY_NUMBER_PADDED} +group tpch +subgroup sf1 + +require delta + +require parquet + +load benchmark/tpch/sf1/local/delta_attach/load.sql + +run duckdb/extension/tpch/dbgen/queries/q${QUERY_NUMBER_PADDED}.sql + +result duckdb/extension/tpch/dbgen/answers/sf1/q${QUERY_NUMBER_PADDED}.csv \ No newline at end of file diff --git a/duckdb b/duckdb index f680b7d..9f3db54 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit f680b7d08f56183391b581077d4baf589e1cc8bd +Subproject commit 9f3db54cc308f38afee235bd6b7bdf61142e4995 diff --git a/extension-ci-tools b/extension-ci-tools index f5594c6..b46e390 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit f5594c61803daee122a5245afb817966e1a4545c +Subproject commit b46e39024cb2fc96fcec258a7a1304d510bbe914 diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index b817faa..e3ab444 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -32,7 +32,7 @@ def generate_test_data_delta_rs_multi(path, init, tables, splits = 1): os.makedirs(f"{generated_path}") - # First we write a DuckDB file TODO: this should go in 10 appends as well? + # First we write a DuckDB file TODO: this should go in N appends as well? con = duckdb.connect(f"{generated_path}/duckdb.db") con.sql(init) @@ -40,14 +40,21 @@ def generate_test_data_delta_rs_multi(path, init, tables, splits = 1): # Then we write the parquet files for table in tables: total_count = con.sql(f"select count(*) from ({table['query']})").fetchall()[0][0] - tuples_per_file = math.ceil(total_count / splits) + # At least 1 tuple per file + if total_count < splits: + splits = total_count + tuples_per_file = total_count // splits + remainder = total_count % splits file_no = 0 + write_from = 0 while file_no < splits: os.makedirs(f"{generated_path}/{table['name']}/parquet", exist_ok=True) # Write DuckDB's reference data - con.sql(f"COPY ({table['query']} where rowid >= {(file_no) * tuples_per_file} and rowid < {(file_no+1) * tuples_per_file}) to '{generated_path}/{table['name']}/parquet/data_{file_no}.parquet' (FORMAT parquet)") + write_to = write_from + tuples_per_file + (1 if file_no < remainder else 0) + con.sql(f"COPY ({table['query']} where rowid >= {write_from} and rowid < {write_to}) to '{generated_path}/{table['name']}/parquet/data_{file_no}.parquet' (FORMAT parquet)") file_no += 1 + write_from = write_to for table in tables: con = duckdb.connect(f"{generated_path}/duckdb.db") diff --git a/scripts/plot.py b/scripts/plot.py index 9090f3f..2ca01bc 100644 --- a/scripts/plot.py +++ b/scripts/plot.py @@ -5,6 +5,7 @@ parser = argparse.ArgumentParser(description='Plot the results in ./benchmark_results') parser.add_argument('-p','--pattern', help='Pattern to match result csv files to', required=False, default='*.csv') parser.add_argument('-w','--width', help='Width of graph, adjust to fit data', required=False, default=20) +parser.add_argument('-n','--name', help='name of the graph ', required=False, default='') args = vars(parser.parse_args()) ### Parse Query Results @@ -34,5 +35,5 @@ import numpy as np plt.rcParams["figure.figsize"] = [int(args['width']), 5] -fig = benchmark_results.pivot(index='benchmark', columns='config', values='timing').plot(kind='bar', title='', ylabel='runtime [s]').get_figure() +fig = benchmark_results.pivot(index='benchmark', columns='config', values='timing').plot(kind='bar', title=args['name'], ylabel='runtime [s]').get_figure() fig.savefig('benchmark_results/result.png') \ No newline at end of file diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 1a316d9..ce650aa 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -6,14 +6,52 @@ #include "duckdb.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/main/extension_util.hpp" +#include "duckdb/storage/storage_extension.hpp" +#include "storage/delta_catalog.hpp" +#include "storage/delta_transaction_manager.hpp" namespace duckdb { +static unique_ptr DeltaCatalogAttach(StorageExtensionInfo *storage_info, ClientContext &context, + AttachedDatabase &db, const string &name, AttachInfo &info, + AccessMode access_mode) { + + auto res = make_uniq(db, info.path, access_mode); + + for (const auto& option : info.options) { + if (StringUtil::Lower(option.first) == "pin_snapshot") { + res->use_cache = option.second.GetValue(); + } + } + + res->SetDefaultTable(DEFAULT_SCHEMA, DEFAULT_DELTA_TABLE); + + return std::move(res); +} + +static unique_ptr CreateTransactionManager(StorageExtensionInfo *storage_info, AttachedDatabase &db, + Catalog &catalog) { + auto &delta_catalog = catalog.Cast(); + return make_uniq(db, delta_catalog); +} + +class DeltaStorageExtension : public StorageExtension { +public: + DeltaStorageExtension() { + attach = DeltaCatalogAttach; + create_transaction_manager = CreateTransactionManager; + } +}; + static void LoadInternal(DatabaseInstance &instance) { // Load functions for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { ExtensionUtil::RegisterFunction(instance, function); } + + // Register the "single table" delta catalog (to ATTACH a single delta table) + auto &config = DBConfig::GetConfig(instance); + config.storage_extensions["delta"] = make_uniq(); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 65eb34f..3a85989 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include namespace duckdb { @@ -387,22 +389,42 @@ string DeltaSnapshot::ToDeltaPath(const string &raw_path) { } void DeltaSnapshot::Bind(vector &return_types, vector &names) { - if (!initialized) { - InitializeFiles(); + if (have_bound) { + names = this->names; + return_types = this->types; + return; + } + + if (!initialized_snapshot) { + InitializeSnapshot(); } - auto schema = SchemaVisitor::VisitSnapshotSchema(snapshot.get()); + + unique_ptr schema; + + { + auto snapshot_ref = snapshot->GetLockingRef(); + schema = SchemaVisitor::VisitSnapshotSchema(snapshot_ref.GetPtr()); + } + for (const auto &field: *schema) { names.push_back(field.first); return_types.push_back(field.second); } // Store the bound names for resolving the complex filter pushdown later + have_bound = true; this->names = names; + this->types = return_types; } string DeltaSnapshot::GetFile(idx_t i) { - if (!initialized) { - InitializeFiles(); + if (!initialized_snapshot) { + InitializeSnapshot(); } + + if(!initialized_scan) { + InitializeScan(); + } + // We already have this file if (i < resolved_files.size()) { return resolved_files[i]; @@ -432,35 +454,46 @@ string DeltaSnapshot::GetFile(idx_t i) { return resolved_files[i]; } -void DeltaSnapshot::InitializeFiles() { +void DeltaSnapshot::InitializeSnapshot() { auto path_slice = KernelUtils::ToDeltaString(paths[0]); - // Register engine auto interface_builder = CreateBuilder(context, paths[0]); extern_engine = TryUnpackKernelResult( ffi::builder_build(interface_builder)); - // Initialize Snapshot - snapshot = TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get())); + if (!snapshot) { + snapshot = make_shared_ptr(TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get()))); + } + + initialized_snapshot = true; +} + +void DeltaSnapshot::InitializeScan() { + auto snapshot_ref = snapshot->GetLockingRef(); // Create Scan PredicateVisitor visitor(names, &table_filters); - scan = TryUnpackKernelResult(ffi::scan(snapshot.get(), extern_engine.get(), &visitor)); + scan = TryUnpackKernelResult(ffi::scan(snapshot_ref.GetPtr(), extern_engine.get(), &visitor)); // Create GlobalState global_state = ffi::get_global_scan_state(scan.get()); // Set version - this->version = ffi::version(snapshot.get()); + this->version = ffi::version(snapshot_ref.GetPtr()); // Create scan data iterator scan_data_iterator = TryUnpackKernelResult(ffi::kernel_scan_data_init(extern_engine.get(), scan.get())); - initialized = true; + initialized_scan = true; } unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &context, const MultiFileReaderOptions &options, MultiFilePushdownInfo &info, vector> &filters) { FilterCombiner combiner(context); + + if (filters.empty()) { + return nullptr; + } + for (const auto &filter : filters) { combiner.AddFilter(filter->Copy()); } @@ -471,6 +504,9 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co filtered_list->table_filters = std::move(filterstmp); filtered_list->names = names; + // Copy over the snapshot, this avoids reparsing metadata + filtered_list->snapshot = snapshot; + return std::move(filtered_list); } @@ -484,16 +520,11 @@ vector DeltaSnapshot::GetAllFiles() { } FileExpandResult DeltaSnapshot::GetExpandResult() { - // GetFile(1) will ensure at least the first 2 files are expanded if they are available - GetFile(1); - - if (resolved_files.size() > 1) { - return FileExpandResult::MULTIPLE_FILES; - } else if (resolved_files.size() == 1) { - return FileExpandResult::SINGLE_FILE; - } - - return FileExpandResult::NO_FILES; + // We avoid exposing the ExpandResult to DuckDB here because we want to materialize the Snapshot as late as possible: + // materializing too early (GetExpandResult is called *before* filter pushdown by the Parquet scanner), will lead into + // needing to create 2 scans of the snapshot TODO: we need to investigate if this is actually a sensible decision with + // some benchmarking, its currently based on intuition. + return FileExpandResult::MULTIPLE_FILES; } idx_t DeltaSnapshot::GetTotalFileCount() { @@ -529,8 +560,14 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) return nullptr; } -unique_ptr DeltaMultiFileReader::CreateInstance() { - return std::move(make_uniq()); +unique_ptr DeltaMultiFileReader::CreateInstance(const TableFunction &table_function) { + auto result = make_uniq(); + + if (table_function.function_info) { + result->snapshot = table_function.function_info->Cast().snapshot; + } + + return std::move(result); } bool DeltaMultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files, @@ -618,11 +655,20 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio } } -unique_ptr DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector& paths, FileGlobOptions options) { +shared_ptr DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector& paths, FileGlobOptions options) { if (paths.size() != 1) { throw BinderException("'delta_scan' only supports single path as input"); } + + if (snapshot) { + // TODO: assert that we are querying the same path as this injected snapshot + // This takes the kernel snapshot from the delta snapshot and ensures we use that snapshot for reading + if (snapshot) { + return snapshot; + } + } + return make_uniq(context, paths[0]); } diff --git a/src/include/delta_extension.hpp b/src/include/delta_extension.hpp index d6b13f2..89fbc2d 100644 --- a/src/include/delta_extension.hpp +++ b/src/include/delta_extension.hpp @@ -2,6 +2,8 @@ #include "duckdb.hpp" +#define DEFAULT_DELTA_TABLE "delta_table" + namespace duckdb { class DeltaExtension : public Extension { diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 9b33c5c..7b14386 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -108,6 +108,61 @@ typedef TemplatedUniqueKernelPointer KernelScan typedef TemplatedUniqueKernelPointer KernelGlobalScanState; typedef TemplatedUniqueKernelPointer KernelScanDataIterator; +template +struct SharedKernelPointer; + +// A reference to a SharedKernelPointer, only 1 can be handed out at the same time +template +struct SharedKernelRef { + friend struct SharedKernelPointer; +public: + KernelType* GetPtr() { + return owning_pointer.kernel_ptr.get(); + } + ~SharedKernelRef() { + owning_pointer.lock.unlock(); + } + +protected: + SharedKernelRef(SharedKernelPointer& owning_pointer_p) : owning_pointer(owning_pointer_p) { + owning_pointer.lock.lock(); + } + +protected: + // The pointer that owns this ref + SharedKernelPointer& owning_pointer; +}; + +// Wrapper around ffi objects to share between threads +template +struct SharedKernelPointer { + friend struct SharedKernelRef; +public: + SharedKernelPointer(TemplatedUniqueKernelPointer unique_kernel_ptr) : kernel_ptr(unique_kernel_ptr) {} + SharedKernelPointer(KernelType* ptr) : kernel_ptr(ptr){} + SharedKernelPointer(){} + + SharedKernelPointer(SharedKernelPointer&& other) : SharedKernelPointer() { + other.lock.lock(); + lock.lock(); + kernel_ptr = std::move(other.kernel_ptr); + lock.lock(); + other.lock.lock(); + } + + // Returns a reference to the underlying kernel object. The SharedKernelPointer to this object will be locked for the + // lifetime of this reference + SharedKernelRef GetLockingRef() { + return SharedKernelRef(*this); + } + +protected: + TemplatedUniqueKernelPointer kernel_ptr; + mutex lock; +}; + +typedef SharedKernelPointer SharedKernelSnapshot; + struct KernelUtils { static ffi::KernelStringSlice ToDeltaString(const string &str); static string FromDeltaString(const struct ffi::KernelStringSlice slice); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index aac35cc..d467d10 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -12,6 +12,12 @@ #include "duckdb/common/multi_file_reader.hpp" namespace duckdb { +struct DeltaSnapshot; + +struct DeltaFunctionInfo : public TableFunctionInfo { + shared_ptr snapshot; + string expected_path; +}; struct DeltaFileMetaData { DeltaFileMetaData() {}; @@ -57,8 +63,8 @@ struct DeltaSnapshot : public MultiFileList { string GetFile(idx_t i) override; protected: - // TODO: How to guarantee we only call this after the filter pushdown? - void InitializeFiles(); + void InitializeSnapshot(); + void InitializeScan(); template T TryUnpackKernelResult(ffi::ExternResult result) { @@ -70,7 +76,8 @@ struct DeltaSnapshot : public MultiFileList { idx_t version; //! Delta Kernel Structures - KernelSnapshot snapshot; + shared_ptr snapshot; + KernelExternEngine extern_engine; KernelScan scan; KernelGlobalScanState global_state; @@ -78,12 +85,16 @@ struct DeltaSnapshot : public MultiFileList { //! Names vector names; + vector types; + bool have_bound = false; //! Metadata map for files vector> metadata; //! Current file list resolution state - bool initialized = false; + bool initialized_snapshot = false; + bool initialized_scan = false; + bool files_exhausted = false; vector resolved_files; TableFilterSet table_filters; @@ -103,9 +114,9 @@ struct DeltaMultiFileReaderGlobalState : public MultiFileReaderGlobalState { }; struct DeltaMultiFileReader : public MultiFileReader { - static unique_ptr CreateInstance(); + static unique_ptr CreateInstance(const TableFunction &table_function); //! Return a DeltaSnapshot - unique_ptr CreateFileList(ClientContext &context, const vector &paths, + shared_ptr CreateFileList(ClientContext &context, const vector &paths, FileGlobOptions options) override; //! Override the regular parquet bind using the MultiFileReader Bind. The bind from these are what DuckDB's file @@ -141,6 +152,10 @@ struct DeltaMultiFileReader : public MultiFileReader { //! Override the ParseOption call to parse delta_scan specific options bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options, ClientContext &context) override; + + // A snapshot can be injected into the multifilereader, this ensures the GetMultiFileList can return this snapshot + // (note that the path should match the one passed to CreateFileList) + shared_ptr snapshot; }; } // namespace duckdb diff --git a/src/include/storage/delta_catalog.hpp b/src/include/storage/delta_catalog.hpp new file mode 100644 index 0000000..faeb00c --- /dev/null +++ b/src/include/storage/delta_catalog.hpp @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/delta_catalog.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "delta_schema_entry.hpp" +#include "duckdb/catalog/catalog.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/common/enums/access_mode.hpp" + +namespace duckdb { +class DeltaSchemaEntry; + +class DeltaClearCacheFunction : public TableFunction { +public: + DeltaClearCacheFunction(); + + static void ClearCacheOnSetting(ClientContext &context, SetScope scope, Value ¶meter); +}; + +class DeltaCatalog : public Catalog { +public: + explicit DeltaCatalog(AttachedDatabase &db_p, const string &internal_name, AccessMode access_mode); + ~DeltaCatalog(); + + string path; + AccessMode access_mode; + bool use_cache; + +public: + void Initialize(bool load_builtin) override; + string GetCatalogType() override { + return "delta"; + } + + optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override; + + void ScanSchemas(ClientContext &context, std::function callback) override; + + optional_ptr GetSchema(CatalogTransaction transaction, const string &schema_name, + OnEntryNotFound if_not_found, + QueryErrorContext error_context = QueryErrorContext()) override; + + unique_ptr PlanInsert(ClientContext &context, LogicalInsert &op, + unique_ptr plan) override; + unique_ptr PlanCreateTableAs(ClientContext &context, LogicalCreateTable &op, + unique_ptr plan) override; + unique_ptr PlanDelete(ClientContext &context, LogicalDelete &op, + unique_ptr plan) override; + unique_ptr PlanUpdate(ClientContext &context, LogicalUpdate &op, + unique_ptr plan) override; + unique_ptr BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, + unique_ptr plan) override; + + DatabaseSize GetDatabaseSize(ClientContext &context) override; + + optional_idx GetCatalogVersion(ClientContext &context) override; + + bool InMemory() override; + string GetDBPath() override; + + bool UseCachedSnapshot(); + + DeltaSchemaEntry& GetMainSchema() { + return *main_schema; + } + +private: + void DropSchema(ClientContext &context, DropInfo &info) override; + +private: + unique_ptr main_schema; + string default_schema; +}; + +} // namespace duckdb diff --git a/src/include/storage/delta_schema_entry.hpp b/src/include/storage/delta_schema_entry.hpp new file mode 100644 index 0000000..c8a8d09 --- /dev/null +++ b/src/include/storage/delta_schema_entry.hpp @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/delta_schema_entry.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" +#include "storage/delta_table_entry.hpp" + +namespace duckdb { +class DeltaTransaction; + +class DeltaSchemaEntry : public SchemaCatalogEntry { +public: + DeltaSchemaEntry(Catalog &catalog, CreateSchemaInfo &info); + ~DeltaSchemaEntry() override; + +public: + optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override; + optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override; + optional_ptr CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info, + TableCatalogEntry &table) override; + optional_ptr CreateView(CatalogTransaction transaction, CreateViewInfo &info) override; + optional_ptr CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) override; + optional_ptr CreateTableFunction(CatalogTransaction transaction, + CreateTableFunctionInfo &info) override; + optional_ptr CreateCopyFunction(CatalogTransaction transaction, + CreateCopyFunctionInfo &info) override; + optional_ptr CreatePragmaFunction(CatalogTransaction transaction, + CreatePragmaFunctionInfo &info) override; + optional_ptr CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) override; + optional_ptr CreateType(CatalogTransaction transaction, CreateTypeInfo &info) override; + void Alter(CatalogTransaction transaction, AlterInfo &info) override; + void Scan(ClientContext &context, CatalogType type, const std::function &callback) override; + void Scan(CatalogType type, const std::function &callback) override; + void DropEntry(ClientContext &context, DropInfo &info) override; + optional_ptr GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; + + optional_ptr GetCachedTable(); + +private: + //! Delta tables may be cached in the SchemaEntry. Since the TableEntry holds the snapshot, this allows sharing a snapshot + //! between different scans. + unique_ptr cached_table; + mutex lock; +}; + +} // namespace duckdb diff --git a/src/include/storage/delta_table_entry.hpp b/src/include/storage/delta_table_entry.hpp new file mode 100644 index 0000000..c131694 --- /dev/null +++ b/src/include/storage/delta_table_entry.hpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/delta_table_entry.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" +#include "duckdb/parser/parsed_data/create_table_info.hpp" + +namespace duckdb { +struct DeltaSnapshot; + +class DeltaTableEntry : public TableCatalogEntry { +public: + DeltaTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info); + ~DeltaTableEntry(); + +public: + unique_ptr GetStatistics(ClientContext &context, column_t column_id) override; + + TableFunction GetScanFunction(ClientContext &context, unique_ptr &bind_data) override; + + TableStorageInfo GetStorageInfo(ClientContext &context) override; + + void BindUpdateConstraints(Binder &binder, LogicalGet &get, LogicalProjection &proj, LogicalUpdate &update, + ClientContext &context) override; + +public: + shared_ptr snapshot; +}; + +} // namespace duckdb diff --git a/src/include/storage/delta_transaction.hpp b/src/include/storage/delta_transaction.hpp new file mode 100644 index 0000000..3a004ef --- /dev/null +++ b/src/include/storage/delta_transaction.hpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/delta_transaction.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/transaction/transaction.hpp" + +namespace duckdb { +class DeltaCatalog; +class DeltaSchemaEntry; +class DeltaTableEntry; +struct DeltaSnapshot; + +enum class DeltaTransactionState { TRANSACTION_NOT_YET_STARTED, TRANSACTION_STARTED, TRANSACTION_FINISHED }; + +class DeltaTransaction : public Transaction { +public: + DeltaTransaction(DeltaCatalog &delta_catalog, TransactionManager &manager, ClientContext &context); + ~DeltaTransaction() override; + + void Start(); + void Commit(); + void Rollback(); + + static DeltaTransaction &Get(ClientContext &context, Catalog &catalog); + AccessMode GetAccessMode() const; + + void SetReadWrite() override { + throw NotImplementedException("Can not start read-write transaction"); + }; +public: + unique_ptr table_entry; + +private: + // DeltaConnection connection; + DeltaTransactionState transaction_state; + AccessMode access_mode; +}; + +} // namespace duckdb diff --git a/src/include/storage/delta_transaction_manager.hpp b/src/include/storage/delta_transaction_manager.hpp new file mode 100644 index 0000000..3957982 --- /dev/null +++ b/src/include/storage/delta_transaction_manager.hpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/delta_transaction_manager.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/transaction/transaction_manager.hpp" +#include "storage/delta_catalog.hpp" +#include "storage/delta_transaction.hpp" + +namespace duckdb { + +class DeltaTransactionManager : public TransactionManager { +public: + DeltaTransactionManager(AttachedDatabase &db_p, DeltaCatalog &delta_catalog); + + Transaction &StartTransaction(ClientContext &context) override; + ErrorData CommitTransaction(ClientContext &context, Transaction &transaction) override; + void RollbackTransaction(Transaction &transaction) override; + + void Checkpoint(ClientContext &context, bool force = false) override; + +private: + DeltaCatalog &delta_catalog; + mutex transaction_lock; + reference_map_t> transactions; +}; + +} // namespace duckdb diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp new file mode 100644 index 0000000..4e57d23 --- /dev/null +++ b/src/storage/delta_catalog.cpp @@ -0,0 +1,108 @@ +#include "storage/delta_catalog.hpp" +#include "storage/delta_schema_entry.hpp" +#include "storage/delta_transaction.hpp" +#include "duckdb/storage/database_size.hpp" +#include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/parser/parsed_data/create_schema_info.hpp" +#include "duckdb/main/attached_database.hpp" + +#include "functions/delta_scan.hpp" +#include "storage/delta_transaction_manager.hpp" + +namespace duckdb { + +DeltaCatalog::DeltaCatalog(AttachedDatabase &db_p, const string &path, AccessMode access_mode) + : Catalog(db_p), path(path), access_mode(access_mode), use_cache(false) { +} + +DeltaCatalog::~DeltaCatalog() = default; + +void DeltaCatalog::Initialize(bool load_builtin) { + CreateSchemaInfo info; + main_schema = make_uniq(*this, info); +} + +optional_ptr DeltaCatalog::CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) { + throw BinderException("Delta tables do not support creating new schemas"); +} + +void DeltaCatalog::DropSchema(ClientContext &context, DropInfo &info) { + throw BinderException("Delta tables do not support dropping schemas"); +} + +void DeltaCatalog::ScanSchemas(ClientContext &context, std::function callback) { + callback(*main_schema); +} + +optional_ptr DeltaCatalog::GetSchema(CatalogTransaction transaction, const string &schema_name, + OnEntryNotFound if_not_found, QueryErrorContext error_context) { + if (schema_name == DEFAULT_SCHEMA || schema_name == INVALID_SCHEMA) { + return main_schema.get(); + } + if (if_not_found == OnEntryNotFound::RETURN_NULL) { + return nullptr; + } + return nullptr; +} + +bool DeltaCatalog::InMemory() { + return false; +} + +string DeltaCatalog::GetDBPath() { + return path; +} + +bool DeltaCatalog::UseCachedSnapshot() { + return use_cache; +} + +optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { + auto &delta_transaction = DeltaTransaction::Get(context, *this); + + // Option 1: snapshot is cached table-wide + auto cached_snapshot = main_schema->GetCachedTable(); + if (cached_snapshot) { + return cached_snapshot->snapshot->version; + } + + // Option 2: snapshot is cached in transaction + if (delta_transaction.table_entry) { + return delta_transaction.table_entry->snapshot->version; + } + + // FIXME: this is not allowed + return optional_idx::Invalid(); +} + +DatabaseSize DeltaCatalog::GetDatabaseSize(ClientContext &context) { + if (default_schema.empty()) { + throw InvalidInputException("Attempting to fetch the database size - but no database was provided " + "in the connection string"); + } + DatabaseSize size; + return size; +} + +unique_ptr DeltaCatalog::PlanInsert(ClientContext &context, LogicalInsert &op, + unique_ptr plan) { + throw NotImplementedException("DeltaCatalog does not support inserts"); +} +unique_ptr DeltaCatalog::PlanCreateTableAs(ClientContext &context, LogicalCreateTable &op, + unique_ptr plan) { + throw NotImplementedException("DeltaCatalog does not support creating new tables"); +} +unique_ptr DeltaCatalog::PlanDelete(ClientContext &context, LogicalDelete &op, + unique_ptr plan) { + throw NotImplementedException("DeltaCatalog does not support deletes"); +} +unique_ptr DeltaCatalog::PlanUpdate(ClientContext &context, LogicalUpdate &op, + unique_ptr plan) { + throw NotImplementedException("DeltaCatalog does not support updates"); +} +unique_ptr DeltaCatalog::BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, + unique_ptr plan) { + throw NotImplementedException("DeltaCatalog does not support creating indices"); +} + +} // namespace duckdb diff --git a/src/storage/delta_schema_entry.cpp b/src/storage/delta_schema_entry.cpp new file mode 100644 index 0000000..7e15c5b --- /dev/null +++ b/src/storage/delta_schema_entry.cpp @@ -0,0 +1,184 @@ +#include "storage/delta_schema_entry.hpp" + +#include "functions/delta_scan.hpp" +#include "storage/delta_catalog.hpp" + +#include "delta_extension.hpp" + +#include "storage/delta_table_entry.hpp" +#include "storage/delta_transaction.hpp" +#include "duckdb/parser/parsed_data/create_view_info.hpp" +#include "duckdb/parser/parsed_data/create_index_info.hpp" +#include "duckdb/planner/parsed_data/bound_create_table_info.hpp" +#include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/parser/constraints/list.hpp" +#include "duckdb/common/unordered_set.hpp" +#include "duckdb/parser/parsed_data/alter_info.hpp" +#include "duckdb/parser/parsed_data/alter_table_info.hpp" +#include "duckdb/parser/parsed_expression_iterator.hpp" + + +namespace duckdb { + +DeltaSchemaEntry::DeltaSchemaEntry(Catalog &catalog, CreateSchemaInfo &info) + : SchemaCatalogEntry(catalog, info) { +} + +DeltaSchemaEntry::~DeltaSchemaEntry() { +} + +DeltaTransaction &GetDeltaTransaction(CatalogTransaction transaction) { + if (!transaction.transaction) { + throw InternalException("No transaction!?"); + } + return transaction.transaction->Cast(); +} + +optional_ptr DeltaSchemaEntry::CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) { + throw BinderException("Delta tables do not support creating tables"); +} + +optional_ptr DeltaSchemaEntry::CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) { + throw BinderException("Delta tables do not support creating functions"); +} + +void DeltaUnqualifyColumnRef(ParsedExpression &expr) { + if (expr.type == ExpressionType::COLUMN_REF) { + auto &colref = expr.Cast(); + auto name = std::move(colref.column_names.back()); + colref.column_names = {std::move(name)}; + return; + } + ParsedExpressionIterator::EnumerateChildren(expr, DeltaUnqualifyColumnRef); +} + +optional_ptr DeltaSchemaEntry::CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info, + TableCatalogEntry &table) { + throw NotImplementedException("CreateIndex"); +} + +string GetDeltaCreateView(CreateViewInfo &info) { + throw NotImplementedException("GetCreateView"); +} + +optional_ptr DeltaSchemaEntry::CreateView(CatalogTransaction transaction, CreateViewInfo &info) { + throw BinderException("Delta tables do not support creating views"); +} + +optional_ptr DeltaSchemaEntry::CreateType(CatalogTransaction transaction, CreateTypeInfo &info) { + throw BinderException("Delta databases do not support creating types"); +} + +optional_ptr DeltaSchemaEntry::CreateSequence(CatalogTransaction transaction, CreateSequenceInfo &info) { + throw BinderException("Delta databases do not support creating sequences"); +} + +optional_ptr DeltaSchemaEntry::CreateTableFunction(CatalogTransaction transaction, + CreateTableFunctionInfo &info) { + throw BinderException("Delta databases do not support creating table functions"); +} + +optional_ptr DeltaSchemaEntry::CreateCopyFunction(CatalogTransaction transaction, + CreateCopyFunctionInfo &info) { + throw BinderException("Delta databases do not support creating copy functions"); +} + +optional_ptr DeltaSchemaEntry::CreatePragmaFunction(CatalogTransaction transaction, + CreatePragmaFunctionInfo &info) { + throw BinderException("Delta databases do not support creating pragma functions"); +} + +optional_ptr DeltaSchemaEntry::CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) { + throw BinderException("Delta databases do not support creating collations"); +} + +void DeltaSchemaEntry::Alter(CatalogTransaction transaction, AlterInfo &info) { + throw NotImplementedException("Delta tables do not support altering"); +} + +bool CatalogTypeIsSupported(CatalogType type) { + switch (type) { + case CatalogType::TABLE_ENTRY: + return true; + default: + return false; + } +} + +static unique_ptr CreateTableEntry(ClientContext &context, DeltaCatalog &delta_catalog, DeltaSchemaEntry &schema_entry) { + auto snapshot = make_shared_ptr(context, delta_catalog.GetDBPath()); + + // Get the names and types from the delta snapshot + vector return_types; + vector names; + snapshot->Bind(return_types, names); + + CreateTableInfo table_info; + for (idx_t i = 0; i < return_types.size(); i++) { + table_info.columns.AddColumn(ColumnDefinition(names[i], return_types[i])); + } + table_info.table = DEFAULT_DELTA_TABLE; + auto table_entry = make_uniq(delta_catalog, schema_entry, table_info); + table_entry->snapshot = std::move(snapshot); + + return table_entry; +} + +void DeltaSchemaEntry::Scan(ClientContext &context, CatalogType type, + const std::function &callback) { + if (!CatalogTypeIsSupported(type)) { + auto transaction = catalog.GetCatalogTransaction(context); + auto default_table = GetEntry(transaction, type, DEFAULT_DELTA_TABLE); + if (default_table) { + callback(*default_table); + } + } + +} +void DeltaSchemaEntry::Scan(CatalogType type, const std::function &callback) { + throw NotImplementedException("Scan without context not supported"); +} + +void DeltaSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { + throw NotImplementedException("Delta tables do not support dropping"); +} + +optional_ptr DeltaSchemaEntry::GetEntry(CatalogTransaction transaction, CatalogType type, + const string &name) { + if (!transaction.HasContext()) { + throw NotImplementedException("Can not DeltaSchemaEntry::GetEntry without context"); + } + auto &context = transaction.GetContext(); + + if (type == CatalogType::TABLE_ENTRY && name == DEFAULT_DELTA_TABLE) { + auto &delta_transaction = GetDeltaTransaction(transaction); + auto &delta_catalog = catalog.Cast(); + + if (delta_transaction.table_entry) { + return *delta_transaction.table_entry; + } + + if (delta_catalog.UseCachedSnapshot()) { + unique_lock l(lock); + if (!cached_table) { + cached_table = CreateTableEntry(context, delta_catalog, *this); + } + return *cached_table; + } + + delta_transaction.table_entry = CreateTableEntry(context, delta_catalog, *this); + return *delta_transaction.table_entry; + } + + return nullptr; +} + +optional_ptr DeltaSchemaEntry::GetCachedTable() { + lock_guard lck(lock); + if (cached_table) { + return *cached_table; + } + return nullptr; +} + +} // namespace duckdb diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp new file mode 100644 index 0000000..f82caa4 --- /dev/null +++ b/src/storage/delta_table_entry.cpp @@ -0,0 +1,74 @@ +#include "storage/delta_catalog.hpp" +#include "storage/delta_schema_entry.hpp" +#include "storage/delta_table_entry.hpp" + +#include "delta_utils.hpp" +#include "functions/delta_scan.hpp" + +#include "storage/delta_transaction.hpp" +#include "duckdb/storage/statistics/base_statistics.hpp" +#include "duckdb/storage/table_storage_info.hpp" +#include "duckdb/main/extension_util.hpp" +#include "duckdb/main/database.hpp" +#include "duckdb/main/secret/secret_manager.hpp" +#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" +#include "duckdb/parser/tableref/table_function_ref.hpp" +#include "../../duckdb/third_party/catch/catch.hpp" +#include "functions/delta_scan.hpp" + +#include + +namespace duckdb { + +DeltaTableEntry::DeltaTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info) + : TableCatalogEntry(catalog, schema, info) { + this->internal = false; +} + +DeltaTableEntry::~DeltaTableEntry() = default; + +unique_ptr DeltaTableEntry::GetStatistics(ClientContext &context, column_t column_id) { + return nullptr; +} + +void DeltaTableEntry::BindUpdateConstraints(Binder &binder, LogicalGet &, LogicalProjection &, LogicalUpdate &, + ClientContext &) { + throw NotImplementedException("BindUpdateConstraints for delta table"); +} + +TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_ptr &bind_data) { + auto &db = DatabaseInstance::GetDatabase(context); + auto &delta_function_set = ExtensionUtil::GetTableFunction(db, "delta_scan"); + + auto delta_scan_function = delta_function_set.functions.GetFunctionByArguments(context, {LogicalType::VARCHAR}); + auto &delta_catalog = catalog.Cast(); + + // Copy over the internal kernel snapshot + auto function_info = make_shared_ptr(); + + function_info->snapshot = this->snapshot; + delta_scan_function.function_info = std::move(function_info); + + vector inputs = {delta_catalog.GetDBPath()}; + named_parameter_map_t param_map; + vector return_types; + vector names; + TableFunctionRef empty_ref; + + + TableFunctionBindInput bind_input(inputs, param_map, return_types, names, nullptr, nullptr, delta_scan_function, + empty_ref); + + auto result = delta_scan_function.bind(context, bind_input, return_types, names); + bind_data = std::move(result); + + return delta_scan_function; +} + +TableStorageInfo DeltaTableEntry::GetStorageInfo(ClientContext &context) { + TableStorageInfo result; + // TODO fill info + return result; +} + +} // namespace duckdb diff --git a/src/storage/delta_transaction.cpp b/src/storage/delta_transaction.cpp new file mode 100644 index 0000000..3846c47 --- /dev/null +++ b/src/storage/delta_transaction.cpp @@ -0,0 +1,42 @@ +#include "storage/delta_transaction.hpp" +#include "storage/delta_catalog.hpp" +#include "duckdb/parser/parsed_data/create_view_info.hpp" +#include "duckdb/catalog/catalog_entry/index_catalog_entry.hpp" +#include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp" +#include "functions/delta_scan.hpp" +#include "storage/delta_table_entry.hpp" + +namespace duckdb { + +DeltaTransaction::DeltaTransaction(DeltaCatalog &delta_catalog, TransactionManager &manager, ClientContext &context) + : Transaction(manager, context), access_mode(delta_catalog.access_mode) { +} + +DeltaTransaction::~DeltaTransaction() { +} + +void DeltaTransaction::Start() { + transaction_state = DeltaTransactionState::TRANSACTION_NOT_YET_STARTED; +} +void DeltaTransaction::Commit() { + if (transaction_state == DeltaTransactionState::TRANSACTION_STARTED) { + transaction_state = DeltaTransactionState::TRANSACTION_FINISHED; + // NOP: we only support read-only transactions currently + } +} +void DeltaTransaction::Rollback() { + if (transaction_state == DeltaTransactionState::TRANSACTION_STARTED) { + transaction_state = DeltaTransactionState::TRANSACTION_FINISHED; + // NOP: we only support read-only transactions currently + } +} + +DeltaTransaction &DeltaTransaction::Get(ClientContext &context, Catalog &catalog) { + return Transaction::Get(context, catalog).Cast(); +} + +AccessMode DeltaTransaction::GetAccessMode() const { + return access_mode; +} + +} // namespace duckdb diff --git a/src/storage/delta_transaction_manager.cpp b/src/storage/delta_transaction_manager.cpp new file mode 100644 index 0000000..4d64f77 --- /dev/null +++ b/src/storage/delta_transaction_manager.cpp @@ -0,0 +1,38 @@ +#include "storage/delta_transaction_manager.hpp" +#include "duckdb/main/attached_database.hpp" + +namespace duckdb { + +DeltaTransactionManager::DeltaTransactionManager(AttachedDatabase &db_p, DeltaCatalog &delta_catalog) + : TransactionManager(db_p), delta_catalog(delta_catalog) { +} + +Transaction &DeltaTransactionManager::StartTransaction(ClientContext &context) { + auto transaction = make_uniq(delta_catalog, *this, context); + transaction->Start(); + auto &result = *transaction; + lock_guard l(transaction_lock); + transactions[result] = std::move(transaction); + return result; +} + +ErrorData DeltaTransactionManager::CommitTransaction(ClientContext &context, Transaction &transaction) { + auto &delta_transaction = transaction.Cast(); + delta_transaction.Commit(); + lock_guard l(transaction_lock); + transactions.erase(transaction); + return ErrorData(); +} + +void DeltaTransactionManager::RollbackTransaction(Transaction &transaction) { + auto &delta_transaction = transaction.Cast(); + delta_transaction.Rollback(); + lock_guard l(transaction_lock); + transactions.erase(transaction); +} + +void DeltaTransactionManager::Checkpoint(ClientContext &context, bool force) { + // NOP +} + +} // namespace duckdb diff --git a/test/sql/dat/attach.test b/test/sql/dat/attach.test new file mode 100644 index 0000000..de6615d --- /dev/null +++ b/test/sql/dat/attach.test @@ -0,0 +1,125 @@ +# name: test/sql/dat/attach.test +# description: Test attaching a delta table +# group: [dat] + +require parquet + +require delta + +require-env DAT_PATH + +statement ok +ATTACH '${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta' as dt (TYPE delta) + +# We can query the table by the catalog name +query I +select utf8 from dt +---- +0 +1 +2 +3 +4 + +# We can query the table using the catalog name + the constant `delta_table` as name +query I +select utf8 from dt.delta_table +---- +0 +1 +2 +3 +4 + +# We can query the table using the catalog name + default schema + the constant `delta_table` as name +query I +select utf8 from dt.main.delta_table +---- +0 +1 +2 +3 +4 + +# Now we create a different table that is actually called dt +statement ok +create table dt as select 1 as id, 2 as utf8 + +# This is now ambiguous! +statement error +from dt +---- +Catalog Error: Ambiguity detected for 'dt': this could either refer to the 'Table' 'dt', or the attached catalog 'dt' which has a default table. To avoid this error, either detach the catalog and reattach under a different name, or use a fully qualified name for the 'Table': 'memory.main.dt' or for the Catalog Default Table: 'dt.main.delta_table'. + +# Join the two tables using fully qualified names +query III +SELECT + id, + dt1.utf8, + dt2.utf8 +FROM + memory.main.dt as dt1 +LEFT JOIN + dt.main.delta_table as dt2 +ON + dt1.utf8 = dt2.utf8 +---- +1 2 2 + + +# You shouldn't be doing this, but its technically possible: we mount the single-table-catalog +statement ok +use dt + +# We can still query the delta catalog default table by its name +query I +select utf8 from dt +---- +0 +1 +2 +3 +4 + +# Or by the default delta table name (`delta_table`) +query I +select utf8 from delta_table +---- +0 +1 +2 +3 +4 + +# Or by specifying the default schema +query I +select utf8 from main.delta_table +---- +0 +1 +2 +3 +4 + +statement ok +USE memory + +statement ok +DROP TABLE main.dt + +statement ok +DETACH dt + +# Test the PIN_SNAPSHOT option: the snapshot is now pinned on attaching +statement ok +ATTACH '${DAT_PATH}/out/reader_tests/generated/all_primitive_types/delta' as dt (TYPE delta, PIN_SNAPSHOT) + +# This query will now reuse +query I +select utf8 from dt +---- +0 +1 +2 +3 +4 \ No newline at end of file From cc4eea647800e80353ee73e794ad29b0d0edc052 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 25 Oct 2024 16:41:41 +0200 Subject: [PATCH 02/45] minor fixes --- benchmark/benchmark.Makefile | 2 +- duckdb | 2 +- src/storage/delta_catalog.cpp | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/benchmark/benchmark.Makefile b/benchmark/benchmark.Makefile index 4387d5e..3e3bf9c 100644 --- a/benchmark/benchmark.Makefile +++ b/benchmark/benchmark.Makefile @@ -35,7 +35,7 @@ bench-run-tpch-sf1-parquet: bench-output-dir bench-run-tpch-sf1-duckdb: bench-output-dir ./build/release/benchmark/benchmark_runner --root-dir './' 'benchmark/tpch/sf1/local/duckdb/$(BENCHMARK_PATTERN)' 2>&1 | tee benchmark_results/tpch-sf1-duckdb.csv # COMPARES TPCH SF1 on parquet file vs on delta files vs on duckdb files -bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet bench-run-tpch-sf1-attach +bench-run-tpch-sf1: bench-run-tpch-sf1-delta bench-run-tpch-sf1-parquet bench-run-tpch-sf1-delta-attach ### # TPCDS diff --git a/duckdb b/duckdb index 9f3db54..c651907 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 9f3db54cc308f38afee235bd6b7bdf61142e4995 +Subproject commit c65190792003312ad96e4b8bdf16430a26da2f01 diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp index 4e57d23..1e8ac4e 100644 --- a/src/storage/delta_catalog.cpp +++ b/src/storage/delta_catalog.cpp @@ -71,8 +71,7 @@ optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { return delta_transaction.table_entry->snapshot->version; } - // FIXME: this is not allowed - return optional_idx::Invalid(); + return {}; } DatabaseSize DeltaCatalog::GetDatabaseSize(ClientContext &context) { From 3c19a20f010f00c61cecf215062d189f8ac47e2e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 6 Nov 2024 10:57:38 +0100 Subject: [PATCH 03/45] bump duckdb to main --- .github/workflows/MainDistributionPipeline.yml | 10 +++++----- duckdb | 2 +- extension-ci-tools | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 496db97..61a2a2d 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -14,10 +14,10 @@ concurrency: jobs: duckdb-stable-build: name: Build extension binaries - uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.1.2 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: v1.1.2 - ci_tools_version: v1.1.2 + duckdb_version: main + ci_tools_version: main extension_name: delta enable_rust: true exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' @@ -25,10 +25,10 @@ jobs: duckdb-stable-deploy: name: Deploy extension binaries needs: duckdb-stable-build - uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.1.2 + uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: extension_name: delta - duckdb_version: v1.1.2 + duckdb_version: main exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} diff --git a/duckdb b/duckdb index c651907..844d35b 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit c65190792003312ad96e4b8bdf16430a26da2f01 +Subproject commit 844d35b7c5e400b0cc2578f683f603881af14944 diff --git a/extension-ci-tools b/extension-ci-tools index b46e390..83f847f 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit b46e39024cb2fc96fcec258a7a1304d510bbe914 +Subproject commit 83f847f8467a760f6c66dc7996c13300210220a8 From 2374e61797bd89917fb3f12e2184e97f6dc24e09 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 7 Nov 2024 16:57:19 +0100 Subject: [PATCH 04/45] bump to recent nightly --- .github/workflows/MainDistributionPipeline.yml | 3 ++- duckdb | 2 +- extension-ci-tools | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 61a2a2d..b84fedb 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,7 +16,8 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: main + # pip install duckdb==1.1.4.dev1594 + duckdb_version: 0ccf3c25cc ci_tools_version: main extension_name: delta enable_rust: true diff --git a/duckdb b/duckdb index 844d35b..0ccf3c2 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 844d35b7c5e400b0cc2578f683f603881af14944 +Subproject commit 0ccf3c25ccbb25fb90616e77b38f6d138f82950d diff --git a/extension-ci-tools b/extension-ci-tools index 83f847f..3e987be 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 83f847f8467a760f6c66dc7996c13300210220a8 +Subproject commit 3e987be862c95d0f7fc674fa242c97ce3a37ee04 From 56200f870309531e9ab422813cfa3c7fb44fdc47 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 8 Nov 2024 11:09:41 +0100 Subject: [PATCH 05/45] disable mingw build --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index b84fedb..0f4294f 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -21,7 +21,7 @@ jobs: ci_tools_version: main extension_name: delta enable_rust: true - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' duckdb-stable-deploy: name: Deploy extension binaries @@ -31,5 +31,5 @@ jobs: with: extension_name: delta duckdb_version: main - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From ecfca609000788591f0337ce6ccfe2c51f634854 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 8 Nov 2024 11:13:58 +0100 Subject: [PATCH 06/45] small ci fixes --- .github/workflows/LocalTesting.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index fe2fe02..b3a897d 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -210,12 +210,14 @@ jobs: - name: Build shell: bash - run: make generate-data + run: | + make generate-data + make release - name: Test shell: bash run: | - GENERATED_DATA_AVAILABLE=1 make test + GENERATED_DATA_AVAILABLE=1 make test_release regression-test-benchmark-runner: name: Performance Regression Tests @@ -280,19 +282,19 @@ jobs: if: always() shell: bash run: | - python3 ./duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpch_sf1_local.csv --verbose --threads=2 --root-dir=. + python3 ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpch_sf1_local.csv --verbose --threads=2 --root-dir=. - name: Regression Test TPC-DS if: always() shell: bash run: | - python ./duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds_sf1_local.csv --verbose --threads=2 --root-dir=. + python ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds_sf1_local.csv --verbose --threads=2 --root-dir=. - name: Regression Test Micro if: always() shell: bash run: | - python ./duckdb/scripts/regression_test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2 --root-dir=. + python ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2 --root-dir=. - name: Test benchmark makefile shell: bash From 5cf23c84fd533e95a827e8f93dc147377eb4d650 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 8 Nov 2024 11:17:30 +0100 Subject: [PATCH 07/45] make python3 available --- .github/workflows/MainDistributionPipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 0f4294f..a3112af 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -22,6 +22,7 @@ jobs: extension_name: delta enable_rust: true exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' + extra_toolchains: 'python3' duckdb-stable-deploy: name: Deploy extension binaries From 77373bc72da7aba764462bc3b6d9e61459bbc407 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 8 Nov 2024 11:20:01 +0100 Subject: [PATCH 08/45] bump vcpkg --- .github/workflows/MainDistributionPipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index a3112af..7f03bff 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -23,6 +23,7 @@ jobs: enable_rust: true exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' extra_toolchains: 'python3' + vcpkg_commit: c82f74667287d3dc386bce81e44964370c91a289 duckdb-stable-deploy: name: Deploy extension binaries From ca12e53ef60705ef86a10849fa2f2186eaf58e1e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 8 Nov 2024 12:46:46 +0100 Subject: [PATCH 09/45] fix mismatching duckdb version --- .github/workflows/MainDistributionPipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 7f03bff..f43fd4e 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -32,6 +32,6 @@ jobs: secrets: inherit with: extension_name: delta - duckdb_version: main + duckdb_version: 0ccf3c25cc exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From 296f51494d7a40041ae4ccd79fe12795066161ee Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Tue, 12 Nov 2024 12:22:56 +0100 Subject: [PATCH 10/45] format --- CMakeLists.txt | 226 ++++++++++-------- src/delta_extension.cpp | 34 +-- src/delta_utils.cpp | 12 +- src/functions/delta_scan.cpp | 182 +++++++------- src/functions/expression_functions.cpp | 46 ++++ src/include/delta_utils.hpp | 77 +++--- src/include/functions/delta_scan.hpp | 60 ++--- .../functions/expression_functions.hpp | 0 src/include/storage/delta_catalog.hpp | 14 +- src/include/storage/delta_schema_entry.hpp | 10 +- src/include/storage/delta_table_entry.hpp | 4 +- src/include/storage/delta_transaction.hpp | 9 +- src/storage/delta_catalog.cpp | 63 ++--- src/storage/delta_schema_entry.cpp | 125 +++++----- src/storage/delta_table_entry.cpp | 11 +- src/storage/delta_transaction.cpp | 4 +- 16 files changed, 487 insertions(+), 390 deletions(-) create mode 100644 src/functions/expression_functions.cpp create mode 100644 src/include/functions/expression_functions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 351f307..50df657 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 2.8.12) include(ExternalProject) -### Core config +# Core config set(TARGET_NAME delta) set(EXTENSION_NAME ${TARGET_NAME}_extension) @@ -12,30 +12,43 @@ project(${TARGET_NAME}) include_directories(src/include) set(EXTENSION_SOURCES - src/delta_extension.cpp - src/delta_functions.cpp - src/delta_utils.cpp - src/functions/delta_scan.cpp - src/storage/delta_catalog.cpp - src/storage/delta_schema_entry.cpp - src/storage/delta_table_entry.cpp - src/storage/delta_transaction.cpp - src/storage/delta_transaction_manager.cpp -) - -### Custom config -# TODO: figure out if we really need this? + src/delta_extension.cpp + src/delta_functions.cpp + src/delta_utils.cpp + src/functions/delta_scan.cpp + src/storage/delta_catalog.cpp + src/storage/delta_schema_entry.cpp + src/storage/delta_table_entry.cpp + src/storage/delta_transaction.cpp + src/storage/delta_transaction_manager.cpp) + +# Custom config TODO: figure out if we really need this? if(APPLE) - set(PLATFORM_LIBS m c System resolv "-framework Corefoundation -framework SystemConfiguration -framework Security") + set(PLATFORM_LIBS + m + c + System + resolv + "-framework Corefoundation -framework SystemConfiguration -framework Security" + ) elseif(UNIX) - set(PLATFORM_LIBS m c resolv) + set(PLATFORM_LIBS m c resolv) elseif(WIN32) - set(PLATFORM_LIBS ntdll ncrypt secur32 ws2_32 userenv bcrypt msvcrt advapi32 RuntimeObject) + set(PLATFORM_LIBS + ntdll + ncrypt + secur32 + ws2_32 + userenv + bcrypt + msvcrt + advapi32 + RuntimeObject) else() - message(STATUS "UNKNOWN OS") + message(STATUS "UNKNOWN OS") endif() -### Setup delta-kernel-rs dependency +# Setup delta-kernel-rs dependency set(KERNEL_NAME delta_kernel) # Set default ExternalProject root directory @@ -46,40 +59,50 @@ set(RUST_ENV_VARS "") # Propagate arch to rust build for CI set(RUST_PLATFORM_TARGET "") if("${OS_NAME}" STREQUAL "linux") - if ("${OS_ARCH}" STREQUAL "arm64") - set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") - elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") - set(RUST_ENV_VARS ${RUST_ENV_VARS} CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu) - set(RUST_ENV_VARS ${RUST_ENV_VARS} CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) - set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib) - set(RUST_ENV_VARS ${RUST_ENV_VARS} OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include) - set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") - else() - set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") - endif() + if("${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") + elseif("${CMAKE_CXX_COMPILER}" MATCHES "aarch64") + set(RUST_ENV_VARS + ${RUST_ENV_VARS} + CFLAGS_aarch64_unknown_linux_gnu=--sysroot=/usr/aarch64-linux-gnu) + set(RUST_ENV_VARS + ${RUST_ENV_VARS} + CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc) + set(RUST_ENV_VARS + ${RUST_ENV_VARS} + OPENSSL_LIB_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib + ) + set(RUST_ENV_VARS + ${RUST_ENV_VARS} + OPENSSL_INCLUDE_DIR=${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/include + ) + set(RUST_PLATFORM_TARGET "aarch64-unknown-linux-gnu") + else() + set(RUST_PLATFORM_TARGET "x86_64-unknown-linux-gnu") + endif() elseif("${OS_NAME}" STREQUAL "osx") - if ("${OSX_BUILD_ARCH}" STREQUAL "arm64") - set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") - elseif ("${OSX_BUILD_ARCH}" STREQUAL "x86_64") - set(RUST_PLATFORM_TARGET "x86_64-apple-darwin") - elseif ("${OS_ARCH}" STREQUAL "arm64") - set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") - endif() + if("${OSX_BUILD_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") + elseif("${OSX_BUILD_ARCH}" STREQUAL "x86_64") + set(RUST_PLATFORM_TARGET "x86_64-apple-darwin") + elseif("${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-apple-darwin") + endif() elseif(WIN32) - if (MINGW AND "${OS_ARCH}" STREQUAL "arm64") - set(RUST_PLATFORM_TARGET "aarch64-pc-windows-gnu") - elseif (MINGW AND "${OS_ARCH}" STREQUAL "amd64") - set(RUST_PLATFORM_TARGET "x86_64-pc-windows-gnu") - elseif (MSVC AND "${OS_ARCH}" STREQUAL "arm64") - set(RUST_PLATFORM_TARGET "aarch64-pc-windows-msvc") - elseif (MSVC AND "${OS_ARCH}" STREQUAL "amd64") - set(RUST_PLATFORM_TARGET "x86_64-pc-windows-msvc") - endif() + if(MINGW AND "${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-pc-windows-gnu") + elseif(MINGW AND "${OS_ARCH}" STREQUAL "amd64") + set(RUST_PLATFORM_TARGET "x86_64-pc-windows-gnu") + elseif(MSVC AND "${OS_ARCH}" STREQUAL "arm64") + set(RUST_PLATFORM_TARGET "aarch64-pc-windows-msvc") + elseif(MSVC AND "${OS_ARCH}" STREQUAL "amd64") + set(RUST_PLATFORM_TARGET "x86_64-pc-windows-msvc") + endif() endif() # We currently only support the predefined targets. -if ("${RUST_PLATFORM_TARGET}" STREQUAL "") - message(FATAL_ERROR "Failed to detect the correct platform") +if("${RUST_PLATFORM_TARGET}" STREQUAL "") + message(FATAL_ERROR "Failed to detect the correct platform") endif() set(RUST_PLATFORM_PARAM "--target=${RUST_PLATFORM_TARGET}") @@ -92,69 +115,84 @@ string(STRIP "${RUST_ENV_VARS}" RUST_ENV_VARS) set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD) # Define all the relevant delta-kernel-rs paths/names -set(DELTA_KERNEL_LIBNAME "${CMAKE_STATIC_LIBRARY_PREFIX}delta_kernel_ffi${CMAKE_STATIC_LIBRARY_SUFFIX}") -set(DELTA_KERNEL_LIBPATH_DEBUG "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/${DELTA_KERNEL_LIBNAME}") -set(DELTA_KERNEL_LIBPATH_RELEASE "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/${DELTA_KERNEL_LIBNAME}") -set(DELTA_KERNEL_FFI_HEADER_PATH "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers") -set(DELTA_KERNEL_FFI_HEADER_C "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h") -set(DELTA_KERNEL_FFI_HEADER_CXX "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp") +set(DELTA_KERNEL_LIBNAME + "${CMAKE_STATIC_LIBRARY_PREFIX}delta_kernel_ffi${CMAKE_STATIC_LIBRARY_SUFFIX}" +) +set(DELTA_KERNEL_LIBPATH_DEBUG + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/${DELTA_KERNEL_LIBNAME}" +) +set(DELTA_KERNEL_LIBPATH_RELEASE + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/${DELTA_KERNEL_LIBNAME}" +) +set(DELTA_KERNEL_FFI_HEADER_PATH + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers") +set(DELTA_KERNEL_FFI_HEADER_C + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.h" +) +set(DELTA_KERNEL_FFI_HEADER_CXX + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers/delta_kernel_ffi.hpp" +) # Add rust_example as a CMake target ExternalProject_Add( - ${KERNEL_NAME} - GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" - # WARNING: the FFI headers are currently pinned due to the C linkage issue of the c++ headers. Currently, when bumping - # the kernel version, the produced header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying the fix - GIT_TAG v0.4.0 - # Prints the env variables passed to the cargo build to the terminal, useful in debugging because passing them - # through CMake is an error-prone mess - CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} env - UPDATE_COMMAND "" - BUILD_IN_SOURCE 1 - # Build debug build - BUILD_COMMAND - ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} - cargo build --package delta_kernel_ffi --workspace --all-features ${RUST_PLATFORM_PARAM} - # Build release build - COMMAND - ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} - cargo build --package delta_kernel_ffi --workspace --all-features --release ${RUST_PLATFORM_PARAM} - # Build DATs - COMMAND - ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} - cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml - # Define the byproducts, required for building with Ninja - BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_DEBUG}" - BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_RELEASE}" - BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_C}" - BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_CXX}" - INSTALL_COMMAND "" - LOG_BUILD ON) + ${KERNEL_NAME} + GIT_REPOSITORY "https://github.com/delta-incubator/delta-kernel-rs" + # WARNING: the FFI headers are currently pinned due to the C linkage issue of + # the c++ headers. Currently, when bumping the kernel version, the produced + # header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying + # the fix + GIT_TAG v0.4.0 + # Prints the env variables passed to the cargo build to the terminal, useful + # in debugging because passing them through CMake is an error-prone mess + CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} + ${RUST_ENV_VARS} env + UPDATE_COMMAND "" + BUILD_IN_SOURCE 1 + # Build debug build + BUILD_COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build + --package delta_kernel_ffi --workspace --all-features ${RUST_PLATFORM_PARAM} + # Build release build + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build + --package delta_kernel_ffi --workspace --all-features --release + ${RUST_PLATFORM_PARAM} + # Build DATs + COMMAND + ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build + --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml + # Define the byproducts, required for building with Ninja + BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_DEBUG}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_RELEASE}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_C}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_CXX}" + INSTALL_COMMAND "" + LOG_BUILD ON) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -# TODO: when C linkage issue is resolved, we should switch back to using the generated headers -#include_directories(${DELTA_KERNEL_FFI_HEADER_PATH}) +# TODO: when C linkage issue is resolved, we should switch back to using the +# generated headers include_directories(${DELTA_KERNEL_FFI_HEADER_PATH}) # Hides annoying linker warnings -set(CMAKE_OSX_DEPLOYMENT_TARGET 13.3 CACHE STRING "Minimum OS X deployment version" FORCE) +set(CMAKE_OSX_DEPLOYMENT_TARGET + 13.3 + CACHE STRING "Minimum OS X deployment version" FORCE) # Add the default client add_compile_definitions(DEFINE_DEFAULT_ENGINE) # Link delta-kernal-rs to static lib -target_link_libraries(${EXTENSION_NAME} - debug ${DELTA_KERNEL_LIBPATH_DEBUG} - optimized ${DELTA_KERNEL_LIBPATH_RELEASE} - ${PLATFORM_LIBS}) +target_link_libraries( + ${EXTENSION_NAME} debug ${DELTA_KERNEL_LIBPATH_DEBUG} optimized + ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) add_dependencies(${EXTENSION_NAME} delta_kernel) # Link delta-kernal-rs to dynamic lib -target_link_libraries(${LOADABLE_EXTENSION_NAME} - debug ${DELTA_KERNEL_LIBPATH_DEBUG} - optimized ${DELTA_KERNEL_LIBPATH_RELEASE} - ${PLATFORM_LIBS}) +target_link_libraries( + ${LOADABLE_EXTENSION_NAME} debug ${DELTA_KERNEL_LIBPATH_DEBUG} optimized + ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel) install( diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 97d1b53..36003a3 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -13,18 +13,18 @@ namespace duckdb { static unique_ptr DeltaCatalogAttach(StorageExtensionInfo *storage_info, ClientContext &context, - AttachedDatabase &db, const string &name, AttachInfo &info, - AccessMode access_mode) { + AttachedDatabase &db, const string &name, AttachInfo &info, + AccessMode access_mode) { - auto res = make_uniq(db, info.path, access_mode); + auto res = make_uniq(db, info.path, access_mode); - for (const auto& option : info.options) { - if (StringUtil::Lower(option.first) == "pin_snapshot") { - res->use_cache = option.second.GetValue(); - } - } + for (const auto &option : info.options) { + if (StringUtil::Lower(option.first) == "pin_snapshot") { + res->use_cache = option.second.GetValue(); + } + } - res->SetDefaultTable(DEFAULT_SCHEMA, DEFAULT_DELTA_TABLE); + res->SetDefaultTable(DEFAULT_SCHEMA, DEFAULT_DELTA_TABLE); return std::move(res); } @@ -44,14 +44,14 @@ class DeltaStorageExtension : public StorageExtension { }; static void LoadInternal(DatabaseInstance &instance) { - // Load functions - for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { - ExtensionUtil::RegisterFunction(instance, function); - } - - // Register the "single table" delta catalog (to ATTACH a single delta table) - auto &config = DBConfig::GetConfig(instance); - config.storage_extensions["delta"] = make_uniq(); + // Load functions + for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { + ExtensionUtil::RegisterFunction(instance, function); + } + + // Register the "single table" delta catalog (to ATTACH a single delta table) + auto &config = DBConfig::GetConfig(instance); + config.storage_extensions["delta"] = make_uniq(); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 035d300..1a8ff04 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -13,11 +13,11 @@ unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::Sha ffi::EngineSchemaVisitor visitor; visitor.data = &state; - visitor.make_field_list = (uintptr_t (*)(void *, uintptr_t))&MakeFieldList; - visitor.visit_struct = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uintptr_t))&VisitStruct; - visitor.visit_array = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t))&VisitArray; - visitor.visit_map = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t))&VisitMap; - visitor.visit_decimal = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uint8_t, uint8_t))&VisitDecimal; + visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList; + visitor.visit_struct = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uintptr_t)) & VisitStruct; + visitor.visit_array = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitArray; + visitor.visit_map = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, bool, uintptr_t)) & VisitMap; + visitor.visit_decimal = (void (*)(void *, uintptr_t, ffi::KernelStringSlice, uint8_t, uint8_t)) & VisitDecimal; visitor.visit_string = VisitSimpleType(); visitor.visit_long = VisitSimpleType(); visitor.visit_integer = VisitSimpleType(); @@ -176,7 +176,7 @@ vector KernelUtils::FromDeltaBoolSlice(const struct ffi::KernelBoolSlice s PredicateVisitor::PredicateVisitor(const vector &column_names, optional_ptr filters) { predicate = this; - visitor = (uintptr_t (*)(void *, ffi::KernelExpressionVisitorState *))&VisitPredicate; + visitor = (uintptr_t(*)(void *, ffi::KernelExpressionVisitorState *)) & VisitPredicate; if (filters) { for (auto &filter : filters->filters) { diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index d88d597..4e35b17 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -278,7 +278,8 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p } if (StringUtil::StartsWith(endpoint, "http://")) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), KernelUtils::ToDeltaString("true")); + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), + KernelUtils::ToDeltaString("true")); } ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), KernelUtils::ToDeltaString(endpoint)); @@ -363,7 +364,8 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p } // Set the use_emulator option for when the azurite test server is used if (account_name == "devstoreaccount1" || connection_string.find("devstoreaccount1") != string::npos) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), KernelUtils::ToDeltaString("true")); + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("use_emulator"), + KernelUtils::ToDeltaString("true")); } if (!account_name.empty()) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("account_name"), @@ -373,7 +375,8 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p ffi::set_builder_option(builder, KernelUtils::ToDeltaString("azure_endpoint"), KernelUtils::ToDeltaString(endpoint)); } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), KernelUtils::ToDeltaString(bucket)); + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("container_name"), + KernelUtils::ToDeltaString(bucket)); } return builder; } @@ -412,46 +415,46 @@ string DeltaSnapshot::ToDeltaPath(const string &raw_path) { } void DeltaSnapshot::Bind(vector &return_types, vector &names) { - if (have_bound) { - names = this->names; - return_types = this->types; - return; - } - - if (!initialized_snapshot) { - InitializeSnapshot(); - } - - unique_ptr schema; - - { - auto snapshot_ref = snapshot->GetLockingRef(); - schema = SchemaVisitor::VisitSnapshotSchema(snapshot_ref.GetPtr()); - } - - for (const auto &field: *schema) { - names.push_back(field.first); - return_types.push_back(field.second); - } - // Store the bound names for resolving the complex filter pushdown later - have_bound = true; - this->names = names; - this->types = return_types; + if (have_bound) { + names = this->names; + return_types = this->types; + return; + } + + if (!initialized_snapshot) { + InitializeSnapshot(); + } + + unique_ptr schema; + + { + auto snapshot_ref = snapshot->GetLockingRef(); + schema = SchemaVisitor::VisitSnapshotSchema(snapshot_ref.GetPtr()); + } + + for (const auto &field : *schema) { + names.push_back(field.first); + return_types.push_back(field.second); + } + // Store the bound names for resolving the complex filter pushdown later + have_bound = true; + this->names = names; + this->types = return_types; } string DeltaSnapshot::GetFile(idx_t i) { - if (!initialized_snapshot) { - InitializeSnapshot(); - } + if (!initialized_snapshot) { + InitializeSnapshot(); + } - if(!initialized_scan) { - InitializeScan(); - } + if (!initialized_scan) { + InitializeScan(); + } - // We already have this file - if (i < resolved_files.size()) { - return resolved_files[i]; - } + // We already have this file + if (i < resolved_files.size()) { + return resolved_files[i]; + } if (files_exhausted) { return ""; @@ -478,59 +481,62 @@ string DeltaSnapshot::GetFile(idx_t i) { } void DeltaSnapshot::InitializeSnapshot() { - auto path_slice = KernelUtils::ToDeltaString(paths[0]); + auto path_slice = KernelUtils::ToDeltaString(paths[0]); - auto interface_builder = CreateBuilder(context, paths[0]); - extern_engine = TryUnpackKernelResult( ffi::builder_build(interface_builder)); + auto interface_builder = CreateBuilder(context, paths[0]); + extern_engine = TryUnpackKernelResult(ffi::builder_build(interface_builder)); - if (!snapshot) { - snapshot = make_shared_ptr(TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get()))); - } + if (!snapshot) { + snapshot = make_shared_ptr( + TryUnpackKernelResult(ffi::snapshot(path_slice, extern_engine.get()))); + } - initialized_snapshot = true; + initialized_snapshot = true; } void DeltaSnapshot::InitializeScan() { - auto snapshot_ref = snapshot->GetLockingRef(); + auto snapshot_ref = snapshot->GetLockingRef(); - // Create Scan - PredicateVisitor visitor(names, &table_filters); - scan = TryUnpackKernelResult(ffi::scan(snapshot_ref.GetPtr(), extern_engine.get(), &visitor)); + // Create Scan + PredicateVisitor visitor(names, &table_filters); + scan = TryUnpackKernelResult(ffi::scan(snapshot_ref.GetPtr(), extern_engine.get(), &visitor)); // Create GlobalState global_state = ffi::get_global_scan_state(scan.get()); - // Set version - this->version = ffi::version(snapshot_ref.GetPtr()); + // Set version + this->version = ffi::version(snapshot_ref.GetPtr()); // Create scan data iterator scan_data_iterator = TryUnpackKernelResult(ffi::kernel_scan_data_init(extern_engine.get(), scan.get())); - initialized_scan = true; + initialized_scan = true; } -unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &context, const MultiFileReaderOptions &options, MultiFilePushdownInfo &info, - vector> &filters) { - FilterCombiner combiner(context); +unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &context, + const MultiFileReaderOptions &options, + MultiFilePushdownInfo &info, + vector> &filters) { + FilterCombiner combiner(context); - if (filters.empty()) { - return nullptr; - } + if (filters.empty()) { + return nullptr; + } - for (const auto &filter : filters) { - combiner.AddFilter(filter->Copy()); - } - auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids); + for (const auto &filter : filters) { + combiner.AddFilter(filter->Copy()); + } + auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids); // TODO: can/should we figure out if this filtered anything? auto filtered_list = make_uniq(context, paths[0]); filtered_list->table_filters = std::move(filterstmp); filtered_list->names = names; - // Copy over the snapshot, this avoids reparsing metadata - filtered_list->snapshot = snapshot; + // Copy over the snapshot, this avoids reparsing metadata + filtered_list->snapshot = snapshot; - return std::move(filtered_list); + return std::move(filtered_list); } vector DeltaSnapshot::GetAllFiles() { @@ -543,11 +549,11 @@ vector DeltaSnapshot::GetAllFiles() { } FileExpandResult DeltaSnapshot::GetExpandResult() { - // We avoid exposing the ExpandResult to DuckDB here because we want to materialize the Snapshot as late as possible: - // materializing too early (GetExpandResult is called *before* filter pushdown by the Parquet scanner), will lead into - // needing to create 2 scans of the snapshot TODO: we need to investigate if this is actually a sensible decision with - // some benchmarking, its currently based on intuition. - return FileExpandResult::MULTIPLE_FILES; + // We avoid exposing the ExpandResult to DuckDB here because we want to materialize the Snapshot as late as + // possible: materializing too early (GetExpandResult is called *before* filter pushdown by the Parquet scanner), + // will lead into needing to create 2 scans of the snapshot TODO: we need to investigate if this is actually a + // sensible decision with some benchmarking, its currently based on intuition. + return FileExpandResult::MULTIPLE_FILES; } idx_t DeltaSnapshot::GetTotalFileCount() { @@ -584,13 +590,13 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) } unique_ptr DeltaMultiFileReader::CreateInstance(const TableFunction &table_function) { - auto result = make_uniq(); + auto result = make_uniq(); - if (table_function.function_info) { - result->snapshot = table_function.function_info->Cast().snapshot; - } + if (table_function.function_info) { + result->snapshot = table_function.function_info->Cast().snapshot; + } - return std::move(result); + return std::move(result); } bool DeltaMultiFileReader::Bind(MultiFileReaderOptions &options, MultiFileList &files, @@ -683,21 +689,21 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio } } -shared_ptr DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector& paths, FileGlobOptions options) { - if (paths.size() != 1) { - throw BinderException("'delta_scan' only supports single path as input"); - } - +shared_ptr DeltaMultiFileReader::CreateFileList(ClientContext &context, const vector &paths, + FileGlobOptions options) { + if (paths.size() != 1) { + throw BinderException("'delta_scan' only supports single path as input"); + } - if (snapshot) { - // TODO: assert that we are querying the same path as this injected snapshot - // This takes the kernel snapshot from the delta snapshot and ensures we use that snapshot for reading - if (snapshot) { - return snapshot; - } - } + if (snapshot) { + // TODO: assert that we are querying the same path as this injected snapshot + // This takes the kernel snapshot from the delta snapshot and ensures we use that snapshot for reading + if (snapshot) { + return snapshot; + } + } - return make_uniq(context, paths[0]); + return make_uniq(context, paths[0]); } // Generate the correct Selection Vector Based on the Raw delta KernelBoolSlice dv and the row_id_column diff --git a/src/functions/expression_functions.cpp b/src/functions/expression_functions.cpp new file mode 100644 index 0000000..373e42f --- /dev/null +++ b/src/functions/expression_functions.cpp @@ -0,0 +1,46 @@ +#include +#include + +#include "duckdb/function/scalar_function.hpp" +#include "duckdb/planner/expression/bound_constant_expression.hpp" + +#include "delta_utils.hpp" +#include "delta_functions.hpp" + +namespace duckdb { + +static void GetDeltaTestExpression(DataChunk &input, ExpressionState &state, Vector &output) { + output.SetVectorType(VectorType::CONSTANT_VECTOR); + + auto test_expression = ffi::get_testing_kernel_expression(); + ExpressionVisitor visitor; + + auto result = visitor.VisitKernelExpression(&test_expression); + if (result->size() != 1) { + throw InternalException("Unexpected result: expected single expression"); + } + + auto &expr = result->back(); + if (expr->GetExpressionType() != ExpressionType::CONJUNCTION_AND) { + throw InternalException("Unexpected result: expected single top level Conjuntion"); + } + + vector result_to_string; + for (auto &expr : expr->Cast().children) { + result_to_string.push_back(expr->ToString()); + } + + output.SetValue(0, Value::LIST(result_to_string)); +}; + +ScalarFunctionSet DeltaFunctions::GetExpressionFunction(DatabaseInstance &instance) { + ScalarFunctionSet result; + result.name = "get_delta_test_expression"; + + ScalarFunction getvar({}, LogicalType::LIST(LogicalType::VARCHAR), GetDeltaTestExpression, nullptr, nullptr); + result.AddFunction(getvar); + + return result; +} + +} // namespace duckdb \ No newline at end of file diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 24806a4..8760862 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -115,57 +115,64 @@ typedef TemplatedUniqueKernelPointer KernelScan typedef TemplatedUniqueKernelPointer KernelGlobalScanState; typedef TemplatedUniqueKernelPointer KernelScanDataIterator; -template +template struct SharedKernelPointer; // A reference to a SharedKernelPointer, only 1 can be handed out at the same time -template +template struct SharedKernelRef { - friend struct SharedKernelPointer; + friend struct SharedKernelPointer; + public: - KernelType* GetPtr() { - return owning_pointer.kernel_ptr.get(); - } - ~SharedKernelRef() { - owning_pointer.lock.unlock(); - } + KernelType *GetPtr() { + return owning_pointer.kernel_ptr.get(); + } + ~SharedKernelRef() { + owning_pointer.lock.unlock(); + } protected: - SharedKernelRef(SharedKernelPointer& owning_pointer_p) : owning_pointer(owning_pointer_p) { - owning_pointer.lock.lock(); - } + SharedKernelRef(SharedKernelPointer &owning_pointer_p) + : owning_pointer(owning_pointer_p) { + owning_pointer.lock.lock(); + } protected: - // The pointer that owns this ref - SharedKernelPointer& owning_pointer; + // The pointer that owns this ref + SharedKernelPointer &owning_pointer; }; // Wrapper around ffi objects to share between threads -template +template struct SharedKernelPointer { - friend struct SharedKernelRef; + friend struct SharedKernelRef; + public: - SharedKernelPointer(TemplatedUniqueKernelPointer unique_kernel_ptr) : kernel_ptr(unique_kernel_ptr) {} - SharedKernelPointer(KernelType* ptr) : kernel_ptr(ptr){} - SharedKernelPointer(){} - - SharedKernelPointer(SharedKernelPointer&& other) : SharedKernelPointer() { - other.lock.lock(); - lock.lock(); - kernel_ptr = std::move(other.kernel_ptr); - lock.lock(); - other.lock.lock(); - } - - // Returns a reference to the underlying kernel object. The SharedKernelPointer to this object will be locked for the - // lifetime of this reference - SharedKernelRef GetLockingRef() { - return SharedKernelRef(*this); - } + SharedKernelPointer(TemplatedUniqueKernelPointer unique_kernel_ptr) + : kernel_ptr(unique_kernel_ptr) { + } + SharedKernelPointer(KernelType *ptr) : kernel_ptr(ptr) { + } + SharedKernelPointer() { + } + + SharedKernelPointer(SharedKernelPointer &&other) : SharedKernelPointer() { + other.lock.lock(); + lock.lock(); + kernel_ptr = std::move(other.kernel_ptr); + lock.lock(); + other.lock.lock(); + } + + // Returns a reference to the underlying kernel object. The SharedKernelPointer to this object will be locked for + // the lifetime of this reference + SharedKernelRef GetLockingRef() { + return SharedKernelRef(*this); + } protected: - TemplatedUniqueKernelPointer kernel_ptr; - mutex lock; + TemplatedUniqueKernelPointer kernel_ptr; + mutex lock; }; typedef SharedKernelPointer SharedKernelSnapshot; diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index eb2de6e..32662a2 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -15,8 +15,8 @@ namespace duckdb { struct DeltaSnapshot; struct DeltaFunctionInfo : public TableFunctionInfo { - shared_ptr snapshot; - string expected_path; + shared_ptr snapshot; + string expected_path; }; struct DeltaFileMetaData { @@ -63,8 +63,8 @@ struct DeltaSnapshot : public MultiFileList { string GetFile(idx_t i) override; protected: - void InitializeSnapshot(); - void InitializeScan(); + void InitializeSnapshot(); + void InitializeScan(); template T TryUnpackKernelResult(ffi::ExternResult result) { @@ -76,29 +76,29 @@ struct DeltaSnapshot : public MultiFileList { public: idx_t version; - //! Delta Kernel Structures - shared_ptr snapshot; + //! Delta Kernel Structures + shared_ptr snapshot; - KernelExternEngine extern_engine; - KernelScan scan; - KernelGlobalScanState global_state; - KernelScanDataIterator scan_data_iterator; + KernelExternEngine extern_engine; + KernelScan scan; + KernelGlobalScanState global_state; + KernelScanDataIterator scan_data_iterator; - //! Names - vector names; - vector types; - bool have_bound = false; + //! Names + vector names; + vector types; + bool have_bound = false; //! Metadata map for files vector> metadata; - //! Current file list resolution state - bool initialized_snapshot = false; - bool initialized_scan = false; + //! Current file list resolution state + bool initialized_snapshot = false; + bool initialized_scan = false; - bool files_exhausted = false; - vector resolved_files; - TableFilterSet table_filters; + bool files_exhausted = false; + vector resolved_files; + TableFilterSet table_filters; ClientContext &context; }; @@ -116,10 +116,10 @@ struct DeltaMultiFileReaderGlobalState : public MultiFileReaderGlobalState { }; struct DeltaMultiFileReader : public MultiFileReader { - static unique_ptr CreateInstance(const TableFunction &table_function); - //! Return a DeltaSnapshot - shared_ptr CreateFileList(ClientContext &context, const vector &paths, - FileGlobOptions options) override; + static unique_ptr CreateInstance(const TableFunction &table_function); + //! Return a DeltaSnapshot + shared_ptr CreateFileList(ClientContext &context, const vector &paths, + FileGlobOptions options) override; //! Override the regular parquet bind using the MultiFileReader Bind. The bind from these are what DuckDB's file //! readers will try read @@ -153,13 +153,13 @@ struct DeltaMultiFileReader : public MultiFileReader { const MultiFileReaderData &reader_data, DataChunk &chunk, optional_ptr global_state) override; - //! Override the ParseOption call to parse delta_scan specific options - bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options, - ClientContext &context) override; + //! Override the ParseOption call to parse delta_scan specific options + bool ParseOption(const string &key, const Value &val, MultiFileReaderOptions &options, + ClientContext &context) override; - // A snapshot can be injected into the multifilereader, this ensures the GetMultiFileList can return this snapshot - // (note that the path should match the one passed to CreateFileList) - shared_ptr snapshot; + // A snapshot can be injected into the multifilereader, this ensures the GetMultiFileList can return this snapshot + // (note that the path should match the one passed to CreateFileList) + shared_ptr snapshot; }; } // namespace duckdb diff --git a/src/include/functions/expression_functions.hpp b/src/include/functions/expression_functions.hpp new file mode 100644 index 0000000..e69de29 diff --git a/src/include/storage/delta_catalog.hpp b/src/include/storage/delta_catalog.hpp index faeb00c..17549dd 100644 --- a/src/include/storage/delta_catalog.hpp +++ b/src/include/storage/delta_catalog.hpp @@ -30,7 +30,7 @@ class DeltaCatalog : public Catalog { string path; AccessMode access_mode; - bool use_cache; + bool use_cache; public: void Initialize(bool load_builtin) override; @@ -59,22 +59,22 @@ class DeltaCatalog : public Catalog { DatabaseSize GetDatabaseSize(ClientContext &context) override; - optional_idx GetCatalogVersion(ClientContext &context) override; + optional_idx GetCatalogVersion(ClientContext &context) override; bool InMemory() override; string GetDBPath() override; - bool UseCachedSnapshot(); + bool UseCachedSnapshot(); - DeltaSchemaEntry& GetMainSchema() { - return *main_schema; - } + DeltaSchemaEntry &GetMainSchema() { + return *main_schema; + } private: void DropSchema(ClientContext &context, DropInfo &info) override; private: - unique_ptr main_schema; + unique_ptr main_schema; string default_schema; }; diff --git a/src/include/storage/delta_schema_entry.hpp b/src/include/storage/delta_schema_entry.hpp index c8a8d09..dc41a4c 100644 --- a/src/include/storage/delta_schema_entry.hpp +++ b/src/include/storage/delta_schema_entry.hpp @@ -40,13 +40,13 @@ class DeltaSchemaEntry : public SchemaCatalogEntry { void DropEntry(ClientContext &context, DropInfo &info) override; optional_ptr GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; - optional_ptr GetCachedTable(); + optional_ptr GetCachedTable(); private: - //! Delta tables may be cached in the SchemaEntry. Since the TableEntry holds the snapshot, this allows sharing a snapshot - //! between different scans. - unique_ptr cached_table; - mutex lock; + //! Delta tables may be cached in the SchemaEntry. Since the TableEntry holds the snapshot, this allows sharing a + //! snapshot between different scans. + unique_ptr cached_table; + mutex lock; }; } // namespace duckdb diff --git a/src/include/storage/delta_table_entry.hpp b/src/include/storage/delta_table_entry.hpp index c131694..5263e88 100644 --- a/src/include/storage/delta_table_entry.hpp +++ b/src/include/storage/delta_table_entry.hpp @@ -17,7 +17,7 @@ struct DeltaSnapshot; class DeltaTableEntry : public TableCatalogEntry { public: DeltaTableEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateTableInfo &info); - ~DeltaTableEntry(); + ~DeltaTableEntry(); public: unique_ptr GetStatistics(ClientContext &context, column_t column_id) override; @@ -30,7 +30,7 @@ class DeltaTableEntry : public TableCatalogEntry { ClientContext &context) override; public: - shared_ptr snapshot; + shared_ptr snapshot; }; } // namespace duckdb diff --git a/src/include/storage/delta_transaction.hpp b/src/include/storage/delta_transaction.hpp index 3a004ef..b9d369c 100644 --- a/src/include/storage/delta_transaction.hpp +++ b/src/include/storage/delta_transaction.hpp @@ -30,11 +30,12 @@ class DeltaTransaction : public Transaction { static DeltaTransaction &Get(ClientContext &context, Catalog &catalog); AccessMode GetAccessMode() const; - void SetReadWrite() override { - throw NotImplementedException("Can not start read-write transaction"); - }; + void SetReadWrite() override { + throw NotImplementedException("Can not start read-write transaction"); + }; + public: - unique_ptr table_entry; + unique_ptr table_entry; private: // DeltaConnection connection; diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp index 1e8ac4e..53b1195 100644 --- a/src/storage/delta_catalog.cpp +++ b/src/storage/delta_catalog.cpp @@ -18,31 +18,32 @@ DeltaCatalog::DeltaCatalog(AttachedDatabase &db_p, const string &path, AccessMod DeltaCatalog::~DeltaCatalog() = default; void DeltaCatalog::Initialize(bool load_builtin) { - CreateSchemaInfo info; - main_schema = make_uniq(*this, info); + CreateSchemaInfo info; + main_schema = make_uniq(*this, info); } optional_ptr DeltaCatalog::CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) { - throw BinderException("Delta tables do not support creating new schemas"); + throw BinderException("Delta tables do not support creating new schemas"); } void DeltaCatalog::DropSchema(ClientContext &context, DropInfo &info) { - throw BinderException("Delta tables do not support dropping schemas"); + throw BinderException("Delta tables do not support dropping schemas"); } void DeltaCatalog::ScanSchemas(ClientContext &context, std::function callback) { - callback(*main_schema); + callback(*main_schema); } optional_ptr DeltaCatalog::GetSchema(CatalogTransaction transaction, const string &schema_name, - OnEntryNotFound if_not_found, QueryErrorContext error_context) { - if (schema_name == DEFAULT_SCHEMA || schema_name == INVALID_SCHEMA) { - return main_schema.get(); - } - if (if_not_found == OnEntryNotFound::RETURN_NULL) { - return nullptr; - } - return nullptr; + OnEntryNotFound if_not_found, + QueryErrorContext error_context) { + if (schema_name == DEFAULT_SCHEMA || schema_name == INVALID_SCHEMA) { + return main_schema.get(); + } + if (if_not_found == OnEntryNotFound::RETURN_NULL) { + return nullptr; + } + return nullptr; } bool DeltaCatalog::InMemory() { @@ -54,24 +55,24 @@ string DeltaCatalog::GetDBPath() { } bool DeltaCatalog::UseCachedSnapshot() { - return use_cache; + return use_cache; } optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { - auto &delta_transaction = DeltaTransaction::Get(context, *this); + auto &delta_transaction = DeltaTransaction::Get(context, *this); - // Option 1: snapshot is cached table-wide - auto cached_snapshot = main_schema->GetCachedTable(); - if (cached_snapshot) { - return cached_snapshot->snapshot->version; - } + // Option 1: snapshot is cached table-wide + auto cached_snapshot = main_schema->GetCachedTable(); + if (cached_snapshot) { + return cached_snapshot->snapshot->version; + } - // Option 2: snapshot is cached in transaction - if (delta_transaction.table_entry) { - return delta_transaction.table_entry->snapshot->version; - } + // Option 2: snapshot is cached in transaction + if (delta_transaction.table_entry) { + return delta_transaction.table_entry->snapshot->version; + } - return {}; + return {}; } DatabaseSize DeltaCatalog::GetDatabaseSize(ClientContext &context) { @@ -84,23 +85,23 @@ DatabaseSize DeltaCatalog::GetDatabaseSize(ClientContext &context) { } unique_ptr DeltaCatalog::PlanInsert(ClientContext &context, LogicalInsert &op, - unique_ptr plan) { + unique_ptr plan) { throw NotImplementedException("DeltaCatalog does not support inserts"); } unique_ptr DeltaCatalog::PlanCreateTableAs(ClientContext &context, LogicalCreateTable &op, - unique_ptr plan) { + unique_ptr plan) { throw NotImplementedException("DeltaCatalog does not support creating new tables"); } unique_ptr DeltaCatalog::PlanDelete(ClientContext &context, LogicalDelete &op, - unique_ptr plan) { + unique_ptr plan) { throw NotImplementedException("DeltaCatalog does not support deletes"); } unique_ptr DeltaCatalog::PlanUpdate(ClientContext &context, LogicalUpdate &op, - unique_ptr plan) { + unique_ptr plan) { throw NotImplementedException("DeltaCatalog does not support updates"); } -unique_ptr DeltaCatalog::BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, - unique_ptr plan) { +unique_ptr DeltaCatalog::BindCreateIndex(Binder &binder, CreateStatement &stmt, + TableCatalogEntry &table, unique_ptr plan) { throw NotImplementedException("DeltaCatalog does not support creating indices"); } diff --git a/src/storage/delta_schema_entry.cpp b/src/storage/delta_schema_entry.cpp index 7e15c5b..61348d4 100644 --- a/src/storage/delta_schema_entry.cpp +++ b/src/storage/delta_schema_entry.cpp @@ -17,11 +17,9 @@ #include "duckdb/parser/parsed_data/alter_table_info.hpp" #include "duckdb/parser/parsed_expression_iterator.hpp" - namespace duckdb { -DeltaSchemaEntry::DeltaSchemaEntry(Catalog &catalog, CreateSchemaInfo &info) - : SchemaCatalogEntry(catalog, info) { +DeltaSchemaEntry::DeltaSchemaEntry(Catalog &catalog, CreateSchemaInfo &info) : SchemaCatalogEntry(catalog, info) { } DeltaSchemaEntry::~DeltaSchemaEntry() { @@ -35,7 +33,7 @@ DeltaTransaction &GetDeltaTransaction(CatalogTransaction transaction) { } optional_ptr DeltaSchemaEntry::CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) { - throw BinderException("Delta tables do not support creating tables"); + throw BinderException("Delta tables do not support creating tables"); } optional_ptr DeltaSchemaEntry::CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) { @@ -53,7 +51,7 @@ void DeltaUnqualifyColumnRef(ParsedExpression &expr) { } optional_ptr DeltaSchemaEntry::CreateIndex(CatalogTransaction transaction, CreateIndexInfo &info, - TableCatalogEntry &table) { + TableCatalogEntry &table) { throw NotImplementedException("CreateIndex"); } @@ -62,7 +60,7 @@ string GetDeltaCreateView(CreateViewInfo &info) { } optional_ptr DeltaSchemaEntry::CreateView(CatalogTransaction transaction, CreateViewInfo &info) { - throw BinderException("Delta tables do not support creating views"); + throw BinderException("Delta tables do not support creating views"); } optional_ptr DeltaSchemaEntry::CreateType(CatalogTransaction transaction, CreateTypeInfo &info) { @@ -74,26 +72,27 @@ optional_ptr DeltaSchemaEntry::CreateSequence(CatalogTransaction t } optional_ptr DeltaSchemaEntry::CreateTableFunction(CatalogTransaction transaction, - CreateTableFunctionInfo &info) { + CreateTableFunctionInfo &info) { throw BinderException("Delta databases do not support creating table functions"); } optional_ptr DeltaSchemaEntry::CreateCopyFunction(CatalogTransaction transaction, - CreateCopyFunctionInfo &info) { + CreateCopyFunctionInfo &info) { throw BinderException("Delta databases do not support creating copy functions"); } optional_ptr DeltaSchemaEntry::CreatePragmaFunction(CatalogTransaction transaction, - CreatePragmaFunctionInfo &info) { + CreatePragmaFunctionInfo &info) { throw BinderException("Delta databases do not support creating pragma functions"); } -optional_ptr DeltaSchemaEntry::CreateCollation(CatalogTransaction transaction, CreateCollationInfo &info) { +optional_ptr DeltaSchemaEntry::CreateCollation(CatalogTransaction transaction, + CreateCollationInfo &info) { throw BinderException("Delta databases do not support creating collations"); } void DeltaSchemaEntry::Alter(CatalogTransaction transaction, AlterInfo &info) { - throw NotImplementedException("Delta tables do not support altering"); + throw NotImplementedException("Delta tables do not support altering"); } bool CatalogTypeIsSupported(CatalogType type) { @@ -105,80 +104,80 @@ bool CatalogTypeIsSupported(CatalogType type) { } } -static unique_ptr CreateTableEntry(ClientContext &context, DeltaCatalog &delta_catalog, DeltaSchemaEntry &schema_entry) { - auto snapshot = make_shared_ptr(context, delta_catalog.GetDBPath()); +static unique_ptr CreateTableEntry(ClientContext &context, DeltaCatalog &delta_catalog, + DeltaSchemaEntry &schema_entry) { + auto snapshot = make_shared_ptr(context, delta_catalog.GetDBPath()); - // Get the names and types from the delta snapshot - vector return_types; - vector names; - snapshot->Bind(return_types, names); + // Get the names and types from the delta snapshot + vector return_types; + vector names; + snapshot->Bind(return_types, names); - CreateTableInfo table_info; - for (idx_t i = 0; i < return_types.size(); i++) { - table_info.columns.AddColumn(ColumnDefinition(names[i], return_types[i])); - } - table_info.table = DEFAULT_DELTA_TABLE; - auto table_entry = make_uniq(delta_catalog, schema_entry, table_info); - table_entry->snapshot = std::move(snapshot); + CreateTableInfo table_info; + for (idx_t i = 0; i < return_types.size(); i++) { + table_info.columns.AddColumn(ColumnDefinition(names[i], return_types[i])); + } + table_info.table = DEFAULT_DELTA_TABLE; + auto table_entry = make_uniq(delta_catalog, schema_entry, table_info); + table_entry->snapshot = std::move(snapshot); - return table_entry; + return table_entry; } void DeltaSchemaEntry::Scan(ClientContext &context, CatalogType type, - const std::function &callback) { + const std::function &callback) { if (!CatalogTypeIsSupported(type)) { - auto transaction = catalog.GetCatalogTransaction(context); + auto transaction = catalog.GetCatalogTransaction(context); auto default_table = GetEntry(transaction, type, DEFAULT_DELTA_TABLE); - if (default_table) { - callback(*default_table); - } + if (default_table) { + callback(*default_table); + } } - } void DeltaSchemaEntry::Scan(CatalogType type, const std::function &callback) { throw NotImplementedException("Scan without context not supported"); } void DeltaSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { - throw NotImplementedException("Delta tables do not support dropping"); + throw NotImplementedException("Delta tables do not support dropping"); } optional_ptr DeltaSchemaEntry::GetEntry(CatalogTransaction transaction, CatalogType type, - const string &name) { - if (!transaction.HasContext()) { - throw NotImplementedException("Can not DeltaSchemaEntry::GetEntry without context"); - } - auto &context = transaction.GetContext(); - - if (type == CatalogType::TABLE_ENTRY && name == DEFAULT_DELTA_TABLE) { - auto &delta_transaction = GetDeltaTransaction(transaction); - auto &delta_catalog = catalog.Cast(); - - if (delta_transaction.table_entry) { - return *delta_transaction.table_entry; - } - - if (delta_catalog.UseCachedSnapshot()) { - unique_lock l(lock); - if (!cached_table) { - cached_table = CreateTableEntry(context, delta_catalog, *this); - } - return *cached_table; - } - - delta_transaction.table_entry = CreateTableEntry(context, delta_catalog, *this); - return *delta_transaction.table_entry; - } + const string &name) { + if (!transaction.HasContext()) { + throw NotImplementedException("Can not DeltaSchemaEntry::GetEntry without context"); + } + auto &context = transaction.GetContext(); + + if (type == CatalogType::TABLE_ENTRY && name == DEFAULT_DELTA_TABLE) { + auto &delta_transaction = GetDeltaTransaction(transaction); + auto &delta_catalog = catalog.Cast(); + + if (delta_transaction.table_entry) { + return *delta_transaction.table_entry; + } + + if (delta_catalog.UseCachedSnapshot()) { + unique_lock l(lock); + if (!cached_table) { + cached_table = CreateTableEntry(context, delta_catalog, *this); + } + return *cached_table; + } + + delta_transaction.table_entry = CreateTableEntry(context, delta_catalog, *this); + return *delta_transaction.table_entry; + } - return nullptr; + return nullptr; } optional_ptr DeltaSchemaEntry::GetCachedTable() { - lock_guard lck(lock); - if (cached_table) { - return *cached_table; - } - return nullptr; + lock_guard lck(lock); + if (cached_table) { + return *cached_table; + } + return nullptr; } } // namespace duckdb diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp index f82caa4..6f7f829 100644 --- a/src/storage/delta_table_entry.cpp +++ b/src/storage/delta_table_entry.cpp @@ -32,7 +32,7 @@ unique_ptr DeltaTableEntry::GetStatistics(ClientContext &context } void DeltaTableEntry::BindUpdateConstraints(Binder &binder, LogicalGet &, LogicalProjection &, LogicalUpdate &, - ClientContext &) { + ClientContext &) { throw NotImplementedException("BindUpdateConstraints for delta table"); } @@ -43,11 +43,11 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt auto delta_scan_function = delta_function_set.functions.GetFunctionByArguments(context, {LogicalType::VARCHAR}); auto &delta_catalog = catalog.Cast(); - // Copy over the internal kernel snapshot - auto function_info = make_shared_ptr(); + // Copy over the internal kernel snapshot + auto function_info = make_shared_ptr(); - function_info->snapshot = this->snapshot; - delta_scan_function.function_info = std::move(function_info); + function_info->snapshot = this->snapshot; + delta_scan_function.function_info = std::move(function_info); vector inputs = {delta_catalog.GetDBPath()}; named_parameter_map_t param_map; @@ -55,7 +55,6 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt vector names; TableFunctionRef empty_ref; - TableFunctionBindInput bind_input(inputs, param_map, return_types, names, nullptr, nullptr, delta_scan_function, empty_ref); diff --git a/src/storage/delta_transaction.cpp b/src/storage/delta_transaction.cpp index 3846c47..2af1a46 100644 --- a/src/storage/delta_transaction.cpp +++ b/src/storage/delta_transaction.cpp @@ -27,7 +27,7 @@ void DeltaTransaction::Commit() { void DeltaTransaction::Rollback() { if (transaction_state == DeltaTransactionState::TRANSACTION_STARTED) { transaction_state = DeltaTransactionState::TRANSACTION_FINISHED; - // NOP: we only support read-only transactions currently + // NOP: we only support read-only transactions currently } } @@ -36,7 +36,7 @@ DeltaTransaction &DeltaTransaction::Get(ClientContext &context, Catalog &catalog } AccessMode DeltaTransaction::GetAccessMode() const { - return access_mode; + return access_mode; } } // namespace duckdb From f240e459aacc141ef9043b877baa56df3945c106 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Tue, 12 Nov 2024 13:03:02 +0100 Subject: [PATCH 11/45] add ci tools version --- .github/workflows/MainDistributionPipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index f43fd4e..0be63d2 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -32,6 +32,7 @@ jobs: secrets: inherit with: extension_name: delta + ci_tools_version: main duckdb_version: 0ccf3c25cc exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From febbb798e3b0e7d8b60b6c48956655a041e15f6a Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Tue, 12 Nov 2024 15:08:15 +0100 Subject: [PATCH 12/45] also skip old arch label --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 0be63d2..fb259e8 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -21,7 +21,7 @@ jobs: ci_tools_version: main extension_name: delta enable_rust: true - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' extra_toolchains: 'python3' vcpkg_commit: c82f74667287d3dc386bce81e44964370c91a289 @@ -34,5 +34,5 @@ jobs: extension_name: delta ci_tools_version: main duckdb_version: 0ccf3c25cc - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_mingw' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From a2364a1e2546054a73b8928d027e896a6f8c11c8 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 14 Nov 2024 11:00:47 +0100 Subject: [PATCH 13/45] enable deploy for feature --- .github/workflows/MainDistributionPipeline.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index fb259e8..22af8c2 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -35,4 +35,3 @@ jobs: ci_tools_version: main duckdb_version: 0ccf3c25cc exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' - deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From ad12b3650d41ffe3dffb0965cfca090421da002e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 14 Nov 2024 17:05:28 +0100 Subject: [PATCH 14/45] add table function tostring --- duckdb | 2 +- src/functions/delta_scan.cpp | 18 +++++++++++++----- src/include/functions/delta_scan.hpp | 1 + src/storage/delta_table_entry.cpp | 1 + 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/duckdb b/duckdb index 0ccf3c2..7fb238e 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0ccf3c25ccbb25fb90616e77b38f6d138f82950d +Subproject commit 7fb238e1d2625fdc34a3057f3dffa7dfc32e0c5a diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 4e35b17..3053fbf 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -980,11 +980,17 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult return MultiFileReader::ParseOption(key, val, options, context); } -// -// DeltaMultiFileReaderBindData::DeltaMultiFileReaderBindData(DeltaSnapshot & delta_snapshot): -// current_snapshot(delta_snapshot){ -// -//} + +static InsertionOrderPreservingMap DeltaFunctionToString(TableFunctionToStringInput &input) { + InsertionOrderPreservingMap result; + + if (input.table_function.function_info) { + auto& table_info = input.table_function.function_info->Cast(); + result["Table"] = table_info.table_name; + } + + return result; +} TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) { // Parquet extension needs to be loaded for this to make sense @@ -1007,6 +1013,8 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance function.table_scan_progress = nullptr; function.get_bind_info = nullptr; + function.to_string = DeltaFunctionToString; + // Schema param is just confusing here function.named_parameters.erase("schema"); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 32662a2..72636e8 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -17,6 +17,7 @@ struct DeltaSnapshot; struct DeltaFunctionInfo : public TableFunctionInfo { shared_ptr snapshot; string expected_path; + string table_name; }; struct DeltaFileMetaData { diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp index 6f7f829..6dfb19d 100644 --- a/src/storage/delta_table_entry.cpp +++ b/src/storage/delta_table_entry.cpp @@ -47,6 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt auto function_info = make_shared_ptr(); function_info->snapshot = this->snapshot; + function_info->table_name = delta_catalog.GetName(); delta_scan_function.function_info = std::move(function_info); vector inputs = {delta_catalog.GetDBPath()}; From 1fe8e0d69ca222024aaee7c632ffd0f6a1ab9ff2 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 09:59:43 +0100 Subject: [PATCH 15/45] bump duckdb to 1.1.4.dev2005 --- .github/workflows/MainDistributionPipeline.yml | 6 +++--- duckdb | 2 +- extension-ci-tools | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 22af8c2..3449ca6 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,8 +16,8 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - # pip install duckdb==1.1.4.dev1594 - duckdb_version: 0ccf3c25cc + # pip install duckdb==1.1.4.dev2005 + duckdb_version: b470dea7ee ci_tools_version: main extension_name: delta enable_rust: true @@ -33,5 +33,5 @@ jobs: with: extension_name: delta ci_tools_version: main - duckdb_version: 0ccf3c25cc + duckdb_version: b470dea7ee exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' diff --git a/duckdb b/duckdb index 0ccf3c2..b470dea 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0ccf3c25ccbb25fb90616e77b38f6d138f82950d +Subproject commit b470dea7ee47dc2debcc37a4e94976f8eff6670c diff --git a/extension-ci-tools b/extension-ci-tools index 3e987be..916d4ef 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 3e987be862c95d0f7fc674fa242c97ce3a37ee04 +Subproject commit 916d4ef4371068ca98a007378b52582c3e46b4e5 From 35114aebc34e0824a38babb7fbcd1d15be18c0c5 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:14:13 +0100 Subject: [PATCH 16/45] fix upstream MultiFileReader API changes --- src/functions/delta_scan.cpp | 16 ++++++++-------- src/include/functions/delta_scan.hpp | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 3053fbf..8021a29 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -526,7 +526,7 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co for (const auto &filter : filters) { combiner.AddFilter(filter->Copy()); } - auto filterstmp = combiner.GenerateTableScanFilters(info.column_ids); + auto filterstmp = combiner.GenerateTableScanFilters(info.column_indexes); // TODO: can/should we figure out if this filtered anything? auto filtered_list = make_uniq(context, paths[0]); @@ -643,7 +643,7 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) { MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, @@ -671,7 +671,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio if (!file_metadata->partition_map.empty()) { for (idx_t i = 0; i < global_column_ids.size(); i++) { - column_t col_id = global_column_ids[i]; + column_t col_id = global_column_ids[i].GetPrimaryIndex(); if (IsRowIdColumnId(col_id)) { continue; } @@ -749,14 +749,14 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta duckdb::ClientContext &context, const duckdb::MultiFileReaderOptions &file_options, const duckdb::MultiFileReaderBindData &bind_data, const duckdb::MultiFileList &file_list, const vector &global_types, const vector &global_names, - const vector &global_column_ids) { + const vector &global_column_ids) { vector extra_columns; vector> mapped_columns; // Create a map of the columns that are in the projection case_insensitive_map_t selected_columns; for (idx_t i = 0; i < global_column_ids.size(); i++) { - auto global_id = global_column_ids[i]; + auto global_id = global_column_ids[i].GetPrimaryIndex(); if (IsRowIdColumnId(global_id)) { continue; } @@ -815,7 +815,7 @@ unique_ptr DeltaMultiFileReader::InitializeGlobalSta // in the parquet files, we just add null constant columns static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) { D_ASSERT(global_types.size() == global_names.size()); @@ -839,7 +839,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector= global_types.size()) { throw InternalException( "MultiFileReader::CreatePositionalMapping - global_id is out of range in global_types for this file"); @@ -880,7 +880,7 @@ static void CustomMulfiFileNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, const vector &global_names, - const vector &global_column_ids, + const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) { // First call the base implementation to do most mapping diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 72636e8..5bc981c 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -133,7 +133,7 @@ struct DeltaMultiFileReader : public MultiFileReader { void CreateNameMapping(const string &file_name, const vector &local_types, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, + const vector &global_names, const vector &global_column_ids, MultiFileReaderData &reader_data, const string &initial_file, optional_ptr global_state) override; @@ -141,12 +141,12 @@ struct DeltaMultiFileReader : public MultiFileReader { InitializeGlobalState(ClientContext &context, const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &bind_data, const MultiFileList &file_list, const vector &global_types, const vector &global_names, - const vector &global_column_ids) override; + const vector &global_column_ids) override; void FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, const vector &global_names, - const vector &global_column_ids, MultiFileReaderData &reader_data, + const vector &global_column_ids, MultiFileReaderData &reader_data, ClientContext &context, optional_ptr global_state) override; //! Override the FinalizeChunk method From 6f25451dc798521f0782f6e5b196ec54b2210f79 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:17:38 +0100 Subject: [PATCH 17/45] add explain table name test --- test/sql/dat/attach.test | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/sql/dat/attach.test b/test/sql/dat/attach.test index de6615d..14ece4b 100644 --- a/test/sql/dat/attach.test +++ b/test/sql/dat/attach.test @@ -122,4 +122,10 @@ select utf8 from dt 1 2 3 -4 \ No newline at end of file +4 + +# Test that the explain output contains the table name +query II +explain from dt +---- +physical_plan :.*Table: dt.* \ No newline at end of file From 35e9d529482422e4213d452ac862ffa5f39fbee0 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 10:18:02 +0100 Subject: [PATCH 18/45] format --- src/functions/delta_scan.cpp | 20 ++++++++++---------- src/include/functions/delta_scan.hpp | 2 +- src/storage/delta_table_entry.cpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 8021a29..377c5c0 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -643,9 +643,9 @@ void DeltaMultiFileReader::BindOptions(MultiFileReaderOptions &options, MultiFil void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_options, const MultiFileReaderBindData &options, const string &filename, const vector &local_names, const vector &global_types, - const vector &global_names, const vector &global_column_ids, - MultiFileReaderData &reader_data, ClientContext &context, - optional_ptr global_state) { + const vector &global_names, + const vector &global_column_ids, MultiFileReaderData &reader_data, + ClientContext &context, optional_ptr global_state) { MultiFileReader::FinalizeBind(file_options, options, filename, local_names, global_types, global_names, global_column_ids, reader_data, context, global_state); @@ -982,14 +982,14 @@ bool DeltaMultiFileReader::ParseOption(const string &key, const Value &val, Mult } static InsertionOrderPreservingMap DeltaFunctionToString(TableFunctionToStringInput &input) { - InsertionOrderPreservingMap result; + InsertionOrderPreservingMap result; - if (input.table_function.function_info) { - auto& table_info = input.table_function.function_info->Cast(); - result["Table"] = table_info.table_name; - } + if (input.table_function.function_info) { + auto &table_info = input.table_function.function_info->Cast(); + result["Table"] = table_info.table_name; + } - return result; + return result; } TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance) { @@ -1013,7 +1013,7 @@ TableFunctionSet DeltaFunctions::GetDeltaScanFunction(DatabaseInstance &instance function.table_scan_progress = nullptr; function.get_bind_info = nullptr; - function.to_string = DeltaFunctionToString; + function.to_string = DeltaFunctionToString; // Schema param is just confusing here function.named_parameters.erase("schema"); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index 5bc981c..e9e89da 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -17,7 +17,7 @@ struct DeltaSnapshot; struct DeltaFunctionInfo : public TableFunctionInfo { shared_ptr snapshot; string expected_path; - string table_name; + string table_name; }; struct DeltaFileMetaData { diff --git a/src/storage/delta_table_entry.cpp b/src/storage/delta_table_entry.cpp index 6dfb19d..be6ea58 100644 --- a/src/storage/delta_table_entry.cpp +++ b/src/storage/delta_table_entry.cpp @@ -47,7 +47,7 @@ TableFunction DeltaTableEntry::GetScanFunction(ClientContext &context, unique_pt auto function_info = make_shared_ptr(); function_info->snapshot = this->snapshot; - function_info->table_name = delta_catalog.GetName(); + function_info->table_name = delta_catalog.GetName(); delta_scan_function.function_info = std::move(function_info); vector inputs = {delta_catalog.GetDBPath()}; From bc3434f547e14a0e3bb52c405b06604b1f5e8226 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 14:54:14 +0100 Subject: [PATCH 19/45] add filtered files to explain output --- scripts/generate_test_data.py | 2 +- src/functions/delta_scan.cpp | 50 +++++++++- .../generated/file_skipping_all_types.test | 92 ++++++++++++++----- 3 files changed, 117 insertions(+), 27 deletions(-) diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index e3ab444..001b9b2 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -180,7 +180,7 @@ def generate_test_data_pyspark(name, current_path, input_path, delete_predicate ## Partitioned table with all types we can file skip on for type in ["bool", "int", "tinyint", "smallint", "bigint", "float", "double", "varchar"]: - query = f"CREATE table test_table as select i::{type} as value, i::{type} as part from range(0,2) tbl(i)" + query = f"CREATE table test_table as select i::{type} as value1, (i)::{type} as value2, (i)::{type} as value3, i::{type} as part from range(0,5) tbl(i)" generate_test_data_delta_rs(f"test_file_skipping/{type}", query, "part") ## Simple table with deletion vector diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 377c5c0..aeed39f 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -15,6 +15,7 @@ #include "duckdb/parser/parsed_expression.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/planner/operator/logical_get.hpp" +#include "duckdb/main/query_profiler.hpp" #include #include @@ -523,12 +524,12 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co return nullptr; } - for (const auto &filter : filters) { - combiner.AddFilter(filter->Copy()); + for (auto riter = filters.rbegin(); riter != filters.rend(); ++riter) { + combiner.AddFilter(riter->get()->Copy()); } + auto filterstmp = combiner.GenerateTableScanFilters(info.column_indexes); - // TODO: can/should we figure out if this filtered anything? auto filtered_list = make_uniq(context, paths[0]); filtered_list->table_filters = std::move(filterstmp); filtered_list->names = names; @@ -536,6 +537,49 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co // Copy over the snapshot, this avoids reparsing metadata filtered_list->snapshot = snapshot; + auto &profiler = QueryProfiler::Get(context); + + // Note: this is potentially quite expensive: we are creating 2 scans of the snapshot and fully materializing both + // file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for + // EXPLAIN ANALYZE queries + if (profiler.IsEnabled()) { + auto old_total = GetTotalFileCount(); + auto new_total = filtered_list->GetTotalFileCount(); + + if (old_total != new_total) { + string filters_info; + bool first_item = true; + for (auto &f : filtered_list->table_filters.filters) { + auto &column_index = f.first; + auto &filter = f.second; + if (column_index < names.size()) { + if (!first_item) { + filters_info += "\n"; + } + first_item = false; + auto &col_name = names[column_index]; + filters_info += filter->ToString(col_name); + } + } + + info.extra_info.file_filters = filters_info; + } + + if (!info.extra_info.total_files.IsValid()) { + info.extra_info.total_files = old_total; + } else if (info.extra_info.total_files.GetIndex() < old_total) { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!"); + } + + if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) { + info.extra_info.filtered_files = new_total; + } else { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!"); + } + } + return std::move(filtered_list); } diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test index e4348e8..77e1516 100644 --- a/test/sql/generated/file_skipping_all_types.test +++ b/test/sql/generated/file_skipping_all_types.test @@ -8,37 +8,83 @@ require delta require-env GENERATED_DATA_AVAILABLE -# TODO: this doesn't appear to skip files yet -# TODO: add tests once https://github.com/duckdb/duckdb/pull/12488 is available +foreach type float double -query I -select value -from delta_scan('./data/generated/test_file_skipping/bool/delta_lake') -where part != false -order by value +# using column to skip files +query II +EXPLAIN ANALYZE SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 0.5 and + value2 > 2.5 and + value3 < 3.5 ---- -true - -foreach type bool int tinyint smallint bigint varchar +analyzed_plan :.*File Filters:.*value1>0.5.*value2>2.5.*value3<3.5.*Scanning Files: 1/5.* -query I -select value -from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') -where part != 0 -order by value +# FIXME: Partition columns currently don't cause file skipping yet +query II +EXPLAIN ANALYZE SELECT part +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE part > 0.5 ---- -1 +analyzed_plan :.*File Filters:.* endloop -foreach type float double +# use bool column to skip files +query II +EXPLAIN ANALYZE SELECT * +FROM delta_scan('./data/generated/test_file_skipping/bool/delta_lake') +WHERE value1=false +---- +analyzed_plan :.*File Filters:.*value1=false.*Scanning Files: 1/2.* + +# FIXME: Partition columns currently don't cause file skipping yet +query II +EXPLAIN ANALYZE SELECT part +FROM delta_scan('./data/generated/test_file_skipping/bool/delta_lake') +WHERE part=false +---- +analyzed_plan :.*File Filters:.* + +foreach type int tinyint smallint bigint -query I -select value -from delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') -where part > 0.5 -order by value +# using column to skip files +query II +EXPLAIN ANALYZE SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 1 and + value2 > 2 and + value3 < 4 ---- -1.0 +analyzed_plan :.*File Filters:.*value1>1.*value2>2.*value3<4.*Scanning Files: 1/5.* + +# FIXME: Partition columns currently don't cause file skipping yet +query II +EXPLAIN ANALYZE SELECT part +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE part = 0 +---- +analyzed_plan :.*File Filters:.* endloop + +# using column to skip files +query II +EXPLAIN ANALYZE SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE + value1 = '2' and + value2 = '2' and + value3 = '2' +---- +analyzed_plan :.*File Filters:.*value1='2'.*value2='2'.*value3='2'.*Scanning Files: 1/5.* + +# FIXME: Partition columns currently don't cause file skipping yet +query II +EXPLAIN ANALYZE SELECT part +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE part = '0' +---- +analyzed_plan :.*File Filters:.* From c08e66b322753cdeeaff295b4e96d9287a707283 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Wed, 20 Nov 2024 15:17:25 +0100 Subject: [PATCH 20/45] hide behind option --- src/delta_extension.cpp | 5 ++ src/functions/delta_scan.cpp | 63 ++++++++++--------- .../generated/file_skipping_all_types.test | 44 +++++++++++++ 3 files changed, 83 insertions(+), 29 deletions(-) diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 36003a3..0c21ade 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -52,6 +52,11 @@ static void LoadInternal(DatabaseInstance &instance) { // Register the "single table" delta catalog (to ATTACH a single delta table) auto &config = DBConfig::GetConfig(instance); config.storage_extensions["delta"] = make_uniq(); + + config.AddExtensionOption("delta_scan_explain_files_filtered", + "Adds the filtered files to the explain output. Warning: this may change performance of " + "delta scan during explain analyze queries.", + LogicalType::BOOLEAN, Value(true)); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index aeed39f..fb4bbe4 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -543,40 +543,45 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co // file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for // EXPLAIN ANALYZE queries if (profiler.IsEnabled()) { - auto old_total = GetTotalFileCount(); - auto new_total = filtered_list->GetTotalFileCount(); - - if (old_total != new_total) { - string filters_info; - bool first_item = true; - for (auto &f : filtered_list->table_filters.filters) { - auto &column_index = f.first; - auto &filter = f.second; - if (column_index < names.size()) { - if (!first_item) { - filters_info += "\n"; + Value result; + if (!context.TryGetCurrentSetting("delta_scan_explain_files_filtered", result)) { + throw InternalException("Failed to find 'delta_scan_explain_files_filtered' option!"); + } else if (result.GetValue()) { + auto old_total = GetTotalFileCount(); + auto new_total = filtered_list->GetTotalFileCount(); + + if (old_total != new_total) { + string filters_info; + bool first_item = true; + for (auto &f : filtered_list->table_filters.filters) { + auto &column_index = f.first; + auto &filter = f.second; + if (column_index < names.size()) { + if (!first_item) { + filters_info += "\n"; + } + first_item = false; + auto &col_name = names[column_index]; + filters_info += filter->ToString(col_name); } - first_item = false; - auto &col_name = names[column_index]; - filters_info += filter->ToString(col_name); } - } - info.extra_info.file_filters = filters_info; - } + info.extra_info.file_filters = filters_info; + } - if (!info.extra_info.total_files.IsValid()) { - info.extra_info.total_files = old_total; - } else if (info.extra_info.total_files.GetIndex() < old_total) { - throw InternalException( - "Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!"); - } + if (!info.extra_info.total_files.IsValid()) { + info.extra_info.total_files = old_total; + } else if (info.extra_info.total_files.GetIndex() < old_total) { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: total_files inconsistent!"); + } - if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) { - info.extra_info.filtered_files = new_total; - } else { - throw InternalException( - "Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!"); + if (!info.extra_info.filtered_files.IsValid() || info.extra_info.filtered_files.GetIndex() >= new_total) { + info.extra_info.filtered_files = new_total; + } else { + throw InternalException( + "Error encountered when analyzing filtered out files for delta scan: filtered_files inconsistent!"); + } } } diff --git a/test/sql/generated/file_skipping_all_types.test b/test/sql/generated/file_skipping_all_types.test index 77e1516..e2b90ea 100644 --- a/test/sql/generated/file_skipping_all_types.test +++ b/test/sql/generated/file_skipping_all_types.test @@ -21,6 +21,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1>0.5.*value2>2.5.*value3<3.5.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 0.5 and + value2 > 2.5 and + value3 < 3.5 +---- +3.0 3.0 3.0 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -60,6 +70,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1>1.*value2>2.*value3<4.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/${type}/delta_lake') +WHERE + value1 > 1 and + value2 > 2 and + value3 < 4 +---- +3 3 3 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -81,6 +101,16 @@ WHERE ---- analyzed_plan :.*File Filters:.*value1='2'.*value2='2'.*value3='2'.*Scanning Files: 1/5.* +query III +SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE + value1 = '2' and + value2 = '2' and + value3 = '2' +---- +2 2 2 + # FIXME: Partition columns currently don't cause file skipping yet query II EXPLAIN ANALYZE SELECT part @@ -88,3 +118,17 @@ FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') WHERE part = '0' ---- analyzed_plan :.*File Filters:.* + +# We can remove this from output if precise operator timing is crucial +statement ok +set delta_scan_explain_files_filtered = false; + +query II +EXPLAIN ANALYZE SELECT value1, value2, value3 +FROM delta_scan('./data/generated/test_file_skipping/varchar/delta_lake') +WHERE + value1 = '2' and + value2 = '2' and + value3 = '2' +---- +analyzed_plan :.*File Filters:.* \ No newline at end of file From ac80e3a80110d80316d44aaf7616fe785ebd05a0 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:03:10 +0100 Subject: [PATCH 21/45] bump-kernel --- CMakeLists.txt | 2 +- src/include/delta_kernel_ffi.hpp | 314 ++++++++++++++++++++++++------- 2 files changed, 251 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 50df657..1113da8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,7 @@ ExternalProject_Add( # the c++ headers. Currently, when bumping the kernel version, the produced # header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying # the fix - GIT_TAG v0.4.0 + GIT_TAG v0.5.0 # Prints the env variables passed to the cargo build to the terminal, useful # in debugging because passing them through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 6f1401e..3b6a615 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include namespace ffi { @@ -40,6 +40,7 @@ enum class KernelError { MalformedJsonError, MissingMetadataError, MissingProtocolError, + InvalidProtocolError, MissingMetadataAndProtocolError, ParseError, JoinFailureError, @@ -52,6 +53,12 @@ enum class KernelError { InternalError, InvalidExpression, InvalidLogPath, + InvalidCommitInfo, + FileAlreadyExists, + MissingCommitInfo, + UnsupportedError, + ParseIntervalError, + ChangeDataFeedUnsupported, }; struct CStringMap; @@ -73,6 +80,8 @@ struct ExclusiveFileReadResultIterator; struct KernelExpressionVisitorState; +struct SharedExpression; + struct SharedExternEngine; struct SharedGlobalScanState; @@ -179,19 +188,20 @@ struct ExternResult { /// Intentionally not Copy, Clone, Send, nor Sync. /// /// Whoever instantiates the struct must ensure it does not outlive the data it points to. The -/// compiler cannot help us here, because raw pointers don't have lifetimes. To reduce the risk of -/// accidental misuse, it is recommended to only instantiate this struct as a function arg, by -/// converting a string slice `Into` a `KernelStringSlice`. That way, the borrowed reference at call -/// site protects the `KernelStringSlice` until the function returns. Meanwhile, the callee should -/// assume that the slice is only valid until the function returns, and must not retain any -/// references to the slice or its data that could outlive the function call. -/// -/// ``` -/// # use delta_kernel_ffi::KernelStringSlice; -/// fn wants_slice(slice: KernelStringSlice) { } -/// let msg = String::from("hello"); -/// wants_slice(msg.into()); +/// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is +/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning +/// a string slice from a code block or function (since the move risks over-extending its lifetime): +/// +/// ```ignore +/// # // Ignored because this code is pub(crate) and doc tests cannot compile it +/// let dangling_slice = { +/// let tmp = String::from("tmp"); +/// kernel_string_slice!(tmp) +/// } /// ``` +/// +/// Meanwhile, the callee must assume that the slice is only valid until the function returns, and +/// must not retain any references to the slice or its data that might outlive the function call. struct KernelStringSlice { const char *ptr; uintptr_t len; @@ -205,22 +215,6 @@ using NullableCvoid = void *; /// function is that `kernel_str` is _only_ valid until the return from this function using AllocateStringFn = NullableCvoid (*)(KernelStringSlice kernel_str); -struct FileMeta { - KernelStringSlice path; - int64_t last_modified; - uintptr_t size; -}; - -/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap -/// the engine functions. The engine retains ownership of the iterator. -struct EngineIterator { - void *data; - /// A function that should advance the iterator and return the next time from the data - /// If the iterator is complete, it should return null. It should be safe to - /// call `get_next()` multiple times if it returns null. - const void *(*get_next)(void *data); -}; - /// ABI-compatible struct for ArrowArray from C Data Interface /// See /// @@ -278,6 +272,182 @@ struct ArrowFFIData { }; #endif +struct FileMeta { + KernelStringSlice path; + int64_t last_modified; + uintptr_t size; +}; + +/// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap +/// the engine functions. The engine retains ownership of the iterator. +struct EngineIterator { + void *data; + /// A function that should advance the iterator and return the next time from the data + /// If the iterator is complete, it should return null. It should be safe to + /// call `get_next()` multiple times if it returns null. + const void *(*get_next)(void *data); +}; + +template +using VisitLiteralFn = void (*)(void *data, uintptr_t sibling_list_id, T value); + +using VisitVariadicFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +using VisitUnaryFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +using VisitBinaryOpFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + +/// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own +/// representation of a kernel expression. +/// +/// The model is list based. When the kernel needs a list, it will ask engine to allocate one of a +/// particular size. Once allocated the engine returns an `id`, which can be any integer identifier +/// ([`usize`]) the engine wants, and will be passed back to the engine to identify the list in the +/// future. +/// +/// Every expression the kernel visits belongs to some list of "sibling" elements. The schema +/// itself is a list of schema elements, and every complex type (struct expression, array, variadic, etc) +/// contains a list of "child" elements. +/// 1. Before visiting any complex expression type, the kernel asks the engine to allocate a list to +/// hold its children +/// 2. When visiting any expression element, the kernel passes its parent's "child list" as the +/// "sibling list" the element should be appended to: +/// - For a struct literal, first visit each struct field and visit each value +/// - For a struct expression, visit each sub expression. +/// - For an array literal, visit each of the elements. +/// - For a variadic `and` or `or` expression, visit each sub-expression. +/// - For a binary operator expression, visit the left and right operands. +/// - For a unary `is null` or `not` expression, visit the sub-expression. +/// 3. When visiting a complex expression, the kernel also passes the "child list" containing +/// that element's (already-visited) children. +/// 4. The [`visit_expression`] method returns the id of the list of top-level columns +/// +/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed +/// to visitor methods +/// TODO: Visit type information in struct field and null. This will likely involve using the schema +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant +/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 +struct EngineExpressionVisitor { + /// An opaque engine state pointer + void *data; + /// Creates a new expression list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_int; + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_long; + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_short; + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_byte; + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_float; + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_double; + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_string; + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_bool; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. + VisitLiteralFn visit_literal_timestamp; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. + VisitLiteralFn visit_literal_timestamp_ntz; + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_date; + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. + void (*visit_literal_binary)(void *data, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. + void (*visit_literal_decimal)(void *data, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, + uint8_t precision, uint8_t scale); + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + void (*visit_literal_struct)(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_id, + uintptr_t child_value_list_id); + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. + void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// Visits a null value belonging to the list identified by `sibling_list_id. + void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_and; + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_or; + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_not; + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_is_null; + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_lt; + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_le; + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_gt; + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ge; + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_eq; + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ne; + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_distinct; + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_in; + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_not_in; + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_add; + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_minus; + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_multiply; + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_divide; + /// Visits the `column` belonging to the list identified by `sibling_list_id`. + void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` + void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +}; + +// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 +struct im_an_unused_struct_that_tricks_msvc_into_compilation { + ExternResult field; + ExternResult field2; + ExternResult field3; + ExternResult> field4; + ExternResult> field5; + ExternResult field6; + ExternResult field7; + ExternResult> field8; + ExternResult> field9; + ExternResult> field10; + ExternResult field11; +}; + /// A predicate that can be used to skip data when scanning. /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, @@ -305,21 +475,6 @@ struct Stats { using CScanCallback = void (*)(NullableCvoid engine_context, KernelStringSlice path, int64_t size, const Stats *stats, const DvInfo *dv_info, const CStringMap *partition_map); -// This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 -struct im_an_unused_struct_that_tricks_msvc_into_compilation { - ExternResult field; - ExternResult field2; - ExternResult field3; - ExternResult> field4; - ExternResult> field5; - ExternResult field6; - ExternResult field7; - ExternResult> field8; - ExternResult> field9; - ExternResult> field10; - ExternResult field11; -}; - /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. /// @@ -498,6 +653,32 @@ bool string_slice_next(Handle data, NullableCvoid engine_co /// Caller is responsible for (at most once) passing a valid pointer to a [`StringSliceIterator`] void free_string_slice_data(Handle data); +/// Get the number of rows in an engine data +/// +/// # Safety +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData` +uintptr_t engine_data_length(Handle *data); + +/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants +/// to use its own engine data format +/// +/// # Safety +/// +/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must +/// ensure the handle outlives the returned pointer. +void *get_raw_engine_data(Handle data); + +#if defined(DEFINE_DEFAULT_ENGINE) +/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data +/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and +/// the schema. If this function returns an `Ok` variant the _engine_ must free the returned struct. +/// +/// # Safety +/// data_handle must be a valid ExclusiveEngineData as read by the +/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. +ExternResult get_raw_arrow_data(Handle data, Handle engine); +#endif + /// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The /// _engine_ "owns" the data that is passed into the `engine_visitor`, since it is allocated by the /// `Engine` being used for log-replay. If the engine wants the kernel to free this data, it _must_ @@ -565,38 +746,31 @@ uintptr_t visit_expression_literal_double(KernelExpressionVisitorState *state, d uintptr_t visit_expression_literal_bool(KernelExpressionVisitorState *state, bool value); -/// Get the number of rows in an engine data +/// Free the memory the passed SharedExpression /// /// # Safety -/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData` -uintptr_t engine_data_length(Handle *data); +/// Engine is responsible for passing a valid SharedExpression +void free_kernel_predicate(Handle data); -/// Allow an engine to "unwrap" an [`ExclusiveEngineData`] into the raw pointer for the case it wants -/// to use its own engine data format -/// -/// # Safety +/// Visit the expression of the passed [`SharedExpression`] Handle using the provided `visitor`. +/// See the documentation of [`EngineExpressionVisitor`] for a description of how this visitor +/// works. /// -/// `data_handle` must be a valid pointer to a kernel allocated `ExclusiveEngineData`. The Engine must -/// ensure the handle outlives the returned pointer. -void *get_raw_engine_data(Handle data); - -#if defined(DEFINE_DEFAULT_ENGINE) -/// Get an [`ArrowFFIData`] to allow binding to the arrow [C Data -/// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and -/// the schema. +/// This method returns the id that the engine generated for the top level expression /// /// # Safety -/// data_handle must be a valid ExclusiveEngineData as read by the -/// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. -ExternResult get_raw_arrow_data(Handle data, Handle engine); -#endif +/// +/// The caller must pass a valid SharedExpression Handle and expression visitor +uintptr_t visit_expression(const Handle *expression, EngineExpressionVisitor *visitor); /// Drops a scan. /// # Safety /// Caller is responsible for passing a [valid][Handle#Validity] scan handle. void free_scan(Handle scan); -/// Get a [`Scan`] over the table specified by the passed snapshot. +/// Get a [`Scan`] over the table specified by the passed snapshot. It is the responsibility of the +/// _engine_ to free this scan when complete by calling [`free_scan`]. +/// /// # Safety /// /// Caller is responsible for passing a valid snapshot pointer, and engine pointer @@ -650,6 +824,10 @@ void free_global_scan_state(Handle state); ExternResult> kernel_scan_data_init(Handle engine, Handle scan); +/// Call the provided `engine_visitor` on the next scan data item. The visitor will be provided with +/// a selection vector and engine data. It is the responsibility of the _engine_ to free these when +/// it is finished by calling [`free_bool_slice`] and [`free_engine_data`] respectively. +/// /// # Safety /// /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by @@ -706,6 +884,14 @@ void visit_scan_data(Handle data, KernelBoolSlice selection /// Caller is responsible for passing a valid snapshot handle and schema visitor. uintptr_t visit_schema(Handle snapshot, EngineSchemaVisitor *visitor); +/// Constructs a kernel expression that is passed back as a SharedExpression handle. The expected +/// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt`. +/// +/// # Safety +/// The caller is responsible for freeing the retured memory, either by calling +/// [`free_kernel_predicate`], or [`Handle::drop_handle`] +Handle get_testing_kernel_expression(); + } // extern "C" } // namespace ffi From 582202e5c83b8a4cc73590fb01f6dd87c1fab89a Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:10:50 +0100 Subject: [PATCH 22/45] fix enum util --- src/delta_utils.cpp | 69 ++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 1a8ff04..ae42676 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -107,37 +107,48 @@ ffi::EngineError *DuckDBEngineError::AllocateError(ffi::KernelError etype, ffi:: string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) { const char *KERNEL_ERROR_ENUM_STRINGS[] = { "UnknownError", - "FFIError", - "ArrowError", - "EngineDataTypeError", - "ExtractError", - "GenericError", - "IOErrorError", - "ParquetError", - "ObjectStoreError", - "ObjectStorePathError", - "Reqwest", - "FileNotFoundError", - "MissingColumnError", - "UnexpectedColumnTypeError", - "MissingDataError", - "MissingVersionError", - "DeletionVectorError", - "InvalidUrlError", - "MalformedJsonError", - "MissingMetadataError", - "MissingProtocolError", - "MissingMetadataAndProtocolError", - "ParseError", - "JoinFailureError", - "Utf8Error", - "ParseIntError", - "InvalidColumnMappingMode", - "InvalidTableLocation", - "InvalidDecimalError", + "FFIError", + "ArrowError", + "EngineDataTypeError", + "ExtractError", + "GenericError", + "IOErrorError", + "ParquetError", + "ObjectStoreError", + "ObjectStorePathError", + "ReqwestError", + "FileNotFoundError", + "MissingColumnError", + "UnexpectedColumnTypeError", + "MissingDataError", + "MissingVersionError", + "DeletionVectorError", + "InvalidUrlError", + "MalformedJsonError", + "MissingMetadataError", + "MissingProtocolError", + "InvalidProtocolError", + "MissingMetadataAndProtocolError", + "ParseError", + "JoinFailureError", + "Utf8Error", + "ParseIntError", + "InvalidColumnMappingModeError", + "InvalidTableLocationError", + "InvalidDecimalError", + "InvalidStructDataError", + "InternalError", + "InvalidExpression", + "InvalidLogPath", + "InvalidCommitInfo", + "FileAlreadyExists", + "MissingCommitInfo", + "UnsupportedError", + "ParseIntervalError", + "ChangeDataFeedUnsupported" }; - static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidDecimalError, + static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::ChangeDataFeedUnsupported, "KernelErrorEnumStrings mismatched with kernel"); if ((int)err < sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *)) { From 14e2e4580c6e5ff65e093d18adf39bc29bb4905c Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:13:16 +0100 Subject: [PATCH 23/45] checkout v4 --- .github/workflows/CloudTesting.yml | 2 +- .github/workflows/LocalTesting.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CloudTesting.yml b/.github/workflows/CloudTesting.yml index 93c627a..593694e 100644 --- a/.github/workflows/CloudTesting.yml +++ b/.github/workflows/CloudTesting.yml @@ -26,7 +26,7 @@ jobs: sudo apt-get update -y -qq sudo apt-get install -y -qq ninja-build make gcc-multilib g++-multilib zip unzip build-essential checkinstall curl libz-dev openssh-client - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index b3a897d..e8ff44b 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -24,7 +24,7 @@ jobs: ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' @@ -90,7 +90,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' @@ -179,7 +179,7 @@ jobs: VCPKG_TOOLCHAIN_PATH: ${{ github.workspace }}/vcpkg/scripts/buildsystems/vcpkg.cmake steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 submodules: 'true' From 307049a6e7b4104dc594fdffc2e0abd795305c3b Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:19:40 +0100 Subject: [PATCH 24/45] fix old manylinux based ci job --- .github/workflows/LocalTesting.yml | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index e8ff44b..515bbfb 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -14,7 +14,6 @@ jobs: azurite-tests-linux: name: Azurite (local azure test server) tests (Linux) runs-on: ubuntu-latest - container: 'quay.io/pypa/manylinux2014_x86_64' env: VCPKG_TARGET_TRIPLET: 'x64-linux' GEN: Ninja @@ -29,27 +28,33 @@ jobs: fetch-depth: 0 submodules: 'true' + - name: Install Ninja + shell: bash + run: sudo apt-get update -y -qq && sudo apt-get install -y -qq ninja-build + + - name: Setup Ccache + uses: hendrikmuhs/ccache-action@main + with: + key: ${{ github.job }} + + - name: Setup vcpkg + uses: lukka/run-vcpkg@v11.1 + with: + vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 + + - uses: actions/setup-node@v4 + - name: install Azure test service run: | - yum install -y nodejs npm npm install -g azurite echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo - yum install -y azure-cli - - - name: Setup ManyLinux2014 - run: | - ./duckdb/scripts/setup_manylinux2014.sh general aws-cli ccache ssh python_alias openssl + sudo apt-get install -y azure-cli - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y echo "$HOME/.cargo/bin" >> $GITHUB_PATH - - name: Setup vcpkg - uses: lukka/run-vcpkg@v11.1 - with: - vcpkgGitCommitId: a1a1cbc975abf909a6c8985a6a2b8fe20bbd9bd6 - - name: Handle OpenSSL dependency for rust build run: | echo "OPENSSL_ROOT_DIR=`pwd`/build/release/vcpkg_installed/x64-linux" >> $GITHUB_ENV From 526f5f3373dc064243cb938f6f99fe22b9c735ca Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 5 Dec 2024 10:27:54 +0100 Subject: [PATCH 25/45] remove old line of azurite initialization --- .github/workflows/LocalTesting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 515bbfb..34457eb 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -47,7 +47,6 @@ jobs: - name: install Azure test service run: | npm install -g azurite - echo -e "[azure-cli]\nname=Azure CLI\nbaseurl=https://packages.microsoft.com/yumrepos/azure-cli\nenabled=1\ngpgcheck=1\ngpgkey=https://packages.microsoft.com/keys/microsoft.asc" | tee /etc/yum.repos.d/azure-cli.repo sudo apt-get install -y azure-cli - name: Setup Rust for manylinux (dtolnay/rust-toolchain doesn't work due to curl being old here) From 0c87fd63ef774cbbf61750ce2cb60be85d8dcd21 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 10:50:42 +0100 Subject: [PATCH 26/45] minor cleanup --- src/delta_utils.cpp | 9 +++------ src/functions/delta_scan.cpp | 13 ++----------- src/include/delta_utils.hpp | 8 ++++---- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index ae42676..9e709ec 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -79,18 +79,15 @@ uintptr_t SchemaVisitor::MakeFieldListImpl(uintptr_t capacity_hint) { void SchemaVisitor::AppendToList(uintptr_t id, ffi::KernelStringSlice name, LogicalType &&child) { auto it = inflight_lists.find(id); if (it == inflight_lists.end()) { - // TODO... some error... - throw InternalException("WEIRD SHIT"); - } else { - it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child))); + throw InternalException("Unhandled error in SchemaVisitor::AppendToList child"); } + it->second->emplace_back(std::make_pair(string(name.ptr, name.len), std::move(child))); } unique_ptr SchemaVisitor::TakeFieldList(uintptr_t id) { auto it = inflight_lists.find(id); if (it == inflight_lists.end()) { - // TODO: Raise some kind of error. - throw InternalException("WEIRD SHIT 2"); + throw InternalException("Unhandled error in SchemaVisitor::TakeFieldList"); } auto rval = std::move(it->second); inflight_lists.erase(it); diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index fb4bbe4..e3e9458 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -1,4 +1,5 @@ #include "functions/delta_scan.hpp" +#include "storage/delta_catalog.hpp" #include "delta_functions.hpp" #include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" @@ -16,13 +17,9 @@ #include "duckdb/planner/binder.hpp" #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/main/query_profiler.hpp" +#include "duckdb/main/client_data.hpp" -#include -#include #include -#include -#include -#include namespace duckdb { @@ -473,11 +470,6 @@ string DeltaSnapshot::GetFile(idx_t i) { } } - // The kernel scan visitor should have resolved a file OR returned - if (i >= resolved_files.size()) { - throw IOException("Delta Kernel seems to have failed to resolve a new file"); - } - return resolved_files[i]; } @@ -770,7 +762,6 @@ static SelectionVector DuckSVFromDeltaSV(const ffi::KernelBoolSlice &dv, Vector for (idx_t i = 0; i < count; i++) { auto row_id = row_ids[data.sel->get_index(i)]; - // TODO: why are deletion vectors not spanning whole data? if (row_id >= dv.len || dv.ptr[row_id]) { result.data()[current_select] = i; current_select++; diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 8760862..4e8b670 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -189,11 +189,11 @@ struct KernelUtils { if (result.err._0) { auto error_cast = static_cast(result.err._0); error_cast->Throw(from_where); - } else { - throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error, but error was nullptr", - from_where.c_str()); } - } else if (result.tag == ffi::ExternResult::Tag::Ok) { + throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error, but error was nullptr", + from_where.c_str()); + } + if (result.tag == ffi::ExternResult::Tag::Ok) { return result.ok._0; } throw IOException("Invalid error ExternResult tag found!"); From 4a32608dbf0f6dd956c61bfe767278268801f636 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 11:25:38 +0100 Subject: [PATCH 27/45] add locking and parallel test --- Makefile | 8 ++ src/functions/delta_scan.cpp | 61 +++++++++++---- src/include/functions/delta_scan.hpp | 16 +++- src/storage/delta_catalog.cpp | 4 +- test/sql/generated/attach_parallel.test | 100 ++++++++++++++++++++++++ 5 files changed, 167 insertions(+), 22 deletions(-) create mode 100644 test/sql/generated/attach_parallel.test diff --git a/Makefile b/Makefile index 8cc8bc9..7eb8376 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,14 @@ PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) EXT_NAME=deltatable EXT_CONFIG=${PROJ_DIR}extension_config.cmake +ifeq ($(SANITIZER_MODE), thread) + EXT_DEBUG_FLAGS:=-DENABLE_THREAD_SANITIZER=1 +endif + +ifneq ("${CUSTOM_LINKER}", "") + EXT_DEBUG_FLAGS:=${EXT_DEBUG_FLAGS} -DCUSTOM_LINKER=${CUSTOM_LINKER} +endif + # Set test paths test_release: export DELTA_KERNEL_TESTS_PATH=./build/release/rust/src/delta_kernel/kernel/tests/data test_release: export DAT_PATH=./build/release/rust/src/delta_kernel/acceptance/tests/dat diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index fb4bbe4..9e1c36e 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -49,7 +49,7 @@ string url_decode(string input) { return result; } -static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, +void DeltaSnapshot::VisitCallback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, const ffi::Stats *stats, const ffi::DvInfo *dv_info, const struct ffi::CStringMap *partition_values) { auto context = (DeltaSnapshot *)engine_context; @@ -94,9 +94,9 @@ static void visit_callback(ffi::NullableCvoid engine_context, struct ffi::Kernel context->metadata.back()->partition_map = std::move(constant_map); } -static void visit_data(void *engine_context, ffi::ExclusiveEngineData *engine_data, +void DeltaSnapshot::VisitData(void *engine_context, ffi::ExclusiveEngineData *engine_data, const struct ffi::KernelBoolSlice selection_vec) { - ffi::visit_scan_data(engine_data, selection_vec, engine_context, visit_callback); + ffi::visit_scan_data(engine_data, selection_vec, engine_context, VisitCallback); } string ParseAccountNameFromEndpoint(const string &endpoint) { @@ -386,7 +386,7 @@ DeltaSnapshot::DeltaSnapshot(ClientContext &context_p, const string &path) : MultiFileList({ToDeltaPath(path)}, FileGlobOptions::ALLOW_EMPTY), context(context_p) { } -string DeltaSnapshot::GetPath() { +string DeltaSnapshot::GetPath() const { return GetPaths()[0]; } @@ -416,6 +416,8 @@ string DeltaSnapshot::ToDeltaPath(const string &raw_path) { } void DeltaSnapshot::Bind(vector &return_types, vector &names) { + unique_lock lck(lock); + if (have_bound) { names = this->names; return_types = this->types; @@ -443,7 +445,7 @@ void DeltaSnapshot::Bind(vector &return_types, vector &name this->types = return_types; } -string DeltaSnapshot::GetFile(idx_t i) { +string DeltaSnapshot::GetFileInternal(idx_t i) { if (!initialized_snapshot) { InitializeSnapshot(); } @@ -462,7 +464,7 @@ string DeltaSnapshot::GetFile(idx_t i) { } while (i >= resolved_files.size()) { - auto have_scan_data_res = ffi::kernel_scan_data_next(scan_data_iterator.get(), this, visit_data); + auto have_scan_data_res = ffi::kernel_scan_data_next(scan_data_iterator.get(), this, VisitData); auto have_scan_data = TryUnpackKernelResult(have_scan_data_res); @@ -481,6 +483,12 @@ string DeltaSnapshot::GetFile(idx_t i) { return resolved_files[i]; } +string DeltaSnapshot::GetFile(idx_t i) { + // TODO: profile this: we should be able to use atomics here to optimize + unique_lock lck(lock); + return GetFileInternal(i); +} + void DeltaSnapshot::InitializeSnapshot() { auto path_slice = KernelUtils::ToDeltaString(paths[0]); @@ -535,13 +543,17 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co filtered_list->names = names; // Copy over the snapshot, this avoids reparsing metadata - filtered_list->snapshot = snapshot; + { + unique_lock lck(lock); + filtered_list->snapshot = snapshot; + } auto &profiler = QueryProfiler::Get(context); // Note: this is potentially quite expensive: we are creating 2 scans of the snapshot and fully materializing both // file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for // EXPLAIN ANALYZE queries + // TODO: check locking behaviour below if (profiler.IsEnabled()) { Value result; if (!context.TryGetCurrentSetting("delta_scan_explain_files_filtered", result)) { @@ -589,9 +601,10 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co } vector DeltaSnapshot::GetAllFiles() { + unique_lock lck(lock); idx_t i = resolved_files.size(); // TODO: this can probably be improved - while (!GetFile(i).empty()) { + while (!GetFileInternal(i).empty()) { i++; } return resolved_files; @@ -606,9 +619,9 @@ FileExpandResult DeltaSnapshot::GetExpandResult() { } idx_t DeltaSnapshot::GetTotalFileCount() { - // TODO: this can probably be improved + unique_lock lck(lock); idx_t i = resolved_files.size(); - while (!GetFile(i).empty()) { + while (!GetFileInternal(i).empty()) { i++; } return resolved_files.size(); @@ -618,6 +631,9 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) // This also ensures all files are expanded auto total_file_count = DeltaSnapshot::GetTotalFileCount(); + // TODO: internalize above + unique_lock lck(lock); + if (total_file_count == 0) { return make_uniq(0, 0); } @@ -638,6 +654,17 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) return nullptr; } + +idx_t DeltaSnapshot::GetVersion() { + unique_lock lck(lock); + return version; +} + +DeltaFileMetaData &DeltaSnapshot::GetMetaData(idx_t index) const { + unique_lock lck(lock); + return *metadata[index]; +} + unique_ptr DeltaMultiFileReader::CreateInstance(const TableFunction &table_function) { auto result = make_uniq(); @@ -716,16 +743,16 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio // Get the metadata for this file D_ASSERT(global_state->file_list); const auto &snapshot = dynamic_cast(*global_state->file_list); - auto &file_metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()]; + auto &file_metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); - if (!file_metadata->partition_map.empty()) { + if (!file_metadata.partition_map.empty()) { for (idx_t i = 0; i < global_column_ids.size(); i++) { column_t col_id = global_column_ids[i].GetPrimaryIndex(); if (IsRowIdColumnId(col_id)) { continue; } - auto col_partition_entry = file_metadata->partition_map.find(global_names[col_id]); - if (col_partition_entry != file_metadata->partition_map.end()) { + auto col_partition_entry = file_metadata.partition_map.find(global_names[col_id]); + if (col_partition_entry != file_metadata.partition_map.end()) { auto ¤t_type = global_types[col_id]; if (current_type == LogicalType::BLOB) { reader_data.constant_map.emplace_back(i, Value::BLOB_RAW(col_partition_entry->second)); @@ -977,15 +1004,15 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile // Get the metadata for this file const auto &snapshot = dynamic_cast(*global_state->file_list); - auto &metadata = snapshot.metadata[reader_data.file_list_idx.GetIndex()]; + auto &metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); - if (metadata->selection_vector.ptr && chunk.size() != 0) { + if (metadata.selection_vector.ptr && chunk.size() != 0) { D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX); auto &file_row_number_column = chunk.data[delta_global_state.file_row_number_idx]; // Construct the selection vector using the file_row_number column and the raw selection vector from delta idx_t select_count; - auto sv = DuckSVFromDeltaSV(metadata->selection_vector, file_row_number_column, chunk.size(), select_count); + auto sv = DuckSVFromDeltaSV(metadata.selection_vector, file_row_number_column, chunk.size(), select_count); chunk.Slice(sv, select_count); } diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index e9e89da..fe842d3 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -43,7 +43,7 @@ struct DeltaFileMetaData { //! The DeltaSnapshot implements the MultiFileList API to allow injecting it into the regular DuckDB parquet scan struct DeltaSnapshot : public MultiFileList { DeltaSnapshot(ClientContext &context, const string &path); - string GetPath(); + string GetPath() const; static string ToDuckDBPath(const string &raw_path); static string ToDeltaPath(const string &raw_path); @@ -58,12 +58,15 @@ struct DeltaSnapshot : public MultiFileList { idx_t GetTotalFileCount() override; unique_ptr GetCardinality(ClientContext &context) override; + idx_t GetVersion(); + DeltaFileMetaData &GetMetaData(idx_t index) const; protected: //! Get the i-th expanded file string GetFile(idx_t i) override; protected: + string GetFileInternal(idx_t i); void InitializeSnapshot(); void InitializeScan(); @@ -73,8 +76,15 @@ struct DeltaSnapshot : public MultiFileList { result, StringUtil::Format("While trying to read from delta table: '%s'", paths[0])); } - // TODO: change back to protected -public: + static void VisitData(void *engine_context, ffi::ExclusiveEngineData *engine_data, + const struct ffi::KernelBoolSlice selection_vec); + static void VisitCallback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, + const ffi::Stats *stats, const ffi::DvInfo *dv_info, + const struct ffi::CStringMap *partition_values); + +protected: + mutable mutex lock; + idx_t version; //! Delta Kernel Structures diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp index 53b1195..44e03e7 100644 --- a/src/storage/delta_catalog.cpp +++ b/src/storage/delta_catalog.cpp @@ -64,12 +64,12 @@ optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { // Option 1: snapshot is cached table-wide auto cached_snapshot = main_schema->GetCachedTable(); if (cached_snapshot) { - return cached_snapshot->snapshot->version; + return cached_snapshot->snapshot->GetVersion(); } // Option 2: snapshot is cached in transaction if (delta_transaction.table_entry) { - return delta_transaction.table_entry->snapshot->version; + return delta_transaction.table_entry->snapshot->GetVersion(); } return {}; diff --git a/test/sql/generated/attach_parallel.test b/test/sql/generated/attach_parallel.test new file mode 100644 index 0000000..37a5fbb --- /dev/null +++ b/test/sql/generated/attach_parallel.test @@ -0,0 +1,100 @@ +# name: test/sql/generated/attach_parallel.test +# description: Test attaching a delta table and reading from it in parallel +# group: [dat] + +require parquet + +require delta + +require-env GENERATED_DATA_AVAILABLE + +statement ok +pragma threads=10; + +statement ok +ATTACH 'data/generated/simple_partitioned/delta_lake/' as dt (TYPE delta) + +statement ok +ATTACH 'data/generated/simple_partitioned/delta_lake/' as dt_pinned (TYPE delta, PIN_SNAPSHOT) + +concurrentloop threadid 0 20 + +query I +WITH RECURSIVE ctename AS ( + SELECT *, 1 as recursiondepth + FROM dt + UNION ALL + SELECT * EXCLUDE (c2.recursiondepth), c2.recursiondepth + 1 as recursiondepth + FROM ctename as c2 + WHERE c2.recursiondepth < 8 +) +SELECT count(i) FROM ctename; +---- +80 + +query I +WITH RECURSIVE ctename AS ( + SELECT *, 1 as recursiondepth + FROM dt_pinned + UNION ALL + SELECT * EXCLUDE (c2.recursiondepth), c2.recursiondepth + 1 as recursiondepth + FROM ctename as c2 + WHERE c2.recursiondepth < 8 +) +SELECT count(i) FROM ctename; +---- +80 + +endloop + +concurrentloop threadid 0 20 + +query I +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt UNION ALL +SELECT count(i) FROM dt +---- +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 + +query I +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned UNION ALL +SELECT count(i) FROM dt_pinned +---- +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 + + + +endloop \ No newline at end of file From f945b663ca850e267a91912a152d2d74c42c15bc Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 13:34:17 +0100 Subject: [PATCH 28/45] disable micro benchmarks for now --- .github/workflows/LocalTesting.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/LocalTesting.yml b/.github/workflows/LocalTesting.yml index 34457eb..5c425b8 100644 --- a/.github/workflows/LocalTesting.yml +++ b/.github/workflows/LocalTesting.yml @@ -294,11 +294,12 @@ jobs: run: | python ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/tpcds_sf1_local.csv --verbose --threads=2 --root-dir=. - - name: Regression Test Micro - if: always() - shell: bash - run: | - python ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2 --root-dir=. + # FIXME: re-enable +# - name: Regression Test Micro +# if: always() +# shell: bash +# run: | +# python ./duckdb/scripts/regression/test_runner.py --old=duckdb_delta/build/release/benchmark/benchmark_runner --new=build/release/benchmark/benchmark_runner --benchmarks=.github/regression/micro.csv --verbose --threads=2 --root-dir=. - name: Test benchmark makefile shell: bash From a9f197786126ad37b7d36d514a54f2ece9b4405e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 15:26:32 +0100 Subject: [PATCH 29/45] correctly set endpoint for gcs --- src/functions/delta_scan.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 9e1c36e..cb5ca50 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -278,13 +278,16 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p } } - if (StringUtil::StartsWith(endpoint, "http://")) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), - KernelUtils::ToDeltaString("true")); - } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), - KernelUtils::ToDeltaString(endpoint)); - } + if (StringUtil::StartsWith(endpoint, "http://")) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), + KernelUtils::ToDeltaString("true")); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), + KernelUtils::ToDeltaString(endpoint)); + } else if (StringUtil::StartsWith(path, "gs://") || StringUtil::StartsWith(path, "gcs://")) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), + KernelUtils::ToDeltaString("https://storage.googleapis.com")); + } ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); From 3da4e72fa22d990f4b3fe4da57d6194d5bbc9a6e Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 15:26:54 +0100 Subject: [PATCH 30/45] correctly set endpoint for gcs --- src/functions/delta_scan.cpp | 69 ++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index cb5ca50..f66a72f 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -241,42 +241,41 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p // Here you would need to add the logic for setting the builder options for Azure // This is just a placeholder and will need to be replaced with the actual logic if (secret_type == "s3" || secret_type == "gcs" || secret_type == "r2") { + string key_id, secret, session_token, region, endpoint, url_style; + bool use_ssl = true; + secret_reader.TryGetSecretKey("key_id", key_id); + secret_reader.TryGetSecretKey("secret", secret); + secret_reader.TryGetSecretKey("session_token", session_token); + secret_reader.TryGetSecretKey("region", region); + secret_reader.TryGetSecretKey("endpoint", endpoint); + secret_reader.TryGetSecretKey("url_style", url_style); + secret_reader.TryGetSecretKey("use_ssl", use_ssl); + + if (key_id.empty() && secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), + KernelUtils::ToDeltaString("true")); + } - string key_id, secret, session_token, region, endpoint, url_style; - bool use_ssl = true; - secret_reader.TryGetSecretKey("key_id", key_id); - secret_reader.TryGetSecretKey("secret", secret); - secret_reader.TryGetSecretKey("session_token", session_token); - secret_reader.TryGetSecretKey("region", region); - secret_reader.TryGetSecretKey("endpoint", endpoint); - secret_reader.TryGetSecretKey("url_style", url_style); - secret_reader.TryGetSecretKey("use_ssl", use_ssl); - - if (key_id.empty() && secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), - KernelUtils::ToDeltaString("true")); - } - - if (!key_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), - KernelUtils::ToDeltaString(key_id)); - } - if (!secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), - KernelUtils::ToDeltaString(secret)); - } - if (!session_token.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), - KernelUtils::ToDeltaString(session_token)); - } - if (!endpoint.empty() && endpoint != "s3.amazonaws.com") { - if (!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) { - if (use_ssl) { - endpoint = "https://" + endpoint; - } else { - endpoint = "http://" + endpoint; - } - } + if (!key_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), + KernelUtils::ToDeltaString(key_id)); + } + if (!secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), + KernelUtils::ToDeltaString(secret)); + } + if (!session_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), + KernelUtils::ToDeltaString(session_token)); + } + if (!endpoint.empty() && endpoint != "s3.amazonaws.com") { + if (!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) { + if (use_ssl) { + endpoint = "https://" + endpoint; + } else { + endpoint = "http://" + endpoint; + } + } if (StringUtil::StartsWith(endpoint, "http://")) { ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), From 041a838179c521df2e8998e6dd73afe3a3381433 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 11:56:40 +0100 Subject: [PATCH 31/45] bump to v0.6.0, build kernel either debug or release not both --- CMakeLists.txt | 25 +- Makefile | 2 +- src/delta_utils.cpp | 6 +- src/include/delta_kernel_ffi.hpp | 763 +++++++++++++++++++------------ 4 files changed, 484 insertions(+), 312 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1113da8..3f0dfcc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,11 +118,8 @@ set(RUST_UNSET_ENV_VARS --unset=CC --unset=CXX --unset=LD) set(DELTA_KERNEL_LIBNAME "${CMAKE_STATIC_LIBRARY_PREFIX}delta_kernel_ffi${CMAKE_STATIC_LIBRARY_SUFFIX}" ) -set(DELTA_KERNEL_LIBPATH_DEBUG - "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/debug/${DELTA_KERNEL_LIBNAME}" -) -set(DELTA_KERNEL_LIBPATH_RELEASE - "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/release/${DELTA_KERNEL_LIBNAME}" +set(DELTA_KERNEL_LIBPATH + "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/${RUST_PLATFORM_TARGET}/$,debug,release>/${DELTA_KERNEL_LIBNAME}" ) set(DELTA_KERNEL_FFI_HEADER_PATH "${CMAKE_BINARY_DIR}/rust/src/delta_kernel/target/ffi-headers") @@ -141,7 +138,7 @@ ExternalProject_Add( # the c++ headers. Currently, when bumping the kernel version, the produced # header in ./src/include/delta_kernel_ffi.hpp should be also bumped, applying # the fix - GIT_TAG v0.5.0 + GIT_TAG v0.6.0 # Prints the env variables passed to the cargo build to the terminal, useful # in debugging because passing them through CMake is an error-prone mess CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} @@ -151,19 +148,13 @@ ExternalProject_Add( # Build debug build BUILD_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build - --package delta_kernel_ffi --workspace --all-features ${RUST_PLATFORM_PARAM} - # Build release build - COMMAND - ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build - --package delta_kernel_ffi --workspace --all-features --release - ${RUST_PLATFORM_PARAM} + --package delta_kernel_ffi --workspace $<$:--release> --all-features ${RUST_PLATFORM_PARAM} # Build DATs COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build --manifest-path=${CMAKE_BINARY_DIR}/rust/src/delta_kernel/acceptance/Cargo.toml # Define the byproducts, required for building with Ninja - BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_DEBUG}" - BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH_RELEASE}" + BUILD_BYPRODUCTS "${DELTA_KERNEL_LIBPATH}" BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_C}" BUILD_BYPRODUCTS "${DELTA_KERNEL_FFI_HEADER_CXX}" INSTALL_COMMAND "" @@ -185,14 +176,12 @@ add_compile_definitions(DEFINE_DEFAULT_ENGINE) # Link delta-kernal-rs to static lib target_link_libraries( - ${EXTENSION_NAME} debug ${DELTA_KERNEL_LIBPATH_DEBUG} optimized - ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) + ${EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} ${PLATFORM_LIBS}) add_dependencies(${EXTENSION_NAME} delta_kernel) # Link delta-kernal-rs to dynamic lib target_link_libraries( - ${LOADABLE_EXTENSION_NAME} debug ${DELTA_KERNEL_LIBPATH_DEBUG} optimized - ${DELTA_KERNEL_LIBPATH_RELEASE} ${PLATFORM_LIBS}) + ${LOADABLE_EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} ${PLATFORM_LIBS}) add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel) install( diff --git a/Makefile b/Makefile index 7eb8376..4a361b3 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/k test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat # Core extensions that we need for testing -CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs' +#CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs' # Set this flag during building to enable the benchmark runner ifeq (${BUILD_BENCHMARK}, 1) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index ae42676..84e71fa 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -145,10 +145,12 @@ string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) { "MissingCommitInfo", "UnsupportedError", "ParseIntervalError", - "ChangeDataFeedUnsupported" + "ChangeDataFeedUnsupported", + "ChangeDataFeedIncompatibleSchema", + "InvalidCheckpoint" }; - static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::ChangeDataFeedUnsupported, + static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidCheckpoint, "KernelErrorEnumStrings mismatched with kernel"); if ((int)err < sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *)) { diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index 3b6a615..ec9db0c 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -9,56 +9,99 @@ namespace ffi { enum class KernelError { - UnknownError, - FFIError, + UnknownError, + FFIError, #if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) - ArrowError, + ArrowError, #endif - EngineDataTypeError, - ExtractError, - GenericError, - IOErrorError, + EngineDataTypeError, + ExtractError, + GenericError, + IOErrorError, #if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) - ParquetError, + ParquetError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ObjectStoreError, + ObjectStoreError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ObjectStorePathError, + ObjectStorePathError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ReqwestError, + ReqwestError, #endif - FileNotFoundError, - MissingColumnError, - UnexpectedColumnTypeError, - MissingDataError, - MissingVersionError, - DeletionVectorError, - InvalidUrlError, - MalformedJsonError, - MissingMetadataError, - MissingProtocolError, - InvalidProtocolError, - MissingMetadataAndProtocolError, - ParseError, - JoinFailureError, - Utf8Error, - ParseIntError, - InvalidColumnMappingModeError, - InvalidTableLocationError, - InvalidDecimalError, - InvalidStructDataError, - InternalError, - InvalidExpression, - InvalidLogPath, - InvalidCommitInfo, - FileAlreadyExists, - MissingCommitInfo, - UnsupportedError, - ParseIntervalError, - ChangeDataFeedUnsupported, + FileNotFoundError, + MissingColumnError, + UnexpectedColumnTypeError, + MissingDataError, + MissingVersionError, + DeletionVectorError, + InvalidUrlError, + MalformedJsonError, + MissingMetadataError, + MissingProtocolError, + InvalidProtocolError, + MissingMetadataAndProtocolError, + ParseError, + JoinFailureError, + Utf8Error, + ParseIntError, + InvalidColumnMappingModeError, + InvalidTableLocationError, + InvalidDecimalError, + InvalidStructDataError, + InternalError, + InvalidExpression, + InvalidLogPath, + InvalidCommitInfo, + FileAlreadyExists, + MissingCommitInfo, + UnsupportedError, + ParseIntervalError, + ChangeDataFeedUnsupported, + ChangeDataFeedIncompatibleSchema, + InvalidCheckpoint, +}; + +/// Definitions of level verbosity. Verbose Levels are "greater than" less verbose ones. So +/// Level::ERROR is the lowest, and Level::TRACE the highest. +enum class Level { + ERROR = 0, + WARN = 1, + INFO = 2, + DEBUGGING = 3, + TRACE = 4, +}; + +/// Format to use for log lines. These correspond to the formats from [`tracing_subscriber` +/// formats](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/format/index.html). +enum class LogLineFormat { + /// The default formatter. This emits human-readable, single-line logs for each event that + /// occurs, with the context displayed before the formatted representation of the event. + /// Example: + /// `2022-02-15T18:40:14.289898Z INFO fmt: preparing to shave yaks number_of_yaks=3` + FULL, + /// A variant of the FULL formatter, optimized for short line lengths. Fields from the context + /// are appended to the fields of the formatted event, and targets are not shown. + /// Example: + /// `2022-02-17T19:51:05.809287Z INFO fmt_compact: preparing to shave yaks number_of_yaks=3` + COMPACT, + /// Emits excessively pretty, multi-line logs, optimized for human readability. This is + /// primarily intended to be used in local development and debugging, or for command-line + /// applications, where automated analysis and compact storage of logs is less of a priority + /// than readability and visual appeal. + /// Example: + /// ```ignore + /// 2022-02-15T18:44:24.535324Z INFO fmt_pretty: preparing to shave yaks, number_of_yaks: 3 + /// at examples/examples/fmt-pretty.rs:16 on main + /// ``` + PRETTY, + /// Outputs newline-delimited JSON logs. This is intended for production use with systems where + /// structured logs are consumed as JSON by analysis and viewing tools. The JSON output is not + /// optimized for human readability. + /// Example: + /// `{"timestamp":"2022-02-15T18:47:10.821315Z","level":"INFO","fields":{"message":"preparing to shave yaks","number_of_yaks":3},"target":"fmt_json"}` + JSON, }; struct CStringMap; @@ -100,15 +143,15 @@ struct StringSliceIterator; /// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible /// to free that slice, by calling [super::free_bool_slice] exactly once. struct KernelBoolSlice { - bool *ptr; - uintptr_t len; + bool *ptr; + uintptr_t len; }; /// An owned slice of u64 row indexes allocated by the kernel. The engine is responsible for /// freeing this slice by calling [super::free_row_indexes] once. struct KernelRowIndexArray { - uint64_t *ptr; - uintptr_t len; + uint64_t *ptr; + uintptr_t len; }; /// Represents an object that crosses the FFI boundary and which outlives the scope that created @@ -143,8 +186,8 @@ struct KernelRowIndexArray { /// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can /// freely access shared (non-mutable) handles. /// -template -using Handle = H *; +template +using Handle = H*; /// An error that can be returned to the engine. Engines that wish to associate additional /// information can define and use any type that is [pointer @@ -153,31 +196,31 @@ using Handle = H *; /// of a [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard-layout) /// class. struct EngineError { - KernelError etype; + KernelError etype; }; /// Semantics: Kernel will always immediately return the leaked engine error to the engine (if it /// allocated one at all), and engine is responsible for freeing it. -template +template struct ExternResult { - enum class Tag { - Ok, - Err, - }; - - struct Ok_Body { - T _0; - }; - - struct Err_Body { - EngineError *_0; - }; - - Tag tag; - union { - Ok_Body ok; - Err_Body err; - }; + enum class Tag { + Ok, + Err, + }; + + struct Ok_Body { + T _0; + }; + + struct Err_Body { + EngineError *_0; + }; + + Tag tag; + union { + Ok_Body ok; + Err_Body err; + }; }; /// A non-owned slice of a UTF8 string, intended for arg-passing between kernel and engine. The @@ -203,17 +246,17 @@ struct ExternResult { /// Meanwhile, the callee must assume that the slice is only valid until the function returns, and /// must not retain any references to the slice or its data that might outlive the function call. struct KernelStringSlice { - const char *ptr; - uintptr_t len; + const char *ptr; + uintptr_t len; }; -using AllocateErrorFn = EngineError *(*)(KernelError etype, KernelStringSlice msg); +using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg); -using NullableCvoid = void *; +using NullableCvoid = void*; /// Allow engines to allocate strings of their own type. the contract of calling a passed allocate /// function is that `kernel_str` is _only_ valid until the return from this function -using AllocateStringFn = NullableCvoid (*)(KernelStringSlice kernel_str); +using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); /// ABI-compatible struct for ArrowArray from C Data Interface /// See @@ -226,16 +269,16 @@ using AllocateStringFn = NullableCvoid (*)(KernelStringSlice kernel_str); /// } /// ``` struct FFI_ArrowArray { - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void **buffers; - FFI_ArrowArray **children; - FFI_ArrowArray *dictionary; - void (*release)(FFI_ArrowArray *arg1); - void *private_data; + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void **buffers; + FFI_ArrowArray **children; + FFI_ArrowArray *dictionary; + void (*release)(FFI_ArrowArray *arg1); + void *private_data; }; /// ABI-compatible struct for `ArrowSchema` from C Data Interface @@ -250,16 +293,16 @@ struct FFI_ArrowArray { /// ``` /// struct FFI_ArrowSchema { - const char *format; - const char *name; - const char *metadata; - /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) - int64_t flags; - int64_t n_children; - FFI_ArrowSchema **children; - FFI_ArrowSchema *dictionary; - void (*release)(FFI_ArrowSchema *arg1); - void *private_data; + const char *format; + const char *name; + const char *metadata; + /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) + int64_t flags; + int64_t n_children; + FFI_ArrowSchema **children; + FFI_ArrowSchema *dictionary; + void (*release)(FFI_ArrowSchema *arg1); + void *private_data; }; #if defined(DEFINE_DEFAULT_ENGINE) @@ -267,35 +310,35 @@ struct FFI_ArrowSchema { /// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and /// the schema. struct ArrowFFIData { - FFI_ArrowArray array; - FFI_ArrowSchema schema; + FFI_ArrowArray array; + FFI_ArrowSchema schema; }; #endif struct FileMeta { - KernelStringSlice path; - int64_t last_modified; - uintptr_t size; + KernelStringSlice path; + int64_t last_modified; + uintptr_t size; }; /// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap /// the engine functions. The engine retains ownership of the iterator. struct EngineIterator { - void *data; - /// A function that should advance the iterator and return the next time from the data - /// If the iterator is complete, it should return null. It should be safe to - /// call `get_next()` multiple times if it returns null. - const void *(*get_next)(void *data); + void *data; + /// A function that should advance the iterator and return the next time from the data + /// If the iterator is complete, it should return null. It should be safe to + /// call `get_next()` multiple times if it returns null. + const void *(*get_next)(void *data); }; -template -using VisitLiteralFn = void (*)(void *data, uintptr_t sibling_list_id, T value); +template +using VisitLiteralFn = void(*)(void *data, uintptr_t sibling_list_id, T value); -using VisitVariadicFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitVariadicFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); -using VisitUnaryFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitUnaryFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); -using VisitBinaryOpFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitBinaryOpFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own /// representation of a kernel expression. @@ -328,126 +371,154 @@ using VisitBinaryOpFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_ /// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant /// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 struct EngineExpressionVisitor { - /// An opaque engine state pointer - void *data; - /// Creates a new expression list, optionally reserving capacity up front - uintptr_t (*make_field_list)(void *data, uintptr_t reserve); - /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_int; - /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_long; - /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_short; - /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_byte; - /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_float; - /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_double; - /// Visit a `string` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_string; - /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_bool; - /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. - /// The timestamp is microsecond precision and adjusted to UTC. - VisitLiteralFn visit_literal_timestamp; - /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. - /// The timestamp is microsecond precision with no timezone. - VisitLiteralFn visit_literal_timestamp_ntz; - /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs - /// to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_date; - /// Visit binary data at the `buffer` with length `len` belonging to the list identified by - /// `sibling_list_id`. - void (*visit_literal_binary)(void *data, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); - /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer - /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 - /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. - void (*visit_literal_decimal)(void *data, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, - uint8_t precision, uint8_t scale); - /// Visit a struct literal belonging to the list identified by `sibling_list_id`. - /// The field names of the struct are in a list identified by `child_field_list_id`. - /// The values of the struct are in a list identified by `child_value_list_id`. - void (*visit_literal_struct)(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_id, - uintptr_t child_value_list_id); - /// Visit an array literal belonging to the list identified by `sibling_list_id`. - /// The values of the array are in a list identified by `child_list_id`. - void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); - /// Visits a null value belonging to the list identified by `sibling_list_id. - void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); - /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the array are in a list identified by `child_list_id` - VisitVariadicFn visit_and; - /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the array are in a list identified by `child_list_id` - VisitVariadicFn visit_or; - /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expression will be in a _one_ item list identified by `child_list_id` - VisitUnaryFn visit_not; - /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expression will be in a _one_ item list identified by `child_list_id` - VisitUnaryFn visit_is_null; - /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_lt; - /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_le; - /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_gt; - /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_ge; - /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_eq; - /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_ne; - /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_distinct; - /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_in; - /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_not_in; - /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_add; - /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_minus; - /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_multiply; - /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_divide; - /// Visits the `column` belonging to the list identified by `sibling_list_id`. - void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` - void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// An opaque engine state pointer + void *data; + /// Creates a new expression list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_int; + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_long; + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_short; + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_byte; + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_float; + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_double; + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_string; + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_bool; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. + VisitLiteralFn visit_literal_timestamp; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. + VisitLiteralFn visit_literal_timestamp_ntz; + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_date; + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. + void (*visit_literal_binary)(void *data, + uintptr_t sibling_list_id, + const uint8_t *buffer, + uintptr_t len); + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. + void (*visit_literal_decimal)(void *data, + uintptr_t sibling_list_id, + uint64_t value_ms, + uint64_t value_ls, + uint8_t precision, + uint8_t scale); + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + void (*visit_literal_struct)(void *data, + uintptr_t sibling_list_id, + uintptr_t child_field_list_id, + uintptr_t child_value_list_id); + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. + void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// Visits a null value belonging to the list identified by `sibling_list_id. + void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_and; + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_or; + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_not; + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_is_null; + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_lt; + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_le; + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_gt; + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ge; + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_eq; + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ne; + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_distinct; + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_in; + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_not_in; + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_add; + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_minus; + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_multiply; + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_divide; + /// Visits the `column` belonging to the list identified by `sibling_list_id`. + void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` + void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); }; // This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 struct im_an_unused_struct_that_tricks_msvc_into_compilation { - ExternResult field; - ExternResult field2; - ExternResult field3; - ExternResult> field4; - ExternResult> field5; - ExternResult field6; - ExternResult field7; - ExternResult> field8; - ExternResult> field9; - ExternResult> field10; - ExternResult field11; + ExternResult field; + ExternResult field2; + ExternResult field3; + ExternResult> field4; + ExternResult> field5; + ExternResult field6; + ExternResult field7; + ExternResult> field8; + ExternResult> field9; + ExternResult> field10; + ExternResult field11; }; +/// An `Event` can generally be thought of a "log message". It contains all the relevant bits such +/// that an engine can generate a log message in its format +struct Event { + /// The log message associated with the event + KernelStringSlice message; + /// Level that the event was emitted at + Level level; + /// A string that specifies in what part of the system the event occurred + KernelStringSlice target; + /// source file line number where the event occurred, or 0 (zero) if unknown + uint32_t line; + /// file where the event occurred. If unknown the slice `ptr` will be null and the len will be 0 + KernelStringSlice file; +}; + +using TracingEventFn = void(*)(Event event); + +using TracingLogLineFn = void(*)(KernelStringSlice line); + /// A predicate that can be used to skip data when scanning. /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, @@ -459,21 +530,25 @@ struct im_an_unused_struct_that_tricks_msvc_into_compilation { /// kernel each retain ownership of their respective objects, with no need to coordinate memory /// lifetimes with the other. struct EnginePredicate { - void *predicate; - uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); + void *predicate; + uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); }; /// Give engines an easy way to consume stats struct Stats { - /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the - /// `num_records` statistic must be present and accurate, and must equal the number of records - /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat - /// outdated, i.e. not reflecting deleted rows yet. - uint64_t num_records; + /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the + /// `num_records` statistic must be present and accurate, and must equal the number of records + /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat + /// outdated, i.e. not reflecting deleted rows yet. + uint64_t num_records; }; -using CScanCallback = void (*)(NullableCvoid engine_context, KernelStringSlice path, int64_t size, const Stats *stats, - const DvInfo *dv_info, const CStringMap *partition_map); +using CScanCallback = void(*)(NullableCvoid engine_context, + KernelStringSlice path, + int64_t size, + const Stats *stats, + const DvInfo *dv_info, + const CStringMap *partition_map); /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. @@ -501,49 +576,61 @@ using CScanCallback = void (*)(NullableCvoid engine_context, KernelStringSlice p /// that element's (already-visited) children. /// 4. The [`visit_schema`] method returns the id of the list of top-level columns struct EngineSchemaVisitor { - /// opaque state pointer - void *data; - /// Creates a new field list, optionally reserving capacity up front - uintptr_t (*make_field_list)(void *data, uintptr_t reserve); - /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a - /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. - void (*visit_struct)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, uintptr_t child_list_id); - /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list - /// with the array's element type - void (*visit_array)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, bool contains_null, - uintptr_t child_list_id); - /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list - /// where the first element is the map's key type and the second element is the - /// map's value type - void (*visit_map)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, bool value_contains_null, - uintptr_t child_list_id); - /// visit a `decimal` with the specified `precision` and `scale` - void (*visit_decimal)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, uint8_t precision, - uint8_t scale); - /// Visit a `string` belonging to the list identified by `sibling_list_id`. - void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `long` belonging to the list identified by `sibling_list_id`. - void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. - void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `short` belonging to the list identified by `sibling_list_id`. - void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `byte` belonging to the list identified by `sibling_list_id`. - void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `float` belonging to the list identified by `sibling_list_id`. - void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `double` belonging to the list identified by `sibling_list_id`. - void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit `binary` belonging to the list identified by `sibling_list_id`. - void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `date` belonging to the list identified by `sibling_list_id`. - void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. - void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. - void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// opaque state pointer + void *data; + /// Creates a new field list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. + void (*visit_struct)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + uintptr_t child_list_id); + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the array's element type + void (*visit_array)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + bool contains_null, + uintptr_t child_list_id); + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the map's key type and the second element is the + /// map's value type + void (*visit_map)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + bool value_contains_null, + uintptr_t child_list_id); + /// visit a `decimal` with the specified `precision` and `scale` + void (*visit_decimal)(void *data, + uintptr_t sibling_list_id, + KernelStringSlice name, + uint8_t precision, + uint8_t scale); + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); }; extern "C" { @@ -572,7 +659,8 @@ void free_engine_data(Handle engine_data); /// /// # Safety /// Caller is responsible for passing a valid path pointer. -ExternResult get_engine_builder(KernelStringSlice path, AllocateErrorFn allocate_error); +ExternResult get_engine_builder(KernelStringSlice path, + AllocateErrorFn allocate_error); #endif #if defined(DEFINE_DEFAULT_ENGINE) @@ -599,7 +687,8 @@ ExternResult> builder_build(EngineBuilder *builder); /// # Safety /// /// Caller is responsible for passing a valid path pointer. -ExternResult> get_default_engine(KernelStringSlice path, AllocateErrorFn allocate_error); +ExternResult> get_default_engine(KernelStringSlice path, + AllocateErrorFn allocate_error); #endif #if defined(DEFINE_SYNC_ENGINE) @@ -619,7 +708,8 @@ void free_engine(Handle engine); /// # Safety /// /// Caller is responsible for passing valid handles and path pointer. -ExternResult> snapshot(KernelStringSlice path, Handle engine); +ExternResult> snapshot(KernelStringSlice path, + Handle engine); /// # Safety /// @@ -645,7 +735,8 @@ NullableCvoid snapshot_table_root(Handle snapshot, AllocateStrin /// /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by /// [kernel_scan_data_free]. The visitor function pointer must be non-null. -bool string_slice_next(Handle data, NullableCvoid engine_context, +bool string_slice_next(Handle data, + NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, KernelStringSlice slice)); /// # Safety @@ -676,7 +767,8 @@ void *get_raw_engine_data(Handle data); /// # Safety /// data_handle must be a valid ExclusiveEngineData as read by the /// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. -ExternResult get_raw_arrow_data(Handle data, Handle engine); +ExternResult get_raw_arrow_data(Handle data, + Handle engine); #endif /// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The @@ -688,7 +780,8 @@ ExternResult get_raw_arrow_data(Handle data /// /// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by /// [`free_read_result_iter`]. The visitor function pointer must be non-null. -ExternResult read_result_next(Handle data, NullableCvoid engine_context, +ExternResult read_result_next(Handle data, + NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, Handle engine_data)); @@ -703,8 +796,9 @@ void free_read_result_iter(Handle data); /// /// # Safety /// Caller is responsible for calling with a valid `ExternEngineHandle` and `FileMeta` -ExternResult> -read_parquet_file(Handle engine, const FileMeta *file, Handle physical_schema); +ExternResult> read_parquet_file(Handle engine, + const FileMeta *file, + Handle physical_schema); uintptr_t visit_expression_and(KernelExpressionVisitorState *state, EngineIterator *children); @@ -720,7 +814,8 @@ uintptr_t visit_expression_eq(KernelExpressionVisitorState *state, uintptr_t a, /// # Safety /// The string slice must be valid -ExternResult visit_expression_column(KernelExpressionVisitorState *state, KernelStringSlice name, +ExternResult visit_expression_column(KernelExpressionVisitorState *state, + KernelStringSlice name, AllocateErrorFn allocate_error); uintptr_t visit_expression_not(KernelExpressionVisitorState *state, uintptr_t inner_expr); @@ -729,7 +824,8 @@ uintptr_t visit_expression_is_null(KernelExpressionVisitorState *state, uintptr_ /// # Safety /// The string slice must be valid -ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, KernelStringSlice value, +ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, + KernelStringSlice value, AllocateErrorFn allocate_error); uintptr_t visit_expression_literal_int(KernelExpressionVisitorState *state, int32_t value); @@ -761,7 +857,84 @@ void free_kernel_predicate(Handle data); /// # Safety /// /// The caller must pass a valid SharedExpression Handle and expression visitor -uintptr_t visit_expression(const Handle *expression, EngineExpressionVisitor *visitor); +uintptr_t visit_expression(const Handle *expression, + EngineExpressionVisitor *visitor); + +/// Enable getting called back for tracing (logging) events in the kernel. `max_level` specifies +/// that only events `<=` to the specified level should be reported. More verbose Levels are "greater +/// than" less verbose ones. So Level::ERROR is the lowest, and Level::TRACE the highest. +/// +/// Note that setting up such a call back can only be done ONCE. Calling any of +/// `enable_event_tracing`, `enable_log_line_tracing`, or `enable_formatted_log_line_tracing` more +/// than once is a no-op. +/// +/// Returns `true` if the callback was setup successfully, false on failure (i.e. if called a second +/// time) +/// +/// [`event`] based tracing gives an engine maximal flexibility in formatting event log +/// lines. Kernel can also format events for the engine. If this is desired call +/// [`enable_log_line_tracing`] instead of this method. +/// +/// # Safety +/// Caller must pass a valid function pointer for the callback +bool enable_event_tracing(TracingEventFn callback, + Level max_level); + +/// Enable getting called back with log lines in the kernel using default settings: +/// - FULL format +/// - include ansi color +/// - include timestamps +/// - include level +/// - include target +/// +/// `max_level` specifies that only logs `<=` to the specified level should be reported. More +/// verbose Levels are "greater than" less verbose ones. So Level::ERROR is the lowest, and +/// Level::TRACE the highest. +/// +/// Log lines passed to the callback will already have a newline at the end. +/// +/// Note that setting up such a call back can only be done ONCE. Calling any of +/// `enable_event_tracing`, `enable_log_line_tracing`, or `enable_formatted_log_line_tracing` more +/// than once is a no-op. +/// +/// Returns `true` if the callback was setup successfully, false on failure (i.e. if called a second +/// time) +/// +/// Log line based tracing is simple for an engine as it can just log the passed string, but does +/// not provide flexibility for an engine to format events. If the engine wants to use a specific +/// format for events it should call [`enable_event_tracing`] instead of this function. +/// +/// # Safety +/// Caller must pass a valid function pointer for the callback +bool enable_log_line_tracing(TracingLogLineFn callback, Level max_level); + +/// Enable getting called back with log lines in the kernel. This variant allows specifying +/// formatting options for the log lines. See [`enable_log_line_tracing`] for general info on +/// getting called back for log lines. +/// +/// Note that setting up such a call back can only be done ONCE. Calling any of +/// `enable_event_tracing`, `enable_log_line_tracing`, or `enable_formatted_log_line_tracing` more +/// than once is a no-op. +/// +/// Returns `true` if the callback was setup successfully, false on failure (i.e. if called a second +/// time) +/// +/// Options that can be set: +/// - `format`: see [`LogLineFormat`] +/// - `ansi`: should the formatter use ansi escapes for color +/// - `with_time`: should the formatter include a timestamp in the log message +/// - `with_level`: should the formatter include the level in the log message +/// - `with_target`: should the formatter include what part of the system the event occurred +/// +/// # Safety +/// Caller must pass a valid function pointer for the callback +bool enable_formatted_log_line_tracing(TracingLogLineFn callback, + Level max_level, + LogLineFormat format, + bool ansi, + bool with_time, + bool with_level, + bool with_target); /// Drops a scan. /// # Safety @@ -774,7 +947,8 @@ void free_scan(Handle scan); /// # Safety /// /// Caller is responsible for passing a valid snapshot pointer, and engine pointer -ExternResult> scan(Handle snapshot, Handle engine, +ExternResult> scan(Handle snapshot, + Handle engine, EnginePredicate *predicate); /// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`] @@ -832,7 +1006,8 @@ ExternResult> kernel_scan_data_init(Handle kernel_scan_data_next(Handle data, NullableCvoid engine_context, +ExternResult kernel_scan_data_next(Handle data, + NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, Handle engine_data, KernelBoolSlice selection_vector)); @@ -850,20 +1025,24 @@ void free_kernel_scan_data(Handle data); /// # Safety /// /// The engine is responsible for providing a valid [`CStringMap`] pointer and [`KernelStringSlice`] -NullableCvoid get_from_map(const CStringMap *map, KernelStringSlice key, AllocateStringFn allocate_fn); +NullableCvoid get_from_map(const CStringMap *map, + KernelStringSlice key, + AllocateStringFn allocate_fn); /// Get a selection vector out of a [`DvInfo`] struct /// /// # Safety /// Engine is responsible for providing valid pointers for each argument -ExternResult selection_vector_from_dv(const DvInfo *dv_info, Handle engine, +ExternResult selection_vector_from_dv(const DvInfo *dv_info, + Handle engine, Handle state); /// Get a vector of row indexes out of a [`DvInfo`] struct /// /// # Safety /// Engine is responsible for providing valid pointers for each argument -ExternResult row_indexes_from_dv(const DvInfo *dv_info, Handle engine, +ExternResult row_indexes_from_dv(const DvInfo *dv_info, + Handle engine, Handle state); /// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan @@ -871,7 +1050,9 @@ ExternResult row_indexes_from_dv(const DvInfo *dv_info, Han /// /// # Safety /// engine is responsbile for passing a valid [`ExclusiveEngineData`] and selection vector. -void visit_scan_data(Handle data, KernelBoolSlice selection_vec, NullableCvoid engine_context, +void visit_scan_data(Handle data, + KernelBoolSlice selection_vec, + NullableCvoid engine_context, CScanCallback callback); /// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the @@ -892,6 +1073,6 @@ uintptr_t visit_schema(Handle snapshot, EngineSchemaVisitor *vis /// [`free_kernel_predicate`], or [`Handle::drop_handle`] Handle get_testing_kernel_expression(); -} // extern "C" +} // extern "C" -} // namespace ffi +} // namespace ffi From 6c3c795f798a339ab63c5c6bb355ffab26030b99 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 13:38:19 +0100 Subject: [PATCH 32/45] add missing windows lib --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f0dfcc..3fc459e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ elseif(UNIX) elseif(WIN32) set(PLATFORM_LIBS ntdll + crypt32 ncrypt secur32 ws2_32 From 829103eca2bbc6717e05be022a6c451e6eff282f Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 16:02:56 +0100 Subject: [PATCH 33/45] add missing test --- test/sql/cloud/gcs.test | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 test/sql/cloud/gcs.test diff --git a/test/sql/cloud/gcs.test b/test/sql/cloud/gcs.test new file mode 100644 index 0000000..5a12aff --- /dev/null +++ b/test/sql/cloud/gcs.test @@ -0,0 +1,22 @@ +# name: test/sql/cloud/minio_local/gcs_r2.test +# description: test delta extension with GCS and R2 +# group: [aws] + +require httpfs + +require parquet + +require delta + +statement ok +CREATE SECRET ( + TYPE GCS, + KEY_ID 'SOME_KEY', + SECRET 'SOME SECRET' +); + +statement error +select col1, col2 from delta_scan('gcs://some-bucket/some-table'); +---- +403 + From 9876698fc6fe019c2f67bef4e02207b267282007 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 17:21:34 +0100 Subject: [PATCH 34/45] add kernel expression test function based test --- CMakeLists.txt | 1 + src/delta_extension.cpp | 13 +- src/delta_functions.cpp | 14 +- src/delta_utils.cpp | 299 ++++++++++++++++++++++++++++- src/include/delta_functions.hpp | 9 +- src/include/delta_utils.hpp | 114 ++++++++++- test/sql/main/test_expression.test | 48 +++++ 7 files changed, 485 insertions(+), 13 deletions(-) create mode 100644 test/sql/main/test_expression.test diff --git a/CMakeLists.txt b/CMakeLists.txt index 1113da8..da8dfd2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ set(EXTENSION_SOURCES src/delta_functions.cpp src/delta_utils.cpp src/functions/delta_scan.cpp + src/functions/expression_functions.cpp src/storage/delta_catalog.cpp src/storage/delta_schema_entry.cpp src/storage/delta_table_entry.cpp diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 0c21ade..5695acc 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -44,10 +44,15 @@ class DeltaStorageExtension : public StorageExtension { }; static void LoadInternal(DatabaseInstance &instance) { - // Load functions - for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { - ExtensionUtil::RegisterFunction(instance, function); - } + // Load Table functions + for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { + ExtensionUtil::RegisterFunction(instance, function); + } + + // Load Scalar functions + for (const auto &function : DeltaFunctions::GetScalarFunctions(instance)) { + ExtensionUtil::RegisterFunction(instance, function); + } // Register the "single table" delta catalog (to ATTACH a single delta table) auto &config = DBConfig::GetConfig(instance); diff --git a/src/delta_functions.cpp b/src/delta_functions.cpp index e79894b..8feb6aa 100644 --- a/src/delta_functions.cpp +++ b/src/delta_functions.cpp @@ -2,8 +2,7 @@ #include "duckdb.hpp" #include "duckdb/main/extension_util.hpp" - -#include +#include "duckdb/parser/parsed_data/create_scalar_function_info.hpp" namespace duckdb { @@ -15,4 +14,13 @@ vector DeltaFunctions::GetTableFunctions(DatabaseInstance &ins return functions; } -}; // namespace duckdb +vector DeltaFunctions::GetScalarFunctions(DatabaseInstance &instance) { + vector functions; + + functions.push_back(GetExpressionFunction(instance)); + + return functions; +} + + +}; diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index ae42676..6b3c03c 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -1,16 +1,309 @@ #include "delta_utils.hpp" +#include + #include "duckdb.hpp" #include "duckdb/main/extension_util.hpp" #include #include +#include "duckdb/parser/expression/conjunction_expression.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" +#include "duckdb/parser/expression/function_expression.hpp" +#include "duckdb/parser/expression/operator_expression.hpp" +#include "duckdb/common/types/decimal.hpp" + namespace duckdb { -unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot *snapshot) { - SchemaVisitor state; - ffi::EngineSchemaVisitor visitor; +void ExpressionVisitor::VisitComparisonExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + + auto &lhs = children->at(0); + auto &rhs = children->at(1); + unique_ptr expression = make_uniq(ExpressionType::COMPARE_LESSTHAN, std::move(lhs), std::move(rhs)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +unique_ptr>> ExpressionVisitor::VisitKernelExpression(const ffi::Handle *expression) { + ExpressionVisitor state; + ffi::EngineExpressionVisitor visitor; + + visitor.data = &state; + visitor.make_field_list = (uintptr_t (*)(void*, uintptr_t)) &MakeFieldList; + + // Templated primitive functions + visitor.visit_literal_bool = VisitPrimitiveLiteral(); + visitor.visit_literal_byte = VisitPrimitiveLiteralByte; + visitor.visit_literal_short = VisitPrimitiveLiteralShort; + visitor.visit_literal_int = VisitPrimitiveLiteralInt; + visitor.visit_literal_long = VisitPrimitiveLiteralLong; + visitor.visit_literal_float = VisitPrimitiveLiteralFloat; + visitor.visit_literal_double = VisitPrimitiveLiteralDouble; + + visitor.visit_literal_decimal = VisitDecimalLiteral; + + // Custom Implementations + visitor.visit_literal_timestamp = &VisitTimestampLiteral; + visitor.visit_literal_timestamp_ntz = &VisitTimestampNtzLiteral; + visitor.visit_literal_date = &VisitDateLiteral; + + visitor.visit_literal_string = &VisitStringLiteral; + + visitor.visit_literal_binary = &VisitBinaryLiteral; + visitor.visit_literal_null = &VisitNullLiteral; + visitor.visit_literal_array = &VisitArrayLiteral; + + visitor.visit_and = VisitVariadicExpression(); + visitor.visit_or = VisitVariadicExpression(); + + visitor.visit_lt = VisitBinaryExpression(); + visitor.visit_le = VisitBinaryExpression(); + visitor.visit_gt = VisitBinaryExpression(); + visitor.visit_ge = VisitBinaryExpression(); + + visitor.visit_eq = VisitBinaryExpression(); + visitor.visit_ne = VisitBinaryExpression(); + visitor.visit_distinct = VisitBinaryExpression(); + + visitor.visit_in = VisitVariadicExpression(); + visitor.visit_not_in = VisitVariadicExpression(); + + visitor.visit_add = VisitAdditionExpression; + visitor.visit_minus = VisitSubctractionExpression; + visitor.visit_multiply = VisitMultiplyExpression; + visitor.visit_divide = VisitDivideExpression; + + visitor.visit_column = &VisitColumnExpression; + visitor.visit_struct_expr = &VisitStructExpression; + + visitor.visit_literal_struct = &VisitStructLiteral; + + visitor.visit_not = &VisitNotExpression; + visitor.visit_is_null = &VisitIsNullExpression; + + uintptr_t result = ffi::visit_expression(expression, &visitor); + + if (state.error.HasError()) { + state.error.Throw(); + } + + return state.TakeFieldList(result); +} + +void ExpressionVisitor::VisitAdditionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("+", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitSubctractionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("-", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitDivideExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("/", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitMultiplyExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("*", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitPrimitiveLiteralBool(void* state, uintptr_t sibling_list_id, bool value) { + auto expression = make_uniq(Value::BOOLEAN(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralByte(void* state, uintptr_t sibling_list_id, int8_t value) { + auto expression = make_uniq(Value::TINYINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralShort(void* state, uintptr_t sibling_list_id, int16_t value) { + auto expression = make_uniq(Value::SMALLINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralInt(void* state, uintptr_t sibling_list_id, int32_t value) { + auto expression = make_uniq(Value::INTEGER(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralLong(void* state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::BIGINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralFloat(void* state, uintptr_t sibling_list_id, float value) { + auto expression = make_uniq(Value::FLOAT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitPrimitiveLiteralDouble(void* state, uintptr_t sibling_list_id, double value) { + auto expression = make_uniq(Value::DOUBLE(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitTimestampLiteral(void* state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::TIMESTAMPTZ(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitTimestampNtzLiteral(void* state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::TIMESTAMP(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitDateLiteral(void* state, uintptr_t sibling_list_id, int32_t value) { + auto expression = make_uniq(Value::DATE(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitStringLiteral(void* state, uintptr_t sibling_list_id, ffi::KernelStringSlice value) { + auto expression = make_uniq(Value(string(value.ptr, value.len))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitBinaryLiteral(void* state, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len) { + auto expression = make_uniq(Value::BLOB(buffer, len)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitNullLiteral(void* state, uintptr_t sibling_list_id) { + auto expression = make_uniq(Value()); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitArrayLiteral(void* state, uintptr_t sibling_list_id, uintptr_t child_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("list_value", std::move(*children)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitStructLiteral(void *state, uintptr_t sibling_list_id, uintptr_t child_field_list_value, uintptr_t child_value_list_id) { + auto state_cast = static_cast(state); + + auto children_keys = state_cast->TakeFieldList(child_field_list_value); + auto children_values = state_cast->TakeFieldList(child_value_list_id); + if (!children_values || !children_keys) { + return; + } + + if (children_values->size() != children_keys->size()) { + state_cast->error = ErrorData("Size of Keys and Values vector do not match in ExpressionVisitor::VisitStructLiteral"); + return; + } + + for (idx_t i = 0; i < children_keys->size(); i++) { + (*children_values)[i]->alias = (*children_keys)[i]->ToString(); + } + + unique_ptr expression = make_uniq("struct_pack", std::move(*children_values)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitNotExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("NOT", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +void ExpressionVisitor::VisitIsNullExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + + children->push_back(make_uniq(Value())); + unique_ptr expression = make_uniq("IS", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); +} + +// FIXME: this is not 100% correct yet: value_ms is ignored +void ExpressionVisitor::VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale) { + try { + if (precision >= Decimal::MAX_WIDTH_INT64 || value_ls > (uint64_t)NumericLimits::Maximum()) { + throw NotImplementedException("ExpressionVisitor::VisitDecimalLiteral HugeInt decimals"); + } + auto expression = make_uniq(Value::DECIMAL(42, 18, 10)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); + } catch (Exception &e) { + static_cast(state)->error = ErrorData(e); + } +} + +void ExpressionVisitor::VisitColumnExpression(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name) { + auto expression = make_uniq(string(name.ptr, name.len)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +} +void ExpressionVisitor::VisitStructExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + static_cast(state)->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); +} + +uintptr_t ExpressionVisitor::MakeFieldList(ExpressionVisitor* state, uintptr_t capacity_hint) { + return state->MakeFieldListImpl(capacity_hint); +} +uintptr_t ExpressionVisitor::MakeFieldListImpl(uintptr_t capacity_hint) { + uintptr_t id = next_id++; + auto list = make_uniq(); + if (capacity_hint > 0) { + list->reserve(capacity_hint); + } + inflight_lists.emplace(id, std::move(list)); + return id; +} + +void ExpressionVisitor::AppendToList(uintptr_t id, unique_ptr child) { + auto it = inflight_lists.find(id); + if (it == inflight_lists.end()) { + error = ErrorData("ExpressionVisitor::AppendToList could not find " + Value::UBIGINT(id).ToString()); + return; + } + + it->second->emplace_back(std::move(child)); +} + +unique_ptr ExpressionVisitor::TakeFieldList(uintptr_t id) { + auto it = inflight_lists.find(id); + if (it == inflight_lists.end()) { + error = ErrorData("ExpressionVisitor::TakeFieldList could not find " + Value::UBIGINT(id).ToString()); + return nullptr; + } + auto rval = std::move(it->second); + inflight_lists.erase(it); + return rval; +} + +unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot* snapshot) { + SchemaVisitor state; + ffi::EngineSchemaVisitor visitor; visitor.data = &state; visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList; diff --git a/src/include/delta_functions.hpp b/src/include/delta_functions.hpp index 4f819cb..b68d5c6 100644 --- a/src/include/delta_functions.hpp +++ b/src/include/delta_functions.hpp @@ -14,9 +14,14 @@ namespace duckdb { class DeltaFunctions { public: - static vector GetTableFunctions(DatabaseInstance &instance); + static vector GetTableFunctions(DatabaseInstance &instance); + static vector GetScalarFunctions(DatabaseInstance &instance); private: - static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance); + //! Table Functions + static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance); + + //! Scalar Functions + static ScalarFunctionSet GetExpressionFunction(DatabaseInstance &instance); }; } // namespace duckdb diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 8760862..5332bd8 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -4,7 +4,11 @@ #include "duckdb/common/enum_util.hpp" #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/planner/filter/constant_filter.hpp" - +#include "duckdb/planner/expression.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" +#include "duckdb/parser/expression/conjunction_expression.hpp" +#include "duckdb/common/error_data.hpp" +#include "duckdb/parser/expression/comparison_expression.hpp" #include #include @@ -12,6 +16,114 @@ namespace duckdb { +class ExpressionVisitor : public ffi::EngineExpressionVisitor { + using FieldList = vector>; + +public: + unique_ptr>> VisitKernelExpression(const ffi::Handle* expression); + +private: + unordered_map> inflight_lists; + uintptr_t next_id = 1; + + ErrorData error; + + + // Literals + template + static ffi::VisitLiteralFn VisitPrimitiveLiteral() { + return (ffi::VisitLiteralFn) &VisitPrimitiveLiteral; + } + template + static void VisitPrimitiveLiteral(void* state, uintptr_t sibling_list_id, CPP_TYPE value) { + auto state_cast = static_cast(state); + auto duckdb_value = CREATE_VALUE_FUN(value); + auto expression = make_uniq(duckdb_value); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitPrimitiveLiteralBool(void* state, uintptr_t sibling_list_id, bool value); + static void VisitPrimitiveLiteralByte(void* state, uintptr_t sibling_list_id, int8_t value); + static void VisitPrimitiveLiteralShort(void* state, uintptr_t sibling_list_id, int16_t value); + static void VisitPrimitiveLiteralInt(void* state, uintptr_t sibling_list_id, int32_t value); + static void VisitPrimitiveLiteralLong(void* state, uintptr_t sibling_list_id, int64_t value); + static void VisitPrimitiveLiteralFloat(void* state, uintptr_t sibling_list_id, float value); + static void VisitPrimitiveLiteralDouble(void* state, uintptr_t sibling_list_id, double value); + + static void VisitTimestampLiteral(void* state, uintptr_t sibling_list_id, int64_t value); + static void VisitTimestampNtzLiteral(void* state, uintptr_t sibling_list_id, int64_t value); + static void VisitDateLiteral(void* state, uintptr_t sibling_list_id, int32_t value); + static void VisitStringLiteral(void* state, uintptr_t sibling_list_id, ffi::KernelStringSlice value); + static void VisitBinaryLiteral(void* state, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); + static void VisitNullLiteral(void* state, uintptr_t sibling_list_id); + static void VisitArrayLiteral(void* state, uintptr_t sibling_list_id, uintptr_t child_id); + static void VisitStructLiteral(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_value, uintptr_t child_value_list_id); + static void VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale); + static void VisitColumnExpression(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name); + static void VisitStructExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitNotExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitIsNullExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + template + static ffi::VisitVariadicFn VisitUnaryExpression() { + return &VisitVariadicExpression; + } + template + static ffi::VisitVariadicFn VisitBinaryExpression() { + return &VisitBinaryExpression; + } + template + static ffi::VisitVariadicFn VisitVariadicExpression() { + return &VisitVariadicExpression; + } + + template + static void VisitVariadicExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + return; + } + unique_ptr expression = make_uniq(EXPRESSION_TYPE, std::move(*children)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitAdditionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitSubctractionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitDivideExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitMultiplyExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + template + static void VisitBinaryExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + return; + } + + if (children->size() != 2) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + state_cast->error = ErrorData("INCORRECT SIZE IN VISIT_BINARY_EXPRESSION" + EnumUtil::ToString(EXPRESSION_TYPE)); + return; + } + + auto &lhs = children->at(0); + auto &rhs = children->at(1); + unique_ptr expression = make_uniq(EXPRESSION_TYPE, std::move(lhs), std::move(rhs)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitComparisonExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + // List functions + static uintptr_t MakeFieldList(ExpressionVisitor* state, uintptr_t capacity_hint); + void AppendToList(uintptr_t id, unique_ptr child); + uintptr_t MakeFieldListImpl(uintptr_t capacity_hint); + unique_ptr TakeFieldList(uintptr_t id); +}; + // SchemaVisitor is used to parse the schema of a Delta table from the Kernel class SchemaVisitor { public: diff --git a/test/sql/main/test_expression.test b/test/sql/main/test_expression.test new file mode 100644 index 0000000..4de05a7 --- /dev/null +++ b/test/sql/main/test_expression.test @@ -0,0 +1,48 @@ +# name: test/sql/main/test_expression.test +# description: Test the get_delta_test_expression function +# group: [delta_generated] + +require parquet + +require delta + +# TODO still broken: +# - Decimal +# - StructExpression +query I +SELECT unnest(get_delta_test_expression()) +---- +127 +-128 +3.4028235e+38 +-3.4028235e+38 +1.7976931348623157e+308 +-1.7976931348623157e+308 +2147483647 +-2147483648 +9223372036854775807 +-9223372036854775808 +'hello expressions' +'1970-01-01 00:00:00.00005+00'::TIMESTAMP WITH TIME ZONE +'1970-01-01 00:00:00.0001'::TIMESTAMP +'1970-02-02'::DATE +'\x00\x00\xDE\xAD\xBE\xEF\xCA\xFE'::BLOB +0.0000000042 +NULL +struct_pack("'top'" := struct_pack("'a'" := 500, "'b'" := list_value(5, 0))) +list_value(5, 0) +42 +not((col is NULL)) +(0 IN (0)) +(0 + 0) +(0 - 0) +(0 = 0) +(0 != 0) +(0 NOT IN (0)) +(0 / 0) +(0 * 0) +(0 < 0) +(0 <= 0) +(0 > 0) +(0 >= 0) +(0 IS DISTINCT FROM 0) \ No newline at end of file From 252bad8d62b4348d34179e92c0e0c63fe3715ff6 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 17:29:16 +0100 Subject: [PATCH 35/45] format --- CMakeLists.txt | 11 +- src/delta_extension.cpp | 18 +- src/delta_functions.cpp | 9 +- src/delta_utils.cpp | 502 +++++++++--------- src/functions/delta_scan.cpp | 139 +++-- src/include/delta_functions.hpp | 12 +- src/include/delta_kernel_ffi.hpp | 728 +++++++++++++-------------- src/include/delta_utils.hpp | 210 ++++---- src/include/functions/delta_scan.hpp | 18 +- 9 files changed, 806 insertions(+), 841 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 13ce508..65c13ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,8 @@ ExternalProject_Add( # Build debug build BUILD_COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build - --package delta_kernel_ffi --workspace $<$:--release> --all-features ${RUST_PLATFORM_PARAM} + --package delta_kernel_ffi --workspace $<$:--release> + --all-features ${RUST_PLATFORM_PARAM} # Build DATs COMMAND ${CMAKE_COMMAND} -E env ${RUST_UNSET_ENV_VARS} ${RUST_ENV_VARS} cargo build @@ -177,13 +178,13 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET add_compile_definitions(DEFINE_DEFAULT_ENGINE) # Link delta-kernal-rs to static lib -target_link_libraries( - ${EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} ${PLATFORM_LIBS}) +target_link_libraries(${EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} + ${PLATFORM_LIBS}) add_dependencies(${EXTENSION_NAME} delta_kernel) # Link delta-kernal-rs to dynamic lib -target_link_libraries( - ${LOADABLE_EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} ${PLATFORM_LIBS}) +target_link_libraries(${LOADABLE_EXTENSION_NAME} ${DELTA_KERNEL_LIBPATH} + ${PLATFORM_LIBS}) add_dependencies(${LOADABLE_EXTENSION_NAME} delta_kernel) install( diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 5695acc..50ce93d 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -44,15 +44,15 @@ class DeltaStorageExtension : public StorageExtension { }; static void LoadInternal(DatabaseInstance &instance) { - // Load Table functions - for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { - ExtensionUtil::RegisterFunction(instance, function); - } - - // Load Scalar functions - for (const auto &function : DeltaFunctions::GetScalarFunctions(instance)) { - ExtensionUtil::RegisterFunction(instance, function); - } + // Load Table functions + for (const auto &function : DeltaFunctions::GetTableFunctions(instance)) { + ExtensionUtil::RegisterFunction(instance, function); + } + + // Load Scalar functions + for (const auto &function : DeltaFunctions::GetScalarFunctions(instance)) { + ExtensionUtil::RegisterFunction(instance, function); + } // Register the "single table" delta catalog (to ATTACH a single delta table) auto &config = DBConfig::GetConfig(instance); diff --git a/src/delta_functions.cpp b/src/delta_functions.cpp index 8feb6aa..922d0d2 100644 --- a/src/delta_functions.cpp +++ b/src/delta_functions.cpp @@ -15,12 +15,11 @@ vector DeltaFunctions::GetTableFunctions(DatabaseInstance &ins } vector DeltaFunctions::GetScalarFunctions(DatabaseInstance &instance) { - vector functions; + vector functions; - functions.push_back(GetExpressionFunction(instance)); + functions.push_back(GetExpressionFunction(instance)); - return functions; + return functions; } - -}; +}; // namespace duckdb diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 93a84ca..920237d 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -13,297 +13,309 @@ #include "duckdb/parser/expression/operator_expression.hpp" #include "duckdb/common/types/decimal.hpp" - namespace duckdb { void ExpressionVisitor::VisitComparisonExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); + auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } - auto &lhs = children->at(0); - auto &rhs = children->at(1); - unique_ptr expression = make_uniq(ExpressionType::COMPARE_LESSTHAN, std::move(lhs), std::move(rhs)); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto &lhs = children->at(0); + auto &rhs = children->at(1); + unique_ptr expression = + make_uniq(ExpressionType::COMPARE_LESSTHAN, std::move(lhs), std::move(rhs)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } -unique_ptr>> ExpressionVisitor::VisitKernelExpression(const ffi::Handle *expression) { - ExpressionVisitor state; - ffi::EngineExpressionVisitor visitor; +unique_ptr>> +ExpressionVisitor::VisitKernelExpression(const ffi::Handle *expression) { + ExpressionVisitor state; + ffi::EngineExpressionVisitor visitor; - visitor.data = &state; - visitor.make_field_list = (uintptr_t (*)(void*, uintptr_t)) &MakeFieldList; + visitor.data = &state; + visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList; - // Templated primitive functions - visitor.visit_literal_bool = VisitPrimitiveLiteral(); - visitor.visit_literal_byte = VisitPrimitiveLiteralByte; - visitor.visit_literal_short = VisitPrimitiveLiteralShort; - visitor.visit_literal_int = VisitPrimitiveLiteralInt; - visitor.visit_literal_long = VisitPrimitiveLiteralLong; - visitor.visit_literal_float = VisitPrimitiveLiteralFloat; - visitor.visit_literal_double = VisitPrimitiveLiteralDouble; + // Templated primitive functions + visitor.visit_literal_bool = VisitPrimitiveLiteral(); + visitor.visit_literal_byte = VisitPrimitiveLiteralByte; + visitor.visit_literal_short = VisitPrimitiveLiteralShort; + visitor.visit_literal_int = VisitPrimitiveLiteralInt; + visitor.visit_literal_long = VisitPrimitiveLiteralLong; + visitor.visit_literal_float = VisitPrimitiveLiteralFloat; + visitor.visit_literal_double = VisitPrimitiveLiteralDouble; - visitor.visit_literal_decimal = VisitDecimalLiteral; + visitor.visit_literal_decimal = VisitDecimalLiteral; - // Custom Implementations - visitor.visit_literal_timestamp = &VisitTimestampLiteral; - visitor.visit_literal_timestamp_ntz = &VisitTimestampNtzLiteral; - visitor.visit_literal_date = &VisitDateLiteral; + // Custom Implementations + visitor.visit_literal_timestamp = &VisitTimestampLiteral; + visitor.visit_literal_timestamp_ntz = &VisitTimestampNtzLiteral; + visitor.visit_literal_date = &VisitDateLiteral; - visitor.visit_literal_string = &VisitStringLiteral; + visitor.visit_literal_string = &VisitStringLiteral; - visitor.visit_literal_binary = &VisitBinaryLiteral; - visitor.visit_literal_null = &VisitNullLiteral; - visitor.visit_literal_array = &VisitArrayLiteral; + visitor.visit_literal_binary = &VisitBinaryLiteral; + visitor.visit_literal_null = &VisitNullLiteral; + visitor.visit_literal_array = &VisitArrayLiteral; - visitor.visit_and = VisitVariadicExpression(); - visitor.visit_or = VisitVariadicExpression(); + visitor.visit_and = VisitVariadicExpression(); + visitor.visit_or = VisitVariadicExpression(); - visitor.visit_lt = VisitBinaryExpression(); - visitor.visit_le = VisitBinaryExpression(); - visitor.visit_gt = VisitBinaryExpression(); - visitor.visit_ge = VisitBinaryExpression(); + visitor.visit_lt = VisitBinaryExpression(); + visitor.visit_le = VisitBinaryExpression(); + visitor.visit_gt = VisitBinaryExpression(); + visitor.visit_ge = VisitBinaryExpression(); - visitor.visit_eq = VisitBinaryExpression(); - visitor.visit_ne = VisitBinaryExpression(); - visitor.visit_distinct = VisitBinaryExpression(); + visitor.visit_eq = VisitBinaryExpression(); + visitor.visit_ne = VisitBinaryExpression(); + visitor.visit_distinct = VisitBinaryExpression(); - visitor.visit_in = VisitVariadicExpression(); - visitor.visit_not_in = VisitVariadicExpression(); + visitor.visit_in = VisitVariadicExpression(); + visitor.visit_not_in = VisitVariadicExpression(); - visitor.visit_add = VisitAdditionExpression; - visitor.visit_minus = VisitSubctractionExpression; - visitor.visit_multiply = VisitMultiplyExpression; - visitor.visit_divide = VisitDivideExpression; + visitor.visit_add = VisitAdditionExpression; + visitor.visit_minus = VisitSubctractionExpression; + visitor.visit_multiply = VisitMultiplyExpression; + visitor.visit_divide = VisitDivideExpression; - visitor.visit_column = &VisitColumnExpression; - visitor.visit_struct_expr = &VisitStructExpression; + visitor.visit_column = &VisitColumnExpression; + visitor.visit_struct_expr = &VisitStructExpression; - visitor.visit_literal_struct = &VisitStructLiteral; + visitor.visit_literal_struct = &VisitStructLiteral; - visitor.visit_not = &VisitNotExpression; - visitor.visit_is_null = &VisitIsNullExpression; + visitor.visit_not = &VisitNotExpression; + visitor.visit_is_null = &VisitIsNullExpression; - uintptr_t result = ffi::visit_expression(expression, &visitor); + uintptr_t result = ffi::visit_expression(expression, &visitor); - if (state.error.HasError()) { - state.error.Throw(); - } + if (state.error.HasError()) { + state.error.Throw(); + } - return state.TakeFieldList(result); + return state.TakeFieldList(result); } void ExpressionVisitor::VisitAdditionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("+", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = + make_uniq("+", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitSubctractionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("-", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = + make_uniq("-", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitDivideExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("/", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = + make_uniq("/", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitMultiplyExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("*", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = + make_uniq("*", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralBool(void* state, uintptr_t sibling_list_id, bool value) { - auto expression = make_uniq(Value::BOOLEAN(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralBool(void *state, uintptr_t sibling_list_id, bool value) { + auto expression = make_uniq(Value::BOOLEAN(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralByte(void* state, uintptr_t sibling_list_id, int8_t value) { - auto expression = make_uniq(Value::TINYINT(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralByte(void *state, uintptr_t sibling_list_id, int8_t value) { + auto expression = make_uniq(Value::TINYINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralShort(void* state, uintptr_t sibling_list_id, int16_t value) { - auto expression = make_uniq(Value::SMALLINT(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralShort(void *state, uintptr_t sibling_list_id, int16_t value) { + auto expression = make_uniq(Value::SMALLINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralInt(void* state, uintptr_t sibling_list_id, int32_t value) { - auto expression = make_uniq(Value::INTEGER(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralInt(void *state, uintptr_t sibling_list_id, int32_t value) { + auto expression = make_uniq(Value::INTEGER(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralLong(void* state, uintptr_t sibling_list_id, int64_t value) { - auto expression = make_uniq(Value::BIGINT(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralLong(void *state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::BIGINT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralFloat(void* state, uintptr_t sibling_list_id, float value) { - auto expression = make_uniq(Value::FLOAT(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralFloat(void *state, uintptr_t sibling_list_id, float value) { + auto expression = make_uniq(Value::FLOAT(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitPrimitiveLiteralDouble(void* state, uintptr_t sibling_list_id, double value) { - auto expression = make_uniq(Value::DOUBLE(value)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitPrimitiveLiteralDouble(void *state, uintptr_t sibling_list_id, double value) { + auto expression = make_uniq(Value::DOUBLE(value)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitTimestampLiteral(void* state, uintptr_t sibling_list_id, int64_t value) { - auto expression = make_uniq(Value::TIMESTAMPTZ(static_cast(value))); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitTimestampLiteral(void *state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::TIMESTAMPTZ(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitTimestampNtzLiteral(void* state, uintptr_t sibling_list_id, int64_t value) { - auto expression = make_uniq(Value::TIMESTAMP(static_cast(value))); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitTimestampNtzLiteral(void *state, uintptr_t sibling_list_id, int64_t value) { + auto expression = make_uniq(Value::TIMESTAMP(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitDateLiteral(void* state, uintptr_t sibling_list_id, int32_t value) { - auto expression = make_uniq(Value::DATE(static_cast(value))); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitDateLiteral(void *state, uintptr_t sibling_list_id, int32_t value) { + auto expression = make_uniq(Value::DATE(static_cast(value))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitStringLiteral(void* state, uintptr_t sibling_list_id, ffi::KernelStringSlice value) { - auto expression = make_uniq(Value(string(value.ptr, value.len))); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitStringLiteral(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice value) { + auto expression = make_uniq(Value(string(value.ptr, value.len))); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitBinaryLiteral(void* state, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len) { - auto expression = make_uniq(Value::BLOB(buffer, len)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitBinaryLiteral(void *state, uintptr_t sibling_list_id, const uint8_t *buffer, + uintptr_t len) { + auto expression = make_uniq(Value::BLOB(buffer, len)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitNullLiteral(void* state, uintptr_t sibling_list_id) { - auto expression = make_uniq(Value()); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitNullLiteral(void *state, uintptr_t sibling_list_id) { + auto expression = make_uniq(Value()); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitArrayLiteral(void* state, uintptr_t sibling_list_id, uintptr_t child_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("list_value", std::move(*children)); - state_cast->AppendToList(sibling_list_id, std::move(expression)); +void ExpressionVisitor::VisitArrayLiteral(void *state, uintptr_t sibling_list_id, uintptr_t child_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_id); + if (!children) { + return; + } + unique_ptr expression = make_uniq("list_value", std::move(*children)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } -void ExpressionVisitor::VisitStructLiteral(void *state, uintptr_t sibling_list_id, uintptr_t child_field_list_value, uintptr_t child_value_list_id) { - auto state_cast = static_cast(state); +void ExpressionVisitor::VisitStructLiteral(void *state, uintptr_t sibling_list_id, uintptr_t child_field_list_value, + uintptr_t child_value_list_id) { + auto state_cast = static_cast(state); - auto children_keys = state_cast->TakeFieldList(child_field_list_value); - auto children_values = state_cast->TakeFieldList(child_value_list_id); - if (!children_values || !children_keys) { - return; - } + auto children_keys = state_cast->TakeFieldList(child_field_list_value); + auto children_values = state_cast->TakeFieldList(child_value_list_id); + if (!children_values || !children_keys) { + return; + } - if (children_values->size() != children_keys->size()) { - state_cast->error = ErrorData("Size of Keys and Values vector do not match in ExpressionVisitor::VisitStructLiteral"); - return; - } + if (children_values->size() != children_keys->size()) { + state_cast->error = + ErrorData("Size of Keys and Values vector do not match in ExpressionVisitor::VisitStructLiteral"); + return; + } - for (idx_t i = 0; i < children_keys->size(); i++) { - (*children_values)[i]->alias = (*children_keys)[i]->ToString(); - } + for (idx_t i = 0; i < children_keys->size(); i++) { + (*children_values)[i]->alias = (*children_keys)[i]->ToString(); + } - unique_ptr expression = make_uniq("struct_pack", std::move(*children_values)); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + unique_ptr expression = make_uniq("struct_pack", std::move(*children_values)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitNotExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } - unique_ptr expression = make_uniq("NOT", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } + unique_ptr expression = + make_uniq("NOT", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitIsNullExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - return; - } + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + return; + } - children->push_back(make_uniq(Value())); - unique_ptr expression = make_uniq("IS", std::move(*children), nullptr, nullptr, false, true); - state_cast->AppendToList(sibling_list_id, std::move(expression)); + children->push_back(make_uniq(Value())); + unique_ptr expression = + make_uniq("IS", std::move(*children), nullptr, nullptr, false, true); + state_cast->AppendToList(sibling_list_id, std::move(expression)); } // FIXME: this is not 100% correct yet: value_ms is ignored -void ExpressionVisitor::VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale) { - try { - if (precision >= Decimal::MAX_WIDTH_INT64 || value_ls > (uint64_t)NumericLimits::Maximum()) { - throw NotImplementedException("ExpressionVisitor::VisitDecimalLiteral HugeInt decimals"); - } - auto expression = make_uniq(Value::DECIMAL(42, 18, 10)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); - } catch (Exception &e) { - static_cast(state)->error = ErrorData(e); - } +void ExpressionVisitor::VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, + uint64_t value_ls, uint8_t precision, uint8_t scale) { + try { + if (precision >= Decimal::MAX_WIDTH_INT64 || value_ls > (uint64_t)NumericLimits::Maximum()) { + throw NotImplementedException("ExpressionVisitor::VisitDecimalLiteral HugeInt decimals"); + } + auto expression = make_uniq(Value::DECIMAL(42, 18, 10)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); + } catch (Exception &e) { + static_cast(state)->error = ErrorData(e); + } } void ExpressionVisitor::VisitColumnExpression(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name) { - auto expression = make_uniq(string(name.ptr, name.len)); - static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); + auto expression = make_uniq(string(name.ptr, name.len)); + static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } void ExpressionVisitor::VisitStructExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - static_cast(state)->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + static_cast(state)->AppendToList(sibling_list_id, + std::move(make_uniq(Value(42)))); } -uintptr_t ExpressionVisitor::MakeFieldList(ExpressionVisitor* state, uintptr_t capacity_hint) { - return state->MakeFieldListImpl(capacity_hint); +uintptr_t ExpressionVisitor::MakeFieldList(ExpressionVisitor *state, uintptr_t capacity_hint) { + return state->MakeFieldListImpl(capacity_hint); } uintptr_t ExpressionVisitor::MakeFieldListImpl(uintptr_t capacity_hint) { - uintptr_t id = next_id++; - auto list = make_uniq(); - if (capacity_hint > 0) { - list->reserve(capacity_hint); - } - inflight_lists.emplace(id, std::move(list)); - return id; + uintptr_t id = next_id++; + auto list = make_uniq(); + if (capacity_hint > 0) { + list->reserve(capacity_hint); + } + inflight_lists.emplace(id, std::move(list)); + return id; } void ExpressionVisitor::AppendToList(uintptr_t id, unique_ptr child) { - auto it = inflight_lists.find(id); - if (it == inflight_lists.end()) { - error = ErrorData("ExpressionVisitor::AppendToList could not find " + Value::UBIGINT(id).ToString()); - return; - } + auto it = inflight_lists.find(id); + if (it == inflight_lists.end()) { + error = ErrorData("ExpressionVisitor::AppendToList could not find " + Value::UBIGINT(id).ToString()); + return; + } - it->second->emplace_back(std::move(child)); + it->second->emplace_back(std::move(child)); } unique_ptr ExpressionVisitor::TakeFieldList(uintptr_t id) { - auto it = inflight_lists.find(id); - if (it == inflight_lists.end()) { - error = ErrorData("ExpressionVisitor::TakeFieldList could not find " + Value::UBIGINT(id).ToString()); - return nullptr; - } - auto rval = std::move(it->second); - inflight_lists.erase(it); - return rval; + auto it = inflight_lists.find(id); + if (it == inflight_lists.end()) { + error = ErrorData("ExpressionVisitor::TakeFieldList could not find " + Value::UBIGINT(id).ToString()); + return nullptr; + } + auto rval = std::move(it->second); + inflight_lists.erase(it); + return rval; } -unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot* snapshot) { - SchemaVisitor state; - ffi::EngineSchemaVisitor visitor; +unique_ptr SchemaVisitor::VisitSnapshotSchema(ffi::SharedSnapshot *snapshot) { + SchemaVisitor state; + ffi::EngineSchemaVisitor visitor; visitor.data = &state; visitor.make_field_list = (uintptr_t(*)(void *, uintptr_t)) & MakeFieldList; @@ -395,50 +407,48 @@ ffi::EngineError *DuckDBEngineError::AllocateError(ffi::KernelError etype, ffi:: } string DuckDBEngineError::KernelErrorEnumToString(ffi::KernelError err) { - const char *KERNEL_ERROR_ENUM_STRINGS[] = { - "UnknownError", - "FFIError", - "ArrowError", - "EngineDataTypeError", - "ExtractError", - "GenericError", - "IOErrorError", - "ParquetError", - "ObjectStoreError", - "ObjectStorePathError", - "ReqwestError", - "FileNotFoundError", - "MissingColumnError", - "UnexpectedColumnTypeError", - "MissingDataError", - "MissingVersionError", - "DeletionVectorError", - "InvalidUrlError", - "MalformedJsonError", - "MissingMetadataError", - "MissingProtocolError", - "InvalidProtocolError", - "MissingMetadataAndProtocolError", - "ParseError", - "JoinFailureError", - "Utf8Error", - "ParseIntError", - "InvalidColumnMappingModeError", - "InvalidTableLocationError", - "InvalidDecimalError", - "InvalidStructDataError", - "InternalError", - "InvalidExpression", - "InvalidLogPath", - "InvalidCommitInfo", - "FileAlreadyExists", - "MissingCommitInfo", - "UnsupportedError", - "ParseIntervalError", - "ChangeDataFeedUnsupported", - "ChangeDataFeedIncompatibleSchema", - "InvalidCheckpoint" - }; + const char *KERNEL_ERROR_ENUM_STRINGS[] = {"UnknownError", + "FFIError", + "ArrowError", + "EngineDataTypeError", + "ExtractError", + "GenericError", + "IOErrorError", + "ParquetError", + "ObjectStoreError", + "ObjectStorePathError", + "ReqwestError", + "FileNotFoundError", + "MissingColumnError", + "UnexpectedColumnTypeError", + "MissingDataError", + "MissingVersionError", + "DeletionVectorError", + "InvalidUrlError", + "MalformedJsonError", + "MissingMetadataError", + "MissingProtocolError", + "InvalidProtocolError", + "MissingMetadataAndProtocolError", + "ParseError", + "JoinFailureError", + "Utf8Error", + "ParseIntError", + "InvalidColumnMappingModeError", + "InvalidTableLocationError", + "InvalidDecimalError", + "InvalidStructDataError", + "InternalError", + "InvalidExpression", + "InvalidLogPath", + "InvalidCommitInfo", + "FileAlreadyExists", + "MissingCommitInfo", + "UnsupportedError", + "ParseIntervalError", + "ChangeDataFeedUnsupported", + "ChangeDataFeedIncompatibleSchema", + "InvalidCheckpoint"}; static_assert(sizeof(KERNEL_ERROR_ENUM_STRINGS) / sizeof(char *) - 1 == (int)ffi::KernelError::InvalidCheckpoint, "KernelErrorEnumStrings mismatched with kernel"); diff --git a/src/functions/delta_scan.cpp b/src/functions/delta_scan.cpp index 0018923..6836abb 100644 --- a/src/functions/delta_scan.cpp +++ b/src/functions/delta_scan.cpp @@ -47,8 +47,8 @@ string url_decode(string input) { } void DeltaSnapshot::VisitCallback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, - const ffi::Stats *stats, const ffi::DvInfo *dv_info, - const struct ffi::CStringMap *partition_values) { + const ffi::Stats *stats, const ffi::DvInfo *dv_info, + const struct ffi::CStringMap *partition_values) { auto context = (DeltaSnapshot *)engine_context; auto path_string = context->GetPath(); StringUtil::RTrim(path_string, "/"); @@ -92,7 +92,7 @@ void DeltaSnapshot::VisitCallback(ffi::NullableCvoid engine_context, struct ffi: } void DeltaSnapshot::VisitData(void *engine_context, ffi::ExclusiveEngineData *engine_data, - const struct ffi::KernelBoolSlice selection_vec) { + const struct ffi::KernelBoolSlice selection_vec) { ffi::visit_scan_data(engine_data, selection_vec, engine_context, VisitCallback); } @@ -238,52 +238,52 @@ static ffi::EngineBuilder *CreateBuilder(ClientContext &context, const string &p // Here you would need to add the logic for setting the builder options for Azure // This is just a placeholder and will need to be replaced with the actual logic if (secret_type == "s3" || secret_type == "gcs" || secret_type == "r2") { - string key_id, secret, session_token, region, endpoint, url_style; - bool use_ssl = true; - secret_reader.TryGetSecretKey("key_id", key_id); - secret_reader.TryGetSecretKey("secret", secret); - secret_reader.TryGetSecretKey("session_token", session_token); - secret_reader.TryGetSecretKey("region", region); - secret_reader.TryGetSecretKey("endpoint", endpoint); - secret_reader.TryGetSecretKey("url_style", url_style); - secret_reader.TryGetSecretKey("use_ssl", use_ssl); - - if (key_id.empty() && secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), - KernelUtils::ToDeltaString("true")); - } - - if (!key_id.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), - KernelUtils::ToDeltaString(key_id)); - } - if (!secret.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), - KernelUtils::ToDeltaString(secret)); - } - if (!session_token.empty()) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), - KernelUtils::ToDeltaString(session_token)); - } - if (!endpoint.empty() && endpoint != "s3.amazonaws.com") { - if (!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) { - if (use_ssl) { - endpoint = "https://" + endpoint; - } else { - endpoint = "http://" + endpoint; - } - } - - if (StringUtil::StartsWith(endpoint, "http://")) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), - KernelUtils::ToDeltaString("true")); - } - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), - KernelUtils::ToDeltaString(endpoint)); - } else if (StringUtil::StartsWith(path, "gs://") || StringUtil::StartsWith(path, "gcs://")) { - ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), - KernelUtils::ToDeltaString("https://storage.googleapis.com")); - } + string key_id, secret, session_token, region, endpoint, url_style; + bool use_ssl = true; + secret_reader.TryGetSecretKey("key_id", key_id); + secret_reader.TryGetSecretKey("secret", secret); + secret_reader.TryGetSecretKey("session_token", session_token); + secret_reader.TryGetSecretKey("region", region); + secret_reader.TryGetSecretKey("endpoint", endpoint); + secret_reader.TryGetSecretKey("url_style", url_style); + secret_reader.TryGetSecretKey("use_ssl", use_ssl); + + if (key_id.empty() && secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("skip_signature"), + KernelUtils::ToDeltaString("true")); + } + + if (!key_id.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_access_key_id"), + KernelUtils::ToDeltaString(key_id)); + } + if (!secret.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_secret_access_key"), + KernelUtils::ToDeltaString(secret)); + } + if (!session_token.empty()) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_session_token"), + KernelUtils::ToDeltaString(session_token)); + } + if (!endpoint.empty() && endpoint != "s3.amazonaws.com") { + if (!StringUtil::StartsWith(endpoint, "https://") && !StringUtil::StartsWith(endpoint, "http://")) { + if (use_ssl) { + endpoint = "https://" + endpoint; + } else { + endpoint = "http://" + endpoint; + } + } + + if (StringUtil::StartsWith(endpoint, "http://")) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("allow_http"), + KernelUtils::ToDeltaString("true")); + } + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), + KernelUtils::ToDeltaString(endpoint)); + } else if (StringUtil::StartsWith(path, "gs://") || StringUtil::StartsWith(path, "gcs://")) { + ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_endpoint"), + KernelUtils::ToDeltaString("https://storage.googleapis.com")); + } ffi::set_builder_option(builder, KernelUtils::ToDeltaString("aws_region"), KernelUtils::ToDeltaString(region)); @@ -415,7 +415,7 @@ string DeltaSnapshot::ToDeltaPath(const string &raw_path) { } void DeltaSnapshot::Bind(vector &return_types, vector &names) { - unique_lock lck(lock); + unique_lock lck(lock); if (have_bound) { names = this->names; @@ -478,9 +478,9 @@ string DeltaSnapshot::GetFileInternal(idx_t i) { } string DeltaSnapshot::GetFile(idx_t i) { - // TODO: profile this: we should be able to use atomics here to optimize - unique_lock lck(lock); - return GetFileInternal(i); + // TODO: profile this: we should be able to use atomics here to optimize + unique_lock lck(lock); + return GetFileInternal(i); } void DeltaSnapshot::InitializeSnapshot() { @@ -537,17 +537,17 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co filtered_list->names = names; // Copy over the snapshot, this avoids reparsing metadata - { - unique_lock lck(lock); - filtered_list->snapshot = snapshot; - } + { + unique_lock lck(lock); + filtered_list->snapshot = snapshot; + } auto &profiler = QueryProfiler::Get(context); // Note: this is potentially quite expensive: we are creating 2 scans of the snapshot and fully materializing both // file lists Therefore this is only done when profile is enabled. This is enable by default in debug mode or for // EXPLAIN ANALYZE queries - // TODO: check locking behaviour below + // TODO: check locking behaviour below if (profiler.IsEnabled()) { Value result; if (!context.TryGetCurrentSetting("delta_scan_explain_files_filtered", result)) { @@ -595,7 +595,7 @@ unique_ptr DeltaSnapshot::ComplexFilterPushdown(ClientContext &co } vector DeltaSnapshot::GetAllFiles() { - unique_lock lck(lock); + unique_lock lck(lock); idx_t i = resolved_files.size(); // TODO: this can probably be improved while (!GetFileInternal(i).empty()) { @@ -613,7 +613,7 @@ FileExpandResult DeltaSnapshot::GetExpandResult() { } idx_t DeltaSnapshot::GetTotalFileCount() { - unique_lock lck(lock); + unique_lock lck(lock); idx_t i = resolved_files.size(); while (!GetFileInternal(i).empty()) { i++; @@ -625,8 +625,8 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) // This also ensures all files are expanded auto total_file_count = DeltaSnapshot::GetTotalFileCount(); - // TODO: internalize above - unique_lock lck(lock); + // TODO: internalize above + unique_lock lck(lock); if (total_file_count == 0) { return make_uniq(0, 0); @@ -648,15 +648,14 @@ unique_ptr DeltaSnapshot::GetCardinality(ClientContext &context) return nullptr; } - idx_t DeltaSnapshot::GetVersion() { - unique_lock lck(lock); - return version; + unique_lock lck(lock); + return version; } DeltaFileMetaData &DeltaSnapshot::GetMetaData(idx_t index) const { - unique_lock lck(lock); - return *metadata[index]; + unique_lock lck(lock); + return *metadata[index]; } unique_ptr DeltaMultiFileReader::CreateInstance(const TableFunction &table_function) { @@ -737,7 +736,7 @@ void DeltaMultiFileReader::FinalizeBind(const MultiFileReaderOptions &file_optio // Get the metadata for this file D_ASSERT(global_state->file_list); const auto &snapshot = dynamic_cast(*global_state->file_list); - auto &file_metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); + auto &file_metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); if (!file_metadata.partition_map.empty()) { for (idx_t i = 0; i < global_column_ids.size(); i++) { @@ -997,7 +996,7 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile // Get the metadata for this file const auto &snapshot = dynamic_cast(*global_state->file_list); - auto &metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); + auto &metadata = snapshot.GetMetaData(reader_data.file_list_idx.GetIndex()); if (metadata.selection_vector.ptr && chunk.size() != 0) { D_ASSERT(delta_global_state.file_row_number_idx != DConstants::INVALID_INDEX); @@ -1005,7 +1004,7 @@ void DeltaMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFile // Construct the selection vector using the file_row_number column and the raw selection vector from delta idx_t select_count; - auto sv = DuckSVFromDeltaSV(metadata.selection_vector, file_row_number_column, chunk.size(), select_count); + auto sv = DuckSVFromDeltaSV(metadata.selection_vector, file_row_number_column, chunk.size(), select_count); chunk.Slice(sv, select_count); } diff --git a/src/include/delta_functions.hpp b/src/include/delta_functions.hpp index b68d5c6..0753551 100644 --- a/src/include/delta_functions.hpp +++ b/src/include/delta_functions.hpp @@ -14,14 +14,14 @@ namespace duckdb { class DeltaFunctions { public: - static vector GetTableFunctions(DatabaseInstance &instance); - static vector GetScalarFunctions(DatabaseInstance &instance); + static vector GetTableFunctions(DatabaseInstance &instance); + static vector GetScalarFunctions(DatabaseInstance &instance); private: - //! Table Functions - static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance); + //! Table Functions + static TableFunctionSet GetDeltaScanFunction(DatabaseInstance &instance); - //! Scalar Functions - static ScalarFunctionSet GetExpressionFunction(DatabaseInstance &instance); + //! Scalar Functions + static ScalarFunctionSet GetExpressionFunction(DatabaseInstance &instance); }; } // namespace duckdb diff --git a/src/include/delta_kernel_ffi.hpp b/src/include/delta_kernel_ffi.hpp index ec9db0c..5bb6cae 100644 --- a/src/include/delta_kernel_ffi.hpp +++ b/src/include/delta_kernel_ffi.hpp @@ -9,99 +9,100 @@ namespace ffi { enum class KernelError { - UnknownError, - FFIError, + UnknownError, + FFIError, #if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) - ArrowError, + ArrowError, #endif - EngineDataTypeError, - ExtractError, - GenericError, - IOErrorError, + EngineDataTypeError, + ExtractError, + GenericError, + IOErrorError, #if (defined(DEFINE_DEFAULT_ENGINE) || defined(DEFINE_SYNC_ENGINE)) - ParquetError, + ParquetError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ObjectStoreError, + ObjectStoreError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ObjectStorePathError, + ObjectStorePathError, #endif #if defined(DEFINE_DEFAULT_ENGINE) - ReqwestError, + ReqwestError, #endif - FileNotFoundError, - MissingColumnError, - UnexpectedColumnTypeError, - MissingDataError, - MissingVersionError, - DeletionVectorError, - InvalidUrlError, - MalformedJsonError, - MissingMetadataError, - MissingProtocolError, - InvalidProtocolError, - MissingMetadataAndProtocolError, - ParseError, - JoinFailureError, - Utf8Error, - ParseIntError, - InvalidColumnMappingModeError, - InvalidTableLocationError, - InvalidDecimalError, - InvalidStructDataError, - InternalError, - InvalidExpression, - InvalidLogPath, - InvalidCommitInfo, - FileAlreadyExists, - MissingCommitInfo, - UnsupportedError, - ParseIntervalError, - ChangeDataFeedUnsupported, - ChangeDataFeedIncompatibleSchema, - InvalidCheckpoint, + FileNotFoundError, + MissingColumnError, + UnexpectedColumnTypeError, + MissingDataError, + MissingVersionError, + DeletionVectorError, + InvalidUrlError, + MalformedJsonError, + MissingMetadataError, + MissingProtocolError, + InvalidProtocolError, + MissingMetadataAndProtocolError, + ParseError, + JoinFailureError, + Utf8Error, + ParseIntError, + InvalidColumnMappingModeError, + InvalidTableLocationError, + InvalidDecimalError, + InvalidStructDataError, + InternalError, + InvalidExpression, + InvalidLogPath, + InvalidCommitInfo, + FileAlreadyExists, + MissingCommitInfo, + UnsupportedError, + ParseIntervalError, + ChangeDataFeedUnsupported, + ChangeDataFeedIncompatibleSchema, + InvalidCheckpoint, }; /// Definitions of level verbosity. Verbose Levels are "greater than" less verbose ones. So /// Level::ERROR is the lowest, and Level::TRACE the highest. enum class Level { - ERROR = 0, - WARN = 1, - INFO = 2, - DEBUGGING = 3, - TRACE = 4, + ERROR = 0, + WARN = 1, + INFO = 2, + DEBUGGING = 3, + TRACE = 4, }; /// Format to use for log lines. These correspond to the formats from [`tracing_subscriber` /// formats](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/fmt/format/index.html). enum class LogLineFormat { - /// The default formatter. This emits human-readable, single-line logs for each event that - /// occurs, with the context displayed before the formatted representation of the event. - /// Example: - /// `2022-02-15T18:40:14.289898Z INFO fmt: preparing to shave yaks number_of_yaks=3` - FULL, - /// A variant of the FULL formatter, optimized for short line lengths. Fields from the context - /// are appended to the fields of the formatted event, and targets are not shown. - /// Example: - /// `2022-02-17T19:51:05.809287Z INFO fmt_compact: preparing to shave yaks number_of_yaks=3` - COMPACT, - /// Emits excessively pretty, multi-line logs, optimized for human readability. This is - /// primarily intended to be used in local development and debugging, or for command-line - /// applications, where automated analysis and compact storage of logs is less of a priority - /// than readability and visual appeal. - /// Example: - /// ```ignore - /// 2022-02-15T18:44:24.535324Z INFO fmt_pretty: preparing to shave yaks, number_of_yaks: 3 - /// at examples/examples/fmt-pretty.rs:16 on main - /// ``` - PRETTY, - /// Outputs newline-delimited JSON logs. This is intended for production use with systems where - /// structured logs are consumed as JSON by analysis and viewing tools. The JSON output is not - /// optimized for human readability. - /// Example: - /// `{"timestamp":"2022-02-15T18:47:10.821315Z","level":"INFO","fields":{"message":"preparing to shave yaks","number_of_yaks":3},"target":"fmt_json"}` - JSON, + /// The default formatter. This emits human-readable, single-line logs for each event that + /// occurs, with the context displayed before the formatted representation of the event. + /// Example: + /// `2022-02-15T18:40:14.289898Z INFO fmt: preparing to shave yaks number_of_yaks=3` + FULL, + /// A variant of the FULL formatter, optimized for short line lengths. Fields from the context + /// are appended to the fields of the formatted event, and targets are not shown. + /// Example: + /// `2022-02-17T19:51:05.809287Z INFO fmt_compact: preparing to shave yaks number_of_yaks=3` + COMPACT, + /// Emits excessively pretty, multi-line logs, optimized for human readability. This is + /// primarily intended to be used in local development and debugging, or for command-line + /// applications, where automated analysis and compact storage of logs is less of a priority + /// than readability and visual appeal. + /// Example: + /// ```ignore + /// 2022-02-15T18:44:24.535324Z INFO fmt_pretty: preparing to shave yaks, number_of_yaks: 3 + /// at examples/examples/fmt-pretty.rs:16 on main + /// ``` + PRETTY, + /// Outputs newline-delimited JSON logs. This is intended for production use with systems where + /// structured logs are consumed as JSON by analysis and viewing tools. The JSON output is not + /// optimized for human readability. + /// Example: + /// `{"timestamp":"2022-02-15T18:47:10.821315Z","level":"INFO","fields":{"message":"preparing to shave + /// yaks","number_of_yaks":3},"target":"fmt_json"}` + JSON, }; struct CStringMap; @@ -143,15 +144,15 @@ struct StringSliceIterator; /// receives a `KernelBoolSlice` as a return value from a kernel method, engine is responsible /// to free that slice, by calling [super::free_bool_slice] exactly once. struct KernelBoolSlice { - bool *ptr; - uintptr_t len; + bool *ptr; + uintptr_t len; }; /// An owned slice of u64 row indexes allocated by the kernel. The engine is responsible for /// freeing this slice by calling [super::free_row_indexes] once. struct KernelRowIndexArray { - uint64_t *ptr; - uintptr_t len; + uint64_t *ptr; + uintptr_t len; }; /// Represents an object that crosses the FFI boundary and which outlives the scope that created @@ -186,8 +187,8 @@ struct KernelRowIndexArray { /// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can /// freely access shared (non-mutable) handles. /// -template -using Handle = H*; +template +using Handle = H *; /// An error that can be returned to the engine. Engines that wish to associate additional /// information can define and use any type that is [pointer @@ -196,31 +197,31 @@ using Handle = H*; /// of a [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard-layout) /// class. struct EngineError { - KernelError etype; + KernelError etype; }; /// Semantics: Kernel will always immediately return the leaked engine error to the engine (if it /// allocated one at all), and engine is responsible for freeing it. -template +template struct ExternResult { - enum class Tag { - Ok, - Err, - }; - - struct Ok_Body { - T _0; - }; - - struct Err_Body { - EngineError *_0; - }; - - Tag tag; - union { - Ok_Body ok; - Err_Body err; - }; + enum class Tag { + Ok, + Err, + }; + + struct Ok_Body { + T _0; + }; + + struct Err_Body { + EngineError *_0; + }; + + Tag tag; + union { + Ok_Body ok; + Err_Body err; + }; }; /// A non-owned slice of a UTF8 string, intended for arg-passing between kernel and engine. The @@ -246,17 +247,17 @@ struct ExternResult { /// Meanwhile, the callee must assume that the slice is only valid until the function returns, and /// must not retain any references to the slice or its data that might outlive the function call. struct KernelStringSlice { - const char *ptr; - uintptr_t len; + const char *ptr; + uintptr_t len; }; -using AllocateErrorFn = EngineError*(*)(KernelError etype, KernelStringSlice msg); +using AllocateErrorFn = EngineError *(*)(KernelError etype, KernelStringSlice msg); -using NullableCvoid = void*; +using NullableCvoid = void *; /// Allow engines to allocate strings of their own type. the contract of calling a passed allocate /// function is that `kernel_str` is _only_ valid until the return from this function -using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); +using AllocateStringFn = NullableCvoid (*)(KernelStringSlice kernel_str); /// ABI-compatible struct for ArrowArray from C Data Interface /// See @@ -269,16 +270,16 @@ using AllocateStringFn = NullableCvoid(*)(KernelStringSlice kernel_str); /// } /// ``` struct FFI_ArrowArray { - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void **buffers; - FFI_ArrowArray **children; - FFI_ArrowArray *dictionary; - void (*release)(FFI_ArrowArray *arg1); - void *private_data; + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void **buffers; + FFI_ArrowArray **children; + FFI_ArrowArray *dictionary; + void (*release)(FFI_ArrowArray *arg1); + void *private_data; }; /// ABI-compatible struct for `ArrowSchema` from C Data Interface @@ -293,16 +294,16 @@ struct FFI_ArrowArray { /// ``` /// struct FFI_ArrowSchema { - const char *format; - const char *name; - const char *metadata; - /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) - int64_t flags; - int64_t n_children; - FFI_ArrowSchema **children; - FFI_ArrowSchema *dictionary; - void (*release)(FFI_ArrowSchema *arg1); - void *private_data; + const char *format; + const char *name; + const char *metadata; + /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) + int64_t flags; + int64_t n_children; + FFI_ArrowSchema **children; + FFI_ArrowSchema *dictionary; + void (*release)(FFI_ArrowSchema *arg1); + void *private_data; }; #if defined(DEFINE_DEFAULT_ENGINE) @@ -310,35 +311,35 @@ struct FFI_ArrowSchema { /// Interface](https://arrow.apache.org/docs/format/CDataInterface.html). This includes the data and /// the schema. struct ArrowFFIData { - FFI_ArrowArray array; - FFI_ArrowSchema schema; + FFI_ArrowArray array; + FFI_ArrowSchema schema; }; #endif struct FileMeta { - KernelStringSlice path; - int64_t last_modified; - uintptr_t size; + KernelStringSlice path; + int64_t last_modified; + uintptr_t size; }; /// Model iterators. This allows an engine to specify iteration however it likes, and we simply wrap /// the engine functions. The engine retains ownership of the iterator. struct EngineIterator { - void *data; - /// A function that should advance the iterator and return the next time from the data - /// If the iterator is complete, it should return null. It should be safe to - /// call `get_next()` multiple times if it returns null. - const void *(*get_next)(void *data); + void *data; + /// A function that should advance the iterator and return the next time from the data + /// If the iterator is complete, it should return null. It should be safe to + /// call `get_next()` multiple times if it returns null. + const void *(*get_next)(void *data); }; -template -using VisitLiteralFn = void(*)(void *data, uintptr_t sibling_list_id, T value); +template +using VisitLiteralFn = void (*)(void *data, uintptr_t sibling_list_id, T value); -using VisitVariadicFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitVariadicFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); -using VisitUnaryFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitUnaryFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); -using VisitBinaryOpFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); +using VisitBinaryOpFn = void (*)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); /// The [`EngineExpressionVisitor`] defines a visitor system to allow engines to build their own /// representation of a kernel expression. @@ -371,153 +372,144 @@ using VisitBinaryOpFn = void(*)(void *data, uintptr_t sibling_list_id, uintptr_t /// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant /// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 struct EngineExpressionVisitor { - /// An opaque engine state pointer - void *data; - /// Creates a new expression list, optionally reserving capacity up front - uintptr_t (*make_field_list)(void *data, uintptr_t reserve); - /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_int; - /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_long; - /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_short; - /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_byte; - /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_float; - /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_double; - /// Visit a `string` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_string; - /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_bool; - /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. - /// The timestamp is microsecond precision and adjusted to UTC. - VisitLiteralFn visit_literal_timestamp; - /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. - /// The timestamp is microsecond precision with no timezone. - VisitLiteralFn visit_literal_timestamp_ntz; - /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs - /// to the list identified by `sibling_list_id`. - VisitLiteralFn visit_literal_date; - /// Visit binary data at the `buffer` with length `len` belonging to the list identified by - /// `sibling_list_id`. - void (*visit_literal_binary)(void *data, - uintptr_t sibling_list_id, - const uint8_t *buffer, - uintptr_t len); - /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer - /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 - /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. - void (*visit_literal_decimal)(void *data, - uintptr_t sibling_list_id, - uint64_t value_ms, - uint64_t value_ls, - uint8_t precision, - uint8_t scale); - /// Visit a struct literal belonging to the list identified by `sibling_list_id`. - /// The field names of the struct are in a list identified by `child_field_list_id`. - /// The values of the struct are in a list identified by `child_value_list_id`. - void (*visit_literal_struct)(void *data, - uintptr_t sibling_list_id, - uintptr_t child_field_list_id, - uintptr_t child_value_list_id); - /// Visit an array literal belonging to the list identified by `sibling_list_id`. - /// The values of the array are in a list identified by `child_list_id`. - void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); - /// Visits a null value belonging to the list identified by `sibling_list_id. - void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); - /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the array are in a list identified by `child_list_id` - VisitVariadicFn visit_and; - /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the array are in a list identified by `child_list_id` - VisitVariadicFn visit_or; - /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expression will be in a _one_ item list identified by `child_list_id` - VisitUnaryFn visit_not; - /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. - /// The sub-expression will be in a _one_ item list identified by `child_list_id` - VisitUnaryFn visit_is_null; - /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_lt; - /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_le; - /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_gt; - /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_ge; - /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_eq; - /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_ne; - /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_distinct; - /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_in; - /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_not_in; - /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_add; - /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_minus; - /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_multiply; - /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. - /// The operands will be in a _two_ item list identified by `child_list_id` - VisitBinaryOpFn visit_divide; - /// Visits the `column` belonging to the list identified by `sibling_list_id`. - void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. - /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` - void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// An opaque engine state pointer + void *data; + /// Creates a new expression list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Visit a 32bit `integer` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_int; + /// Visit a 64bit `long` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_long; + /// Visit a 16bit `short` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_short; + /// Visit an 8bit `byte` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_byte; + /// Visit a 32bit `float` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_float; + /// Visit a 64bit `double` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_double; + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_string; + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_bool; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision and adjusted to UTC. + VisitLiteralFn visit_literal_timestamp; + /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. + /// The timestamp is microsecond precision with no timezone. + VisitLiteralFn visit_literal_timestamp_ntz; + /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// to the list identified by `sibling_list_id`. + VisitLiteralFn visit_literal_date; + /// Visit binary data at the `buffer` with length `len` belonging to the list identified by + /// `sibling_list_id`. + void (*visit_literal_binary)(void *data, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); + /// Visit a 128bit `decimal` value with the given precision and scale. The 128bit integer + /// is split into the most significant 64 bits in `value_ms`, and the least significant 64 + /// bits in `value_ls`. The `decimal` belongs to the list identified by `sibling_list_id`. + void (*visit_literal_decimal)(void *data, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, + uint8_t precision, uint8_t scale); + /// Visit a struct literal belonging to the list identified by `sibling_list_id`. + /// The field names of the struct are in a list identified by `child_field_list_id`. + /// The values of the struct are in a list identified by `child_value_list_id`. + void (*visit_literal_struct)(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_id, + uintptr_t child_value_list_id); + /// Visit an array literal belonging to the list identified by `sibling_list_id`. + /// The values of the array are in a list identified by `child_list_id`. + void (*visit_literal_array)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); + /// Visits a null value belonging to the list identified by `sibling_list_id. + void (*visit_literal_null)(void *data, uintptr_t sibling_list_id); + /// Visits an `and` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_and; + /// Visits an `or` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the array are in a list identified by `child_list_id` + VisitVariadicFn visit_or; + /// Visits a `not` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_not; + /// Visits a `is_null` expression belonging to the list identified by `sibling_list_id`. + /// The sub-expression will be in a _one_ item list identified by `child_list_id` + VisitUnaryFn visit_is_null; + /// Visits the `LessThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_lt; + /// Visits the `LessThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_le; + /// Visits the `GreaterThan` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_gt; + /// Visits the `GreaterThanOrEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ge; + /// Visits the `Equal` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_eq; + /// Visits the `NotEqual` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_ne; + /// Visits the `Distinct` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_distinct; + /// Visits the `In` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_in; + /// Visits the `NotIn` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_not_in; + /// Visits the `Add` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_add; + /// Visits the `Minus` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_minus; + /// Visits the `Multiply` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_multiply; + /// Visits the `Divide` binary operator belonging to the list identified by `sibling_list_id`. + /// The operands will be in a _two_ item list identified by `child_list_id` + VisitBinaryOpFn visit_divide; + /// Visits the `column` belonging to the list identified by `sibling_list_id`. + void (*visit_column)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visits a `StructExpression` belonging to the list identified by `sibling_list_id`. + /// The sub-expressions of the `StructExpression` are in a list identified by `child_list_id` + void (*visit_struct_expr)(void *data, uintptr_t sibling_list_id, uintptr_t child_list_id); }; // This trickery is from https://github.com/mozilla/cbindgen/issues/402#issuecomment-578680163 struct im_an_unused_struct_that_tricks_msvc_into_compilation { - ExternResult field; - ExternResult field2; - ExternResult field3; - ExternResult> field4; - ExternResult> field5; - ExternResult field6; - ExternResult field7; - ExternResult> field8; - ExternResult> field9; - ExternResult> field10; - ExternResult field11; + ExternResult field; + ExternResult field2; + ExternResult field3; + ExternResult> field4; + ExternResult> field5; + ExternResult field6; + ExternResult field7; + ExternResult> field8; + ExternResult> field9; + ExternResult> field10; + ExternResult field11; }; /// An `Event` can generally be thought of a "log message". It contains all the relevant bits such /// that an engine can generate a log message in its format struct Event { - /// The log message associated with the event - KernelStringSlice message; - /// Level that the event was emitted at - Level level; - /// A string that specifies in what part of the system the event occurred - KernelStringSlice target; - /// source file line number where the event occurred, or 0 (zero) if unknown - uint32_t line; - /// file where the event occurred. If unknown the slice `ptr` will be null and the len will be 0 - KernelStringSlice file; + /// The log message associated with the event + KernelStringSlice message; + /// Level that the event was emitted at + Level level; + /// A string that specifies in what part of the system the event occurred + KernelStringSlice target; + /// source file line number where the event occurred, or 0 (zero) if unknown + uint32_t line; + /// file where the event occurred. If unknown the slice `ptr` will be null and the len will be 0 + KernelStringSlice file; }; -using TracingEventFn = void(*)(Event event); +using TracingEventFn = void (*)(Event event); -using TracingLogLineFn = void(*)(KernelStringSlice line); +using TracingLogLineFn = void (*)(KernelStringSlice line); /// A predicate that can be used to skip data when scanning. /// @@ -530,25 +522,21 @@ using TracingLogLineFn = void(*)(KernelStringSlice line); /// kernel each retain ownership of their respective objects, with no need to coordinate memory /// lifetimes with the other. struct EnginePredicate { - void *predicate; - uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); + void *predicate; + uintptr_t (*visitor)(void *predicate, KernelExpressionVisitorState *state); }; /// Give engines an easy way to consume stats struct Stats { - /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the - /// `num_records` statistic must be present and accurate, and must equal the number of records - /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat - /// outdated, i.e. not reflecting deleted rows yet. - uint64_t num_records; + /// For any file where the deletion vector is not present (see [`DvInfo::has_vector`]), the + /// `num_records` statistic must be present and accurate, and must equal the number of records + /// in the data file. In the presence of Deletion Vectors the statistics may be somewhat + /// outdated, i.e. not reflecting deleted rows yet. + uint64_t num_records; }; -using CScanCallback = void(*)(NullableCvoid engine_context, - KernelStringSlice path, - int64_t size, - const Stats *stats, - const DvInfo *dv_info, - const CStringMap *partition_map); +using CScanCallback = void (*)(NullableCvoid engine_context, KernelStringSlice path, int64_t size, const Stats *stats, + const DvInfo *dv_info, const CStringMap *partition_map); /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own /// representation of a schema from a particular schema within kernel. @@ -576,61 +564,49 @@ using CScanCallback = void(*)(NullableCvoid engine_context, /// that element's (already-visited) children. /// 4. The [`visit_schema`] method returns the id of the list of top-level columns struct EngineSchemaVisitor { - /// opaque state pointer - void *data; - /// Creates a new field list, optionally reserving capacity up front - uintptr_t (*make_field_list)(void *data, uintptr_t reserve); - /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a - /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. - void (*visit_struct)(void *data, - uintptr_t sibling_list_id, - KernelStringSlice name, - uintptr_t child_list_id); - /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list - /// with the array's element type - void (*visit_array)(void *data, - uintptr_t sibling_list_id, - KernelStringSlice name, - bool contains_null, - uintptr_t child_list_id); - /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list - /// where the first element is the map's key type and the second element is the - /// map's value type - void (*visit_map)(void *data, - uintptr_t sibling_list_id, - KernelStringSlice name, - bool value_contains_null, - uintptr_t child_list_id); - /// visit a `decimal` with the specified `precision` and `scale` - void (*visit_decimal)(void *data, - uintptr_t sibling_list_id, - KernelStringSlice name, - uint8_t precision, - uint8_t scale); - /// Visit a `string` belonging to the list identified by `sibling_list_id`. - void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `long` belonging to the list identified by `sibling_list_id`. - void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit an `integer` belonging to the list identified by `sibling_list_id`. - void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `short` belonging to the list identified by `sibling_list_id`. - void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `byte` belonging to the list identified by `sibling_list_id`. - void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `float` belonging to the list identified by `sibling_list_id`. - void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `double` belonging to the list identified by `sibling_list_id`. - void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. - void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit `binary` belonging to the list identified by `sibling_list_id`. - void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `date` belonging to the list identified by `sibling_list_id`. - void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. - void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); - /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. - void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// opaque state pointer + void *data; + /// Creates a new field list, optionally reserving capacity up front + uintptr_t (*make_field_list)(void *data, uintptr_t reserve); + /// Indicate that the schema contains a `Struct` type. The top level of a Schema is always a + /// `Struct`. The fields of the `Struct` are in the list identified by `child_list_id`. + void (*visit_struct)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, uintptr_t child_list_id); + /// Indicate that the schema contains an Array type. `child_list_id` will be a _one_ item list + /// with the array's element type + void (*visit_array)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, bool contains_null, + uintptr_t child_list_id); + /// Indicate that the schema contains an Map type. `child_list_id` will be a _two_ item list + /// where the first element is the map's key type and the second element is the + /// map's value type + void (*visit_map)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, bool value_contains_null, + uintptr_t child_list_id); + /// visit a `decimal` with the specified `precision` and `scale` + void (*visit_decimal)(void *data, uintptr_t sibling_list_id, KernelStringSlice name, uint8_t precision, + uint8_t scale); + /// Visit a `string` belonging to the list identified by `sibling_list_id`. + void (*visit_string)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `long` belonging to the list identified by `sibling_list_id`. + void (*visit_long)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit an `integer` belonging to the list identified by `sibling_list_id`. + void (*visit_integer)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `short` belonging to the list identified by `sibling_list_id`. + void (*visit_short)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `byte` belonging to the list identified by `sibling_list_id`. + void (*visit_byte)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `float` belonging to the list identified by `sibling_list_id`. + void (*visit_float)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `double` belonging to the list identified by `sibling_list_id`. + void (*visit_double)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `boolean` belonging to the list identified by `sibling_list_id`. + void (*visit_boolean)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit `binary` belonging to the list identified by `sibling_list_id`. + void (*visit_binary)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `date` belonging to the list identified by `sibling_list_id`. + void (*visit_date)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); + /// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`. + void (*visit_timestamp_ntz)(void *data, uintptr_t sibling_list_id, KernelStringSlice name); }; extern "C" { @@ -659,8 +635,7 @@ void free_engine_data(Handle engine_data); /// /// # Safety /// Caller is responsible for passing a valid path pointer. -ExternResult get_engine_builder(KernelStringSlice path, - AllocateErrorFn allocate_error); +ExternResult get_engine_builder(KernelStringSlice path, AllocateErrorFn allocate_error); #endif #if defined(DEFINE_DEFAULT_ENGINE) @@ -687,8 +662,7 @@ ExternResult> builder_build(EngineBuilder *builder); /// # Safety /// /// Caller is responsible for passing a valid path pointer. -ExternResult> get_default_engine(KernelStringSlice path, - AllocateErrorFn allocate_error); +ExternResult> get_default_engine(KernelStringSlice path, AllocateErrorFn allocate_error); #endif #if defined(DEFINE_SYNC_ENGINE) @@ -708,8 +682,7 @@ void free_engine(Handle engine); /// # Safety /// /// Caller is responsible for passing valid handles and path pointer. -ExternResult> snapshot(KernelStringSlice path, - Handle engine); +ExternResult> snapshot(KernelStringSlice path, Handle engine); /// # Safety /// @@ -735,8 +708,7 @@ NullableCvoid snapshot_table_root(Handle snapshot, AllocateStrin /// /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by /// [kernel_scan_data_free]. The visitor function pointer must be non-null. -bool string_slice_next(Handle data, - NullableCvoid engine_context, +bool string_slice_next(Handle data, NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, KernelStringSlice slice)); /// # Safety @@ -767,8 +739,7 @@ void *get_raw_engine_data(Handle data); /// # Safety /// data_handle must be a valid ExclusiveEngineData as read by the /// [`delta_kernel::engine::default::DefaultEngine`] obtained from `get_default_engine`. -ExternResult get_raw_arrow_data(Handle data, - Handle engine); +ExternResult get_raw_arrow_data(Handle data, Handle engine); #endif /// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The @@ -780,8 +751,7 @@ ExternResult get_raw_arrow_data(Handle data, /// /// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by /// [`free_read_result_iter`]. The visitor function pointer must be non-null. -ExternResult read_result_next(Handle data, - NullableCvoid engine_context, +ExternResult read_result_next(Handle data, NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, Handle engine_data)); @@ -796,9 +766,8 @@ void free_read_result_iter(Handle data); /// /// # Safety /// Caller is responsible for calling with a valid `ExternEngineHandle` and `FileMeta` -ExternResult> read_parquet_file(Handle engine, - const FileMeta *file, - Handle physical_schema); +ExternResult> +read_parquet_file(Handle engine, const FileMeta *file, Handle physical_schema); uintptr_t visit_expression_and(KernelExpressionVisitorState *state, EngineIterator *children); @@ -814,8 +783,7 @@ uintptr_t visit_expression_eq(KernelExpressionVisitorState *state, uintptr_t a, /// # Safety /// The string slice must be valid -ExternResult visit_expression_column(KernelExpressionVisitorState *state, - KernelStringSlice name, +ExternResult visit_expression_column(KernelExpressionVisitorState *state, KernelStringSlice name, AllocateErrorFn allocate_error); uintptr_t visit_expression_not(KernelExpressionVisitorState *state, uintptr_t inner_expr); @@ -824,8 +792,7 @@ uintptr_t visit_expression_is_null(KernelExpressionVisitorState *state, uintptr_ /// # Safety /// The string slice must be valid -ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, - KernelStringSlice value, +ExternResult visit_expression_literal_string(KernelExpressionVisitorState *state, KernelStringSlice value, AllocateErrorFn allocate_error); uintptr_t visit_expression_literal_int(KernelExpressionVisitorState *state, int32_t value); @@ -857,8 +824,7 @@ void free_kernel_predicate(Handle data); /// # Safety /// /// The caller must pass a valid SharedExpression Handle and expression visitor -uintptr_t visit_expression(const Handle *expression, - EngineExpressionVisitor *visitor); +uintptr_t visit_expression(const Handle *expression, EngineExpressionVisitor *visitor); /// Enable getting called back for tracing (logging) events in the kernel. `max_level` specifies /// that only events `<=` to the specified level should be reported. More verbose Levels are "greater @@ -877,8 +843,7 @@ uintptr_t visit_expression(const Handle *expression, /// /// # Safety /// Caller must pass a valid function pointer for the callback -bool enable_event_tracing(TracingEventFn callback, - Level max_level); +bool enable_event_tracing(TracingEventFn callback, Level max_level); /// Enable getting called back with log lines in the kernel using default settings: /// - FULL format @@ -928,13 +893,8 @@ bool enable_log_line_tracing(TracingLogLineFn callback, Level max_level); /// /// # Safety /// Caller must pass a valid function pointer for the callback -bool enable_formatted_log_line_tracing(TracingLogLineFn callback, - Level max_level, - LogLineFormat format, - bool ansi, - bool with_time, - bool with_level, - bool with_target); +bool enable_formatted_log_line_tracing(TracingLogLineFn callback, Level max_level, LogLineFormat format, bool ansi, + bool with_time, bool with_level, bool with_target); /// Drops a scan. /// # Safety @@ -947,8 +907,7 @@ void free_scan(Handle scan); /// # Safety /// /// Caller is responsible for passing a valid snapshot pointer, and engine pointer -ExternResult> scan(Handle snapshot, - Handle engine, +ExternResult> scan(Handle snapshot, Handle engine, EnginePredicate *predicate); /// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`] @@ -1006,8 +965,7 @@ ExternResult> kernel_scan_data_init(Handle kernel_scan_data_next(Handle data, - NullableCvoid engine_context, +ExternResult kernel_scan_data_next(Handle data, NullableCvoid engine_context, void (*engine_visitor)(NullableCvoid engine_context, Handle engine_data, KernelBoolSlice selection_vector)); @@ -1025,24 +983,20 @@ void free_kernel_scan_data(Handle data); /// # Safety /// /// The engine is responsible for providing a valid [`CStringMap`] pointer and [`KernelStringSlice`] -NullableCvoid get_from_map(const CStringMap *map, - KernelStringSlice key, - AllocateStringFn allocate_fn); +NullableCvoid get_from_map(const CStringMap *map, KernelStringSlice key, AllocateStringFn allocate_fn); /// Get a selection vector out of a [`DvInfo`] struct /// /// # Safety /// Engine is responsible for providing valid pointers for each argument -ExternResult selection_vector_from_dv(const DvInfo *dv_info, - Handle engine, +ExternResult selection_vector_from_dv(const DvInfo *dv_info, Handle engine, Handle state); /// Get a vector of row indexes out of a [`DvInfo`] struct /// /// # Safety /// Engine is responsible for providing valid pointers for each argument -ExternResult row_indexes_from_dv(const DvInfo *dv_info, - Handle engine, +ExternResult row_indexes_from_dv(const DvInfo *dv_info, Handle engine, Handle state); /// Shim for ffi to call visit_scan_data. This will generally be called when iterating through scan @@ -1050,9 +1004,7 @@ ExternResult row_indexes_from_dv(const DvInfo *dv_info, /// /// # Safety /// engine is responsbile for passing a valid [`ExclusiveEngineData`] and selection vector. -void visit_scan_data(Handle data, - KernelBoolSlice selection_vec, - NullableCvoid engine_context, +void visit_scan_data(Handle data, KernelBoolSlice selection_vec, NullableCvoid engine_context, CScanCallback callback); /// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the @@ -1073,6 +1025,6 @@ uintptr_t visit_schema(Handle snapshot, EngineSchemaVisitor *vis /// [`free_kernel_predicate`], or [`Handle::drop_handle`] Handle get_testing_kernel_expression(); -} // extern "C" +} // extern "C" -} // namespace ffi +} // namespace ffi diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 5fa1fdf..540bdb6 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -17,111 +17,115 @@ namespace duckdb { class ExpressionVisitor : public ffi::EngineExpressionVisitor { - using FieldList = vector>; + using FieldList = vector>; public: - unique_ptr>> VisitKernelExpression(const ffi::Handle* expression); + unique_ptr>> + VisitKernelExpression(const ffi::Handle *expression); private: - unordered_map> inflight_lists; - uintptr_t next_id = 1; - - ErrorData error; - - - // Literals - template - static ffi::VisitLiteralFn VisitPrimitiveLiteral() { - return (ffi::VisitLiteralFn) &VisitPrimitiveLiteral; - } - template - static void VisitPrimitiveLiteral(void* state, uintptr_t sibling_list_id, CPP_TYPE value) { - auto state_cast = static_cast(state); - auto duckdb_value = CREATE_VALUE_FUN(value); - auto expression = make_uniq(duckdb_value); - state_cast->AppendToList(sibling_list_id, std::move(expression)); - } - - static void VisitPrimitiveLiteralBool(void* state, uintptr_t sibling_list_id, bool value); - static void VisitPrimitiveLiteralByte(void* state, uintptr_t sibling_list_id, int8_t value); - static void VisitPrimitiveLiteralShort(void* state, uintptr_t sibling_list_id, int16_t value); - static void VisitPrimitiveLiteralInt(void* state, uintptr_t sibling_list_id, int32_t value); - static void VisitPrimitiveLiteralLong(void* state, uintptr_t sibling_list_id, int64_t value); - static void VisitPrimitiveLiteralFloat(void* state, uintptr_t sibling_list_id, float value); - static void VisitPrimitiveLiteralDouble(void* state, uintptr_t sibling_list_id, double value); - - static void VisitTimestampLiteral(void* state, uintptr_t sibling_list_id, int64_t value); - static void VisitTimestampNtzLiteral(void* state, uintptr_t sibling_list_id, int64_t value); - static void VisitDateLiteral(void* state, uintptr_t sibling_list_id, int32_t value); - static void VisitStringLiteral(void* state, uintptr_t sibling_list_id, ffi::KernelStringSlice value); - static void VisitBinaryLiteral(void* state, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); - static void VisitNullLiteral(void* state, uintptr_t sibling_list_id); - static void VisitArrayLiteral(void* state, uintptr_t sibling_list_id, uintptr_t child_id); - static void VisitStructLiteral(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_value, uintptr_t child_value_list_id); - static void VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale); - static void VisitColumnExpression(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name); - static void VisitStructExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - static void VisitNotExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - static void VisitIsNullExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - - template - static ffi::VisitVariadicFn VisitUnaryExpression() { - return &VisitVariadicExpression; - } - template - static ffi::VisitVariadicFn VisitBinaryExpression() { - return &VisitBinaryExpression; - } - template - static ffi::VisitVariadicFn VisitVariadicExpression() { - return &VisitVariadicExpression; - } - - template - static void VisitVariadicExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); - return; - } - unique_ptr expression = make_uniq(EXPRESSION_TYPE, std::move(*children)); - state_cast->AppendToList(sibling_list_id, std::move(expression)); - } - - static void VisitAdditionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - static void VisitSubctractionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - static void VisitDivideExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - static void VisitMultiplyExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - - template - static void VisitBinaryExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { - auto state_cast = static_cast(state); - auto children = state_cast->TakeFieldList(child_list_id); - if (!children) { - state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); - return; - } - - if (children->size() != 2) { - state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); - state_cast->error = ErrorData("INCORRECT SIZE IN VISIT_BINARY_EXPRESSION" + EnumUtil::ToString(EXPRESSION_TYPE)); - return; - } - - auto &lhs = children->at(0); - auto &rhs = children->at(1); - unique_ptr expression = make_uniq(EXPRESSION_TYPE, std::move(lhs), std::move(rhs)); - state_cast->AppendToList(sibling_list_id, std::move(expression)); - } - - static void VisitComparisonExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); - - // List functions - static uintptr_t MakeFieldList(ExpressionVisitor* state, uintptr_t capacity_hint); - void AppendToList(uintptr_t id, unique_ptr child); - uintptr_t MakeFieldListImpl(uintptr_t capacity_hint); - unique_ptr TakeFieldList(uintptr_t id); + unordered_map> inflight_lists; + uintptr_t next_id = 1; + + ErrorData error; + + // Literals + template + static ffi::VisitLiteralFn VisitPrimitiveLiteral() { + return (ffi::VisitLiteralFn)&VisitPrimitiveLiteral; + } + template + static void VisitPrimitiveLiteral(void *state, uintptr_t sibling_list_id, CPP_TYPE value) { + auto state_cast = static_cast(state); + auto duckdb_value = CREATE_VALUE_FUN(value); + auto expression = make_uniq(duckdb_value); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitPrimitiveLiteralBool(void *state, uintptr_t sibling_list_id, bool value); + static void VisitPrimitiveLiteralByte(void *state, uintptr_t sibling_list_id, int8_t value); + static void VisitPrimitiveLiteralShort(void *state, uintptr_t sibling_list_id, int16_t value); + static void VisitPrimitiveLiteralInt(void *state, uintptr_t sibling_list_id, int32_t value); + static void VisitPrimitiveLiteralLong(void *state, uintptr_t sibling_list_id, int64_t value); + static void VisitPrimitiveLiteralFloat(void *state, uintptr_t sibling_list_id, float value); + static void VisitPrimitiveLiteralDouble(void *state, uintptr_t sibling_list_id, double value); + + static void VisitTimestampLiteral(void *state, uintptr_t sibling_list_id, int64_t value); + static void VisitTimestampNtzLiteral(void *state, uintptr_t sibling_list_id, int64_t value); + static void VisitDateLiteral(void *state, uintptr_t sibling_list_id, int32_t value); + static void VisitStringLiteral(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice value); + static void VisitBinaryLiteral(void *state, uintptr_t sibling_list_id, const uint8_t *buffer, uintptr_t len); + static void VisitNullLiteral(void *state, uintptr_t sibling_list_id); + static void VisitArrayLiteral(void *state, uintptr_t sibling_list_id, uintptr_t child_id); + static void VisitStructLiteral(void *data, uintptr_t sibling_list_id, uintptr_t child_field_list_value, + uintptr_t child_value_list_id); + static void VisitDecimalLiteral(void *state, uintptr_t sibling_list_id, uint64_t value_ms, uint64_t value_ls, + uint8_t precision, uint8_t scale); + static void VisitColumnExpression(void *state, uintptr_t sibling_list_id, ffi::KernelStringSlice name); + static void VisitStructExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitNotExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitIsNullExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + template + static ffi::VisitVariadicFn VisitUnaryExpression() { + return &VisitVariadicExpression; + } + template + static ffi::VisitVariadicFn VisitBinaryExpression() { + return &VisitBinaryExpression; + } + template + static ffi::VisitVariadicFn VisitVariadicExpression() { + return &VisitVariadicExpression; + } + + template + static void VisitVariadicExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + return; + } + unique_ptr expression = make_uniq(EXPRESSION_TYPE, std::move(*children)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitAdditionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitSubctractionExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitDivideExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + static void VisitMultiplyExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + template + static void VisitBinaryExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id) { + auto state_cast = static_cast(state); + auto children = state_cast->TakeFieldList(child_list_id); + if (!children) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + return; + } + + if (children->size() != 2) { + state_cast->AppendToList(sibling_list_id, std::move(make_uniq(Value(42)))); + state_cast->error = + ErrorData("INCORRECT SIZE IN VISIT_BINARY_EXPRESSION" + EnumUtil::ToString(EXPRESSION_TYPE)); + return; + } + + auto &lhs = children->at(0); + auto &rhs = children->at(1); + unique_ptr expression = + make_uniq(EXPRESSION_TYPE, std::move(lhs), std::move(rhs)); + state_cast->AppendToList(sibling_list_id, std::move(expression)); + } + + static void VisitComparisonExpression(void *state, uintptr_t sibling_list_id, uintptr_t child_list_id); + + // List functions + static uintptr_t MakeFieldList(ExpressionVisitor *state, uintptr_t capacity_hint); + void AppendToList(uintptr_t id, unique_ptr child); + uintptr_t MakeFieldListImpl(uintptr_t capacity_hint); + unique_ptr TakeFieldList(uintptr_t id); }; // SchemaVisitor is used to parse the schema of a Delta table from the Kernel @@ -303,9 +307,9 @@ struct KernelUtils { error_cast->Throw(from_where); } throw IOException("Hit DeltaKernel FFI error (from: %s): Hit error, but error was nullptr", - from_where.c_str()); + from_where.c_str()); } - if (result.tag == ffi::ExternResult::Tag::Ok) { + if (result.tag == ffi::ExternResult::Tag::Ok) { return result.ok._0; } throw IOException("Invalid error ExternResult tag found!"); diff --git a/src/include/functions/delta_scan.hpp b/src/include/functions/delta_scan.hpp index fe842d3..86edd2e 100644 --- a/src/include/functions/delta_scan.hpp +++ b/src/include/functions/delta_scan.hpp @@ -58,15 +58,15 @@ struct DeltaSnapshot : public MultiFileList { idx_t GetTotalFileCount() override; unique_ptr GetCardinality(ClientContext &context) override; - idx_t GetVersion(); - DeltaFileMetaData &GetMetaData(idx_t index) const; + idx_t GetVersion(); + DeltaFileMetaData &GetMetaData(idx_t index) const; protected: //! Get the i-th expanded file string GetFile(idx_t i) override; protected: - string GetFileInternal(idx_t i); + string GetFileInternal(idx_t i); void InitializeSnapshot(); void InitializeScan(); @@ -76,14 +76,14 @@ struct DeltaSnapshot : public MultiFileList { result, StringUtil::Format("While trying to read from delta table: '%s'", paths[0])); } - static void VisitData(void *engine_context, ffi::ExclusiveEngineData *engine_data, - const struct ffi::KernelBoolSlice selection_vec); - static void VisitCallback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, - const ffi::Stats *stats, const ffi::DvInfo *dv_info, - const struct ffi::CStringMap *partition_values); + static void VisitData(void *engine_context, ffi::ExclusiveEngineData *engine_data, + const struct ffi::KernelBoolSlice selection_vec); + static void VisitCallback(ffi::NullableCvoid engine_context, struct ffi::KernelStringSlice path, int64_t size, + const ffi::Stats *stats, const ffi::DvInfo *dv_info, + const struct ffi::CStringMap *partition_values); protected: - mutable mutex lock; + mutable mutex lock; idx_t version; From 1ed221c311d48b4af52a77d7519b3dbab6229bff Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 19 Dec 2024 18:41:24 +0100 Subject: [PATCH 36/45] fix booleans --- src/delta_utils.cpp | 2 +- test/sql/main/test_expression.test | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 920237d..d47d99f 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -39,7 +39,7 @@ ExpressionVisitor::VisitKernelExpression(const ffi::Handle(); + visitor.visit_literal_bool = VisitPrimitiveLiteralBool; visitor.visit_literal_byte = VisitPrimitiveLiteralByte; visitor.visit_literal_short = VisitPrimitiveLiteralShort; visitor.visit_literal_int = VisitPrimitiveLiteralInt; diff --git a/test/sql/main/test_expression.test b/test/sql/main/test_expression.test index 4de05a7..8eca5d4 100644 --- a/test/sql/main/test_expression.test +++ b/test/sql/main/test_expression.test @@ -23,6 +23,8 @@ SELECT unnest(get_delta_test_expression()) 9223372036854775807 -9223372036854775808 'hello expressions' +true +false '1970-01-01 00:00:00.00005+00'::TIMESTAMP WITH TIME ZONE '1970-01-01 00:00:00.0001'::TIMESTAMP '1970-02-02'::DATE From 23f4e9ce6e56643a82fcad711540bbc283e87815 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Fri, 20 Dec 2024 12:26:10 +0100 Subject: [PATCH 37/45] fix optional_idx issue --- src/storage/delta_catalog.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp index 44e03e7..5ecba54 100644 --- a/src/storage/delta_catalog.cpp +++ b/src/storage/delta_catalog.cpp @@ -60,19 +60,24 @@ bool DeltaCatalog::UseCachedSnapshot() { optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { auto &delta_transaction = DeltaTransaction::Get(context, *this); + idx_t version = DConstants::INVALID_INDEX; // Option 1: snapshot is cached table-wide auto cached_snapshot = main_schema->GetCachedTable(); if (cached_snapshot) { - return cached_snapshot->snapshot->GetVersion(); + version = cached_snapshot->snapshot->GetVersion(); } // Option 2: snapshot is cached in transaction if (delta_transaction.table_entry) { - return delta_transaction.table_entry->snapshot->GetVersion(); + version = delta_transaction.table_entry->snapshot->GetVersion(); } - return {}; + if (version != DConstants::INVALID_INDEX) { + return version; + } + + return optional_idx::Invalid(); } DatabaseSize DeltaCatalog::GetDatabaseSize(ClientContext &context) { From 6810aa3c24fb28146ff3c26ed2d26cf86f6b18f9 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 10:50:45 +0100 Subject: [PATCH 38/45] bump duckdb to main --- .github/workflows/MainDistributionPipeline.yml | 6 ++---- duckdb | 2 +- extension-ci-tools | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 3449ca6..a42cbc0 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,14 +16,12 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - # pip install duckdb==1.1.4.dev2005 - duckdb_version: b470dea7ee + duckdb_version: main ci_tools_version: main extension_name: delta enable_rust: true exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' extra_toolchains: 'python3' - vcpkg_commit: c82f74667287d3dc386bce81e44964370c91a289 duckdb-stable-deploy: name: Deploy extension binaries @@ -32,6 +30,6 @@ jobs: secrets: inherit with: extension_name: delta + duckdb_version: main ci_tools_version: main - duckdb_version: b470dea7ee exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' diff --git a/duckdb b/duckdb index b470dea..89bcc3e 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit b470dea7ee47dc2debcc37a4e94976f8eff6670c +Subproject commit 89bcc3e2ce739b1b470afa79818ee03c8cf96fe8 diff --git a/extension-ci-tools b/extension-ci-tools index 916d4ef..4317e39 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 916d4ef4371068ca98a007378b52582c3e46b4e5 +Subproject commit 4317e39099f4b71d614f00d044aaec651bec6fc9 From e39f308b148923db424e5184f4472d174abad85d Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 12 Dec 2024 14:26:37 +0100 Subject: [PATCH 39/45] poc hooking up logging --- src/delta_extension.cpp | 4 ++++ src/delta_utils.cpp | 48 +++++++++++++++++++++++++++++++++++++ src/include/delta_utils.hpp | 20 ++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 50ce93d..596f9ab 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -2,6 +2,7 @@ #include "delta_extension.hpp" +#include "delta_utils.hpp" #include "delta_functions.hpp" #include "duckdb.hpp" #include "duckdb/common/exception.hpp" @@ -62,6 +63,9 @@ static void LoadInternal(DatabaseInstance &instance) { "Adds the filtered files to the explain output. Warning: this may change performance of " "delta scan during explain analyze queries.", LogicalType::BOOLEAN, Value(true)); + + LoggerCallback::Initialize(instance); + ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index d47d99f..89d7e5c 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -4,6 +4,7 @@ #include "duckdb.hpp" #include "duckdb/main/extension_util.hpp" +#include "duckdb/main/database.hpp" #include #include @@ -634,4 +635,51 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte } } +void LoggerCallback::Initialize(DatabaseInstance &db_p) { + auto &instance = GetInstance(); + unique_lock lck(instance.lock); + instance.db = db_p.shared_from_this(); +} + +void LoggerCallback::CallbackEvent(ffi::Event event) { + auto &instance = GetInstance(); + auto db_locked = instance.db.lock(); + if (db_locked) { + auto transformed_log_level = GetDuckDBLogLevel(event.level); + auto log_type = KernelUtils::FromDeltaString(event.target); + string constructed_log_message; + Logger::Log(log_type.c_str(), *db_locked, transformed_log_level, [&]() { + auto message = KernelUtils::FromDeltaString(event.message); + auto file = KernelUtils::FromDeltaString(event.file); + + if (!file.empty()) { + constructed_log_message = StringUtil::Format("%s@%u : %s ", file, event.line, message); + } else { + constructed_log_message = message; + } + + return constructed_log_message.c_str(); + }); + } +} + +LogLevel LoggerCallback::GetDuckDBLogLevel(ffi::Level level) { + switch (level) { + case ffi::Level::TRACE: + case ffi::Level::DEBUGGING: + return LogLevel::DEBUGGING; + case ffi::Level::INFO: + return LogLevel::INFO; + case ffi::Level::WARN: + return LogLevel::WARN; + case ffi::Level::ERROR: + return LogLevel::ERROR; + } +} + +LoggerCallback &LoggerCallback::GetInstance() { + static LoggerCallback instance; + return instance; +} + }; // namespace duckdb diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index 540bdb6..b34c197 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -15,6 +15,7 @@ // TODO: clean up this file as we go namespace duckdb { +class DatabaseInstance; class ExpressionVisitor : public ffi::EngineExpressionVisitor { using FieldList = vector>; @@ -336,4 +337,23 @@ class PredicateVisitor : public ffi::EnginePredicate { uintptr_t VisitFilter(const string &col_name, const TableFilter &filter, ffi::KernelExpressionVisitorState *state); }; +// Singleton class to forward logs to DuckDB +class LoggerCallback { +public: + //! Singleton GetInstance + static LoggerCallback& GetInstance(); + static void Initialize(DatabaseInstance &db); + static void CallbackLogLine(ffi::KernelStringSlice log_line); + static void CallbackEvent(ffi::Event log_line); + + + static LogLevel GetDuckDBLogLevel(ffi::Level); + +protected: + LoggerCallback() {} + + mutex lock; + weak_ptr db; +}; + } // namespace duckdb From 020eb46b3165c2e31842b9486694492340d2a78b Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 13:16:18 +0100 Subject: [PATCH 40/45] finish delta kernel logging forwarding --- src/delta_extension.cpp | 9 +++-- src/delta_utils.cpp | 37 +++++++++++++++----- src/include/delta_utils.hpp | 5 ++- test/sql/delta_kernel_rs/logging.test | 49 +++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 12 deletions(-) create mode 100644 test/sql/delta_kernel_rs/logging.test diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 596f9ab..2d9c03f 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -10,6 +10,7 @@ #include "duckdb/storage/storage_extension.hpp" #include "storage/delta_catalog.hpp" #include "storage/delta_transaction_manager.hpp" +#include "duckdb/main/config.hpp" namespace duckdb { @@ -60,12 +61,16 @@ static void LoadInternal(DatabaseInstance &instance) { config.storage_extensions["delta"] = make_uniq(); config.AddExtensionOption("delta_scan_explain_files_filtered", - "Adds the filtered files to the explain output. Warning: this may change performance of " + "Adds the filtered files to the explain output. Warning: this may impact performance of " "delta scan during explain analyze queries.", LogicalType::BOOLEAN, Value(true)); + config.AddExtensionOption("delta_kernel_logging", + "Forwards the internal logging of the Delta Kernel to the duckdb logger. Warning: this may impact " + "performance even with DuckDB logging disabled.", + LogicalType::BOOLEAN, Value(false), LoggerCallback::DuckDBSettingCallBack); + LoggerCallback::Initialize(instance); - ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index 89d7e5c..e8ed7c9 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -172,7 +172,7 @@ void ExpressionVisitor::VisitPrimitiveLiteralDouble(void *state, uintptr_t sibli } void ExpressionVisitor::VisitTimestampLiteral(void *state, uintptr_t sibling_list_id, int64_t value) { - auto expression = make_uniq(Value::TIMESTAMPTZ(static_cast(value))); + auto expression = make_uniq(Value::TIMESTAMPTZ(timestamp_tz_t(value))); static_cast(state)->AppendToList(sibling_list_id, std::move(expression)); } @@ -646,14 +646,13 @@ void LoggerCallback::CallbackEvent(ffi::Event event) { auto db_locked = instance.db.lock(); if (db_locked) { auto transformed_log_level = GetDuckDBLogLevel(event.level); - auto log_type = KernelUtils::FromDeltaString(event.target); string constructed_log_message; - Logger::Log(log_type.c_str(), *db_locked, transformed_log_level, [&]() { + Logger::Log("delta.Kernel", *db_locked, transformed_log_level, [&]() { + auto log_type = KernelUtils::FromDeltaString(event.target); auto message = KernelUtils::FromDeltaString(event.message); auto file = KernelUtils::FromDeltaString(event.file); - if (!file.empty()) { - constructed_log_message = StringUtil::Format("%s@%u : %s ", file, event.line, message); + constructed_log_message = StringUtil::Format("[%s] %s@%u : %s ", log_type, file, event.line, message); } else { constructed_log_message = message; } @@ -666,14 +665,15 @@ void LoggerCallback::CallbackEvent(ffi::Event event) { LogLevel LoggerCallback::GetDuckDBLogLevel(ffi::Level level) { switch (level) { case ffi::Level::TRACE: + return LogLevel::LOG_TRACE; case ffi::Level::DEBUGGING: - return LogLevel::DEBUGGING; + return LogLevel::LOG_DEBUG; case ffi::Level::INFO: - return LogLevel::INFO; + return LogLevel::LOG_INFO; case ffi::Level::WARN: - return LogLevel::WARN; + return LogLevel::LOG_WARN; case ffi::Level::ERROR: - return LogLevel::ERROR; + return LogLevel::LOG_ERROR; } } @@ -682,4 +682,23 @@ LoggerCallback &LoggerCallback::GetInstance() { return instance; } +void LoggerCallback::DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value ¶meter) { + Value current_setting; + auto res = context.TryGetCurrentSetting("delta_kernel_logging", current_setting); + + if (res.GetScope() == SettingScope::INVALID) { + throw InternalException("Failed to find setting 'delta_kernel_logging'"); + } + + if (current_setting.GetValue() && !parameter.GetValue()) { + throw InvalidInputException("Can not disable 'delta_kernel_logging' after enabling it. You can disable DuckDB " + "logging with SET enable_logging=false, but there will still be some performance overhead from 'delta_kernel_logging'" + "that can only be mitigated by restarting DuckDB"); + } + + if (!current_setting.GetValue() && parameter.GetValue()) { + ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE); + } +} + }; // namespace duckdb diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index b34c197..f6d9671 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -340,10 +340,13 @@ class PredicateVisitor : public ffi::EnginePredicate { // Singleton class to forward logs to DuckDB class LoggerCallback { public: + + //! The Callback for the DuckDB setting to hook up Delta Kernel Logging to the DuckDB logger + static void DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value ¶meter); + //! Singleton GetInstance static LoggerCallback& GetInstance(); static void Initialize(DatabaseInstance &db); - static void CallbackLogLine(ffi::KernelStringSlice log_line); static void CallbackEvent(ffi::Event log_line); diff --git a/test/sql/delta_kernel_rs/logging.test b/test/sql/delta_kernel_rs/logging.test new file mode 100644 index 0000000..1909eba --- /dev/null +++ b/test/sql/delta_kernel_rs/logging.test @@ -0,0 +1,49 @@ +# name: test/sql/delta_kernel_rs/logging.test +# description: test that delta kernel log entries are properly propagated to the DuckDB logger +# group: [delta_kernel_rs] + +require parquet + +require delta + +require-env DELTA_KERNEL_TESTS_PATH + +statement ok +set enable_logging=true; + +statement ok +SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned') + +# No kernel logging available yet: we need to set delta_kernel_logging=true +query I +SELECT count(*) FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel') +---- +0 + +statement ok +set delta_kernel_logging=true; + +statement ok +set logging_level = 'TRACE'; + +statement ok +SELECT * FROM delta_scan('${DELTA_KERNEL_TESTS_PATH}/basic_partitioned') + +# Now we have log! +query I +SELECT count(*) > 50 FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel') +---- +true + +statement ok +set delta_kernel_logging=true; + +statement error +set delta_kernel_logging=false; +---- +Invalid Input Error: Can not disable 'delta_kernel_logging' after enabling it + +mode output_result + +statement ok +FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel') \ No newline at end of file From e5ee3e1b6677c81cec16aa17720141a19a223a83 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 13:16:52 +0100 Subject: [PATCH 41/45] format --- src/delta_extension.cpp | 11 ++-- src/delta_utils.cpp | 103 +++++++++++++++++----------------- src/include/delta_utils.hpp | 23 ++++---- src/storage/delta_catalog.cpp | 12 ++-- 4 files changed, 75 insertions(+), 74 deletions(-) diff --git a/src/delta_extension.cpp b/src/delta_extension.cpp index 2d9c03f..dac8888 100644 --- a/src/delta_extension.cpp +++ b/src/delta_extension.cpp @@ -65,12 +65,13 @@ static void LoadInternal(DatabaseInstance &instance) { "delta scan during explain analyze queries.", LogicalType::BOOLEAN, Value(true)); - config.AddExtensionOption("delta_kernel_logging", - "Forwards the internal logging of the Delta Kernel to the duckdb logger. Warning: this may impact " - "performance even with DuckDB logging disabled.", - LogicalType::BOOLEAN, Value(false), LoggerCallback::DuckDBSettingCallBack); + config.AddExtensionOption( + "delta_kernel_logging", + "Forwards the internal logging of the Delta Kernel to the duckdb logger. Warning: this may impact " + "performance even with DuckDB logging disabled.", + LogicalType::BOOLEAN, Value(false), LoggerCallback::DuckDBSettingCallBack); - LoggerCallback::Initialize(instance); + LoggerCallback::Initialize(instance); } void DeltaExtension::Load(DuckDB &db) { diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index e8ed7c9..f76c6e3 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -636,69 +636,70 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte } void LoggerCallback::Initialize(DatabaseInstance &db_p) { - auto &instance = GetInstance(); - unique_lock lck(instance.lock); - instance.db = db_p.shared_from_this(); + auto &instance = GetInstance(); + unique_lock lck(instance.lock); + instance.db = db_p.shared_from_this(); } void LoggerCallback::CallbackEvent(ffi::Event event) { - auto &instance = GetInstance(); - auto db_locked = instance.db.lock(); - if (db_locked) { - auto transformed_log_level = GetDuckDBLogLevel(event.level); - string constructed_log_message; - Logger::Log("delta.Kernel", *db_locked, transformed_log_level, [&]() { - auto log_type = KernelUtils::FromDeltaString(event.target); - auto message = KernelUtils::FromDeltaString(event.message); - auto file = KernelUtils::FromDeltaString(event.file); - if (!file.empty()) { - constructed_log_message = StringUtil::Format("[%s] %s@%u : %s ", log_type, file, event.line, message); - } else { - constructed_log_message = message; - } - - return constructed_log_message.c_str(); - }); - } + auto &instance = GetInstance(); + auto db_locked = instance.db.lock(); + if (db_locked) { + auto transformed_log_level = GetDuckDBLogLevel(event.level); + string constructed_log_message; + Logger::Log("delta.Kernel", *db_locked, transformed_log_level, [&]() { + auto log_type = KernelUtils::FromDeltaString(event.target); + auto message = KernelUtils::FromDeltaString(event.message); + auto file = KernelUtils::FromDeltaString(event.file); + if (!file.empty()) { + constructed_log_message = StringUtil::Format("[%s] %s@%u : %s ", log_type, file, event.line, message); + } else { + constructed_log_message = message; + } + + return constructed_log_message.c_str(); + }); + } } LogLevel LoggerCallback::GetDuckDBLogLevel(ffi::Level level) { - switch (level) { - case ffi::Level::TRACE: - return LogLevel::LOG_TRACE; - case ffi::Level::DEBUGGING: - return LogLevel::LOG_DEBUG; - case ffi::Level::INFO: - return LogLevel::LOG_INFO; - case ffi::Level::WARN: - return LogLevel::LOG_WARN; - case ffi::Level::ERROR: - return LogLevel::LOG_ERROR; - } + switch (level) { + case ffi::Level::TRACE: + return LogLevel::LOG_TRACE; + case ffi::Level::DEBUGGING: + return LogLevel::LOG_DEBUG; + case ffi::Level::INFO: + return LogLevel::LOG_INFO; + case ffi::Level::WARN: + return LogLevel::LOG_WARN; + case ffi::Level::ERROR: + return LogLevel::LOG_ERROR; + } } LoggerCallback &LoggerCallback::GetInstance() { - static LoggerCallback instance; - return instance; + static LoggerCallback instance; + return instance; } void LoggerCallback::DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value ¶meter) { - Value current_setting; - auto res = context.TryGetCurrentSetting("delta_kernel_logging", current_setting); - - if (res.GetScope() == SettingScope::INVALID) { - throw InternalException("Failed to find setting 'delta_kernel_logging'"); - } - - if (current_setting.GetValue() && !parameter.GetValue()) { - throw InvalidInputException("Can not disable 'delta_kernel_logging' after enabling it. You can disable DuckDB " - "logging with SET enable_logging=false, but there will still be some performance overhead from 'delta_kernel_logging'" - "that can only be mitigated by restarting DuckDB"); - } - - if (!current_setting.GetValue() && parameter.GetValue()) { - ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE); - } + Value current_setting; + auto res = context.TryGetCurrentSetting("delta_kernel_logging", current_setting); + + if (res.GetScope() == SettingScope::INVALID) { + throw InternalException("Failed to find setting 'delta_kernel_logging'"); + } + + if (current_setting.GetValue() && !parameter.GetValue()) { + throw InvalidInputException("Can not disable 'delta_kernel_logging' after enabling it. You can disable DuckDB " + "logging with SET enable_logging=false, but there will still be some performance " + "overhead from 'delta_kernel_logging'" + "that can only be mitigated by restarting DuckDB"); + } + + if (!current_setting.GetValue() && parameter.GetValue()) { + ffi::enable_event_tracing(LoggerCallback::CallbackEvent, ffi::Level::TRACE); + } } }; // namespace duckdb diff --git a/src/include/delta_utils.hpp b/src/include/delta_utils.hpp index f6d9671..5bf4e1d 100644 --- a/src/include/delta_utils.hpp +++ b/src/include/delta_utils.hpp @@ -340,23 +340,22 @@ class PredicateVisitor : public ffi::EnginePredicate { // Singleton class to forward logs to DuckDB class LoggerCallback { public: + //! The Callback for the DuckDB setting to hook up Delta Kernel Logging to the DuckDB logger + static void DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value ¶meter); - //! The Callback for the DuckDB setting to hook up Delta Kernel Logging to the DuckDB logger - static void DuckDBSettingCallBack(ClientContext &context, SetScope scope, Value ¶meter); + //! Singleton GetInstance + static LoggerCallback &GetInstance(); + static void Initialize(DatabaseInstance &db); + static void CallbackEvent(ffi::Event log_line); - //! Singleton GetInstance - static LoggerCallback& GetInstance(); - static void Initialize(DatabaseInstance &db); - static void CallbackEvent(ffi::Event log_line); - - - static LogLevel GetDuckDBLogLevel(ffi::Level); + static LogLevel GetDuckDBLogLevel(ffi::Level); protected: - LoggerCallback() {} + LoggerCallback() { + } - mutex lock; - weak_ptr db; + mutex lock; + weak_ptr db; }; } // namespace duckdb diff --git a/src/storage/delta_catalog.cpp b/src/storage/delta_catalog.cpp index 5ecba54..fb301e2 100644 --- a/src/storage/delta_catalog.cpp +++ b/src/storage/delta_catalog.cpp @@ -60,22 +60,22 @@ bool DeltaCatalog::UseCachedSnapshot() { optional_idx DeltaCatalog::GetCatalogVersion(ClientContext &context) { auto &delta_transaction = DeltaTransaction::Get(context, *this); - idx_t version = DConstants::INVALID_INDEX; + idx_t version = DConstants::INVALID_INDEX; // Option 1: snapshot is cached table-wide auto cached_snapshot = main_schema->GetCachedTable(); if (cached_snapshot) { - version = cached_snapshot->snapshot->GetVersion(); + version = cached_snapshot->snapshot->GetVersion(); } // Option 2: snapshot is cached in transaction if (delta_transaction.table_entry) { - version = delta_transaction.table_entry->snapshot->GetVersion(); + version = delta_transaction.table_entry->snapshot->GetVersion(); } - if (version != DConstants::INVALID_INDEX) { - return version; - } + if (version != DConstants::INVALID_INDEX) { + return version; + } return optional_idx::Invalid(); } From 07955a1b05dc2c620feff3d390abcec6b2b6a7b1 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 13:22:53 +0100 Subject: [PATCH 42/45] remove forgotten print statement --- test/sql/delta_kernel_rs/logging.test | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/test/sql/delta_kernel_rs/logging.test b/test/sql/delta_kernel_rs/logging.test index 1909eba..68fd5b1 100644 --- a/test/sql/delta_kernel_rs/logging.test +++ b/test/sql/delta_kernel_rs/logging.test @@ -41,9 +41,4 @@ set delta_kernel_logging=true; statement error set delta_kernel_logging=false; ---- -Invalid Input Error: Can not disable 'delta_kernel_logging' after enabling it - -mode output_result - -statement ok -FROM duckdb_logs WHERE starts_with(type, 'delta.Kernel') \ No newline at end of file +Invalid Input Error: Can not disable 'delta_kernel_logging' after enabling it \ No newline at end of file From 89ebdb7267f9158c34a77e4480027cb283515427 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 13:28:24 +0100 Subject: [PATCH 43/45] re-enable building test extensions --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4a361b3..7eb8376 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ test_debug: export DELTA_KERNEL_TESTS_PATH=./build/debug/rust/src/delta_kernel/k test_debug: export DAT_PATH=./build/debug/rust/src/delta_kernel/acceptance/tests/dat # Core extensions that we need for testing -#CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs' +CORE_EXTENSIONS='tpcds;tpch;aws;azure;httpfs' # Set this flag during building to enable the benchmark runner ifeq (${BUILD_BENCHMARK}, 1) From 0ece6669c56af62f181b6ee57d12978b27a3634b Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 13:38:16 +0100 Subject: [PATCH 44/45] fix thread-safety issue for logging singleton --- src/delta_utils.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/delta_utils.cpp b/src/delta_utils.cpp index f76c6e3..9cf8337 100644 --- a/src/delta_utils.cpp +++ b/src/delta_utils.cpp @@ -638,7 +638,9 @@ uintptr_t PredicateVisitor::VisitFilter(const string &col_name, const TableFilte void LoggerCallback::Initialize(DatabaseInstance &db_p) { auto &instance = GetInstance(); unique_lock lck(instance.lock); - instance.db = db_p.shared_from_this(); + if (instance.db.expired()) { + instance.db = db_p.shared_from_this(); + } } void LoggerCallback::CallbackEvent(ffi::Event event) { From 7e99954d72ac1f9d9159c658202b7577949002b2 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Mon, 13 Jan 2025 16:29:13 +0100 Subject: [PATCH 45/45] skip musl --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index a42cbc0..45d26e7 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -20,7 +20,7 @@ jobs: ci_tools_version: main extension_name: delta enable_rust: true - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl' extra_toolchains: 'python3' duckdb-stable-deploy: @@ -32,4 +32,4 @@ jobs: extension_name: delta duckdb_version: main ci_tools_version: main - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw' + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools;windows_amd64_mingw;linux_amd64_musl'