From 99c33c3ff34b4c850f524c3e2f3544455165c62e Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 25 Oct 2023 18:07:18 -0400 Subject: [PATCH] Prefer staging file formats when staging --- dlt/normalize/normalize.py | 2 +- dlt/pipeline/pipeline.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dlt/normalize/normalize.py b/dlt/normalize/normalize.py index 6ce24cd3ea..4c30d1a679 100644 --- a/dlt/normalize/normalize.py +++ b/dlt/normalize/normalize.py @@ -84,7 +84,7 @@ def w_normalize_files( def _get_load_storage(file_format: TLoaderFileFormat) -> LoadStorage: # TODO: capabilities.supported_*_formats can be None, it should have defaults - supported_formats = list(set(destination_caps.supported_loader_file_formats or []) | set(destination_caps.supported_staging_file_formats or [])) + supported_formats = destination_caps.supported_loader_file_formats or [] if file_format == "parquet": if file_format in supported_formats: supported_formats.append("arrow") # TODO: Hack to make load storage use the correct writer diff --git a/dlt/pipeline/pipeline.py b/dlt/pipeline/pipeline.py index 1bd9ed61df..f034f4b1f2 100644 --- a/dlt/pipeline/pipeline.py +++ b/dlt/pipeline/pipeline.py @@ -954,7 +954,7 @@ def _get_destination_capabilities(self) -> DestinationCapabilitiesContext: ) return self.destination.capabilities() - def _get_staging_capabilities(self) -> DestinationCapabilitiesContext: + def _get_staging_capabilities(self) -> Optional[DestinationCapabilitiesContext]: return self.staging.capabilities() if self.staging is not None else None def _validate_pipeline_name(self) -> None: @@ -1016,6 +1016,9 @@ def _maybe_destination_capabilities(self, loader_file_format: TLoaderFileFormat DestinationReference.to_name(self.destination), DestinationReference.to_name(self.staging) if self.staging else None, destination_caps, stage_caps, loader_file_format) + caps.supported_loader_file_formats = ( + destination_caps.supported_staging_file_formats if stage_caps else None + ) or destination_caps.supported_loader_file_formats yield caps finally: if injected_caps: