From ef19c92891a1e417c9a2f34265523c40aa6179f8 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 31 Oct 2023 16:26:15 +0100 Subject: [PATCH 01/16] del source name --- .../examples/nested_mongo_data/__init__.py | 0 .../nested_mongo_data/code/.dlt/config.toml | 2 + .../nested_mongo_data/code/.dlt/secrets.toml | 2 + .../nested_mongo_data/code/__init__.py | 0 .../code/mongodb-snippets.py | 41 +++++++++++++ .../docs/examples/nested_mongo_data/index.md | 59 +++++++++++++++++++ 6 files changed, 104 insertions(+) create mode 100644 docs/website/docs/examples/nested_mongo_data/__init__.py create mode 100644 docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml create mode 100644 docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml create mode 100644 docs/website/docs/examples/nested_mongo_data/code/__init__.py create mode 100644 docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py create mode 100644 docs/website/docs/examples/nested_mongo_data/index.md diff --git a/docs/website/docs/examples/nested_mongo_data/__init__.py b/docs/website/docs/examples/nested_mongo_data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml b/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml new file mode 100644 index 0000000000..be627e6c11 --- /dev/null +++ b/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml @@ -0,0 +1,2 @@ +# @@@DLT_SNIPPET_START example +# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml b/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml new file mode 100644 index 0000000000..be627e6c11 --- /dev/null +++ b/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml @@ -0,0 +1,2 @@ +# @@@DLT_SNIPPET_START example +# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_mongo_data/code/__init__.py b/docs/website/docs/examples/nested_mongo_data/code/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py b/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py new file mode 100644 index 0000000000..2392dcae5a --- /dev/null +++ b/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py @@ -0,0 +1,41 @@ +from tests.utils import skipifgithubfork + + +@skipifgithubfork +def incremental_snippet() -> None: + + # @@@DLT_SNIPPET_START example + # @@@DLT_SNIPPET_START markdown_source + from typing import Iterator, Optional, Dict, Any, Tuple + + import dlt + from dlt.common import pendulum + from dlt.common.time import ensure_pendulum_datetime + from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime + from dlt.extract.source import DltResource + from dlt.sources.helpers.requests import client + + @dlt.source + def mongodb_source( + credentials: Dict[str, str]=dlt.secrets.value, + start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + end_date: Optional[TAnyDateTime] = None, + ): + pass + + # @@@DLT_SNIPPET_END markdown_source + + # @@@DLT_SNIPPET_START markdown_pipeline + __name__ = "__main__" # @@@DLT_REMOVE + if __name__ == "__main__": + # create dlt pipeline + pipeline = dlt.pipeline( + pipeline_name="pipeline_name", destination="duckdb", dataset_name="dataset_name" + ) + + load_info = pipeline.run(mongodb_source()) + print(load_info) + # @@@DLT_SNIPPET_END markdown_pipeline + # @@@DLT_SNIPPET_END example + + diff --git a/docs/website/docs/examples/nested_mongo_data/index.md b/docs/website/docs/examples/nested_mongo_data/index.md new file mode 100644 index 0000000000..cda02cc206 --- /dev/null +++ b/docs/website/docs/examples/nested_mongo_data/index.md @@ -0,0 +1,59 @@ +--- +title: Control nested MongoDB data +description: Learn how +keywords: [incremental loading, example] +--- + +import Header from '../_examples-header.md'; + +
+ +## Control nested MongoDB data + +In this example, you'll find a Python script that + +We'll learn: + + +### Loading code + + +```py +from typing import Iterator, Optional, Dict, Any, Tuple + +import dlt +from dlt.common import pendulum +from dlt.common.time import ensure_pendulum_datetime +from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime +from dlt.extract.source import DltResource +from dlt.sources.helpers.requests import client + +@dlt.source +def mongodb_source( + credentials: Dict[str, str]=dlt.secrets.value, + start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 + end_date: Optional[TAnyDateTime] = None, +): + pass +``` + + +Run the pipeline: + + + +```py +if __name__ == "__main__": + # create dlt pipeline + pipeline = dlt.pipeline( + pipeline_name="pipeline_name", destination="duckdb", dataset_name="dataset_name" + ) + + load_info = pipeline.run(mongodb_source()) + print(load_info) +``` + + From d8299cc7e3c971b50cde538afbd78db8fdcce601 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 14:06:49 +0100 Subject: [PATCH 02/16] add mongodb example --- docs/examples/chess_production/chess.py | 3 +- .../docs/examples/chess_production/index.md | 1 - .../__init__.py | 0 .../code/__init__.py | 0 .../nested_data/code/nested_data-snippets.py | 115 ++++++++++++++++++ .../docs/examples/nested_data/index.md | 98 +++++++++++++++ .../nested_mongo_data/code/.dlt/config.toml | 2 - .../nested_mongo_data/code/.dlt/secrets.toml | 2 - .../code/mongodb-snippets.py | 41 ------- .../docs/examples/nested_mongo_data/index.md | 59 --------- docs/website/sidebars.js | 1 + 11 files changed, 215 insertions(+), 107 deletions(-) rename docs/website/docs/examples/{nested_mongo_data => nested_data}/__init__.py (100%) rename docs/website/docs/examples/{nested_mongo_data => nested_data}/code/__init__.py (100%) create mode 100644 docs/website/docs/examples/nested_data/code/nested_data-snippets.py create mode 100644 docs/website/docs/examples/nested_data/index.md delete mode 100644 docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml delete mode 100644 docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml delete mode 100644 docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py delete mode 100644 docs/website/docs/examples/nested_mongo_data/index.md diff --git a/docs/examples/chess_production/chess.py b/docs/examples/chess_production/chess.py index 0ff5ce7c7f..79b573fe43 100644 --- a/docs/examples/chess_production/chess.py +++ b/docs/examples/chess_production/chess.py @@ -3,7 +3,6 @@ import dlt from dlt.common import sleep -from dlt.common.runtime.slack import send_slack_message from dlt.common.typing import StrAny, TDataItems from dlt.sources.helpers.requests import client @@ -161,4 +160,4 @@ def load_data_with_retry(pipeline, data): ) # get data for a few famous players data = chess(chess_url="https://api.chess.com/pub/", max_players=MAX_PLAYERS) - load_data_with_retry(pipeline, data) + load_data_with_retry(pipeline, data) \ No newline at end of file diff --git a/docs/website/docs/examples/chess_production/index.md b/docs/website/docs/examples/chess_production/index.md index f821600c67..c8278f8676 100644 --- a/docs/website/docs/examples/chess_production/index.md +++ b/docs/website/docs/examples/chess_production/index.md @@ -32,7 +32,6 @@ from typing import Any, Iterator import dlt from dlt.common import sleep -from dlt.common.runtime.slack import send_slack_message from dlt.common.typing import StrAny, TDataItems from dlt.sources.helpers.requests import client diff --git a/docs/website/docs/examples/nested_mongo_data/__init__.py b/docs/website/docs/examples/nested_data/__init__.py similarity index 100% rename from docs/website/docs/examples/nested_mongo_data/__init__.py rename to docs/website/docs/examples/nested_data/__init__.py diff --git a/docs/website/docs/examples/nested_mongo_data/code/__init__.py b/docs/website/docs/examples/nested_data/code/__init__.py similarity index 100% rename from docs/website/docs/examples/nested_mongo_data/code/__init__.py rename to docs/website/docs/examples/nested_data/code/__init__.py diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py new file mode 100644 index 0000000000..eb8a81d9e9 --- /dev/null +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -0,0 +1,115 @@ + +def transformers_snippet() -> None: + CHUNK_SIZE = 10000 + # @@@DLT_SNIPPET_START example + # @@@DLT_SNIPPET_START nested_data + from itertools import islice + from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple + + import dlt + from bson.decimal128 import Decimal128 + from bson.objectid import ObjectId + from dlt.common.time import ensure_pendulum_datetime + from dlt.common.typing import TDataItem + from dlt.common.utils import map_nested_in_place + from pendulum import _datetime + from pymongo import ASCENDING, DESCENDING, MongoClient + + # You can limit how deep dlt goes when generating child tables. + # By default, the library will descend and generate child tables + # for all nested lists, without a limit. + # In this example, we specify that we only want to generate child tables up to level 2, + # so there will be only one level of child tables within child tables. + @dlt.source(max_table_nesting=2) + def mongodb_collection( + connection_url: str = dlt.secrets.value, + database: Optional[str] = dlt.config.value, + collection: str = dlt.config.value, + incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg] + write_disposition: Optional[str] = dlt.config.value, + parallel: Optional[bool] = dlt.config.value, + ) -> Any: + # set up mongo client + client = MongoClient( + connection_url, uuidRepresentation="standard", tz_aware=True + ) + mongo_database = client.get_default_database() if not database else client[database] + collection_obj = mongo_database[collection] + + def collection_documents( + client, + collection, + incremental: Optional[dlt.sources.incremental[Any]] = None, + ) -> Iterator[TDataItem]: + LoaderClass = CollectionLoader + + loader = LoaderClass(client, collection, incremental=incremental) + for data in loader.load_documents(): + yield data + + return dlt.resource( # type: ignore + collection_documents, + name=collection_obj.name, + primary_key="_id", + write_disposition=write_disposition, + )(client, collection_obj, incremental=incremental, parallel=parallel) + + # @@@DLT_SNIPPET_END nested_data + + class CollectionLoader: + def __init__( + self, + client, + collection, + incremental: Optional[dlt.sources.incremental[Any]] = None, + ) -> None: + self.client = client + self.collection = collection + self.incremental = incremental + if incremental: + self.cursor_field = incremental.cursor_path + self.last_value = incremental.last_value + else: + self.cursor_column = None + self.last_value = None + + @property + def _filter_op(self) -> Dict[str, Any]: + if not self.incremental or not self.last_value: + return {} + if self.incremental.last_value_func is max: + return {self.cursor_field: {"$gte": self.last_value}} + elif self.incremental.last_value_func is min: + return {self.cursor_field: {"$lt": self.last_value}} + return {} + + def load_documents(self) -> Iterator[TDataItem]: + cursor = self.collection.find(self._filter_op) + while docs_slice := list(islice(cursor, CHUNK_SIZE)): + yield map_nested_in_place(convert_mongo_objs, docs_slice) + + def convert_mongo_objs(value: Any) -> Any: + if isinstance(value, (ObjectId, Decimal128)): + return str(value) + if isinstance(value, _datetime.datetime): + return ensure_pendulum_datetime(value) + return value + + # @@@DLT_SNIPPET_START nested_data_run + + __name__ = "__main__" # @@@DLT_REMOVE + if __name__ == "__main__": + # build duck db pipeline + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", destination="duckdb", dataset_name="pokemon_data" + ) + source_data = mongodb_collection() + source_data.max_table_nesting = 1 + + load_info = pipeline.run(source_data) + print(load_info) + # @@@DLT_SNIPPET_END nested_data_run + # @@@DLT_SNIPPET_END example + + # test assertions + row_counts = pipeline.last_trace.last_normalize_info.row_counts diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md new file mode 100644 index 0000000000..e882082cc6 --- /dev/null +++ b/docs/website/docs/examples/nested_data/index.md @@ -0,0 +1,98 @@ +--- +title: Control nested data +description: Learn how control nested data +keywords: [incremental loading, example] +--- + +import Header from '../_examples-header.md'; + +
+ +## Control nested data + +In this example, you'll find a Python script that + +We'll learn: + +### Install pymongo + +```shell + pip install pymongo>=4.3.3 +``` + +### Loading code + + +```py +from itertools import islice +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple + +import dlt +from bson.decimal128 import Decimal128 +from bson.objectid import ObjectId +from dlt.common.time import ensure_pendulum_datetime +from dlt.common.typing import TDataItem +from dlt.common.utils import map_nested_in_place +from pendulum import _datetime +from pymongo import ASCENDING, DESCENDING, MongoClient + +# You can limit how deep dlt goes when generating child tables. +# By default, the library will descend and generate child tables +# for all nested lists, without a limit. +# In this example, we specify that we only want to generate child tables up to level 2, +# so there will be only one level of child tables within child tables. +@dlt.source(max_table_nesting=2) +def mongodb_collection( + connection_url: str = dlt.secrets.value, + database: Optional[str] = dlt.config.value, + collection: str = dlt.config.value, + incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg] + write_disposition: Optional[str] = dlt.config.value, + parallel: Optional[bool] = dlt.config.value, +) -> Any: + # set up mongo client + client = MongoClient( + connection_url, uuidRepresentation="standard", tz_aware=True + ) + mongo_database = client.get_default_database() if not database else client[database] + collection_obj = mongo_database[collection] + + def collection_documents( + client, + collection, + incremental: Optional[dlt.sources.incremental[Any]] = None, + ) -> Iterator[TDataItem]: + LoaderClass = CollectionLoader + + loader = LoaderClass(client, collection, incremental=incremental) + for data in loader.load_documents(): + yield data + + return dlt.resource( # type: ignore + collection_documents, + name=collection_obj.name, + primary_key="_id", + write_disposition=write_disposition, + )(client, collection_obj, incremental=incremental, parallel=parallel) +``` + + +### Run the pipeline + + +```py +if __name__ == "__main__": + # build duck db pipeline + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", destination="duckdb", dataset_name="pokemon_data" + ) + source_data = mongodb_collection() + source_data.max_table_nesting = 1 + + load_info = pipeline.run(source_data) + print(load_info) +``` + diff --git a/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml b/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml deleted file mode 100644 index be627e6c11..0000000000 --- a/docs/website/docs/examples/nested_mongo_data/code/.dlt/config.toml +++ /dev/null @@ -1,2 +0,0 @@ -# @@@DLT_SNIPPET_START example -# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml b/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml deleted file mode 100644 index be627e6c11..0000000000 --- a/docs/website/docs/examples/nested_mongo_data/code/.dlt/secrets.toml +++ /dev/null @@ -1,2 +0,0 @@ -# @@@DLT_SNIPPET_START example -# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py b/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py deleted file mode 100644 index 2392dcae5a..0000000000 --- a/docs/website/docs/examples/nested_mongo_data/code/mongodb-snippets.py +++ /dev/null @@ -1,41 +0,0 @@ -from tests.utils import skipifgithubfork - - -@skipifgithubfork -def incremental_snippet() -> None: - - # @@@DLT_SNIPPET_START example - # @@@DLT_SNIPPET_START markdown_source - from typing import Iterator, Optional, Dict, Any, Tuple - - import dlt - from dlt.common import pendulum - from dlt.common.time import ensure_pendulum_datetime - from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime - from dlt.extract.source import DltResource - from dlt.sources.helpers.requests import client - - @dlt.source - def mongodb_source( - credentials: Dict[str, str]=dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 - end_date: Optional[TAnyDateTime] = None, - ): - pass - - # @@@DLT_SNIPPET_END markdown_source - - # @@@DLT_SNIPPET_START markdown_pipeline - __name__ = "__main__" # @@@DLT_REMOVE - if __name__ == "__main__": - # create dlt pipeline - pipeline = dlt.pipeline( - pipeline_name="pipeline_name", destination="duckdb", dataset_name="dataset_name" - ) - - load_info = pipeline.run(mongodb_source()) - print(load_info) - # @@@DLT_SNIPPET_END markdown_pipeline - # @@@DLT_SNIPPET_END example - - diff --git a/docs/website/docs/examples/nested_mongo_data/index.md b/docs/website/docs/examples/nested_mongo_data/index.md deleted file mode 100644 index cda02cc206..0000000000 --- a/docs/website/docs/examples/nested_mongo_data/index.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: Control nested MongoDB data -description: Learn how -keywords: [incremental loading, example] ---- - -import Header from '../_examples-header.md'; - -
- -## Control nested MongoDB data - -In this example, you'll find a Python script that - -We'll learn: - - -### Loading code - - -```py -from typing import Iterator, Optional, Dict, Any, Tuple - -import dlt -from dlt.common import pendulum -from dlt.common.time import ensure_pendulum_datetime -from dlt.common.typing import TDataItem, TDataItems, TAnyDateTime -from dlt.extract.source import DltResource -from dlt.sources.helpers.requests import client - -@dlt.source -def mongodb_source( - credentials: Dict[str, str]=dlt.secrets.value, - start_date: Optional[TAnyDateTime] = pendulum.datetime(year=2000, month=1, day=1), # noqa: B008 - end_date: Optional[TAnyDateTime] = None, -): - pass -``` - - -Run the pipeline: - - - -```py -if __name__ == "__main__": - # create dlt pipeline - pipeline = dlt.pipeline( - pipeline_name="pipeline_name", destination="duckdb", dataset_name="dataset_name" - ) - - load_info = pipeline.run(mongodb_source()) - print(load_info) -``` - - diff --git a/docs/website/sidebars.js b/docs/website/sidebars.js index 69fe9d2be6..1546a26029 100644 --- a/docs/website/sidebars.js +++ b/docs/website/sidebars.js @@ -241,6 +241,7 @@ const sidebars = { 'examples/incremental_loading/index', 'examples/connector_x_arrow/index', 'examples/chess_production/index', + 'examples/nested_data/index', ], }, { From 0ea33e236b79fdc8ab0f19ce8510102f99e10c10 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 17:55:21 +0100 Subject: [PATCH 03/16] add three exa,ples how to control nested data --- .../nested_data/code/.dlt/config.toml | 2 + .../nested_data/code/nested_data-snippets.py | 59 +++++++++++++++---- 2 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 docs/website/docs/examples/nested_data/code/.dlt/config.toml diff --git a/docs/website/docs/examples/nested_data/code/.dlt/config.toml b/docs/website/docs/examples/nested_data/code/.dlt/config.toml new file mode 100644 index 0000000000..be627e6c11 --- /dev/null +++ b/docs/website/docs/examples/nested_data/code/.dlt/config.toml @@ -0,0 +1,2 @@ +# @@@DLT_SNIPPET_START example +# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index eb8a81d9e9..331b86d20a 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -1,5 +1,4 @@ - -def transformers_snippet() -> None: +def nested_data_snippet() -> None: CHUNK_SIZE = 10000 # @@@DLT_SNIPPET_START example # @@@DLT_SNIPPET_START nested_data @@ -27,7 +26,6 @@ def mongodb_collection( collection: str = dlt.config.value, incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg] write_disposition: Optional[str] = dlt.config.value, - parallel: Optional[bool] = dlt.config.value, ) -> Any: # set up mongo client client = MongoClient( @@ -52,7 +50,7 @@ def collection_documents( name=collection_obj.name, primary_key="_id", write_disposition=write_disposition, - )(client, collection_obj, incremental=incremental, parallel=parallel) + )(client, collection_obj, incremental=incremental) # @@@DLT_SNIPPET_END nested_data @@ -97,19 +95,60 @@ def convert_mongo_objs(value: Any) -> Any: # @@@DLT_SNIPPET_START nested_data_run - __name__ = "__main__" # @@@DLT_REMOVE + __name__ = "__main__" # @@@DLT_REMOVE if __name__ == "__main__": # build duck db pipeline pipeline = dlt.pipeline( - pipeline_name="mongodb_pipeline", destination="duckdb", dataset_name="pokemon_data" + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="not_unpacked_data", + ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data.max_table_nesting = 0 + + load_info = pipeline.run(source_data) + print(load_info) + + # test assertions + tables = pipeline.last_trace.last_normalize_info.row_counts + tables.pop("_dlt_pipeline_state") + assert len(tables) == 1, pipeline.last_trace.last_normalize_info + + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data", ) - source_data = mongodb_collection() - source_data.max_table_nesting = 1 + source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data.max_table_nesting = 2 load_info = pipeline.run(source_data) print(load_info) + + # test assertions + tables = pipeline.last_trace.last_normalize_info.row_counts + tables.pop("_dlt_pipeline_state") + assert len(tables) == 7, pipeline.last_trace.last_normalize_info + + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data_without_cast", + ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data.max_table_nesting = 2 + source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) + + load_info = pipeline.run(source_data) + print(load_info) + + # test assertions + tables = pipeline.last_trace.last_normalize_info.row_counts + tables.pop("_dlt_pipeline_state") + assert len(tables) == 6, pipeline.last_trace.last_normalize_info + + # @@@DLT_SNIPPET_END nested_data_run # @@@DLT_SNIPPET_END example - # test assertions - row_counts = pipeline.last_trace.last_normalize_info.row_counts + From c5419d4589b63a089fb6f0a73bf6975ff6b1e97d Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 18:33:29 +0100 Subject: [PATCH 04/16] update description --- .../nested_data/code/nested_data-snippets.py | 58 +++++++++---------- .../docs/examples/nested_data/index.md | 56 +++++++++++++++--- 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index 331b86d20a..c4cb16ea70 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -1,5 +1,4 @@ def nested_data_snippet() -> None: - CHUNK_SIZE = 10000 # @@@DLT_SNIPPET_START example # @@@DLT_SNIPPET_START nested_data from itertools import islice @@ -14,6 +13,8 @@ def nested_data_snippet() -> None: from pendulum import _datetime from pymongo import ASCENDING, DESCENDING, MongoClient + CHUNK_SIZE = 10000 + # You can limit how deep dlt goes when generating child tables. # By default, the library will descend and generate child tables # for all nested lists, without a limit. @@ -97,58 +98,55 @@ def convert_mongo_objs(value: Any) -> Any: __name__ = "__main__" # @@@DLT_REMOVE if __name__ == "__main__": - # build duck db pipeline + # When we created the source, we set max_table_nesting to 2. + # This ensures that the generated tables do not have more than two + # levels of nesting, even if the original data structure is more deeply nested. pipeline = dlt.pipeline( pipeline_name="mongodb_pipeline", destination="duckdb", - dataset_name="not_unpacked_data", + dataset_name="unpacked_data", ) source_data = mongodb_collection(collection="movies", write_disposition="replace") - source_data.max_table_nesting = 0 - load_info = pipeline.run(source_data) print(load_info) - - # test assertions - tables = pipeline.last_trace.last_normalize_info.row_counts - tables.pop("_dlt_pipeline_state") - assert len(tables) == 1, pipeline.last_trace.last_normalize_info - + tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE + tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE + assert len(tables) == 7, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + + # The second method involves setting the max_table_nesting attribute directly + # on the source data object. + # This allows for dynamic control over the maximum nesting + # level for a specific data source. + # Here the nesting level is adjusted before running the pipeline. pipeline = dlt.pipeline( pipeline_name="mongodb_pipeline", destination="duckdb", - dataset_name="unpacked_data", + dataset_name="not_unpacked_data", ) source_data = mongodb_collection(collection="movies", write_disposition="replace") - source_data.max_table_nesting = 2 - + source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) - - # test assertions - tables = pipeline.last_trace.last_normalize_info.row_counts - tables.pop("_dlt_pipeline_state") - assert len(tables) == 7, pipeline.last_trace.last_normalize_info - + tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE + tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE + assert len(tables) == 1, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + + # The third method involves applying data type hints to specific columns in the data. + # In this case, we tell dlt that column 'cast' (containing a list of actors) + # in 'movies' table should have type complex which means + # that it will be loaded as JSON/struct and not as child table. pipeline = dlt.pipeline( pipeline_name="mongodb_pipeline", destination="duckdb", dataset_name="unpacked_data_without_cast", ) source_data = mongodb_collection(collection="movies", write_disposition="replace") - source_data.max_table_nesting = 2 source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) - load_info = pipeline.run(source_data) print(load_info) - - # test assertions - tables = pipeline.last_trace.last_normalize_info.row_counts - tables.pop("_dlt_pipeline_state") - assert len(tables) == 6, pipeline.last_trace.last_normalize_info - + tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE + tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE + assert len(tables) == 6, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # @@@DLT_SNIPPET_END nested_data_run # @@@DLT_SNIPPET_END example - - diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index e882082cc6..a59fefa882 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -7,15 +7,21 @@ keywords: [incremental loading, example] import Header from '../_examples-header.md';
## Control nested data -In this example, you'll find a Python script that +In this example, you'll find a Python script that demonstrates how to control nested data using the `dlt` library. -We'll learn: +We'll learn how to: +- [Adjust maximal nesting level in three ways:](../../general-usage/source#reduce-the-nesting-level-of-generated-tables) + - Limit Nesting Levels with dlt decorator. + - Dynamic nesting level adjustment. + - Apply data type hints. +- Work with [MongoDB](../../dlt-ecosystem/verified-sources/mongodb) in Python and `dlt`. +- Enable [incremental loading](../../general-usage/incremental-loading) for efficient data extraction. ### Install pymongo @@ -39,6 +45,8 @@ from dlt.common.utils import map_nested_in_place from pendulum import _datetime from pymongo import ASCENDING, DESCENDING, MongoClient +CHUNK_SIZE = 10000 + # You can limit how deep dlt goes when generating child tables. # By default, the library will descend and generate child tables # for all nested lists, without a limit. @@ -51,7 +59,6 @@ def mongodb_collection( collection: str = dlt.config.value, incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg] write_disposition: Optional[str] = dlt.config.value, - parallel: Optional[bool] = dlt.config.value, ) -> Any: # set up mongo client client = MongoClient( @@ -76,7 +83,7 @@ def mongodb_collection( name=collection_obj.name, primary_key="_id", write_disposition=write_disposition, - )(client, collection_obj, incremental=incremental, parallel=parallel) + )(client, collection_obj, incremental=incremental) ``` @@ -85,13 +92,44 @@ def mongodb_collection( ```py if __name__ == "__main__": - # build duck db pipeline + # When we created the source, we set max_table_nesting to 2. + # This ensures that the generated tables do not have more than two + # levels of nesting, even if the original data structure is more deeply nested. + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data", + ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") + load_info = pipeline.run(source_data) + print(load_info) + + # The second method involves setting the max_table_nesting attribute directly + # on the source data object. + # This allows for dynamic control over the maximum nesting + # level for a specific data source. + # Here the nesting level is adjusted before running the pipeline. pipeline = dlt.pipeline( - pipeline_name="mongodb_pipeline", destination="duckdb", dataset_name="pokemon_data" + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="not_unpacked_data", ) - source_data = mongodb_collection() - source_data.max_table_nesting = 1 + source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data.max_table_nesting = 0 + load_info = pipeline.run(source_data) + print(load_info) + # The third method involves applying data type hints to specific columns in the data. + # In this case, we tell dlt that column 'cast' (containing a list of actors) + # in 'movies' table should have type complex which means + # that it will be loaded as JSON/struct and not as child table. + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data_without_cast", + ) + source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) ``` From 7bb08a348d2cb19af44ac8b5505126387c2eaed1 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 18:45:27 +0100 Subject: [PATCH 05/16] linter --- .../nested_data/code/nested_data-snippets.py | 40 +++++++++++++------ .../docs/examples/nested_data/index.md | 6 +-- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index c4cb16ea70..487925d300 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -2,16 +2,17 @@ def nested_data_snippet() -> None: # @@@DLT_SNIPPET_START example # @@@DLT_SNIPPET_START nested_data from itertools import islice - from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple + from typing import Any, Dict, Iterator, Optional - import dlt from bson.decimal128 import Decimal128 from bson.objectid import ObjectId + from pendulum import _datetime + from pymongo import MongoClient + + import dlt from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import TDataItem from dlt.common.utils import map_nested_in_place - from pendulum import _datetime - from pymongo import ASCENDING, DESCENDING, MongoClient CHUNK_SIZE = 10000 @@ -32,7 +33,9 @@ def mongodb_collection( client = MongoClient( connection_url, uuidRepresentation="standard", tz_aware=True ) - mongo_database = client.get_default_database() if not database else client[database] + mongo_database = ( + client.get_default_database() if not database else client[database] + ) collection_obj = mongo_database[collection] def collection_documents( @@ -43,8 +46,7 @@ def collection_documents( LoaderClass = CollectionLoader loader = LoaderClass(client, collection, incremental=incremental) - for data in loader.load_documents(): - yield data + yield from loader.load_documents() return dlt.resource( # type: ignore collection_documents, @@ -106,12 +108,16 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert len(tables) == 7, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert ( + len(tables) == 7 + ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The second method involves setting the max_table_nesting attribute directly # on the source data object. @@ -123,13 +129,17 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="not_unpacked_data", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert len(tables) == 1, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert ( + len(tables) == 1 + ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The third method involves applying data type hints to specific columns in the data. # In this case, we tell dlt that column 'cast' (containing a list of actors) @@ -140,13 +150,17 @@ def convert_mongo_objs(value: Any) -> Any: destination="duckdb", dataset_name="unpacked_data_without_cast", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert len(tables) == 6, pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert ( + len(tables) == 6 + ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # @@@DLT_SNIPPET_END nested_data_run # @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index a59fefa882..3874e60a02 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -1,5 +1,5 @@ --- -title: Control nested data +title: Control nested MongoDB data description: Learn how control nested data keywords: [incremental loading, example] --- @@ -7,7 +7,7 @@ keywords: [incremental loading, example] import Header from '../_examples-header.md';
@@ -17,7 +17,7 @@ In this example, you'll find a Python script that demonstrates how to control ne We'll learn how to: - [Adjust maximal nesting level in three ways:](../../general-usage/source#reduce-the-nesting-level-of-generated-tables) - - Limit Nesting Levels with dlt decorator. + - Limit nesting levels with dlt decorator. - Dynamic nesting level adjustment. - Apply data type hints. - Work with [MongoDB](../../dlt-ecosystem/verified-sources/mongodb) in Python and `dlt`. From f0c8ad045602528ea82eead5834338db64a5a8e3 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 18:55:10 +0100 Subject: [PATCH 06/16] small fixes --- .../nested_data/code/nested_data-snippets.py | 20 +++++-------------- .../docs/examples/nested_data/index.md | 2 +- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index 487925d300..cda248a4fc 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -30,12 +30,8 @@ def mongodb_collection( write_disposition: Optional[str] = dlt.config.value, ) -> Any: # set up mongo client - client = MongoClient( - connection_url, uuidRepresentation="standard", tz_aware=True - ) - mongo_database = ( - client.get_default_database() if not database else client[database] - ) + client = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) + mongo_database = client.get_default_database() if not database else client[database] collection_obj = mongo_database[collection] def collection_documents( @@ -115,9 +111,7 @@ def convert_mongo_objs(value: Any) -> Any: print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert ( - len(tables) == 7 - ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert (len(tables) == 7), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The second method involves setting the max_table_nesting attribute directly # on the source data object. @@ -137,9 +131,7 @@ def convert_mongo_objs(value: Any) -> Any: print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert ( - len(tables) == 1 - ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert (len(tables) == 1), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # The third method involves applying data type hints to specific columns in the data. # In this case, we tell dlt that column 'cast' (containing a list of actors) @@ -158,9 +150,7 @@ def convert_mongo_objs(value: Any) -> Any: print(load_info) tables = pipeline.last_trace.last_normalize_info.row_counts # @@@DLT_REMOVE tables.pop("_dlt_pipeline_state") # @@@DLT_REMOVE - assert ( - len(tables) == 6 - ), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE + assert (len(tables) == 6), pipeline.last_trace.last_normalize_info # @@@DLT_REMOVE # @@@DLT_SNIPPET_END nested_data_run # @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index 3874e60a02..34da7e4ea8 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -7,7 +7,7 @@ keywords: [incremental loading, example] import Header from '../_examples-header.md';
From 7d7e74f9bf57a7919e0b6ffa4e90a8028353ecba Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Tue, 7 Nov 2023 19:00:18 +0100 Subject: [PATCH 07/16] add generated files --- docs/examples/nested_data/.dlt/config.toml | 0 docs/examples/nested_data/.dlt/secrets.toml | 2 + docs/examples/nested_data/__init__.py | 0 docs/examples/nested_data/nested_data.py | 138 ++++++++++++++++++ .../nested_data/code/.dlt/secrets.toml | 4 + .../docs/examples/nested_data/index.md | 28 ++-- 6 files changed, 160 insertions(+), 12 deletions(-) create mode 100644 docs/examples/nested_data/.dlt/config.toml create mode 100644 docs/examples/nested_data/.dlt/secrets.toml create mode 100644 docs/examples/nested_data/__init__.py create mode 100644 docs/examples/nested_data/nested_data.py create mode 100644 docs/website/docs/examples/nested_data/code/.dlt/secrets.toml diff --git a/docs/examples/nested_data/.dlt/config.toml b/docs/examples/nested_data/.dlt/config.toml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/examples/nested_data/.dlt/secrets.toml b/docs/examples/nested_data/.dlt/secrets.toml new file mode 100644 index 0000000000..791942a3ed --- /dev/null +++ b/docs/examples/nested_data/.dlt/secrets.toml @@ -0,0 +1,2 @@ +[mongodb_pipeline.sources] +connection_url="" \ No newline at end of file diff --git a/docs/examples/nested_data/__init__.py b/docs/examples/nested_data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py new file mode 100644 index 0000000000..d6699179d6 --- /dev/null +++ b/docs/examples/nested_data/nested_data.py @@ -0,0 +1,138 @@ +from itertools import islice +from typing import Any, Dict, Iterator, Optional + +from bson.decimal128 import Decimal128 +from bson.objectid import ObjectId +from pendulum import _datetime +from pymongo import MongoClient + +import dlt +from dlt.common.time import ensure_pendulum_datetime +from dlt.common.typing import TDataItem +from dlt.common.utils import map_nested_in_place + +CHUNK_SIZE = 10000 + +# You can limit how deep dlt goes when generating child tables. +# By default, the library will descend and generate child tables +# for all nested lists, without a limit. +# In this example, we specify that we only want to generate child tables up to level 2, +# so there will be only one level of child tables within child tables. +@dlt.source(max_table_nesting=2) +def mongodb_collection( + connection_url: str = dlt.secrets.value, + database: Optional[str] = dlt.config.value, + collection: str = dlt.config.value, + incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg] + write_disposition: Optional[str] = dlt.config.value, +) -> Any: + # set up mongo client + client = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) + mongo_database = client.get_default_database() if not database else client[database] + collection_obj = mongo_database[collection] + + def collection_documents( + client, + collection, + incremental: Optional[dlt.sources.incremental[Any]] = None, + ) -> Iterator[TDataItem]: + LoaderClass = CollectionLoader + + loader = LoaderClass(client, collection, incremental=incremental) + yield from loader.load_documents() + + return dlt.resource( # type: ignore + collection_documents, + name=collection_obj.name, + primary_key="_id", + write_disposition=write_disposition, + )(client, collection_obj, incremental=incremental) + + +class CollectionLoader: + def __init__( + self, + client, + collection, + incremental: Optional[dlt.sources.incremental[Any]] = None, + ) -> None: + self.client = client + self.collection = collection + self.incremental = incremental + if incremental: + self.cursor_field = incremental.cursor_path + self.last_value = incremental.last_value + else: + self.cursor_column = None + self.last_value = None + + @property + def _filter_op(self) -> Dict[str, Any]: + if not self.incremental or not self.last_value: + return {} + if self.incremental.last_value_func is max: + return {self.cursor_field: {"$gte": self.last_value}} + elif self.incremental.last_value_func is min: + return {self.cursor_field: {"$lt": self.last_value}} + return {} + + def load_documents(self) -> Iterator[TDataItem]: + cursor = self.collection.find(self._filter_op) + while docs_slice := list(islice(cursor, CHUNK_SIZE)): + yield map_nested_in_place(convert_mongo_objs, docs_slice) + +def convert_mongo_objs(value: Any) -> Any: + if isinstance(value, (ObjectId, Decimal128)): + return str(value) + if isinstance(value, _datetime.datetime): + return ensure_pendulum_datetime(value) + return value + + +if __name__ == "__main__": + # When we created the source, we set max_table_nesting to 2. + # This ensures that the generated tables do not have more than two + # levels of nesting, even if the original data structure is more deeply nested. + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data", + ) + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) + load_info = pipeline.run(source_data) + print(load_info) + + # The second method involves setting the max_table_nesting attribute directly + # on the source data object. + # This allows for dynamic control over the maximum nesting + # level for a specific data source. + # Here the nesting level is adjusted before running the pipeline. + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="not_unpacked_data", + ) + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) + source_data.max_table_nesting = 0 + load_info = pipeline.run(source_data) + print(load_info) + + # The third method involves applying data type hints to specific columns in the data. + # In this case, we tell dlt that column 'cast' (containing a list of actors) + # in 'movies' table should have type complex which means + # that it will be loaded as JSON/struct and not as child table. + pipeline = dlt.pipeline( + pipeline_name="mongodb_pipeline", + destination="duckdb", + dataset_name="unpacked_data_without_cast", + ) + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) + source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) + load_info = pipeline.run(source_data) + print(load_info) diff --git a/docs/website/docs/examples/nested_data/code/.dlt/secrets.toml b/docs/website/docs/examples/nested_data/code/.dlt/secrets.toml new file mode 100644 index 0000000000..e119d46d91 --- /dev/null +++ b/docs/website/docs/examples/nested_data/code/.dlt/secrets.toml @@ -0,0 +1,4 @@ +# @@@DLT_SNIPPET_START example +[mongodb_pipeline.sources] +connection_url="" +# @@@DLT_SNIPPET_END example diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index 34da7e4ea8..8d978b2618 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -34,16 +34,17 @@ We'll learn how to: ```py from itertools import islice -from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Tuple +from typing import Any, Dict, Iterator, Optional -import dlt from bson.decimal128 import Decimal128 from bson.objectid import ObjectId +from pendulum import _datetime +from pymongo import MongoClient + +import dlt from dlt.common.time import ensure_pendulum_datetime from dlt.common.typing import TDataItem from dlt.common.utils import map_nested_in_place -from pendulum import _datetime -from pymongo import ASCENDING, DESCENDING, MongoClient CHUNK_SIZE = 10000 @@ -61,9 +62,7 @@ def mongodb_collection( write_disposition: Optional[str] = dlt.config.value, ) -> Any: # set up mongo client - client = MongoClient( - connection_url, uuidRepresentation="standard", tz_aware=True - ) + client = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) mongo_database = client.get_default_database() if not database else client[database] collection_obj = mongo_database[collection] @@ -75,8 +74,7 @@ def mongodb_collection( LoaderClass = CollectionLoader loader = LoaderClass(client, collection, incremental=incremental) - for data in loader.load_documents(): - yield data + yield from loader.load_documents() return dlt.resource( # type: ignore collection_documents, @@ -100,7 +98,9 @@ if __name__ == "__main__": destination="duckdb", dataset_name="unpacked_data", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) load_info = pipeline.run(source_data) print(load_info) @@ -114,7 +114,9 @@ if __name__ == "__main__": destination="duckdb", dataset_name="not_unpacked_data", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) source_data.max_table_nesting = 0 load_info = pipeline.run(source_data) print(load_info) @@ -128,7 +130,9 @@ if __name__ == "__main__": destination="duckdb", dataset_name="unpacked_data_without_cast", ) - source_data = mongodb_collection(collection="movies", write_disposition="replace") + source_data = mongodb_collection( + collection="movies", write_disposition="replace" + ) source_data.movies.apply_hints(columns={"cast": {"data_type": "complex"}}) load_info = pipeline.run(source_data) print(load_info) From 686285e2f0ecfdc7a693f141038df3a0108a2470 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 11:21:19 +0100 Subject: [PATCH 08/16] add mongo dependency --- .github/workflows/test_doc_snippets.yml | 2 +- pyproject.toml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index 554c2efba2..89b3724be3 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -57,7 +57,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E duckdb -E weaviate -E parquet --with docs --without airflow + run: poetry install --no-interaction -E duckdb -E weaviate -E parquet --with docs --with mongodb --without airflow - name: Run linter and tests run: make test-and-lint-snippets diff --git a/pyproject.toml b/pyproject.toml index 2c327ce3b9..a0b58eb980 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -163,6 +163,9 @@ pypdf2 = "^3.0.1" pydoc-markdown = "^4.8.2" connectorx="0.3.1" +[tool.poetry.group.mongodb.dependencies] +pymongo = ">=4.3.3" + [build-system] requires = ["poetry-core>=1.0.8"] build-backend = "poetry.core.masonry.api" \ No newline at end of file From 2f2ab49c37d81664b61e9b6b4e32485b485f43fd Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 11:48:00 +0100 Subject: [PATCH 09/16] update lock file --- poetry.lock | 128 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 103 insertions(+), 25 deletions(-) diff --git a/poetry.lock b/poetry.lock index 72c24349c6..d715259136 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3289,7 +3289,6 @@ files = [ {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, - {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -3298,7 +3297,6 @@ files = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, - {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -3328,7 +3326,6 @@ files = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, - {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -3337,7 +3334,6 @@ files = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, - {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -4292,16 +4288,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -5951,6 +5937,108 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] +[[package]] +name = "pymongo" +version = "4.6.0" +description = "Python driver for MongoDB " +optional = false +python-versions = ">=3.7" +files = [ + {file = "pymongo-4.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c011bd5ad03cc096f99ffcfdd18a1817354132c1331bed7a837a25226659845f"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux1_i686.whl", hash = "sha256:5e63146dbdb1eac207464f6e0cfcdb640c9c5ff0f57b754fa96fe252314a1dc6"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:2972dd1f1285866aba027eff2f4a2bbf8aa98563c2ced14cb34ee5602b36afdf"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_i686.whl", hash = "sha256:a0be99b599da95b7a90a918dd927b20c434bea5e1c9b3efc6a3c6cd67c23f813"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_ppc64le.whl", hash = "sha256:9b0f98481ad5dc4cb430a60bbb8869f05505283b9ae1c62bdb65eb5e020ee8e3"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_s390x.whl", hash = "sha256:256c503a75bd71cf7fb9ebf889e7e222d49c6036a48aad5a619f98a0adf0e0d7"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b4ad70d7cac4ca0c7b31444a0148bd3af01a2662fa12b1ad6f57cd4a04e21766"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5717a308a703dda2886a5796a07489c698b442f5e409cf7dc2ac93de8d61d764"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f7f9feecae53fa18d6a3ea7c75f9e9a1d4d20e5c3f9ce3fba83f07bcc4eee2"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128b1485753106c54af481789cdfea12b90a228afca0b11fb3828309a907e10e"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3077a31633beef77d057c6523f5de7271ddef7bde5e019285b00c0cc9cac1e3"}, + {file = "pymongo-4.6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ebf02c32afa6b67e5861a27183dd98ed88419a94a2ab843cc145fb0bafcc5b28"}, + {file = "pymongo-4.6.0-cp310-cp310-win32.whl", hash = "sha256:b14dd73f595199f4275bed4fb509277470d9b9059310537e3b3daba12b30c157"}, + {file = "pymongo-4.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:8adf014f2779992eba3b513e060d06f075f0ab2fb3ad956f413a102312f65cdf"}, + {file = "pymongo-4.6.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba51129fcc510824b6ca6e2ce1c27e3e4d048b6e35d3ae6f7e517bed1b8b25ce"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2973f113e079fb98515722cd728e1820282721ec9fd52830e4b73cabdbf1eb28"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af425f323fce1b07755edd783581e7283557296946212f5b1a934441718e7528"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ec71ac633b126c0775ed4604ca8f56c3540f5c21a1220639f299e7a544b55f9"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ec6c20385c5a58e16b1ea60c5e4993ea060540671d7d12664f385f2fb32fe79"}, + {file = "pymongo-4.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:85f2cdc400ee87f5952ebf2a117488f2525a3fb2e23863a8efe3e4ee9e54e4d1"}, + {file = "pymongo-4.6.0-cp311-cp311-win32.whl", hash = "sha256:7fc2bb8a74dcfcdd32f89528e38dcbf70a3a6594963d60dc9595e3b35b66e414"}, + {file = "pymongo-4.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:6695d7136a435c1305b261a9ddb9b3ecec9863e05aab3935b96038145fd3a977"}, + {file = "pymongo-4.6.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d603edea1ff7408638b2504905c032193b7dcee7af269802dbb35bc8c3310ed5"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79f41576b3022c2fe9780ae3e44202b2438128a25284a8ddfa038f0785d87019"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49f2af6cf82509b15093ce3569229e0d53c90ad8ae2eef940652d4cf1f81e045"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ecd9e1fa97aa11bf67472220285775fa15e896da108f425e55d23d7540a712ce"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d2be5c9c3488fa8a70f83ed925940f488eac2837a996708d98a0e54a861f212"}, + {file = "pymongo-4.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab6bcc8e424e07c1d4ba6df96f7fb963bcb48f590b9456de9ebd03b88084fe8"}, + {file = "pymongo-4.6.0-cp312-cp312-win32.whl", hash = "sha256:47aa128be2e66abd9d1a9b0437c62499d812d291f17b55185cb4aa33a5f710a4"}, + {file = "pymongo-4.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:014e7049dd019a6663747ca7dae328943e14f7261f7c1381045dfc26a04fa330"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:288c21ab9531b037f7efa4e467b33176bc73a0c27223c141b822ab4a0e66ff2a"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:747c84f4e690fbe6999c90ac97246c95d31460d890510e4a3fa61b7d2b87aa34"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:055f5c266e2767a88bb585d01137d9c7f778b0195d3dbf4a487ef0638be9b651"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:82e620842e12e8cb4050d2643a81c8149361cd82c0a920fa5a15dc4ca8a4000f"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:6b18276f14b4b6d92e707ab6db19b938e112bd2f1dc3f9f1a628df58e4fd3f0d"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:680fa0fc719e1a3dcb81130858368f51d83667d431924d0bcf249644bce8f303"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:3919708594b86d0f5cdc713eb6fccd3f9b9532af09ea7a5d843c933825ef56c4"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db082f728160369d9a6ed2e722438291558fc15ce06d0a7d696a8dad735c236b"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e4ed21029d80c4f62605ab16398fe1ce093fff4b5f22d114055e7d9fbc4adb0"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bea9138b0fc6e2218147e9c6ce1ff76ff8e29dc00bb1b64842bd1ca107aee9f"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a0269811661ba93c472c8a60ea82640e838c2eb148d252720a09b5123f2c2fe"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d6a1b1361f118e7fefa17ae3114e77f10ee1b228b20d50c47c9f351346180c8"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7e3b0127b260d4abae7b62203c4c7ef0874c901b55155692353db19de4b18bc4"}, + {file = "pymongo-4.6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a49aca4d961823b2846b739380c847e8964ff7ae0f0a683992b9d926054f0d6d"}, + {file = "pymongo-4.6.0-cp37-cp37m-win32.whl", hash = "sha256:09c7de516b08c57647176b9fc21d929d628e35bcebc7422220c89ae40b62126a"}, + {file = "pymongo-4.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:81dd1308bd5630d2bb5980f00aa163b986b133f1e9ed66c66ce2a5bc3572e891"}, + {file = "pymongo-4.6.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:2f8c04277d879146eacda920476e93d520eff8bec6c022ac108cfa6280d84348"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5802acc012bbb4bce4dff92973dff76482f30ef35dd4cb8ab5b0e06aa8f08c80"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:ccd785fafa1c931deff6a7116e9a0d402d59fabe51644b0d0c268295ff847b25"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fe03bf25fae4b95d8afe40004a321df644400fdcba4c8e5e1a19c1085b740888"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:2ca0ba501898b2ec31e6c3acf90c31910944f01d454ad8e489213a156ccf1bda"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:10a379fb60f1b2406ae57b8899bacfe20567918c8e9d2d545e1b93628fcf2050"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:a4dc1319d0c162919ee7f4ee6face076becae2abbd351cc14f1fe70af5fb20d9"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:ddef295aaf80cefb0c1606f1995899efcb17edc6b327eb6589e234e614b87756"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:518c90bdd6e842c446d01a766b9136fec5ec6cc94f3b8c3f8b4a332786ee6b64"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b80a4ee19b3442c57c38afa978adca546521a8822d663310b63ae2a7d7b13f3a"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb438a8bf6b695bf50d57e6a059ff09652a07968b2041178b3744ea785fcef9b"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3db7d833a7c38c317dc95b54e27f1d27012e031b45a7c24e360b53197d5f6e7"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3729b8db02063da50eeb3db88a27670d85953afb9a7f14c213ac9e3dca93034b"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:39a1cd5d383b37285641d5a7a86be85274466ae336a61b51117155936529f9b3"}, + {file = "pymongo-4.6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7b0e6361754ac596cd16bfc6ed49f69ffcd9b60b7bc4bcd3ea65c6a83475e4ff"}, + {file = "pymongo-4.6.0-cp38-cp38-win32.whl", hash = "sha256:806e094e9e85d8badc978af8c95b69c556077f11844655cb8cd2d1758769e521"}, + {file = "pymongo-4.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1394c4737b325166a65ae7c145af1ebdb9fb153ebedd37cf91d676313e4a67b8"}, + {file = "pymongo-4.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a8273e1abbcff1d7d29cbbb1ea7e57d38be72f1af3c597c854168508b91516c2"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux1_i686.whl", hash = "sha256:e16ade71c93f6814d095d25cd6d28a90d63511ea396bd96e9ffcb886b278baaa"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:325701ae7b56daa5b0692305b7cb505ca50f80a1288abb32ff420a8a209b01ca"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:cc94f9fea17a5af8cf1a343597711a26b0117c0b812550d99934acb89d526ed2"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:21812453354b151200034750cd30b0140e82ec2a01fd4357390f67714a1bfbde"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:0634994b026336195778e5693583c060418d4ab453eff21530422690a97e1ee8"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:ad4f66fbb893b55f96f03020e67dcab49ffde0177c6565ccf9dec4fdf974eb61"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:2703a9f8f5767986b4f51c259ff452cc837c5a83c8ed5f5361f6e49933743b2f"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bafea6061d63059d8bc2ffc545e2f049221c8a4457d236c5cd6a66678673eab"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f28ae33dc5a0b9cee06e95fd420e42155d83271ab75964baf747ce959cac5f52"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16a534da0e39785687b7295e2fcf9a339f4a20689024983d11afaa4657f8507"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef67fedd863ffffd4adfd46d9d992b0f929c7f61a8307366d664d93517f2c78e"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05c30fd35cc97f14f354916b45feea535d59060ef867446b5c3c7f9b609dd5dc"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1c63e3a2e8fb815c4b1f738c284a4579897e37c3cfd95fdb199229a1ccfb638a"}, + {file = "pymongo-4.6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e5e193f89f4f8c1fe273f9a6e6df915092c9f2af6db2d1afb8bd53855025c11f"}, + {file = "pymongo-4.6.0-cp39-cp39-win32.whl", hash = "sha256:a09bfb51953930e7e838972ddf646c5d5f984992a66d79da6ba7f6a8d8a890cd"}, + {file = "pymongo-4.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:107a234dc55affc5802acb3b6d83cbb8c87355b38a9457fcd8806bdeb8bce161"}, + {file = "pymongo-4.6.0.tar.gz", hash = "sha256:fb1c56d891f9e34303c451998ef62ba52659648bb0d75b03c5e4ac223a3342c2"}, +] + +[package.dependencies] +dnspython = ">=1.16.0,<3.0.0" + +[package.extras] +aws = ["pymongo-auth-aws (<2.0.0)"] +encryption = ["certifi", "pymongo[aws]", "pymongocrypt (>=1.6.0,<2.0.0)"] +gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] +ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] +snappy = ["python-snappy"] +test = ["pytest (>=7)"] +zstd = ["zstandard"] + [[package]] name = "pymysql" version = "1.1.0" @@ -6318,7 +6406,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6326,15 +6413,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6351,7 +6431,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6359,7 +6438,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -8205,4 +8283,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "2686fa0fe1e5f2e796b472f4be28af52e65b5d6bb0bcbbd8df784fd6bc63cf80" +content-hash = "32151571085c79cba33c4b83a8a9f6d8f789c15ca8004bae5cc715a50d4bf960" From 6b65e4c2bf35d5315c7f925c6a28f491cb3b0b79 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 12:06:39 +0100 Subject: [PATCH 10/16] fix typing --- docs/examples/nested_data/nested_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py index d6699179d6..b475b15c12 100644 --- a/docs/examples/nested_data/nested_data.py +++ b/docs/examples/nested_data/nested_data.py @@ -32,8 +32,8 @@ def mongodb_collection( collection_obj = mongo_database[collection] def collection_documents( - client, - collection, + client: Any, + collection: str, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> Iterator[TDataItem]: LoaderClass = CollectionLoader @@ -52,8 +52,8 @@ def collection_documents( class CollectionLoader: def __init__( self, - client, - collection, + client: Any, + collection: str, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> None: self.client = client From 9886f5fca62fd65fd24bd456a9376de31abb1b42 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 12:12:21 +0100 Subject: [PATCH 11/16] fix typing --- docs/examples/nested_data/nested_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/examples/nested_data/nested_data.py b/docs/examples/nested_data/nested_data.py index b475b15c12..3e4a1295c3 100644 --- a/docs/examples/nested_data/nested_data.py +++ b/docs/examples/nested_data/nested_data.py @@ -27,13 +27,13 @@ def mongodb_collection( write_disposition: Optional[str] = dlt.config.value, ) -> Any: # set up mongo client - client = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) + client: Any = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) mongo_database = client.get_default_database() if not database else client[database] collection_obj = mongo_database[collection] def collection_documents( client: Any, - collection: str, + collection: Any, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> Iterator[TDataItem]: LoaderClass = CollectionLoader @@ -53,7 +53,7 @@ class CollectionLoader: def __init__( self, client: Any, - collection: str, + collection: Any, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> None: self.client = client From 9ef7a7e29418bc6fb27c5c4185272bae29f0aaf1 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 12:14:02 +0100 Subject: [PATCH 12/16] fix typing --- .../examples/nested_data/code/nested_data-snippets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py index cda248a4fc..d6328fc2c5 100644 --- a/docs/website/docs/examples/nested_data/code/nested_data-snippets.py +++ b/docs/website/docs/examples/nested_data/code/nested_data-snippets.py @@ -30,13 +30,13 @@ def mongodb_collection( write_disposition: Optional[str] = dlt.config.value, ) -> Any: # set up mongo client - client = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) + client: Any = MongoClient(connection_url, uuidRepresentation="standard", tz_aware=True) mongo_database = client.get_default_database() if not database else client[database] collection_obj = mongo_database[collection] def collection_documents( - client, - collection, + client: Any, + collection: Any, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> Iterator[TDataItem]: LoaderClass = CollectionLoader @@ -56,8 +56,8 @@ def collection_documents( class CollectionLoader: def __init__( self, - client, - collection, + client: Any, + collection: Any, incremental: Optional[dlt.sources.incremental[Any]] = None, ) -> None: self.client = client From 143b86f55bb7d7f990f4ee43d073c5ac26e86c16 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 12:29:19 +0100 Subject: [PATCH 13/16] add env var --- .github/workflows/test_doc_snippets.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index 89b3724be3..61e80ec872 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -22,6 +22,8 @@ env: SOURCES__ZENDESK__CREDENTIALS: ${{ secrets.ZENDESK__CREDENTIALS }} # Slack hook for chess in production example RUNTIME__SLACK_INCOMING_HOOK: ${{ secrets.RUNTIME__SLACK_INCOMING_HOOK }} + # Mongodb url for nested data example + MONGODB_PIPELINE__SOURCES__CONNECTION_URL: ${{ secrets.MONGODB_PIPELINE__SOURCES__CONNECTION_URL }} jobs: run_lint: From 3409b5d42976657f9bf79e58121892a93c78ad22 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 14:18:30 +0100 Subject: [PATCH 14/16] move mongo to docs --- poetry.lock | 2 +- pyproject.toml | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index d715259136..fecaee1be3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8283,4 +8283,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "32151571085c79cba33c4b83a8a9f6d8f789c15ca8004bae5cc715a50d4bf960" +content-hash = "82acaeeda4bb9354378d8647e4f20ce05ebca324c7fb20a9902c97eac2121b7a" diff --git a/pyproject.toml b/pyproject.toml index a0b58eb980..47ffd3e9c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -162,8 +162,6 @@ pymysql = "^1.1.0" pypdf2 = "^3.0.1" pydoc-markdown = "^4.8.2" connectorx="0.3.1" - -[tool.poetry.group.mongodb.dependencies] pymongo = ">=4.3.3" [build-system] From 52404766685388d079ac2ccd14cfa79e79efe920 Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 14:20:38 +0100 Subject: [PATCH 15/16] delete with mongo --- .github/workflows/test_doc_snippets.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_doc_snippets.yml b/.github/workflows/test_doc_snippets.yml index 61e80ec872..ad7d544219 100644 --- a/.github/workflows/test_doc_snippets.yml +++ b/.github/workflows/test_doc_snippets.yml @@ -59,7 +59,7 @@ jobs: - name: Install dependencies # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction -E duckdb -E weaviate -E parquet --with docs --with mongodb --without airflow + run: poetry install --no-interaction -E duckdb -E weaviate -E parquet --with docs --without airflow - name: Run linter and tests run: make test-and-lint-snippets From fa5d56b348ec339ed778ac84af41edee22c1054b Mon Sep 17 00:00:00 2001 From: AstrakhantsevaAA Date: Wed, 8 Nov 2023 14:24:48 +0100 Subject: [PATCH 16/16] fix typo --- docs/website/docs/examples/nested_data/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/website/docs/examples/nested_data/index.md b/docs/website/docs/examples/nested_data/index.md index 8d978b2618..dd5db5009c 100644 --- a/docs/website/docs/examples/nested_data/index.md +++ b/docs/website/docs/examples/nested_data/index.md @@ -16,7 +16,7 @@ import Header from '../_examples-header.md'; In this example, you'll find a Python script that demonstrates how to control nested data using the `dlt` library. We'll learn how to: -- [Adjust maximal nesting level in three ways:](../../general-usage/source#reduce-the-nesting-level-of-generated-tables) +- [Adjust maximum nesting level in three ways:](../../general-usage/source#reduce-the-nesting-level-of-generated-tables) - Limit nesting levels with dlt decorator. - Dynamic nesting level adjustment. - Apply data type hints.