Skip to content

Commit

Permalink
Merge pull request #552 from dlt-hub/rfix/dependency-fixes
Browse files Browse the repository at this point in the history
moves non essential dependencies to extras
  • Loading branch information
rudolfix authored Aug 13, 2023
2 parents feb3e44 + e455c8a commit f765eb8
Show file tree
Hide file tree
Showing 30 changed files with 1,645 additions and 1,248 deletions.
33 changes: 14 additions & 19 deletions .github/workflows/test_common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,13 @@ jobs:
python-version: ["3.11.x"]
# Test all python versions on ubuntu only
include:
- os: "ubuntu-latest"
python-version: "3.8.x"
- os: "ubuntu-latest"
python-version: "3.9.x"
- os: "ubuntu-latest"
python-version: "3.10.x"
- python-version: "3.8.x"
os: "ubuntu-latest"
- python-version: "3.9.x"
os: "ubuntu-latest"
- python-version: "3.10.x"
os: "ubuntu-latest"

defaults:
run:
shell: bash
Expand All @@ -54,12 +55,6 @@ jobs:
virtualenvs-in-project: true
installer-parallel: true

# - name: Get pip cache dir
# id: pip-cache
# run: |
# echo "::set-output name=dir::$(poetry env info -p)"
# echo "$(poetry env info -p)"

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
Expand All @@ -68,28 +63,28 @@ jobs:
path: .venv
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}

- name: Install dependencies
run: poetry install --no-interaction -E pyarrow
- name: Install dependencies + sentry
run: poetry install --no-interaction -E pyarrow && pip install sentry-sdk

- run: |
poetry run pytest tests/common tests/normalize tests/reflection tests/sources tests/cli/common
poetry run pytest tests/common tests/normalize tests/reflection tests/sources
if: runner.os != 'Windows'
name: Run tests Linux/MAC
- run: |
poetry run pytest tests/common tests/normalize tests/reflection tests/sources tests/cli/common -m "not forked"
poetry run pytest tests/common tests/normalize tests/reflection tests/sources -m "not forked"
if: runner.os == 'Windows'
name: Run tests Windows
shell: cmd
- name: Install extra dependencies
run: poetry install --no-interaction -E duckdb -E pyarrow
run: poetry install --no-interaction -E duckdb -E cli

- run: |
poetry run pytest tests/extract tests/pipeline
poetry run pytest tests/extract tests/pipeline tests/cli/common
if: runner.os != 'Windows'
name: Run extra tests Linux/MAC
- run: |
poetry run pytest tests/extract tests/pipeline -m "not forked"
poetry run pytest tests/extract tests/pipeline tests/cli/common
if: runner.os == 'Windows'
name: Run extra tests Windows
shell: cmd
Expand Down
8 changes: 1 addition & 7 deletions .github/workflows/test_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,6 @@ jobs:
virtualenvs-in-project: true
installer-parallel: true

# - name: Get pip cache dir
# id: pip-cache
# run: |
# echo "::set-output name=dir::$(poetry env info -p)"
# echo "$(poetry env info -p)"

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
Expand All @@ -79,7 +73,7 @@ jobs:

- name: Install dependencies
# if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction -E redshift -E gs -E s3 -E pyarrow -E duckdb
run: poetry install --no-interaction -E redshift -E gs -E s3 -E pyarrow -E duckdb -E cli

# - name: Install self
# run: poetry install --no-interaction
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/test_local_destinations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ jobs:
image: postgres
# Provide the password for postgres
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: dlt_data
POSTGRES_USER: loader
POSTGRES_PASSWORD: loader
ports:
- 5432:5432
# Set health checks to wait until postgres has started
Expand Down Expand Up @@ -76,9 +78,9 @@ jobs:
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-redshift

- name: Install dependencies
run: poetry install --no-interaction -E postgres -E duckdb -E pyarrow -E filesystem
run: poetry install --no-interaction -E postgres -E duckdb -E pyarrow -E filesystem -E cli

- run: poetry run pytest tests/load tests/cli
name: Run tests Linux
env:
DESTINATION__POSTGRES__CREDENTIALS: postgresql://postgres:postgres@localhost:5432/postgres
DESTINATION__POSTGRES__CREDENTIALS: postgresql://loader:loader@localhost:5432/dlt_data
82 changes: 55 additions & 27 deletions dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@

import dlt.cli.echo as fmt
from dlt.cli import utils
from dlt.pipeline.exceptions import CannotRestorePipelineException

from dlt.cli.init_command import init_command, list_verified_sources_command, DLT_INIT_DOCS_URL, DEFAULT_VERIFIED_SOURCES_REPO
from dlt.cli.deploy_command import PipelineWasNotRun, deploy_command, DLT_DEPLOY_DOCS_URL, DeploymentMethods, COMMAND_DEPLOY_REPO_LOCATION, SecretFormats
from dlt.cli.pipeline_command import pipeline_command, DLT_PIPELINE_COMMAND_DOCS_URL
from dlt.cli.telemetry_command import DLT_TELEMETRY_DOCS_URL, change_telemetry_status_command, telemetry_status_command
from dlt.pipeline.exceptions import CannotRestorePipelineException

try:
from dlt.cli import deploy_command
from dlt.cli.deploy_command import PipelineWasNotRun, DLT_DEPLOY_DOCS_URL, DeploymentMethods, COMMAND_DEPLOY_REPO_LOCATION, SecretFormats
except ModuleNotFoundError:
pass


@utils.track_command("init", False, "source_name", "destination_name")
Expand Down Expand Up @@ -52,7 +58,13 @@ def deploy_command_wrapper(pipeline_script_path: str, deployment_method: str, re

from git import InvalidGitRepositoryError, NoSuchPathError
try:
deploy_command(pipeline_script_path=pipeline_script_path, deployment_method=deployment_method, repo_location=repo_location, branch=branch, **kwargs)
deploy_command.deploy_command(
pipeline_script_path=pipeline_script_path,
deployment_method=deployment_method,
repo_location=repo_location,
branch=branch,
**kwargs
)
except (CannotRestorePipelineException, PipelineWasNotRun) as ex:
click.secho(str(ex), err=True, fg="red")
fmt.note("You must run the pipeline locally successfully at least once in order to deploy it.")
Expand Down Expand Up @@ -190,22 +202,32 @@ def main() -> int:
init_cmd.add_argument("--branch", default=None, help="Advanced. Uses specific branch of the init repository to fetch the template.")
init_cmd.add_argument("--generic", default=False, action="store_true", help="When present uses a generic template with all the dlt loading code present will be used. Otherwise a debug template is used that can be immediately run to get familiar with the dlt sources.")

# deploy
deploy_cmd = subparsers.add_parser("deploy", help="Creates a deployment package for a selected pipeline script")
deploy_cmd.add_argument("pipeline_script_path", metavar="pipeline-script-path", help="Path to a pipeline script")
deploy_cmd.add_argument("--schedule", required=False, help="A schedule with which to run the pipeline, in cron format. Example: '*/30 * * * *' will run the pipeline every 30 minutes.")
deploy_cmd.add_argument("--location", default=COMMAND_DEPLOY_REPO_LOCATION, help="Advanced. Uses a specific url or local path to pipelines repository.")
deploy_cmd.add_argument("--branch", default=None, help="Advanced. Uses specific branch of the deploy repository to fetch the template.")
deploy_sub_parsers = deploy_cmd.add_subparsers(dest="deployment_method")

# deploy github actions
deploy_github_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.github_actions.value, help="Deploys the pipeline to Github Actions")
deploy_github_cmd.add_argument("--run-manually", default=True, action="store_true", help="Allows the pipeline to be run manually form Github Actions UI.")
deploy_github_cmd.add_argument("--run-on-push", default=False, action="store_true", help="Runs the pipeline with every push to the repository.")

# deploy airflow composer
deploy_airflow_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.airflow_composer.value, help="Deploys the pipeline to Airflow")
deploy_airflow_cmd.add_argument("--secrets-format", default=SecretFormats.toml.value, choices=[v.value for v in SecretFormats], required=False, help="Format of the secrets")
# deploy command requires additional dependencies
try:
# make sure the name is defined
_ = deploy_command
deploy_comm = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=False)
deploy_comm.add_argument("--location", default=COMMAND_DEPLOY_REPO_LOCATION, help="Advanced. Uses a specific url or local path to pipelines repository.")
deploy_comm.add_argument("--branch", help="Advanced. Uses specific branch of the deploy repository to fetch the template.")

deploy_cmd = subparsers.add_parser("deploy", help="Creates a deployment package for a selected pipeline script")
deploy_cmd.add_argument("pipeline_script_path", metavar="pipeline-script-path", help="Path to a pipeline script")
deploy_sub_parsers = deploy_cmd.add_subparsers(dest="deployment_method")

# deploy github actions
deploy_github_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.github_actions.value, help="Deploys the pipeline to Github Actions", parents=[deploy_comm])
deploy_github_cmd.add_argument("--schedule", required=True, help="A schedule with which to run the pipeline, in cron format. Example: '*/30 * * * *' will run the pipeline every 30 minutes. Remember to enclose the scheduler expression in quotation marks!")
deploy_github_cmd.add_argument("--run-manually", default=True, action="store_true", help="Allows the pipeline to be run manually form Github Actions UI.")
deploy_github_cmd.add_argument("--run-on-push", default=False, action="store_true", help="Runs the pipeline with every push to the repository.")

# deploy airflow composer
deploy_airflow_cmd = deploy_sub_parsers.add_parser(DeploymentMethods.airflow_composer.value, help="Deploys the pipeline to Airflow", parents=[deploy_comm])
deploy_airflow_cmd.add_argument("--secrets-format", default=SecretFormats.toml.value, choices=[v.value for v in SecretFormats], required=False, help="Format of the secrets")
except NameError:
# create placeholder command
deploy_cmd = subparsers.add_parser("deploy", help='Install additional dependencies with pip install "dlt[cli]" to create deployment packages', add_help=False)
deploy_cmd.add_argument("--help", "-h", nargs="?", const=True)
deploy_cmd.add_argument("pipeline_script_path", metavar="pipeline-script-path", nargs=argparse.REMAINDER)

schema = subparsers.add_parser("schema", help="Shows, converts and upgrades schemas")
schema.add_argument("file", help="Schema file name, in yaml or json format, will autodetect based on extension")
Expand Down Expand Up @@ -282,14 +304,20 @@ def main() -> int:
else:
return init_command_wrapper(args.source, args.destination, args.generic, args.location, args.branch)
elif args.command == "deploy":
deploy_args = vars(args)
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args
)
try:
deploy_args = vars(args)
return deploy_command_wrapper(
pipeline_script_path=deploy_args.pop("pipeline_script_path"),
deployment_method=deploy_args.pop("deployment_method"),
repo_location=deploy_args.pop("location"),
branch=deploy_args.pop("branch"),
**deploy_args
)
except (NameError, KeyError):
fmt.warning("Please install additional command line dependencies to use deploy command:")
fmt.secho('pip install "dlt[cli]"', bold=True)
fmt.echo("We ask you to install those dependencies separately to keep our core library small and make it work everywhere.")
return -1
elif args.command == "telemetry":
return telemetry_status_command_wrapper()
else:
Expand Down
37 changes: 33 additions & 4 deletions dlt/cli/deploy_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from dlt.cli import utils
from dlt.cli import echo as fmt
from dlt.cli.deploy_command_helpers import (PipelineWasNotRun, BaseDeployment, ask_files_overwrite, generate_pip_freeze, github_origin_to_url, serialize_templated_yaml,
wrap_template_str)
wrap_template_str, get_schedule_description)

from dlt.version import DLT_PKG_NAME

Expand Down Expand Up @@ -49,14 +49,33 @@ def deploy_command(pipeline_script_path: str, deployment_method: str, repo_locat
deployment_class = AirflowDeployment
else:
raise ValueError(f"Deployment method '{deployment_method}' is not supported. Only {', '.join([m.value for m in DeploymentMethods])} are available.'")

# command no longer needed
kwargs.pop("command", None)
deployment_class(pipeline_script_path=pipeline_script_path, location=repo_location, branch=branch, **kwargs).run_deployment()


class GithubActionDeployment(BaseDeployment):
def __init__(
self,
pipeline_script_path: str,
location: str,
schedule: Optional[str],
run_on_push: bool = False,
run_manually: bool = False,
branch: Optional[str] = None,
):
super().__init__(pipeline_script_path, location, branch)
self.schedule = schedule
self.run_on_push = run_on_push
self.run_manually = run_manually
self.schedule_description: Optional[str]

def _generate_workflow(self, *args: Optional[Any]) -> None:
self.deployment_method = DeploymentMethods.github_actions.value
# validate schedule
self.schedule_description = get_schedule_description(self.schedule)
if self.schedule_description is None:
# TODO: move that check to _dlt and some intelligent help message on missing arg
raise ValueError(
f"Setting 'schedule' for '{self.deployment_method}' is required! Use deploy command as 'dlt deploy chess.py {self.deployment_method} --schedule \"*/30 * * * *\"'."
)
Expand Down Expand Up @@ -92,7 +111,7 @@ def _create_new_workflow(self) -> Any:
workflow["name"] = f"Run {self.state['pipeline_name']} pipeline from {self.pipeline_script_path}"
if self.run_on_push is False:
del workflow["on"]["push"]
if self.run_on_dispatch is False:
if self.run_manually is False:
del workflow["on"]["workflow_dispatch"]
workflow["on"]["schedule"] = [{"cron": self.schedule}]
workflow["env"] = {}
Expand Down Expand Up @@ -127,7 +146,7 @@ def _echo_instructions(self, *args: Optional[Any]) -> None:
))
fmt.echo("* The schedule with which the pipeline is run is: %s.%s%s" % (
fmt.bold(self.schedule_description),
" You can also run the pipeline manually." if self.run_on_dispatch else "",
" You can also run the pipeline manually." if self.run_manually else "",
" Pipeline will also run on each push to the repository." if self.run_on_push else "",
))
fmt.echo(
Expand Down Expand Up @@ -166,6 +185,16 @@ def _echo_instructions(self, *args: Optional[Any]) -> None:


class AirflowDeployment(BaseDeployment):
def __init__(
self,
pipeline_script_path: str,
location: str,
branch: Optional[str] = None,
secrets_format: Optional[str] = None,
):
super().__init__(pipeline_script_path, location, branch)
self.secrets_format = secrets_format

def _generate_workflow(self, *args: Optional[Any]) -> None:
self.deployment_method = DeploymentMethods.airflow_composer.value

Expand Down
Loading

0 comments on commit f765eb8

Please sign in to comment.