Skip to content

Commit

Permalink
Tests don't pass, but they are there #1055
Browse files Browse the repository at this point in the history
Signed-off-by: Marcel Coetzee <[email protected]>
  • Loading branch information
Pipboyguy committed Mar 18, 2024
1 parent 6cdf086 commit 6606f5e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 136 deletions.
118 changes: 10 additions & 108 deletions tests/load/clickhouse/test_clickhouse_configuration.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
import os
import pytest
from pathlib import Path
from dlt.common.libs.sql_alchemy import make_url
from dlt.destinations.impl.clickhouse.configuration import ClickhouseCredentials


pytest.importorskip("clickhouse")

from dlt.common.configuration.resolve import resolve_configuration
from dlt.common.configuration.exceptions import ConfigurationValueError
from dlt.common.libs.sql_alchemy import make_url
from dlt.common.utils import digest128

from dlt.destinations.impl.clickhouse.configuration import (
ClickhouseCredentials,
ClickhouseClientConfiguration,
)
from dlt.destinations.impl.snowflake.configuration import (
SnowflakeClientConfiguration,
SnowflakeCredentials,
)

from tests.common.configuration.utils import environment


def test_connection_string_with_all_params() -> None:
url = "snowflake://user1:pass1@host1/db1?warehouse=warehouse1&role=role1&private_key=cGs%3D&private_key_passphrase=paphr"
url = "clickhouse://user1:pass1@host1:9000/db1"

creds = ClickhouseCredentials()
creds.parse_native_representation(url)
Expand All @@ -29,110 +21,20 @@ def test_connection_string_with_all_params() -> None:
assert creds.username == "user1"
assert creds.password == "pass1"
assert creds.host == "host1"
assert creds.warehouse == "warehouse1"
assert creds.role == "role1"
assert creds.private_key == "cGs="
assert creds.private_key_passphrase == "paphr"
assert creds.port == 9000

expected = make_url(url)

# Test URL components regardless of query param order
assert make_url(creds.to_native_representation()) == expected


def test_to_connector_params() -> None:
# PEM key
pkey_str = Path("./tests/common/cases/secrets/encrypted-private-key").read_text("utf8")

creds = SnowflakeCredentials()
creds.private_key = pkey_str # type: ignore[assignment]
creds.private_key_passphrase = "12345" # type: ignore[assignment]
creds.username = "user1"
creds.database = "db1"
creds.host = "host1"
creds.warehouse = "warehouse1"
creds.role = "role1"

params = creds.to_connector_params()

assert isinstance(params["private_key"], bytes)
params.pop("private_key")

assert params == dict(
user="user1",
database="db1",
account="host1",
password=None,
warehouse="warehouse1",
role="role1",
)

# base64 encoded DER key
pkey_str = Path("./tests/common/cases/secrets/encrypted-private-key-base64").read_text("utf8")

creds = SnowflakeCredentials()
creds.private_key = pkey_str # type: ignore[assignment]
creds.private_key_passphrase = "12345" # type: ignore[assignment]
creds.username = "user1"
creds.database = "db1"
creds.host = "host1"
creds.warehouse = "warehouse1"
creds.role = "role1"

params = creds.to_connector_params()

assert isinstance(params["private_key"], bytes)
params.pop("private_key")

assert params == dict(
user="user1",
database="db1",
account="host1",
password=None,
warehouse="warehouse1",
role="role1",
)


def test_snowflake_credentials_native_value(environment) -> None:
with pytest.raises(ConfigurationValueError):
resolve_configuration(
SnowflakeCredentials(),
explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1",
)
# set password via env
os.environ["CREDENTIALS__PASSWORD"] = "pass"
c = resolve_configuration(
SnowflakeCredentials(),
explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1",
)
assert c.is_resolved()
assert c.password == "pass"
# # but if password is specified - it is final
c = resolve_configuration(
SnowflakeCredentials(),
explicit_value="snowflake://user1:pass1@host1/db1?warehouse=warehouse1&role=role1",
)
assert c.is_resolved()
assert c.password == "pass1"

# set PK via env
del os.environ["CREDENTIALS__PASSWORD"]
os.environ["CREDENTIALS__PRIVATE_KEY"] = "pk"
c = resolve_configuration(
SnowflakeCredentials(),
explicit_value="snowflake://user1@host1/db1?warehouse=warehouse1&role=role1",
)
assert c.is_resolved()
assert c.private_key == "pk"


def test_snowflake_configuration() -> None:
def test_clickhouse_configuration() -> None:
# def empty fingerprint
assert SnowflakeClientConfiguration().fingerprint() == ""
assert ClickhouseClientConfiguration().fingerprint() == ""
# based on host
c = resolve_configuration(
SnowflakeCredentials(),
explicit_value="snowflake://user1:pass@host1/db1?warehouse=warehouse1&role=role1",
explicit_value="clickhouse://user1:pass1@host1:9000/db1",
)
assert SnowflakeClientConfiguration(credentials=c).fingerprint() == digest128("host1")
47 changes: 24 additions & 23 deletions tests/load/clickhouse/test_clickhouse_table_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,34 @@
import pytest
import sqlfluff

from dlt.common.utils import uniq_id
from dlt.common.schema import Schema
from dlt.destinations.impl.snowflake.snowflake import SnowflakeClient
from dlt.destinations.impl.snowflake.configuration import (
SnowflakeClientConfiguration,
SnowflakeCredentials,
from dlt.common.utils import uniq_id
from dlt.destinations.impl.clickhouse.clickhouse import ClickhouseClient
from dlt.destinations.impl.clickhouse.configuration import (
ClickhouseCredentials,
ClickhouseClientConfiguration,
)
from dlt.destinations.exceptions import DestinationSchemaWillNotUpdate

from tests.load.utils import TABLE_UPDATE, empty_schema


@pytest.fixture
def snowflake_client(empty_schema: Schema) -> SnowflakeClient:
# return client without opening connection
creds = SnowflakeCredentials()
return SnowflakeClient(
def clickhouse_client(empty_schema: Schema) -> ClickhouseClient:
# Return a client without opening connection.
creds = ClickhouseCredentials()
return ClickhouseClient(
empty_schema,
SnowflakeClientConfiguration(dataset_name="test_" + uniq_id(), credentials=creds),
ClickhouseClientConfiguration(dataset_name=f"test_{uniq_id()}", credentials=creds),
)


def test_create_table(snowflake_client: SnowflakeClient) -> None:
statements = snowflake_client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)
pytest.mark.usefixtures("empty_schema")


def test_create_table(clickhouse_client: ClickhouseClient) -> None:
statements = clickhouse_client._get_table_update_sql("event_test_table", TABLE_UPDATE, False)
assert len(statements) == 1
sql = statements[0]
sqlfluff.parse(sql, dialect="snowflake")
sqlfluff.parse(sql, dialect="clickhouse")

assert sql.strip().startswith("CREATE TABLE")
assert "EVENT_TEST_TABLE" in sql
Expand All @@ -45,13 +46,13 @@ def test_create_table(snowflake_client: SnowflakeClient) -> None:
assert '"COL10" DATE NOT NULL' in sql


def test_alter_table(snowflake_client: SnowflakeClient) -> None:
statements = snowflake_client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)
def test_alter_table(clickhouse_client: ClickhouseClient) -> None:
statements = clickhouse_client._get_table_update_sql("event_test_table", TABLE_UPDATE, True)
assert len(statements) == 1
sql = statements[0]

# TODO: sqlfluff doesn't parse snowflake multi ADD COLUMN clause correctly
# sqlfluff.parse(sql, dialect='snowflake')
# TODO: sqlfluff doesn't parse clickhouse multi ADD COLUMN clause correctly
# sqlfluff.parse(sql, dialect='clickhouse')

assert sql.startswith("ALTER TABLE")
assert sql.count("ALTER TABLE") == 1
Expand All @@ -70,24 +71,24 @@ def test_alter_table(snowflake_client: SnowflakeClient) -> None:

mod_table = deepcopy(TABLE_UPDATE)
mod_table.pop(0)
sql = snowflake_client._get_table_update_sql("event_test_table", mod_table, True)[0]
sql = clickhouse_client._get_table_update_sql("event_test_table", mod_table, True)[0]

assert '"COL1"' not in sql
assert '"COL2" FLOAT NOT NULL' in sql


def test_create_table_with_partition_and_cluster(snowflake_client: SnowflakeClient) -> None:
def test_create_table_with_partition_and_cluster(clickhouse_client: ClickhouseClient) -> None:
mod_update = deepcopy(TABLE_UPDATE)
# timestamp
mod_update[3]["partition"] = True
mod_update[4]["cluster"] = True
mod_update[1]["cluster"] = True
statements = snowflake_client._get_table_update_sql("event_test_table", mod_update, False)
statements = clickhouse_client._get_table_update_sql("event_test_table", mod_update, False)
assert len(statements) == 1
sql = statements[0]

# TODO: Can't parse cluster by
# sqlfluff.parse(sql, dialect="snowflake")
# sqlfluff.parse(sql, dialect="clickhouse")

# clustering must be the last
assert sql.endswith('CLUSTER BY ("COL2","COL5")')
12 changes: 7 additions & 5 deletions tests/load/clickhouse/test_utls.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,7 @@ def test_convert_gs_url_to_http_with_endpoint() -> None:
gcs_url = "gcs://my-bucket/path/to/file.txt"
expected_http_url = "http://my-bucket.custom-endpoint.com/path/to/file.txt"
assert (
convert_storage_to_http_scheme(gcs_url, endpoint="custom-endpoint.com")
== expected_http_url
convert_storage_to_http_scheme(gcs_url, endpoint="custom-endpoint.com") == expected_http_url
)


Expand All @@ -103,7 +102,9 @@ def test_render_with_credentials_jsonl() -> None:
access_key_id = "test_access_key"
secret_access_key = "test_secret_key"
file_format = "jsonl"
expected_output = """s3('https://example.com/data.jsonl','test_access_key','test_secret_key','JSONEachRow')"""
expected_output = (
"""s3('https://example.com/data.jsonl','test_access_key','test_secret_key','JSONEachRow')"""
)
assert (
render_s3_table_function(url, access_key_id, secret_access_key, file_format) # type: ignore[arg-type]
== expected_output
Expand All @@ -115,7 +116,9 @@ def test_render_with_credentials_parquet() -> None:
access_key_id = "test_access_key"
secret_access_key = "test_secret_key"
file_format = "parquet"
expected_output = """s3('https://example.com/data.parquet','test_access_key','test_secret_key','Parquet')"""
expected_output = (
"""s3('https://example.com/data.parquet','test_access_key','test_secret_key','Parquet')"""
)
assert (
render_s3_table_function(url, access_key_id, secret_access_key, file_format) # type: ignore[arg-type]
== expected_output
Expand All @@ -129,7 +132,6 @@ def test_render_without_credentials() -> None:
assert render_s3_table_function(url, file_format=file_format) == expected_output # type: ignore[arg-type]



def test_render_invalid_file_format() -> None:
url = "https://example.com/data.unknown"
access_key_id = "test_access_key"
Expand Down

0 comments on commit 6606f5e

Please sign in to comment.