From 27afd2665714b69fd8075dccdc628c6e95f48799 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Fri, 3 Jan 2025 16:31:34 +0530 Subject: [PATCH 01/16] feature: implemented ssl configuration --- .../examples/workflows/cassandra.yaml | 5 +++ .../source/database/cassandra/connection.py | 35 ++++++++++++++++- .../database/cassandraConnection.json | 8 ++++ .../locales/en-US/Database/Cassandra.md | 39 ++++++++++++++++--- 4 files changed, 80 insertions(+), 7 deletions(-) diff --git a/ingestion/src/metadata/examples/workflows/cassandra.yaml b/ingestion/src/metadata/examples/workflows/cassandra.yaml index 04a93fbede25..f6d208bf8bee 100644 --- a/ingestion/src/metadata/examples/workflows/cassandra.yaml +++ b/ingestion/src/metadata/examples/workflows/cassandra.yaml @@ -13,6 +13,11 @@ source: # token: # requestTimeout: # connectTimeout: + # sslMode: allow + # sslConfig: + # caCertificate: "CA certificate content" + # sslCertificate: "SSL certificate content" + # sslKey: "SSL key content" hostPort: localhost:9042 sourceConfig: config: diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 54053633105e..188b51ee2b03 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -12,7 +12,10 @@ """ Source connection handler """ +import os from functools import partial +from ssl import CERT_REQUIRED, PROTOCOL_TLS, SSLContext +from tempfile import NamedTemporaryFile from typing import Optional from cassandra.auth import PlainTextAuthProvider @@ -34,6 +37,7 @@ from metadata.generated.schema.entity.services.connections.testConnectionResult import ( TestConnectionResult, ) +from metadata.generated.schema.security.ssl.verifySSLConfig import SslMode from metadata.ingestion.connections.test_connections import test_connection_steps from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.cassandra.queries import ( @@ -77,7 +81,36 @@ def get_connection(connection: CassandraConnection): password=connection.authType.password.get_secret_value(), ) - cluster = Cluster(**cluster_config) + ssl_context = None + if connection.sslMode != SslMode.disable: + ssl_context = SSLContext(PROTOCOL_TLS) + + # Load CA certificate directly into memory + ssl_context.load_verify_locations( + cadata=open( + connection.sslConfig.root.caCertificate.get_secret_value() + ).read() + ) + + ssl_context.verify_mode = CERT_REQUIRED + + # Create temporary files since the load_cert_chain function requires + # file paths for the certfile and keyfile + with NamedTemporaryFile(delete=False, mode="w") as certfile, NamedTemporaryFile( + delete=False, mode="w" + ) as keyfile: + certfile.write(connection.sslConfig.root.sslCertificate.get_secret_value()) + certfile_path = certfile.name + + keyfile.write(connection.sslConfig.root.sslKey.get_secret_value()) + keyfile_path = keyfile.name + + ssl_context.load_cert_chain(certfile=certfile_path, keyfile=keyfile_path) + + # Delete temporary files + os.remove(certfile_path), os.remove(keyfile_path) + + cluster = Cluster(**cluster_config, ssl_context=ssl_context) session = cluster.connect() return session diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json index 25c0c64cbec3..90c325a46b8d 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json @@ -49,6 +49,14 @@ "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", "type": "string" }, + "sslMode": { + "title": "SSL Mode", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" + }, + "sslConfig": { + "title": "SSL Configuration", + "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslConfig" + }, "supportsMetadataExtraction": { "title": "Supports Metadata Extraction", "$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction" diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md index d1473f0d03b2..fc87be9d82a8 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md @@ -29,13 +29,27 @@ $$ $$section ### Cloud Config $(id="cloudConfig") - - Configuration for connecting to DataStax Astra DB in the cloud. +Configuration settings required when connecting to DataStax Astra DB in the cloud environment. These settings help establish and maintain secure connections to your cloud-hosted Cassandra database. +$$ + +$$section +### Connect Timeout $(id="connectTimeout") +Specifies the timeout duration in seconds for establishing new connections to Cassandra. This setting helps control how long the system should wait when attempting to create a new connection before timing out. +$$ + +$$section +### Request Timeout $(id="requestTimeout") +Defines the timeout duration in seconds for individual Cassandra requests. This setting determines how long each query or operation should wait for a response before timing out. +$$ + +$$section +### Token $(id="token") +The authentication token required for connecting to DataStax Astra DB. This token serves as the security credential for accessing your cloud database instance. +$$ - - connectTimeout: Timeout in seconds for establishing new connections to Cassandra. - - requestTimeout: Timeout in seconds for individual Cassandra requests. - - token: The Astra DB application token used for authentication. - - secureConnectBundle: File path to the Secure Connect Bundle (.zip) used for a secure connection to DataStax Astra DB. +$$section +### Secure Connect Bundle $(id="secureConnectBundle") +The file path to the Secure Connect Bundle (.zip) file. This bundle contains the necessary certificates and configuration files required to establish a secure connection to your DataStax Astra DB instance. $$ $$section @@ -46,3 +60,16 @@ Database Service > Database > Schema > Table ``` In the case of Cassandra, we won't have a Keyspace/Database as such. If you'd like to see your data in a database named something other than `default`, you can specify the name in this field. $$ + +$$section +### SSL Mode $(id="sslMode") +SSL Mode to connect to Cassandra instance. By default, SSL is disabled. +$$ + +$$section +### SSL Configuration $(id="sslConfig") +SSL Configuration for the Cassandra connection. This is required when SSL Mode is enabled. +- `CA Certificate`: Path to the CA certificate file. +- `SSL Certificate`: Path to the client certificate file. +- `SSL Key`: Path to the client private key file. +$$ \ No newline at end of file From b9084fabdc3933581d35f929b1ad292cd2df8753 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Fri, 3 Jan 2025 16:48:41 +0530 Subject: [PATCH 02/16] fix: caCertificate content read as file --- .../ingestion/source/database/cassandra/connection.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 188b51ee2b03..3a81e6becfc2 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -87,9 +87,7 @@ def get_connection(connection: CassandraConnection): # Load CA certificate directly into memory ssl_context.load_verify_locations( - cadata=open( - connection.sslConfig.root.caCertificate.get_secret_value() - ).read() + cadata=connection.sslConfig.root.caCertificate.get_secret_value() ) ssl_context.verify_mode = CERT_REQUIRED From 3b196cdb2c0967a42296e7ec886d4ff229e70741 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Fri, 3 Jan 2025 16:50:15 +0530 Subject: [PATCH 03/16] fix: cassandra helpers file file level comment --- .../src/metadata/ingestion/source/database/cassandra/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py b/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py index bd8fc548b25e..a142506233a1 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py @@ -9,7 +9,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Iceberg source helpers. +Cassandra source helpers. """ from __future__ import annotations From 57c611569d8af67db17b75d26d940e168a8d4b07 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Fri, 3 Jan 2025 17:02:42 +0530 Subject: [PATCH 04/16] fix: python & java checkstyle changes --- .../metadata/ingestion/source/database/cassandra/connection.py | 3 ++- .../java/org/openmetadata/service/jdbi3/MlModelRepository.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 3a81e6becfc2..3cf837a3ec71 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -106,7 +106,8 @@ def get_connection(connection: CassandraConnection): ssl_context.load_cert_chain(certfile=certfile_path, keyfile=keyfile_path) # Delete temporary files - os.remove(certfile_path), os.remove(keyfile_path) + os.remove(certfile_path) + os.remove(keyfile_path) cluster = Cluster(**cluster_config, ssl_context=ssl_context) session = cluster.connect() diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MlModelRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MlModelRepository.java index 2937030e0e63..e9ada22bd098 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MlModelRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/MlModelRepository.java @@ -211,7 +211,8 @@ private void setMlFeatureSourcesLineage(MlModel mlModel) { .getFeatureSources() .forEach( mlFeatureSource -> { - EntityReference targetEntity = getEntityReference(mlFeatureSource.getDataSource(), Include.ALL); + EntityReference targetEntity = + getEntityReference(mlFeatureSource.getDataSource(), Include.ALL); if (targetEntity != null) { addRelationship( targetEntity.getId(), From da281b7b1d8fca1fb03f4bc21decc76b15a4337a Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 10:06:53 +0530 Subject: [PATCH 05/16] refactor: used ssl manager for cassandra ssl and changes in docs --- .../source/database/cassandra/connection.py | 31 ++---- ingestion/src/metadata/utils/ssl_manager.py | 43 ++++++-- .../connectors/database/cassandra/index.md | 28 +++++ .../connectors/database/cassandra/yaml.md | 101 ++++++++++++++++++ .../database/cassandraConnection.json | 4 + .../locales/en-US/Database/Cassandra.md | 5 + .../database/cassandraConnection.ts | 39 ++++++- 7 files changed, 218 insertions(+), 33 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 3cf837a3ec71..5f554a1c3cc3 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -12,10 +12,8 @@ """ Source connection handler """ -import os from functools import partial from ssl import CERT_REQUIRED, PROTOCOL_TLS, SSLContext -from tempfile import NamedTemporaryFile from typing import Optional from cassandra.auth import PlainTextAuthProvider @@ -83,31 +81,14 @@ def get_connection(connection: CassandraConnection): ssl_context = None if connection.sslMode != SslMode.disable: - ssl_context = SSLContext(PROTOCOL_TLS) - - # Load CA certificate directly into memory - ssl_context.load_verify_locations( - cadata=connection.sslConfig.root.caCertificate.get_secret_value() - ) + ssl_args = connection.connectionArguments.root["ssl_args"] + ssl_context = SSLContext(PROTOCOL_TLS) + ssl_context.load_verify_locations(cadata=ssl_args["ssl_ca"]) ssl_context.verify_mode = CERT_REQUIRED - - # Create temporary files since the load_cert_chain function requires - # file paths for the certfile and keyfile - with NamedTemporaryFile(delete=False, mode="w") as certfile, NamedTemporaryFile( - delete=False, mode="w" - ) as keyfile: - certfile.write(connection.sslConfig.root.sslCertificate.get_secret_value()) - certfile_path = certfile.name - - keyfile.write(connection.sslConfig.root.sslKey.get_secret_value()) - keyfile_path = keyfile.name - - ssl_context.load_cert_chain(certfile=certfile_path, keyfile=keyfile_path) - - # Delete temporary files - os.remove(certfile_path) - os.remove(keyfile_path) + ssl_context.load_cert_chain( + certfile=ssl_args["ssl_cert"], keyfile=ssl_args["ssl_key"] + ) cluster = Cluster(**cluster_config, ssl_context=ssl_context) session = cluster.connect() diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index 1e6d7a0af5ec..c8fefc36fddb 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -19,6 +19,9 @@ from functools import singledispatch, singledispatchmethod from typing import Optional, Union, cast +from metadata.generated.schema.entity.services.connections.database.cassandraConnection import ( + CassandraConnection, +) from pydantic import SecretStr from metadata.generated.schema.entity.services.connections.connectionBasicType import ( @@ -201,9 +204,24 @@ def _(self, connection): connection = cast(KafkaConnection, connection) connection.schemaRegistryConfig["ssl.ca.location"] = self.ca_file_path connection.schemaRegistryConfig["ssl.key.location"] = self.key_file_path - connection.schemaRegistryConfig[ - "ssl.certificate.location" - ] = self.cert_file_path + connection.schemaRegistryConfig["ssl.certificate.location"] = ( + self.cert_file_path + ) + return connection + + @setup_ssl.register(CassandraConnection) + def _(self, connection): + # Use the temporary file paths for SSL configuration + connection = cast(CassandraConnection, connection) + ssl_args = {} + if connection.sslConfig.root.caCertificate: + ssl_args["ssl_ca"] = self.ca_file_path + if connection.sslConfig.root.sslCertificate: + ssl_args["ssl_cert"] = self.cert_file_path + if connection.sslConfig.root.sslKey: + ssl_args["ssl_key"] = self.key_file_path + + connection.connectionArguments.root["ssl_args"] = ssl_args return connection @@ -216,9 +234,9 @@ def check_ssl_and_init(_) -> Optional[SSLManager]: def _(connection) -> Union[SSLManager, None]: service_connection = cast(MatillionConnection, connection) if service_connection.connection: - ssl: Optional[ - verifySSLConfig.SslConfig - ] = service_connection.connection.sslConfig + ssl: Optional[verifySSLConfig.SslConfig] = ( + service_connection.connection.sslConfig + ) if ssl and ssl.root.caCertificate: ssl_dict: dict[str, Union[CustomSecretStr, None]] = { "ca": ssl.root.caCertificate @@ -289,6 +307,19 @@ def _(connection): return None +@check_ssl_and_init.register(CassandraConnection) +def _(connection): + service_connection = cast(CassandraConnection, connection) + ssl: Optional[verifySSLConfig.SslConfig] = service_connection.sslConfig + if ssl and (ssl.root.caCertificate or ssl.root.sslCertificate or ssl.root.sslKey): + return SSLManager( + ca=ssl.root.caCertificate, + cert=ssl.root.sslCertificate, + key=ssl.root.sslKey, + ) + return None + + def get_ssl_connection(service_config): try: # To be cleaned up as part of https://github.com/open-metadata/OpenMetadata/issues/15913 diff --git a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md index becb02db5e68..d626e31c08f5 100644 --- a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md +++ b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md @@ -18,6 +18,7 @@ Configure and schedule Cassandra metadata workflows from the OpenMetadata UI: - [Requirements](#requirements) - [Metadata Ingestion](#metadata-ingestion) +- [Enable Security](#securing-cassandra-connection-with-ssl-in-openmetadata) {% partial file="/v1.7/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/cassandra/yaml"} /%} @@ -59,6 +60,21 @@ Configuration for connecting to DataStax Astra DB in the cloud. - **token**: The Astra DB application token used for authentication. - **secureConnectBundle**: File path to the Secure Connect Bundle (.zip) used for a secure connection to DataStax Astra DB. +**SSL Modes** + +There are a couple of types of SSL modes that Cassandra supports which can be added to ConnectionArguments, they are as follows: +- **disable**: SSL is disabled and the connection is not encrypted. +- **allow**: SSL is used if the server requires it. +- **prefer**: SSL is used if the server supports it. +- **require**: SSL is required. +- **verify-ca**: SSL must be used and the server certificate must be verified. +- **verify-full**: SSL must be used. The server certificate must be verified, and the server hostname must match the hostname attribute on the certificate. + +**SSL Configuration** + +In order to integrate SSL in the Metadata Ingestion Config, the user will have to add the SSL config under sslConfig which is placed in the source. + +{% partial file="/v1.7/connectors/database/advanced-configuration.md" /%} {% /extraContent %} @@ -70,6 +86,18 @@ Configuration for connecting to DataStax Astra DB in the cloud. {% /stepsContainer %} +## Securing Cassandra Connection with SSL in OpenMetadata + +To establish secure connections between OpenMetadata and a Cassandra database, you can use any SSL mode provided by Cassandra, except disable. + +Under `Advanced Config`, after selecting the SSL mode, provide the CA certificate, SSL certificate and SSL key. + +{% image + src="/images/v1.7/connectors/ssl_connection.png" + alt="SSL Configuration" + height="450px" + caption="SSL Configuration" /%} + {% partial file="/v1.7/connectors/troubleshooting.md" /%} {% partial file="/v1.7/connectors/database/related.md" /%} diff --git a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md index 8b86ae89ef02..b3b53171209f 100644 --- a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md +++ b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md @@ -17,6 +17,8 @@ Configure and schedule Cassandra metadata workflows from the OpenMetadata UI: - [Requirements](#requirements) - [Metadata Ingestion](#metadata-ingestion) +- [Enable Security](#securing-cassandra-connection-with-ssl-in-openmetadata) + {% partial file="/v1.7/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/database/cassandra/yaml"} /%} @@ -102,8 +104,96 @@ Configuration for connecting to DataStax Astra DB in the cloud. {% partial file="/v1.7/connectors/yaml/workflow-config-def.md" /%} +#### Advanced Configuration + +{% codeInfo srNumber=6 %} + +**Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to database during the connection. These details must be added as Key-Value pairs. + {% /codeInfo %} +{% codeInfo srNumber=7 %} + +The sslConfig and sslMode are used to configure the SSL (Secure Sockets Layer) connection between your application and the PostgreSQL server. + +- **caCertificate**: Provide the path to ssl ca file. + +- **sslCertificate**: Provide the path to ssl client certificate file (ssl_cert). + +- **sslKey**: Provide the path to ssl client certificate file (ssl_key). + +**sslMode**: This field controls whether a secure SSL/TLS connection will be negotiated with the server. There are several modes you can choose: + +disable: No SSL/TLS encryption will be used; the data sent over the network is not encrypted. +allow: The driver will try to negotiate a non-SSL connection but if the server insists on SSL, it will switch to SSL. +prefer (the default): The driver will try to negotiate an SSL connection but if the server does not support SSL, it will switch to a non-SSL connection. +require: The driver will try to negotiate an SSL connection. If the server does not support SSL, the driver will not fall back to a non-SSL connection. +verify-ca: The driver will negotiate an SSL connection and verify that the server certificate is issued by a trusted certificate authority (CA). +verify-full: The driver will negotiate an SSL connection, verify that the server certificate is issued by a trusted CA and check that the server host name matches the one in the certificate. + +{% /codeInfo %} + +{% /codeInfoContainer %} + +{% codeBlock fileName="filename.yaml" %} + +```yaml {% isCodeBlock=true %} +source: + type: postgres + serviceName: local_postgres + serviceConnection: + config: + type: Postgres +``` +```yaml {% srNumber=1 %} + username: username +``` +```yaml {% srNumber=2 %} + authType: + password: +``` +```yaml {% srNumber=3 %} + authType: + awsConfig: + awsAccessKeyId: access key id + awsSecretAccessKey: access secret key + awsRegion: aws region name +``` +```yaml {% srNumber=4 %} + hostPort: localhost:5432 +``` +```yaml {% srNumber=5 %} + database: database +``` +```yaml {% srNumber=6 %} + ingestAllDatabases: true +``` + + +{% partial file="/v1.7/connectors/yaml/database/source-config.md" /%} + +{% partial file="/v1.7/connectors/yaml/ingestion-sink.md" /%} + +{% partial file="/v1.7/connectors/yaml/workflow-config.md" /%} + +{% /codeBlock %} + +{% /codePreview %} + +{% partial file="/v1.7/connectors/yaml/ingestion-cli.md" /%} + +{% partial file="/v1.7/connectors/yaml/query-usage.md" variables={connector: "postgres"} /%} + +{% partial file="/v1.7/connectors/yaml/lineage.md" variables={connector: "postgres"} /%} + +{% partial file="/v1.7/connectors/yaml/data-profiler.md" variables={connector: "postgres"} /%} + +{% partial file="/v1.7/connectors/yaml/auto-classification.md" variables={connector: "postgres"} /%} + +{% partial file="/v1.7/connectors/yaml/data-quality.md" /%} + + +{% /codeInfo %} {% /codeInfoContainer %} @@ -135,6 +225,17 @@ source: requestTimeout: connectTimeout: ``` +```yaml {% srNumber=6 %} + # connectionArguments: + # key: value +``` +```yaml {% srNumber=7 %} + # sslConfig: + # caCertificate: "path/to/ca/certificate" + # sslCertificate: "path/to/ssl/certificate" + # sslKey: "path/to/ssl/key" + # sslMode: disable #allow prefer require verify-ca verify-full +``` {% partial file="/v1.7/connectors/yaml/database/source-config.md" /%} diff --git a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json index 90c325a46b8d..10b1c43baafc 100644 --- a/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json +++ b/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/database/cassandraConnection.json @@ -49,6 +49,10 @@ "description": "Optional name to give to the database in OpenMetadata. If left blank, we will use default as the database name.", "type": "string" }, + "connectionArguments": { + "title": "Connection Arguments", + "$ref": "../connectionBasicType.json#/definitions/connectionArguments" + }, "sslMode": { "title": "SSL Mode", "$ref": "../../../../security/ssl/verifySSLConfig.json#/definitions/sslMode" diff --git a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md index fc87be9d82a8..9d5155efc6d4 100644 --- a/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md +++ b/openmetadata-ui/src/main/resources/ui/public/locales/en-US/Database/Cassandra.md @@ -61,6 +61,11 @@ Database Service > Database > Schema > Table In the case of Cassandra, we won't have a Keyspace/Database as such. If you'd like to see your data in a database named something other than `default`, you can specify the name in this field. $$ +$$section +### Connection Arguments $(id="connectionArguments") +Additional connection arguments that can be sent to the service during connection. +$$ + $$section ### SSL Mode $(id="sslMode") SSL Mode to connect to Cassandra instance. By default, SSL is disabled. diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/cassandraConnection.ts b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/cassandraConnection.ts index e2825fedb9e4..d26b18066c55 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/cassandraConnection.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/entity/services/connections/database/cassandraConnection.ts @@ -1,5 +1,5 @@ /* - * Copyright 2024 Collate. + * Copyright 2025 Collate. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -17,7 +17,8 @@ export interface CassandraConnection { /** * Choose Auth Config Type. */ - authType?: AuthConfigurationType; + authType?: AuthConfigurationType; + connectionArguments?: { [key: string]: any }; /** * Optional name to give to the database in OpenMetadata. If left blank, we will use default * as the database name. @@ -28,6 +29,8 @@ export interface CassandraConnection { * host when using the `cassandra+srv` scheme. */ hostPort?: string; + sslConfig?: Config; + sslMode?: SSLMode; supportsMetadataExtraction?: boolean; /** * Service Type @@ -82,6 +85,38 @@ export interface DataStaxAstraDBConfiguration { [property: string]: any; } +/** + * Client SSL configuration + * + * OpenMetadata Client configured to validate SSL certificates. + */ +export interface Config { + /** + * The CA certificate used for SSL validation. + */ + caCertificate?: string; + /** + * The SSL certificate used for client authentication. + */ + sslCertificate?: string; + /** + * The private key associated with the SSL certificate. + */ + sslKey?: string; +} + +/** + * SSL Mode to connect to database. + */ +export enum SSLMode { + Allow = "allow", + Disable = "disable", + Prefer = "prefer", + Require = "require", + VerifyCA = "verify-ca", + VerifyFull = "verify-full", +} + /** * Service Type * From 333324847149af60a6eafe3b0ef93e7dca01b1e6 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 10:11:35 +0530 Subject: [PATCH 06/16] fix: formatting --- ingestion/src/metadata/utils/ssl_manager.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index c8fefc36fddb..e1a804922381 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -19,9 +19,6 @@ from functools import singledispatch, singledispatchmethod from typing import Optional, Union, cast -from metadata.generated.schema.entity.services.connections.database.cassandraConnection import ( - CassandraConnection, -) from pydantic import SecretStr from metadata.generated.schema.entity.services.connections.connectionBasicType import ( @@ -30,6 +27,9 @@ from metadata.generated.schema.entity.services.connections.dashboard.qlikSenseConnection import ( QlikSenseConnection, ) +from metadata.generated.schema.entity.services.connections.database.cassandraConnection import ( + CassandraConnection, +) from metadata.generated.schema.entity.services.connections.database.dorisConnection import ( DorisConnection, ) @@ -204,9 +204,9 @@ def _(self, connection): connection = cast(KafkaConnection, connection) connection.schemaRegistryConfig["ssl.ca.location"] = self.ca_file_path connection.schemaRegistryConfig["ssl.key.location"] = self.key_file_path - connection.schemaRegistryConfig["ssl.certificate.location"] = ( - self.cert_file_path - ) + connection.schemaRegistryConfig[ + "ssl.certificate.location" + ] = self.cert_file_path return connection @setup_ssl.register(CassandraConnection) @@ -234,9 +234,9 @@ def check_ssl_and_init(_) -> Optional[SSLManager]: def _(connection) -> Union[SSLManager, None]: service_connection = cast(MatillionConnection, connection) if service_connection.connection: - ssl: Optional[verifySSLConfig.SslConfig] = ( - service_connection.connection.sslConfig - ) + ssl: Optional[ + verifySSLConfig.SslConfig + ] = service_connection.connection.sslConfig if ssl and ssl.root.caCertificate: ssl_dict: dict[str, Union[CustomSecretStr, None]] = { "ca": ssl.root.caCertificate From 343dd74ccbd0c9ad9ce33634500813290e1b2fba Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 11:43:44 +0530 Subject: [PATCH 07/16] fixes: ssl_context.load_verify_locations method & doc changes --- .../source/database/cassandra/connection.py | 3 +- ingestion/src/metadata/utils/ssl_manager.py | 3 + .../connectors/database/cassandra/index.md | 2 - .../connectors/database/cassandra/yaml.md | 60 ------------------- 4 files changed, 4 insertions(+), 64 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 5f554a1c3cc3..95d2e1217bff 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -82,9 +82,8 @@ def get_connection(connection: CassandraConnection): ssl_context = None if connection.sslMode != SslMode.disable: ssl_args = connection.connectionArguments.root["ssl_args"] - ssl_context = SSLContext(PROTOCOL_TLS) - ssl_context.load_verify_locations(cadata=ssl_args["ssl_ca"]) + ssl_context.load_verify_locations(cafile=ssl_args["ssl_ca"]) ssl_context.verify_mode = CERT_REQUIRED ssl_context.load_cert_chain( certfile=ssl_args["ssl_cert"], keyfile=ssl_args["ssl_key"] diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index e1a804922381..578fabdd1e1b 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -221,6 +221,9 @@ def _(self, connection): if connection.sslConfig.root.sslKey: ssl_args["ssl_key"] = self.key_file_path + connection.connectionArguments = ( + connection.connectionArguments or init_empty_connection_arguments() + ) connection.connectionArguments.root["ssl_args"] = ssl_args return connection diff --git a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md index d626e31c08f5..cd1607ec0111 100644 --- a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md +++ b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/index.md @@ -74,8 +74,6 @@ There are a couple of types of SSL modes that Cassandra supports which can be ad In order to integrate SSL in the Metadata Ingestion Config, the user will have to add the SSL config under sslConfig which is placed in the source. -{% partial file="/v1.7/connectors/database/advanced-configuration.md" /%} - {% /extraContent %} {% partial file="/v1.7/connectors/test-connection.md" /%} diff --git a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md index b3b53171209f..8eb3ad08759a 100644 --- a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md +++ b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md @@ -133,66 +133,6 @@ verify-full: The driver will negotiate an SSL connection, verify that the server {% /codeInfo %} -{% /codeInfoContainer %} - -{% codeBlock fileName="filename.yaml" %} - -```yaml {% isCodeBlock=true %} -source: - type: postgres - serviceName: local_postgres - serviceConnection: - config: - type: Postgres -``` -```yaml {% srNumber=1 %} - username: username -``` -```yaml {% srNumber=2 %} - authType: - password: -``` -```yaml {% srNumber=3 %} - authType: - awsConfig: - awsAccessKeyId: access key id - awsSecretAccessKey: access secret key - awsRegion: aws region name -``` -```yaml {% srNumber=4 %} - hostPort: localhost:5432 -``` -```yaml {% srNumber=5 %} - database: database -``` -```yaml {% srNumber=6 %} - ingestAllDatabases: true -``` - - -{% partial file="/v1.7/connectors/yaml/database/source-config.md" /%} - -{% partial file="/v1.7/connectors/yaml/ingestion-sink.md" /%} - -{% partial file="/v1.7/connectors/yaml/workflow-config.md" /%} - -{% /codeBlock %} - -{% /codePreview %} - -{% partial file="/v1.7/connectors/yaml/ingestion-cli.md" /%} - -{% partial file="/v1.7/connectors/yaml/query-usage.md" variables={connector: "postgres"} /%} - -{% partial file="/v1.7/connectors/yaml/lineage.md" variables={connector: "postgres"} /%} - -{% partial file="/v1.7/connectors/yaml/data-profiler.md" variables={connector: "postgres"} /%} - -{% partial file="/v1.7/connectors/yaml/auto-classification.md" variables={connector: "postgres"} /%} - -{% partial file="/v1.7/connectors/yaml/data-quality.md" /%} - - {% /codeInfo %} {% /codeInfoContainer %} From 20098d8cf31c8a1959922fab10aca0b9ea8e58fe Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 11:56:27 +0530 Subject: [PATCH 08/16] fixes: added enable security in cassandra yaml.md --- .../connectors/database/cassandra/yaml.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md index 8eb3ad08759a..cd40db4958a3 100644 --- a/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md +++ b/openmetadata-docs/content/v1.7.x-SNAPSHOT/connectors/database/cassandra/yaml.md @@ -189,3 +189,17 @@ source: {% /codePreview %} {% partial file="/v1.7/connectors/yaml/ingestion-cli.md" /%} + +## Securing Cassandra Connection with SSL in OpenMetadata + +To establish secure connections between OpenMetadata and a Cassandra database, you can use any SSL mode provided by Cassandra, except disable. + +Under `Advanced Config`, after selecting the SSL mode, provide the CA certificate, SSL certificate and SSL key. + +```yaml + sslMode: allow + sslConfig: + caCertificate: "/path/to/ca/certificate" + sslCertificate: "/path/to/ssl/certificate" + sslKey: "/path/to/ssl/key" +``` \ No newline at end of file From 4ff680c8569f3ea31d5fe93816f611a793ea84d8 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 13:25:31 +0530 Subject: [PATCH 09/16] refactor: cassandra tests according to ssl config --- ingestion/tests/unit/test_ssl_manager.py | 68 +++++++++++++++++++ .../unit/topology/database/test_cassandra.py | 6 ++ 2 files changed, 74 insertions(+) diff --git a/ingestion/tests/unit/test_ssl_manager.py b/ingestion/tests/unit/test_ssl_manager.py index c38382162f1c..5cb22c471628 100644 --- a/ingestion/tests/unit/test_ssl_manager.py +++ b/ingestion/tests/unit/test_ssl_manager.py @@ -18,6 +18,7 @@ OpenMetadataJWTClientConfig, ) from metadata.ingestion.ometa.ometa_api import OpenMetadata +from metadata.ingestion.source.database.cassandra.metadata import CassandraSource from metadata.ingestion.source.messaging.kafka.metadata import KafkaSource from metadata.utils.ssl_manager import SSLManager @@ -124,3 +125,70 @@ def test_init(self, mock_ssl_manager, test_connection): kafka_source_with_ssl.service_connection.schemaRegistrySSL.root.sslCertificate.get_secret_value(), "sslCertificateData", ) + + +class CassandraSourceSSLTest(TestCase): + @patch("metadata.ingestion.source.database.cassandra.connection.get_connection") + @patch( + "metadata.ingestion.source.database.cassandra.metadata.CassandraSource.test_connection" + ) + def test_init(self, get_connection, test_connection): + get_connection.return_value = True + test_connection.return_value = True + config = WorkflowSource( + **{ + "type": "cassandra", + "serviceName": "local_cassandra", + "serviceConnection": { + "config": { + "type": "Cassandra", + "hostPort": "localhost:9042", + } + }, + "sourceConfig": {"config": {"type": "DatabaseMetadata"}}, + } + ) + metadata = OpenMetadata( + OpenMetadataConnection( + hostPort="http://localhost:8585/api", + authProvider="openmetadata", + securityConfig=OpenMetadataJWTClientConfig(jwtToken="token"), + ) + ) + cassandra_source = CassandraSource(config, metadata) + self.assertIsNone(cassandra_source.ssl_manager) + + config_with_ssl = WorkflowSource( + **{ + "type": "cassandra", + "serviceName": "local_cassandra", + "serviceConnection": { + "config": { + "type": "Cassandra", + "hostPort": "localhost:9042", + "sslConfig": { + "caCertificate": "caCertificateData", + "sslKey": "sslKeyData", + "sslCertificate": "sslCertificateData", + }, + "sslMode": "allow", + }, + }, + "sourceConfig": {"config": {"type": "DatabaseMetadata"}}, + } + ) + cassandra_source_with_ssl = CassandraSource(config_with_ssl, metadata) + + self.assertIsNotNone(cassandra_source_with_ssl.ssl_manager) + self.assertEqual( + cassandra_source_with_ssl.service_connection.sslConfig.root.caCertificate.get_secret_value(), + "caCertificateData", + ) + self.assertEqual( + cassandra_source_with_ssl.service_connection.sslConfig.root.sslKey.get_secret_value(), + "sslKeyData", + ) + self.assertEqual( + cassandra_source_with_ssl.service_connection.sslConfig.root.sslCertificate.get_secret_value(), + "sslCertificateData", + ) diff --git a/ingestion/tests/unit/topology/database/test_cassandra.py b/ingestion/tests/unit/topology/database/test_cassandra.py index 21edaf77e4c6..82cc9aeb2489 100644 --- a/ingestion/tests/unit/topology/database/test_cassandra.py +++ b/ingestion/tests/unit/topology/database/test_cassandra.py @@ -51,6 +51,12 @@ "username": "cassandra", "authType": {"password": "cassandra"}, "hostPort": "localhost:9042", + "sslMode": "allow", + "sslConfig": { + "caCertificate": "CA certificate content", + "sslCertificate": "SSL certificate content", + "sslKey": "SSL key content", + }, }, }, "sourceConfig": { From 0a595d4459e1c23e69e67b24db1032e1d6bb69bf Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 15:45:35 +0530 Subject: [PATCH 10/16] refactor: optimize helper code --- .../source/database/cassandra/helpers.py | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py b/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py index a142506233a1..832d721876af 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/helpers.py @@ -57,6 +57,7 @@ def parse(cls, field) -> Column: data_type = None array_data_type = None raw_data_type = "" + for letter in field.type: if letter == "<": if raw_data_type in ("", "frozen"): @@ -71,31 +72,29 @@ def parse(cls, field) -> Column: array_data_type = cls.datatype_mapping.get( raw_data_type.lower(), DataType.UNKNOWN ) + raw_data_type = "" - if data_type != DataType.ARRAY: + if data_type != DataType.ARRAY or array_data_type: break - elif letter != ">": - raw_data_type += letter - elif letter == ">": if not array_data_type and data_type: array_data_type = cls.datatype_mapping.get( raw_data_type.lower(), DataType.UNKNOWN ) break - else: - if not data_type: - data_type = cls.datatype_mapping.get( - field.type.lower(), DataType.UNKNOWN - ) + + else: + raw_data_type += letter + + if not data_type: + data_type = cls.datatype_mapping.get(field.type.lower(), DataType.UNKNOWN) column_def = { "name": field.column_name, "dataTypeDisplay": field.type, "dataType": data_type, + "arrayDataType": array_data_type, } - if array_data_type: - column_def["arrayDataType"] = array_data_type return Column(**column_def) From 39ecd716b6a26a4640f7025b6b5a3e2384f0ed7f Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 16:26:05 +0530 Subject: [PATCH 11/16] refactor: move ssl code to ssl manager file --- .../source/database/cassandra/connection.py | 17 ++++--------- ingestion/src/metadata/utils/ssl_manager.py | 24 +++++++++---------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 95d2e1217bff..564b8358e60a 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -13,7 +13,6 @@ Source connection handler """ from functools import partial -from ssl import CERT_REQUIRED, PROTOCOL_TLS, SSLContext from typing import Optional from cassandra.auth import PlainTextAuthProvider @@ -35,7 +34,6 @@ from metadata.generated.schema.entity.services.connections.testConnectionResult import ( TestConnectionResult, ) -from metadata.generated.schema.security.ssl.verifySSLConfig import SslMode from metadata.ingestion.connections.test_connections import test_connection_steps from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.cassandra.queries import ( @@ -79,17 +77,10 @@ def get_connection(connection: CassandraConnection): password=connection.authType.password.get_secret_value(), ) - ssl_context = None - if connection.sslMode != SslMode.disable: - ssl_args = connection.connectionArguments.root["ssl_args"] - ssl_context = SSLContext(PROTOCOL_TLS) - ssl_context.load_verify_locations(cafile=ssl_args["ssl_ca"]) - ssl_context.verify_mode = CERT_REQUIRED - ssl_context.load_cert_chain( - certfile=ssl_args["ssl_cert"], keyfile=ssl_args["ssl_key"] - ) - - cluster = Cluster(**cluster_config, ssl_context=ssl_context) + cluster = Cluster( + **cluster_config, + ssl_context=connection.connectionArguments.root.get("ssl_context"), + ) session = cluster.connect() return session diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index 578fabdd1e1b..8853b4d15f21 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -17,6 +17,7 @@ import tempfile import traceback from functools import singledispatch, singledispatchmethod +from ssl import CERT_REQUIRED, PROTOCOL_TLSv1_2, SSLContext from typing import Optional, Union, cast from pydantic import SecretStr @@ -58,6 +59,7 @@ MatillionConnection, ) from metadata.generated.schema.security.ssl import verifySSLConfig +from metadata.generated.schema.security.ssl.verifySSLConfig import SslMode from metadata.ingestion.connections.builders import init_empty_connection_arguments from metadata.ingestion.models.custom_pydantic import CustomSecretStr from metadata.ingestion.source.connections import get_connection @@ -211,20 +213,20 @@ def _(self, connection): @setup_ssl.register(CassandraConnection) def _(self, connection): - # Use the temporary file paths for SSL configuration connection = cast(CassandraConnection, connection) - ssl_args = {} - if connection.sslConfig.root.caCertificate: - ssl_args["ssl_ca"] = self.ca_file_path - if connection.sslConfig.root.sslCertificate: - ssl_args["ssl_cert"] = self.cert_file_path - if connection.sslConfig.root.sslKey: - ssl_args["ssl_key"] = self.key_file_path + + if connection.sslMode != SslMode.disable: + ssl_context = SSLContext(PROTOCOL_TLSv1_2) + ssl_context.load_verify_locations(cafile=self.ca_file_path) + ssl_context.verify_mode = CERT_REQUIRED + ssl_context.load_cert_chain( + certfile=self.cert_file_path, keyfile=self.key_file_path + ) connection.connectionArguments = ( connection.connectionArguments or init_empty_connection_arguments() ) - connection.connectionArguments.root["ssl_args"] = ssl_args + connection.connectionArguments.root["ssl_context"] = ssl_context return connection @@ -316,9 +318,7 @@ def _(connection): ssl: Optional[verifySSLConfig.SslConfig] = service_connection.sslConfig if ssl and (ssl.root.caCertificate or ssl.root.sslCertificate or ssl.root.sslKey): return SSLManager( - ca=ssl.root.caCertificate, - cert=ssl.root.sslCertificate, - key=ssl.root.sslKey, + ca=ssl.root.caCertificate, cert=ssl.root.sslCertificate, key=ssl.root.sslKey ) return None From 85ad169304c5c6abd01a357d429a48a13053af3a Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 16:33:05 +0530 Subject: [PATCH 12/16] fixes: ssl_context=None in setup_ssl for cassandra --- ingestion/src/metadata/utils/ssl_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index 8853b4d15f21..2ab6a81cf7fc 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -215,6 +215,7 @@ def _(self, connection): def _(self, connection): connection = cast(CassandraConnection, connection) + ssl_context = None if connection.sslMode != SslMode.disable: ssl_context = SSLContext(PROTOCOL_TLSv1_2) ssl_context.load_verify_locations(cafile=self.ca_file_path) From 95bbff3afa9377b071f68b65837bec9ff1300cc0 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 17:04:41 +0530 Subject: [PATCH 13/16] fix: connection arguments without ssl --- .../ingestion/source/database/cassandra/connection.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py index 564b8358e60a..e4836265a57f 100644 --- a/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py +++ b/ingestion/src/metadata/ingestion/source/database/cassandra/connection.py @@ -34,6 +34,7 @@ from metadata.generated.schema.entity.services.connections.testConnectionResult import ( TestConnectionResult, ) +from metadata.ingestion.connections.builders import init_empty_connection_arguments from metadata.ingestion.connections.test_connections import test_connection_steps from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.ingestion.source.database.cassandra.queries import ( @@ -77,6 +78,10 @@ def get_connection(connection: CassandraConnection): password=connection.authType.password.get_secret_value(), ) + connection.connectionArguments = ( + connection.connectionArguments or init_empty_connection_arguments() + ) + cluster = Cluster( **cluster_config, ssl_context=connection.connectionArguments.root.get("ssl_context"), From de5934d13bc486b4bc94674272e780149b7b37b4 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 17:31:43 +0530 Subject: [PATCH 14/16] fix: revert back to deault protocol for cassandra ssl connection --- ingestion/src/metadata/utils/ssl_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ingestion/src/metadata/utils/ssl_manager.py b/ingestion/src/metadata/utils/ssl_manager.py index 2ab6a81cf7fc..8ba6aadd326c 100644 --- a/ingestion/src/metadata/utils/ssl_manager.py +++ b/ingestion/src/metadata/utils/ssl_manager.py @@ -17,7 +17,7 @@ import tempfile import traceback from functools import singledispatch, singledispatchmethod -from ssl import CERT_REQUIRED, PROTOCOL_TLSv1_2, SSLContext +from ssl import CERT_REQUIRED, SSLContext from typing import Optional, Union, cast from pydantic import SecretStr @@ -217,7 +217,7 @@ def _(self, connection): ssl_context = None if connection.sslMode != SslMode.disable: - ssl_context = SSLContext(PROTOCOL_TLSv1_2) + ssl_context = SSLContext() ssl_context.load_verify_locations(cafile=self.ca_file_path) ssl_context.verify_mode = CERT_REQUIRED ssl_context.load_cert_chain( From 2b0b48886bd670d78a12053e5c475504c45855c8 Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Mon, 6 Jan 2025 20:35:00 +0530 Subject: [PATCH 15/16] fix: remove ssl config from topology test --- ingestion/tests/unit/topology/database/test_cassandra.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ingestion/tests/unit/topology/database/test_cassandra.py b/ingestion/tests/unit/topology/database/test_cassandra.py index 82cc9aeb2489..21edaf77e4c6 100644 --- a/ingestion/tests/unit/topology/database/test_cassandra.py +++ b/ingestion/tests/unit/topology/database/test_cassandra.py @@ -51,12 +51,6 @@ "username": "cassandra", "authType": {"password": "cassandra"}, "hostPort": "localhost:9042", - "sslMode": "allow", - "sslConfig": { - "caCertificate": "CA certificate content", - "sslCertificate": "SSL certificate content", - "sslKey": "SSL key content", - }, }, }, "sourceConfig": { From 4a891834017b5092ff2ed0bbca152698517ad5ec Mon Sep 17 00:00:00 2001 From: Keshav Mohta Date: Tue, 7 Jan 2025 11:57:10 +0530 Subject: [PATCH 16/16] fix: used setup_ssl.side_effect --- ingestion/tests/unit/test_ssl_manager.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ingestion/tests/unit/test_ssl_manager.py b/ingestion/tests/unit/test_ssl_manager.py index 5cb22c471628..80516cb383ce 100644 --- a/ingestion/tests/unit/test_ssl_manager.py +++ b/ingestion/tests/unit/test_ssl_manager.py @@ -128,13 +128,16 @@ def test_init(self, mock_ssl_manager, test_connection): class CassandraSourceSSLTest(TestCase): - @patch("metadata.ingestion.source.database.cassandra.connection.get_connection") + @patch("metadata.utils.ssl_manager.SSLManager.setup_ssl") @patch( "metadata.ingestion.source.database.cassandra.metadata.CassandraSource.test_connection" ) - def test_init(self, get_connection, test_connection): + @patch("metadata.ingestion.source.database.cassandra.connection.get_connection") + def test_init(self, get_connection, test_connection, setup_ssl): get_connection.return_value = True test_connection.return_value = True + setup_ssl.side_effect = lambda x: x + config = WorkflowSource( **{ "type": "cassandra",