Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-5964] - TLS certificate renewal #17

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ allure-pytest-collection-report = {git = "https://github.com/canonical/data-plat
# see also: https://github.com/juju/python-libjuju/issues/1184
websockets = "<14.0"
tenacity = "*"
pydantic = "==2.9.1"

[tool.coverage.run]
branch = true
Expand Down
8 changes: 6 additions & 2 deletions src/events/tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,16 @@ def _on_certificate_available(self, event: CertificateAvailableEvent):
if not cert or not private_key:
logger.error("Missing certificate or private key")
raise Exception("Missing certificate or private key")

self.charm.set_status(Status.TLS_NOT_READY)
# write certificates to disk
self.charm.tls_manager.write_certificate(cert, private_key)

# Updating certificates no need to do a rolling restart
if self.charm.state.unit_server.tls_state == TLSState.TLS:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what of the case where the peer-cert was issued but this hook was invoked for the client-cert? what would the state of tls be?

logger.debug(f"Updated certificate for {cert_type}")
return

if self.charm.state.unit_server.certs_ready:
self.charm.set_status(Status.TLS_NOT_READY)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Left a comment on the other PR about this

# we do not restart if the cluster has not started yet
if self.charm.state.cluster.initial_cluster_state == "existing":
self.charm.rolling_restart()
Expand Down
2 changes: 1 addition & 1 deletion src/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Status(Enum):
NO_PEER_RELATION = StatusLevel(MaintenanceStatus("no peer relation available"), "DEBUG")
MISSING_TLS_RELATION = StatusLevel(BlockedStatus("missing tls relation"), "DEBUG")
MISSING_CERTIFICATES = StatusLevel(MaintenanceStatus("missing certificates"), "DEBUG")
TLS_NOT_READY = StatusLevel(MaintenanceStatus("tls not ready"), "DEBUG")
TLS_NOT_READY = StatusLevel(MaintenanceStatus("enabling/disabling TLS"), "DEBUG")
PEER_URL_NOT_SET = StatusLevel(MaintenanceStatus("peer-url not set"), "DEBUG")
CLIENT_TLS_MISSING = StatusLevel(BlockedStatus("client tls relation missing"), "DEBUG")
PEER_TLS_MISSING = StatusLevel(BlockedStatus("peer tls relation missing"), "DEBUG")
Expand Down
15 changes: 13 additions & 2 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pytest_operator.plugin import OpsTest

from literals import CLIENT_PORT, SNAP_NAME
from managers.tls import CertType

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -93,14 +94,14 @@ def get_cluster_endpoints(
)


async def get_juju_leader_unit_name(ops_test: OpsTest, app_name: str = APP_NAME) -> str:
async def get_juju_leader_unit_name(ops_test: OpsTest, app_name: str = APP_NAME) -> str | None:
"""Retrieve the leader unit name."""
for unit in ops_test.model.applications[app_name].units:
if await unit.is_leader_from_status():
return unit.name


async def get_secret_by_label(ops_test: OpsTest, label: str) -> Dict[str, str]:
async def get_secret_by_label(ops_test: OpsTest, label: str) -> Dict[str, str] | None:
secrets_raw = await ops_test.juju("list-secrets")
secret_ids = [
secret_line.split()[0] for secret_line in secrets_raw[1].split("\n")[1:] if secret_line
Expand All @@ -114,3 +115,13 @@ async def get_secret_by_label(ops_test: OpsTest, label: str) -> Dict[str, str]:

if label == secret_data[secret_id].get("label"):
return secret_data[secret_id]["content"]["Data"]


def get_certificate_from_unit(model: str, unit: str, cert_type: CertType) -> str | None:
"""Retrieve a certificate from a unit."""
command = f'juju ssh --model={model} {unit} "cat /var/snap/charmed-etcd/common/tls/{cert_type.value}.pem"'
output = subprocess.getoutput(command)
if output.startswith("-----BEGIN CERTIFICATE-----"):
return output

return None
75 changes: 73 additions & 2 deletions tests/integration/test_tls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
# See LICENSE file for licensing details.

import logging
from time import sleep

import pytest
from juju.application import Application
from pytest_operator.plugin import OpsTest

from literals import INTERNAL_USER, PEER_RELATION
from managers.tls import CertType

from .helpers import (
APP_NAME,
get_certificate_from_unit,
get_cluster_endpoints,
get_cluster_members,
get_juju_leader_unit_name,
Expand All @@ -26,6 +29,7 @@
NUM_UNITS = 3
TEST_KEY = "test_key"
TEST_VALUE = "42"
EXPIRATION_WAITING_TIME = 90


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"])
Expand All @@ -38,7 +42,7 @@ async def test_build_and_deploy_with_tls(ops_test: OpsTest) -> None:
"""
assert ops_test.model is not None
# Deploy the TLS charm
tls_config = {"ca-common-name": "etcd"}
tls_config = {"ca-common-name": "etcd", "certificate-validity": "1m"}
await ops_test.model.deploy(TLS_NAME, channel="edge", config=tls_config)
# Build and deploy charm from local source folder
etcd_charm = await ops_test.build_charm(".")
Expand All @@ -48,24 +52,30 @@ async def test_build_and_deploy_with_tls(ops_test: OpsTest) -> None:
await ops_test.model.deploy(etcd_charm, num_units=NUM_UNITS)

# enable TLS and check if the cluster is still accessible
logger.info("Integrating the TLS certificates")
await ops_test.model.integrate(f"{APP_NAME}:peer-certificates", TLS_NAME)
await ops_test.model.integrate(f"{APP_NAME}:client-certificates", TLS_NAME)
await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000)

logger.info("Cluster is active and idle")
# check if all units have been added to the cluster
endpoints = get_cluster_endpoints(ops_test, APP_NAME, tls_enabled=True)
leader_unit = await get_juju_leader_unit_name(ops_test, APP_NAME)
assert leader_unit

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was there a case when this was false?


cluster_members = get_cluster_members(model, leader_unit, endpoints, tls_enabled=True)
assert len(cluster_members) == NUM_UNITS
for cluster_member in cluster_members:
assert cluster_member["clientURLs"][0].startswith("https")
assert cluster_member["peerURLs"][0].startswith("https")
logger.info("Cluster members all have https URLs")

# make sure data can be written to the cluster
secret = await get_secret_by_label(ops_test, label=f"{PEER_RELATION}.{APP_NAME}.app")
assert secret

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please add messages to your assertion for when they fail.
e.g

assert secret, f"Secret: {PEER_RELATION}.{APP_NAME}.app" missing.

password = secret.get(f"{INTERNAL_USER}-password")

logger.info("Writing/reading data to/from the cluster")
assert (
put_key(
model,
Expand Down Expand Up @@ -101,7 +111,8 @@ async def test_turning_off_tls(ops_test: OpsTest) -> None:
model = ops_test.model_full_name
assert model is not None

# enable TLS and check if the cluster is still accessible
# disable TLS and check if the cluster is still accessible
logger.info("Disabling TLS by removing the TLS certificates")
etcd_app: Application = ops_test.model.applications[APP_NAME] # type: ignore
await etcd_app.remove_relation("peer-certificates", f"{TLS_NAME}:certificates")
await etcd_app.remove_relation("client-certificates", f"{TLS_NAME}:certificates")
Expand All @@ -110,14 +121,17 @@ async def test_turning_off_tls(ops_test: OpsTest) -> None:

endpoints = get_cluster_endpoints(ops_test, APP_NAME)
leader_unit = await get_juju_leader_unit_name(ops_test, APP_NAME)
assert leader_unit
cluster_members = get_cluster_members(model, leader_unit, endpoints)
assert len(cluster_members) == NUM_UNITS

for cluster_member in cluster_members:
assert cluster_member["clientURLs"][0].startswith("http://")
assert cluster_member["peerURLs"][0].startswith("http://")
logger.info("Cluster members all have http URLs")

secret = await get_secret_by_label(ops_test, label=f"{PEER_RELATION}.{APP_NAME}.app")
assert secret
password = secret.get(f"{INTERNAL_USER}-password")
assert (
get_key(
Expand All @@ -130,6 +144,7 @@ async def test_turning_off_tls(ops_test: OpsTest) -> None:
)
== TEST_VALUE
)
logger.info(f"Cluster is still accessible: key {TEST_KEY} has value {TEST_VALUE}")


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"])
Expand All @@ -141,21 +156,25 @@ async def test_turning_on_tls(ops_test: OpsTest) -> None:
assert model is not None

# enable TLS and check if the cluster is still accessible
logger.info("Enabling TLS by adding the TLS certificates")
await ops_test.model.integrate(f"{APP_NAME}:peer-certificates", TLS_NAME)
await ops_test.model.integrate(f"{APP_NAME}:client-certificates", TLS_NAME)

await ops_test.model.wait_for_idle(apps=[APP_NAME], status="active", timeout=1000)

endpoints = get_cluster_endpoints(ops_test, APP_NAME, tls_enabled=True)
leader_unit = await get_juju_leader_unit_name(ops_test, APP_NAME)
assert leader_unit
cluster_members = get_cluster_members(model, leader_unit, endpoints, tls_enabled=True)
assert len(cluster_members) == NUM_UNITS

for cluster_member in cluster_members:
assert cluster_member["clientURLs"][0].startswith("https")
assert cluster_member["peerURLs"][0].startswith("https")
logger.info("Cluster members all have https URLs")

secret = await get_secret_by_label(ops_test, label=f"{PEER_RELATION}.{APP_NAME}.app")
assert secret
password = secret.get(f"{INTERNAL_USER}-password")
assert (
get_key(
Expand All @@ -169,3 +188,55 @@ async def test_turning_on_tls(ops_test: OpsTest) -> None:
)
== TEST_VALUE
)
logger.info(f"Cluster is still accessible: key {TEST_KEY} has value {TEST_VALUE}")


@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"])
@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_certificate_expiration(ops_test: OpsTest) -> None:
assert ops_test.model
model = ops_test.model_full_name
assert model is not None

leader_unit = await get_juju_leader_unit_name(ops_test, APP_NAME)
assert leader_unit

# get current certificate
logger.info("Reading the current certificate from leader unit")
current_certificate = get_certificate_from_unit(model, leader_unit, cert_type=CertType.CLIENT)
assert current_certificate

# wait for certificate to expire
logger.info(f"Waiting for the certificate to expire {EXPIRATION_WAITING_TIME}s")
sleep(EXPIRATION_WAITING_TIME)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test on the status of the application / units? Does it change?

# get new certificate
logger.info("Reading the new certificate from leader unit")
new_certificate = get_certificate_from_unit(model, leader_unit, cert_type=CertType.CLIENT)
assert new_certificate

# check that the certificate has been updated
assert current_certificate != new_certificate
logger.info("Certificate has been updated")

# check that the cluster is still accessible
secret = await get_secret_by_label(ops_test, label=f"{PEER_RELATION}.{APP_NAME}.app")
assert secret

password = secret.get(f"{INTERNAL_USER}-password")
endpoints = get_cluster_endpoints(ops_test, APP_NAME, tls_enabled=True)

assert (
get_key(
model,
leader_unit,
endpoints,
user=INTERNAL_USER,
password=password,
key=TEST_KEY,
tls_enabled=True,
)
== TEST_VALUE
)
logger.info(f"Cluster is still accessible: key {TEST_KEY} has value {TEST_VALUE}")
Loading