From c8c901ed372850791c9f2157e9bc85fd6628e3aa Mon Sep 17 00:00:00 2001 From: Daniele Manni Date: Thu, 29 Aug 2024 18:05:32 +0200 Subject: [PATCH] [#IOPID-1927] Alet on service preference poison queue for fn-elt (#1132) Co-authored-by: Greta Quadrati <75862507+gquadrati@users.noreply.github.com> --- .../elt/_modules/function_apps/data.tf | 5 ++ .../function_apps/function_app_elt.tf | 6 +- .../elt/_modules/function_apps/locals.tf | 2 + .../elt/_modules/function_apps/monitor.tf | 58 +++++++++++++++++++ 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 src/domains/elt/_modules/function_apps/monitor.tf diff --git a/src/domains/elt/_modules/function_apps/data.tf b/src/domains/elt/_modules/function_apps/data.tf index a2063c665..5d7f83ede 100644 --- a/src/domains/elt/_modules/function_apps/data.tf +++ b/src/domains/elt/_modules/function_apps/data.tf @@ -127,3 +127,8 @@ data "azurerm_storage_account" "storage_assets_cdn" { name = replace(format("%s-stcdnassets", var.project), "-", "") resource_group_name = local.resource_group_name_common } + +data "azurerm_storage_account" "function_elt_internal_storage" { + name = module.function_elt.storage_account_internal_function_name + resource_group_name = var.resource_group_name +} diff --git a/src/domains/elt/_modules/function_apps/function_app_elt.tf b/src/domains/elt/_modules/function_apps/function_app_elt.tf index 8cefecf26..1a36dda80 100644 --- a/src/domains/elt/_modules/function_apps/function_app_elt.tf +++ b/src/domains/elt/_modules/function_apps/function_app_elt.tf @@ -94,7 +94,7 @@ locals { MESSAGES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-messages-failure" MESSAGE_STATUS_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-message-status-failure" SERVICES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-services-failure" - SERVICE_PREFERENCES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-service-preferences-failure" + SERVICE_PREFERENCES_FAILURE_QUEUE_NAME = local.service_preferences_failure_queue_name PROFILES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-profiles-failure" INTERNAL_TEST_FISCAL_CODES = module.tests.test_users.all @@ -166,8 +166,8 @@ module "function_elt" { "${local.function_elt.app_settings.MESSAGE_STATUS_FAILURE_QUEUE_NAME}-poison", local.function_elt.app_settings.SERVICES_FAILURE_QUEUE_NAME, "${local.function_elt.app_settings.SERVICES_FAILURE_QUEUE_NAME}-poison", - local.function_elt.app_settings.SERVICE_PREFERENCES_FAILURE_QUEUE_NAME, - "${local.function_elt.app_settings.SERVICE_PREFERENCES_FAILURE_QUEUE_NAME}-poison", + local.service_preferences_failure_queue_name, + "${local.service_preferences_failure_queue_name}-poison", local.function_elt.app_settings.PROFILES_FAILURE_QUEUE_NAME, "${local.function_elt.app_settings.PROFILES_FAILURE_QUEUE_NAME}-poison" ], diff --git a/src/domains/elt/_modules/function_apps/locals.tf b/src/domains/elt/_modules/function_apps/locals.tf index 6e23393a3..0c76a15dd 100644 --- a/src/domains/elt/_modules/function_apps/locals.tf +++ b/src/domains/elt/_modules/function_apps/locals.tf @@ -15,4 +15,6 @@ locals { event_hub_connection = "${format("%s-evh-ns", var.project)}.servicebus.windows.net:9093" pn_service_id = "01G40DWQGKY5GRWSNM4303VNRP" + + service_preferences_failure_queue_name = "pdnd-io-cosmosdb-service-preferences-failure" } diff --git a/src/domains/elt/_modules/function_apps/monitor.tf b/src/domains/elt/_modules/function_apps/monitor.tf new file mode 100644 index 000000000..bc4b2c271 --- /dev/null +++ b/src/domains/elt/_modules/function_apps/monitor.tf @@ -0,0 +1,58 @@ +data "azurerm_monitor_action_group" "quarantine_error_action_group" { + resource_group_name = local.resource_group_name_common + name = "${replace(var.project, "-", "")}quarantineerror" +} + +resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" { + name = "${var.project}-fnelt-internal-st-queue-ds-01" + target_resource_id = "${data.azurerm_storage_account.function_elt_internal_storage.id}/queueServices/default" + log_analytics_workspace_id = data.azurerm_application_insights.application_insights.workspace_id + + enabled_log { + category = "StorageWrite" + } + + metric { + category = "Capacity" + enabled = false + } + metric { + category = "Transaction" + enabled = false + } +} + + +resource "azurerm_monitor_scheduled_query_rules_alert_v2" "service_preferences_failure_alert_rule" { + enabled = true + name = "[CITIZEN-AUTH | iopfneltsdt] Failures on pdnd-io-cosmosdb-service-preferences-failure-poison" + resource_group_name = var.resource_group_name + location = var.location + + scopes = [data.azurerm_storage_account.function_elt_internal_storage.id] + description = "Permanent failures processing Service Preferences export to PDND. REQUIRED MANUAL ACTION" + severity = 1 + auto_mitigation_enabled = false + + window_duration = "PT15M" # Select the interval that's used to group the data points by using the aggregation type function. Choose an Aggregation granularity (period) that's greater than the Frequency of evaluation to reduce the likelihood of missing the first evaluation period of an added time series. + evaluation_frequency = "PT15M" # Select how often the alert rule is to be run. Select a frequency that's smaller than the aggregation granularity to generate a sliding window for the evaluation. + + criteria { + query = <<-QUERY + StorageQueueLogs + | where OperationName contains "PutMessage" + | where Uri contains "${local.service_preferences_failure_queue_name}-poison" + QUERY + operator = "GreaterThan" + threshold = 0 + time_aggregation_method = "Count" + } + + action { + action_groups = [ + data.azurerm_monitor_action_group.quarantine_error_action_group.id, + ] + } + + tags = var.tags +} \ No newline at end of file