From 917948796e451c2442886accc2de507f2322b7a4 Mon Sep 17 00:00:00 2001 From: gquadrati Date: Thu, 24 Oct 2024 14:47:47 +0200 Subject: [PATCH 1/3] add alert rule for profile deletion poison queue + move to error action group --- .../elt/_modules/function_apps/monitor.tf | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/domains/elt/_modules/function_apps/monitor.tf b/src/domains/elt/_modules/function_apps/monitor.tf index 5ea114d5e..6997e7980 100644 --- a/src/domains/elt/_modules/function_apps/monitor.tf +++ b/src/domains/elt/_modules/function_apps/monitor.tf @@ -2,6 +2,10 @@ data "azurerm_monitor_action_group" "quarantine_error_action_group" { resource_group_name = local.resource_group_name_common name = "${replace(var.project, "-", "")}quarantineerror" } +data "azurerm_monitor_action_group" "error_action_group" { + resource_group_name = local.resource_group_name_common + name = "${replace(var.project, "-", "")}error" +} resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" { name = "${var.project}-fnelt-internal-st-queue-ds-01" @@ -50,7 +54,7 @@ resource "azurerm_monitor_scheduled_query_rules_alert_v2" "service_preferences_f action { action_groups = [ - data.azurerm_monitor_action_group.quarantine_error_action_group.id, + data.azurerm_monitor_action_group.error_action_group.id, ] } @@ -84,7 +88,41 @@ resource "azurerm_monitor_scheduled_query_rules_alert_v2" "profiles_failure_aler action { action_groups = [ - data.azurerm_monitor_action_group.quarantine_error_action_group.id, + data.azurerm_monitor_action_group.error_action_group.id, + ] + } + + tags = var.tags +} + +resource "azurerm_monitor_scheduled_query_rules_alert_v2" "profile_deletion_failure_alert_rule" { + enabled = true + name = "[CITIZEN-AUTH | iopfneltsdt] Failures on ${local.profile_deletion_failure_queue_name}-poison" + resource_group_name = var.resource_group_name + location = var.location + + scopes = [data.azurerm_storage_account.function_elt_internal_storage.id] + description = "Permanent failures processing Profiles export to PDND. REQUIRED MANUAL ACTION" + severity = 1 + auto_mitigation_enabled = false + + window_duration = "PT15M" # Select the interval that's used to group the data points by using the aggregation type function. Choose an Aggregation granularity (period) that's greater than the Frequency of evaluation to reduce the likelihood of missing the first evaluation period of an added time series. + evaluation_frequency = "PT15M" # Select how often the alert rule is to be run. Select a frequency that's smaller than the aggregation granularity to generate a sliding window for the evaluation. + + criteria { + query = <<-QUERY + StorageQueueLogs + | where OperationName contains "PutMessage" + | where Uri contains "${local.profile_deletion_failure_queue_name}-poison" + QUERY + operator = "GreaterThan" + threshold = 0 + time_aggregation_method = "Count" + } + + action { + action_groups = [ + data.azurerm_monitor_action_group.error_action_group.id, ] } From 0f572205360aa482e3b56f12dfd9ed09f66f84a6 Mon Sep 17 00:00:00 2001 From: gquadrati Date: Thu, 24 Oct 2024 14:53:26 +0200 Subject: [PATCH 2/3] remove duplicated data --- src/domains/elt/_modules/function_apps/monitor.tf | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/domains/elt/_modules/function_apps/monitor.tf b/src/domains/elt/_modules/function_apps/monitor.tf index 6997e7980..0c65029ed 100644 --- a/src/domains/elt/_modules/function_apps/monitor.tf +++ b/src/domains/elt/_modules/function_apps/monitor.tf @@ -2,10 +2,6 @@ data "azurerm_monitor_action_group" "quarantine_error_action_group" { resource_group_name = local.resource_group_name_common name = "${replace(var.project, "-", "")}quarantineerror" } -data "azurerm_monitor_action_group" "error_action_group" { - resource_group_name = local.resource_group_name_common - name = "${replace(var.project, "-", "")}error" -} resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" { name = "${var.project}-fnelt-internal-st-queue-ds-01" From 28c6d474c133a078a89eea7e73493bd8df7f40ca Mon Sep 17 00:00:00 2001 From: gquadrati Date: Thu, 24 Oct 2024 14:55:44 +0200 Subject: [PATCH 3/3] mode quarantine_error_action_group to data.tf --- src/domains/elt/_modules/function_apps/data.tf | 5 +++++ src/domains/elt/_modules/function_apps/monitor.tf | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/domains/elt/_modules/function_apps/data.tf b/src/domains/elt/_modules/function_apps/data.tf index d6b75f966..1d7db463f 100644 --- a/src/domains/elt/_modules/function_apps/data.tf +++ b/src/domains/elt/_modules/function_apps/data.tf @@ -8,6 +8,11 @@ data "azurerm_monitor_action_group" "error_action_group" { resource_group_name = local.resource_group_name_common } +data "azurerm_monitor_action_group" "quarantine_error_action_group" { + name = "${replace(var.project, "-", "")}quarantineerror" + resource_group_name = local.resource_group_name_common +} + data "azurerm_monitor_action_group" "io_com_action_group" { name = "io-p-com-error-ag-01" resource_group_name = "io-p-itn-msgs-rg-01" diff --git a/src/domains/elt/_modules/function_apps/monitor.tf b/src/domains/elt/_modules/function_apps/monitor.tf index 0c65029ed..ed6b5dfc1 100644 --- a/src/domains/elt/_modules/function_apps/monitor.tf +++ b/src/domains/elt/_modules/function_apps/monitor.tf @@ -1,8 +1,3 @@ -data "azurerm_monitor_action_group" "quarantine_error_action_group" { - resource_group_name = local.resource_group_name_common - name = "${replace(var.project, "-", "")}quarantineerror" -} - resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" { name = "${var.project}-fnelt-internal-st-queue-ds-01" target_resource_id = "${data.azurerm_storage_account.function_elt_internal_storage.id}/queueServices/default"