Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#IOPID-2346] Add alert rule for profile deletion poison queue + move alerts to error action group #1257

Merged
merged 3 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/domains/elt/_modules/function_apps/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ data "azurerm_monitor_action_group" "error_action_group" {
resource_group_name = local.resource_group_name_common
}

data "azurerm_monitor_action_group" "quarantine_error_action_group" {
name = "${replace(var.project, "-", "")}quarantineerror"
resource_group_name = local.resource_group_name_common
}

data "azurerm_monitor_action_group" "io_com_action_group" {
name = "io-p-com-error-ag-01"
resource_group_name = "io-p-itn-msgs-rg-01"
Expand Down
43 changes: 36 additions & 7 deletions src/domains/elt/_modules/function_apps/monitor.tf
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
data "azurerm_monitor_action_group" "quarantine_error_action_group" {
resource_group_name = local.resource_group_name_common
name = "${replace(var.project, "-", "")}quarantineerror"
}

resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" {
name = "${var.project}-fnelt-internal-st-queue-ds-01"
target_resource_id = "${data.azurerm_storage_account.function_elt_internal_storage.id}/queueServices/default"
Expand Down Expand Up @@ -50,7 +45,7 @@ resource "azurerm_monitor_scheduled_query_rules_alert_v2" "service_preferences_f

action {
action_groups = [
data.azurerm_monitor_action_group.quarantine_error_action_group.id,
data.azurerm_monitor_action_group.error_action_group.id,
]
}

Expand Down Expand Up @@ -84,7 +79,41 @@ resource "azurerm_monitor_scheduled_query_rules_alert_v2" "profiles_failure_aler

action {
action_groups = [
data.azurerm_monitor_action_group.quarantine_error_action_group.id,
data.azurerm_monitor_action_group.error_action_group.id,
]
}

tags = var.tags
}

resource "azurerm_monitor_scheduled_query_rules_alert_v2" "profile_deletion_failure_alert_rule" {
enabled = true
name = "[CITIZEN-AUTH | iopfneltsdt] Failures on ${local.profile_deletion_failure_queue_name}-poison"
resource_group_name = var.resource_group_name
location = var.location

scopes = [data.azurerm_storage_account.function_elt_internal_storage.id]
description = "Permanent failures processing Profiles export to PDND. REQUIRED MANUAL ACTION"
severity = 1
auto_mitigation_enabled = false

window_duration = "PT15M" # Select the interval that's used to group the data points by using the aggregation type function. Choose an Aggregation granularity (period) that's greater than the Frequency of evaluation to reduce the likelihood of missing the first evaluation period of an added time series.
evaluation_frequency = "PT15M" # Select how often the alert rule is to be run. Select a frequency that's smaller than the aggregation granularity to generate a sliding window for the evaluation.

criteria {
query = <<-QUERY
StorageQueueLogs
| where OperationName contains "PutMessage"
| where Uri contains "${local.profile_deletion_failure_queue_name}-poison"
QUERY
operator = "GreaterThan"
threshold = 0
time_aggregation_method = "Count"
}

action {
action_groups = [
data.azurerm_monitor_action_group.error_action_group.id,
]
}

Expand Down