Skip to content

Commit

Permalink
[#IOPID-1927] Alet on service preference poison queue for fn-elt (#1132)
Browse files Browse the repository at this point in the history
Co-authored-by: Greta Quadrati <[email protected]>
  • Loading branch information
BurnedMarshal and gquadrati authored Aug 29, 2024
1 parent 11091f7 commit c8c901e
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 3 deletions.
5 changes: 5 additions & 0 deletions src/domains/elt/_modules/function_apps/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,8 @@ data "azurerm_storage_account" "storage_assets_cdn" {
name = replace(format("%s-stcdnassets", var.project), "-", "")
resource_group_name = local.resource_group_name_common
}

data "azurerm_storage_account" "function_elt_internal_storage" {
name = module.function_elt.storage_account_internal_function_name
resource_group_name = var.resource_group_name
}
6 changes: 3 additions & 3 deletions src/domains/elt/_modules/function_apps/function_app_elt.tf
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ locals {
MESSAGES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-messages-failure"
MESSAGE_STATUS_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-message-status-failure"
SERVICES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-services-failure"
SERVICE_PREFERENCES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-service-preferences-failure"
SERVICE_PREFERENCES_FAILURE_QUEUE_NAME = local.service_preferences_failure_queue_name
PROFILES_FAILURE_QUEUE_NAME = "pdnd-io-cosmosdb-profiles-failure"

INTERNAL_TEST_FISCAL_CODES = module.tests.test_users.all
Expand Down Expand Up @@ -166,8 +166,8 @@ module "function_elt" {
"${local.function_elt.app_settings.MESSAGE_STATUS_FAILURE_QUEUE_NAME}-poison",
local.function_elt.app_settings.SERVICES_FAILURE_QUEUE_NAME,
"${local.function_elt.app_settings.SERVICES_FAILURE_QUEUE_NAME}-poison",
local.function_elt.app_settings.SERVICE_PREFERENCES_FAILURE_QUEUE_NAME,
"${local.function_elt.app_settings.SERVICE_PREFERENCES_FAILURE_QUEUE_NAME}-poison",
local.service_preferences_failure_queue_name,
"${local.service_preferences_failure_queue_name}-poison",
local.function_elt.app_settings.PROFILES_FAILURE_QUEUE_NAME,
"${local.function_elt.app_settings.PROFILES_FAILURE_QUEUE_NAME}-poison"
],
Expand Down
2 changes: 2 additions & 0 deletions src/domains/elt/_modules/function_apps/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,6 @@ locals {
event_hub_connection = "${format("%s-evh-ns", var.project)}.servicebus.windows.net:9093"

pn_service_id = "01G40DWQGKY5GRWSNM4303VNRP"

service_preferences_failure_queue_name = "pdnd-io-cosmosdb-service-preferences-failure"
}
58 changes: 58 additions & 0 deletions src/domains/elt/_modules/function_apps/monitor.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
data "azurerm_monitor_action_group" "quarantine_error_action_group" {
resource_group_name = local.resource_group_name_common
name = "${replace(var.project, "-", "")}quarantineerror"
}

resource "azurerm_monitor_diagnostic_setting" "queue_diagnostic_setting" {
name = "${var.project}-fnelt-internal-st-queue-ds-01"
target_resource_id = "${data.azurerm_storage_account.function_elt_internal_storage.id}/queueServices/default"
log_analytics_workspace_id = data.azurerm_application_insights.application_insights.workspace_id

enabled_log {
category = "StorageWrite"
}

metric {
category = "Capacity"
enabled = false
}
metric {
category = "Transaction"
enabled = false
}
}


resource "azurerm_monitor_scheduled_query_rules_alert_v2" "service_preferences_failure_alert_rule" {
enabled = true
name = "[CITIZEN-AUTH | iopfneltsdt] Failures on pdnd-io-cosmosdb-service-preferences-failure-poison"
resource_group_name = var.resource_group_name
location = var.location

scopes = [data.azurerm_storage_account.function_elt_internal_storage.id]
description = "Permanent failures processing Service Preferences export to PDND. REQUIRED MANUAL ACTION"
severity = 1
auto_mitigation_enabled = false

window_duration = "PT15M" # Select the interval that's used to group the data points by using the aggregation type function. Choose an Aggregation granularity (period) that's greater than the Frequency of evaluation to reduce the likelihood of missing the first evaluation period of an added time series.
evaluation_frequency = "PT15M" # Select how often the alert rule is to be run. Select a frequency that's smaller than the aggregation granularity to generate a sliding window for the evaluation.

criteria {
query = <<-QUERY
StorageQueueLogs
| where OperationName contains "PutMessage"
| where Uri contains "${local.service_preferences_failure_queue_name}-poison"
QUERY
operator = "GreaterThan"
threshold = 0
time_aggregation_method = "Count"
}

action {
action_groups = [
data.azurerm_monitor_action_group.quarantine_error_action_group.id,
]
}

tags = var.tags
}

0 comments on commit c8c901e

Please sign in to comment.