From f0a2f1d322331a5053786c696f018c1d328f6a82 Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Wed, 13 Nov 2024 14:01:34 +0000 Subject: [PATCH 1/2] feat: add Athena workgroup Add an Athena workgroup and query result bucket. This will allow queries to be run against the data lake buckets. --- .../workflows/terragrunt-apply-production.yml | 4 +++ .../workflows/terragrunt-plan-production.yml | 9 +++++++ terragrunt/aws/athena/athena.tf | 16 ++++++++++++ terragrunt/aws/athena/variables.tf | 9 +++++++ terragrunt/aws/buckets/athena.tf | 22 ++++++++++++++++ terragrunt/aws/buckets/outputs.tf | 10 +++++++ terragrunt/aws/glue/iam.tf | 7 ++--- .../env/production/athena/.terraform.lock.hcl | 25 ++++++++++++++++++ .../env/production/athena/terragrunt.hcl | 26 +++++++++++++++++++ 9 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 terragrunt/aws/athena/athena.tf create mode 100644 terragrunt/aws/athena/variables.tf create mode 100644 terragrunt/aws/buckets/athena.tf create mode 100644 terragrunt/env/production/athena/.terraform.lock.hcl create mode 100644 terragrunt/env/production/athena/terragrunt.hcl diff --git a/.github/workflows/terragrunt-apply-production.yml b/.github/workflows/terragrunt-apply-production.yml index 7ea7864..2e055df 100644 --- a/.github/workflows/terragrunt-apply-production.yml +++ b/.github/workflows/terragrunt-apply-production.yml @@ -48,6 +48,10 @@ jobs: working-directory: terragrunt/env/production/glue run: terragrunt apply --terragrunt-non-interactive -auto-approve + - name: Terragrunt apply athena + working-directory: terragrunt/env/production/athena + run: terragrunt apply --terragrunt-non-interactive -auto-approve + - name: Terragrunt apply alarms working-directory: terragrunt/env/production/alarms run: terragrunt apply --terragrunt-non-interactive -auto-approve diff --git a/.github/workflows/terragrunt-plan-production.yml b/.github/workflows/terragrunt-plan-production.yml index a7169c0..0c16640 100644 --- a/.github/workflows/terragrunt-plan-production.yml +++ b/.github/workflows/terragrunt-plan-production.yml @@ -59,6 +59,15 @@ jobs: github-token: "${{ secrets.GITHUB_TOKEN }}" terragrunt: "true" + - name: Terragrunt plan athena + uses: cds-snc/terraform-plan@25afd759b2ada46a94b011fab7a81963c4f3a61a # v3.3.0 + with: + directory: "terragrunt/env/production/athena" + comment-delete: "true" + comment-title: "Production: athena 🦉" + github-token: "${{ secrets.GITHUB_TOKEN }}" + terragrunt: "true" + - name: Terragrunt plan alarms uses: cds-snc/terraform-plan@25afd759b2ada46a94b011fab7a81963c4f3a61a # v3.3.0 with: diff --git a/terragrunt/aws/athena/athena.tf b/terragrunt/aws/athena/athena.tf new file mode 100644 index 0000000..d4b4fdc --- /dev/null +++ b/terragrunt/aws/athena/athena.tf @@ -0,0 +1,16 @@ +resource "aws_athena_workgroup" "data_lake" { + name = "data-lake-${var.env}" + + configuration { + enforce_workgroup_configuration = true + publish_cloudwatch_metrics_enabled = true + + result_configuration { + output_location = "s3://${var.athena_bucket_name}/data-lake/" + + encryption_configuration { + encryption_option = "SSE_S3" + } + } + } +} \ No newline at end of file diff --git a/terragrunt/aws/athena/variables.tf b/terragrunt/aws/athena/variables.tf new file mode 100644 index 0000000..8ede7c0 --- /dev/null +++ b/terragrunt/aws/athena/variables.tf @@ -0,0 +1,9 @@ +variable "athena_bucket_arn" { + description = "The ARN of the Athena bucket" + type = string +} + +variable "athena_bucket_name" { + description = "The name of the Athena bucket" + type = string +} \ No newline at end of file diff --git a/terragrunt/aws/buckets/athena.tf b/terragrunt/aws/buckets/athena.tf new file mode 100644 index 0000000..3a1bc61 --- /dev/null +++ b/terragrunt/aws/buckets/athena.tf @@ -0,0 +1,22 @@ +# +# Holds Athena query resuts +# +module "athena_bucket" { + source = "github.com/cds-snc/terraform-modules//S3?ref=v10.0.0" + bucket_name = "cds-data-lake-athena-${var.env}" + billing_tag_value = var.billing_tag_value + + logging = { + target_bucket = module.log_bucket.s3_bucket_id + target_prefix = "athena/" + } + + lifecycle_rule = [ + local.lifecycle_expire_all, + local.lifecycle_remove_noncurrent_versions + ] + + versioning = { + enabled = true + } +} \ No newline at end of file diff --git a/terragrunt/aws/buckets/outputs.tf b/terragrunt/aws/buckets/outputs.tf index 1709fa0..d3a7482 100644 --- a/terragrunt/aws/buckets/outputs.tf +++ b/terragrunt/aws/buckets/outputs.tf @@ -1,3 +1,13 @@ +output "athena_bucket_arn" { + description = "ARN of the S3 Athena query result bucket." + value = module.athena_bucket.s3_bucket_arn +} + +output "athena_bucket_name" { + description = "Name of the S3 Athena query result bucket." + value = module.athena_bucket.s3_bucket_id +} + output "curated_bucket_arn" { description = "ARN of the S3 Curated data bucket." value = module.curated_bucket.s3_bucket_arn diff --git a/terragrunt/aws/glue/iam.tf b/terragrunt/aws/glue/iam.tf index 4acaa71..14a82f8 100644 --- a/terragrunt/aws/glue/iam.tf +++ b/terragrunt/aws/glue/iam.tf @@ -121,9 +121,9 @@ data "aws_iam_policy_document" "glue_kms" { "logs:AssociateKmsKey" ] resources = [ - "arn:aws:logs:${var.region}:${var.account_id}:log-group:${local.glue_crawler_log_group_name}*", - "arn:aws:logs:${var.region}:${var.account_id}:log-group:${local.glue_etl_log_group_name}*", - "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws-glue/sessions/*", + "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws-glue/crawlers*", + "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws-glue/jobs*", + "arn:aws:logs:${var.region}:${var.account_id}:log-group:/aws-glue/sessions*" ] } } @@ -133,6 +133,7 @@ data "aws_iam_policy_document" "s3_write_data_lake" { sid = "WriteDataLakeS3TransformedBuckets" actions = [ "s3:PutObject", + "s3:DeleteObject" ] resources = [ "${var.curated_bucket_arn}/*", diff --git a/terragrunt/env/production/athena/.terraform.lock.hcl b/terragrunt/env/production/athena/.terraform.lock.hcl new file mode 100644 index 0000000..584839c --- /dev/null +++ b/terragrunt/env/production/athena/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.75.1" + constraints = "~> 5.0" + hashes = [ + "h1:ijX5mwbQZOnPVQGxxVsJs6Yh6h2w+V3mQmKznB6pIkw=", + "zh:1075825e7311a8d2d233fd453a173910e891b0320e8a7698af44d1f90b02621d", + "zh:203c5d09a03fcaa946defb8459f01227f2fcda07df768f74777beb328d6751ae", + "zh:21bc79ccb09bfdeb711a3a5226c6c4a457ac7c4bb781dbda6ade7be38461739f", + "zh:2bac969855b62a0ff6716954be29387a1f9793626059122cda4681206396e309", + "zh:4b65ea5b51058f05b9ec8797f76184e19e5b38a609029fe2226af3fa4ad289b3", + "zh:5065d7df357fb3ee2b0a2520bbcff6335c0c47bfb9e8e9932bad088c3ab7efd3", + "zh:678a4015a4cd26af5c2b30dfd9290b8a01e900668fa0fec6585dfd1838f1cebd", + "zh:6ddc5dfdd4a0dddca027db99a7bfa9a0978933119d63af81acb6020728405119", + "zh:98c0d48b09842c444dbcbddd279e5b5b1e44113951817a8ecc28896bb4ad1dd7", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:aad169fea072842c0b54f1ff95f1ec6558d6c5af3ea4c159308583db59003b09", + "zh:bd2625ed8e1ff29ac6ed3a810d7b68a090add5fcb2fce4122669bd37e1eb9f1d", + "zh:c6f57625e26a6ef1ffb49bfa0e6148496ad12d80c857f6bb222e21f293a2a78a", + "zh:c7cd085326c5eb88804b11a4bc0fbc8376f06138f4b9624fb25cd06ea8687cdd", + "zh:f60c98139f983817d4d08f4138b1e53f31f91176ff638631e8dd38b6de36fce0", + ] +} diff --git a/terragrunt/env/production/athena/terragrunt.hcl b/terragrunt/env/production/athena/terragrunt.hcl new file mode 100644 index 0000000..a0f5efa --- /dev/null +++ b/terragrunt/env/production/athena/terragrunt.hcl @@ -0,0 +1,26 @@ +terraform { + source = "../../../aws//athena" +} + +dependencies { + paths = ["../buckets"] +} + +dependency "buckets" { + config_path = "../buckets" + mock_outputs_merge_strategy_with_state = "shallow" + mock_outputs_allowed_terraform_commands = ["init", "fmt", "validate", "plan", "show"] + mock_outputs = { + athena_bucket_arn = "arn:aws:s3:::mock-athena-bucket" + athena_bucket_name = "mock-athena-bucket" + } +} + +inputs = { + athena_bucket_arn = dependency.buckets.outputs.athena_bucket_arn + athena_bucket_name = dependency.buckets.outputs.athena_bucket_name +} + +include { + path = find_in_parent_folders() +} From efe27a06327250943abc34f65b40639d4b3421fc Mon Sep 17 00:00:00 2001 From: Pat Heard Date: Wed, 13 Nov 2024 14:25:10 +0000 Subject: [PATCH 2/2] fix: remove ARN variable --- terragrunt/aws/athena/variables.tf | 5 ----- terragrunt/env/production/athena/terragrunt.hcl | 2 -- 2 files changed, 7 deletions(-) diff --git a/terragrunt/aws/athena/variables.tf b/terragrunt/aws/athena/variables.tf index 8ede7c0..383682a 100644 --- a/terragrunt/aws/athena/variables.tf +++ b/terragrunt/aws/athena/variables.tf @@ -1,8 +1,3 @@ -variable "athena_bucket_arn" { - description = "The ARN of the Athena bucket" - type = string -} - variable "athena_bucket_name" { description = "The name of the Athena bucket" type = string diff --git a/terragrunt/env/production/athena/terragrunt.hcl b/terragrunt/env/production/athena/terragrunt.hcl index a0f5efa..a6dca79 100644 --- a/terragrunt/env/production/athena/terragrunt.hcl +++ b/terragrunt/env/production/athena/terragrunt.hcl @@ -11,13 +11,11 @@ dependency "buckets" { mock_outputs_merge_strategy_with_state = "shallow" mock_outputs_allowed_terraform_commands = ["init", "fmt", "validate", "plan", "show"] mock_outputs = { - athena_bucket_arn = "arn:aws:s3:::mock-athena-bucket" athena_bucket_name = "mock-athena-bucket" } } inputs = { - athena_bucket_arn = dependency.buckets.outputs.athena_bucket_arn athena_bucket_name = dependency.buckets.outputs.athena_bucket_name }