diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..92685e6 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,28 @@ +{ + "name": "data-lake", + "image": "mcr.microsoft.com/devcontainers/base:bullseye@sha256:c8955c35eff4aaf6078713a66cb83121fea893bd8fd908703733e4a38d54448c", + "containerEnv": { + "SHELL": "/bin/zsh" + }, + "features": { + "ghcr.io/devcontainers/features/aws-cli:1": { + "version": "latest" + }, + "ghcr.io/devcontainers/features/terraform:1": { + "version": "1.9.8", + "terragrunt": "0.68.6" + } + }, + "customizations": { + "vscode": { + "extensions": [ + "hashicorp.terraform", + "github.copilot", + "github.vscode-github-actions", + "hashicorp.hcl", + "redhat.vscode-yaml" + ] + } + }, + "remoteUser": "vscode" +} \ No newline at end of file diff --git a/.github/workflows/terraform-security-scan.yml b/.github/workflows/terraform-security-scan.yml new file mode 100644 index 0000000..5b09176 --- /dev/null +++ b/.github/workflows/terraform-security-scan.yml @@ -0,0 +1,29 @@ + +name: "Terraform security scan" + +on: + push: + branches: + - main + paths: + - "terragrunt/**" + - ".github/workflows/terraform-security-scan.yml" + pull_request: + paths: + - "terragrunt/**" + - ".github/workflows/terraform-security-scan.yml" + +jobs: + terraform-security-scan: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + + - name: Checkov security scan + uses: bridgecrewio/checkov-action@99bb2caf247dfd9f03cf984373bc6043d4e32ebf # v12.1347.0 + with: + directory: terragrunt/aws + framework: terraform + output_format: cli + soft_fail: false diff --git a/.github/workflows/terragrunt-apply-production.yml b/.github/workflows/terragrunt-apply-production.yml new file mode 100644 index 0000000..7411717 --- /dev/null +++ b/.github/workflows/terragrunt-apply-production.yml @@ -0,0 +1,53 @@ +name: "Terragrunt apply PRODUCTION" + +on: + push: + branches: + - main + paths: + - "terragrunt/aws/**" + - "terragrunt/env/production/**" + - "terragrunt/env/common/**" + - "terragrunt/env/terragrunt.hcl" + - ".github/workflows/terragrunt-apply-production.yml" + +env: + AWS_REGION: ca-central-1 + CONFTEST_VERSION: 0.27.0 + TERRAFORM_VERSION: 1.9.8 + TERRAGRUNT_VERSION: 0.68.6 + TF_INPUT: false + +permissions: + id-token: write + contents: read + +jobs: + terragrunt-apply-production: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + + - name: Setup terraform tools + uses: cds-snc/terraform-tools-setup@v1 + + - name: Configure AWS credentials using OIDC + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + role-to-assume: arn:aws:iam::739275439843:role/data-lake-apply + role-session-name: TFApply + aws-region: ${{ env.AWS_REGION }} + + - name: Terragrunt apply buckets + working-directory: terragrunt/env/production/buckets + run: terragrunt apply --terragrunt-non-interactive -auto-approve + + - name: Report deployment to Sentinel + if: always() + uses: cds-snc/sentinel-forward-data-action@main + with: + input_data: '{"product": "data-lake", "sha": "${{ github.sha }}", "version": "Infrastructure ${{ github.sha }}", "repository": "${{ github.repository }}", "environment": "production", "status": "${{ job.status }}"}' + log_type: CDS_Product_Deployment_Data + log_analytics_workspace_id: ${{ secrets.LOG_ANALYTICS_WORKSPACE_ID }} + log_analytics_workspace_key: ${{ secrets.LOG_ANALYTICS_WORKSPACE_KEY }} diff --git a/.github/workflows/terragrunt-plan-production.yml b/.github/workflows/terragrunt-plan-production.yml new file mode 100644 index 0000000..d31c917 --- /dev/null +++ b/.github/workflows/terragrunt-plan-production.yml @@ -0,0 +1,50 @@ +name: "Terragrunt plan PRODUCTION" + +on: + pull_request: + branches: + - main + paths: + - "terragrunt/aws/**" + - "terragrunt/env/production/**" + - "terragrunt/env/common/**" + - "terragrunt/env/terragrunt.hcl" + - ".github/workflows/terragrunt-plan-production.yml" + +env: + AWS_REGION: ca-central-1 + CONFTEST_VERSION: 0.27.0 + TERRAFORM_VERSION: 1.9.8 + TERRAGRUNT_VERSION: 0.68.6 + TF_INPUT: false + +permissions: + id-token: write + contents: read + pull-requests: write + +jobs: + terragrunt-plan-production: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + + - name: Setup terraform tools + uses: cds-snc/terraform-tools-setup@v1 + + - name: Configure AWS credentials using OIDC + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + role-to-assume: arn:aws:iam::739275439843:role/data-lake-plan + role-session-name: TFPlan + aws-region: ${{ env.AWS_REGION }} + + - name: Terragrunt plan buckets + uses: cds-snc/terraform-plan@25afd759b2ada46a94b011fab7a81963c4f3a61a # v3.3.0 + with: + directory: "terragrunt/env/production/buckets" + comment-delete: "true" + comment-title: "Production: buckets" + github-token: "${{ secrets.GITHUB_TOKEN }}" + terragrunt: "true" diff --git a/.github/workflows/workflow-failure.yml b/.github/workflows/workflow-failure.yml new file mode 100644 index 0000000..79dd6d6 --- /dev/null +++ b/.github/workflows/workflow-failure.yml @@ -0,0 +1,18 @@ +name: Workflow failure + +on: + workflow_run: + workflows: + - "Terragrunt apply PRODUCTION" + types: + - completed + +jobs: + on-failure: + runs-on: ubuntu-latest + if: github.event.workflow_run.conclusion == 'failure' + steps: + - name: Notify Slack + run: | + json='{"blocks":[{"type":"section","text":{"type":"mrkdwn","text":":red: Data Lake workflow failed: <${{ github.event.workflow_run.html_url }}|${{ github.event.workflow.name }}>"}}]}' + curl -X POST -H 'Content-type: application/json' --data "$json" ${{ secrets.PRODUCTION_SLACK_WEBHOOK_OPS }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4c3b952 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Terraform +.terragrunt-cache +.terraform +*.tfstate +*.tfvars \ No newline at end of file diff --git a/README.md b/README.md index 44263e8..2d76d02 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,2 @@ -# Generic Project Template - -This repository provides some base files for setting up a repository at -CDS. Plan is to create more project template for specific technologies: - -- project-template-terraform -- project-template-python -- project-template-nodejs - -Note that default community health files are maintained at https://github.com/cds-snc/.github +# Data Lake +This repository holds the Terraform and AWS Glue jobs that manage the Platform Data Lake. diff --git a/terragrunt/aws/buckets/s3.tf b/terragrunt/aws/buckets/s3.tf new file mode 100644 index 0000000..89005db --- /dev/null +++ b/terragrunt/aws/buckets/s3.tf @@ -0,0 +1,71 @@ +# +# Holds exported data before transformation +# +module "raw_bucket" { + source = "github.com/cds-snc/terraform-modules//S3?ref=v9.6.7" + bucket_name = "cds-data-lake-raw-${var.env}" + billing_tag_value = var.billing_tag_value + + logging = { + target_bucket = module.log_bucket.s3_bucket_id + target_prefix = "raw/" + } + + versioning = { + enabled = true + } +} + +# +# ETL jobs process the `Raw` bucket and store the transformed data here +# +module "transformed_bucket" { + source = "github.com/cds-snc/terraform-modules//S3?ref=v9.6.7" + bucket_name = "cds-data-lake-transformed-${var.env}" + billing_tag_value = var.billing_tag_value + + logging = { + target_bucket = module.log_bucket.s3_bucket_id + target_prefix = "transformed/" + } + + versioning = { + enabled = true + } +} + +# +# Holds enriched data that has been created by combining multiple transformed datasets +# +module "curated_bucket" { + source = "github.com/cds-snc/terraform-modules//S3?ref=v9.6.7" + bucket_name = "cds-data-lake-curated-${var.env}" + billing_tag_value = var.billing_tag_value + + logging = { + target_bucket = module.log_bucket.s3_bucket_id + target_prefix = "curated/" + } + + versioning = { + enabled = true + } +} + +# +# Bucket access logs, stored for 30 days +# +module "log_bucket" { + source = "github.com/cds-snc/terraform-modules//S3_log_bucket?ref=v9.6.7" + bucket_name = "cds-data-lake-bucket-logs-${var.env}" + versioning_status = "Enabled" + + lifecycle_rule = { + "lifecycle_rule" : { + "enabled" : "true", + "expiration" : { "days" : "30" } + } + } + + billing_tag_value = var.billing_tag_value +} \ No newline at end of file diff --git a/terragrunt/env/common/common_variables.tf b/terragrunt/env/common/common_variables.tf new file mode 100644 index 0000000..2474350 --- /dev/null +++ b/terragrunt/env/common/common_variables.tf @@ -0,0 +1,19 @@ +variable "account_id" { + description = "(Required) The account ID to perform actions on." + type = string +} + +variable "billing_tag_value" { + description = "(Required) the value we use to track billing" + type = string +} + +variable "env" { + description = "(Required) The current running environment" + type = string +} + +variable "region" { + description = "(Required) The region to build infra in" + type = string +} diff --git a/terragrunt/env/common/provider.tf b/terragrunt/env/common/provider.tf new file mode 100644 index 0000000..7e88e51 --- /dev/null +++ b/terragrunt/env/common/provider.tf @@ -0,0 +1,15 @@ + +terraform { + required_version = "1.9.8" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + } +} + +provider "aws" { + region = var.region + allowed_account_ids = [var.account_id] +} diff --git a/terragrunt/env/production/buckets/.terraform.lock.hcl b/terragrunt/env/production/buckets/.terraform.lock.hcl new file mode 100644 index 0000000..be90c5a --- /dev/null +++ b/terragrunt/env/production/buckets/.terraform.lock.hcl @@ -0,0 +1,25 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.73.0" + constraints = ">= 4.9.0, ~> 5.0" + hashes = [ + "h1:86a3PyP74xHVVcdffuvKIPSA6PCeQBJddxxnxtCIVBI=", + "zh:0d24edc51ab6600f56d759831658a9d7a8f69b53900546b75038fc8e3f312406", + "zh:1f8b8414f710a8c5a8777cb1ef1cad1cb4293bc035deb804734a8ec698b0850d", + "zh:2cf76b03564051ee86ef5fbdaea1949e3af549f8836e56371fe94335cf795e1c", + "zh:2ffe05c62b4ae6292dda66cd3a3cbe3e290a1a04369f3e6f74812e885cf3f2f0", + "zh:3564069d9bc918e5bded252d65b6a8758d08b309e1ac54bf7c8e5947a94cdadc", + "zh:4eb5395d52cfcb3c78e86c4ca3759bf9736e0e8dfa6955b0e1a59d9a7f41d805", + "zh:6cd14cbabbcf8b1c15fa73f9ebba4d4df41215ef92bf8d14a3780a7cb571e5c4", + "zh:6f7dc212dee1be2edb4620d352d9b0ea759744b5be08b84012a7621efa262052", + "zh:7468a490d6df04a401f49422c86b46ef91eba00878cc9a5ec3ee4a12fe9447d0", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:b440ff1be9fc62235b2dcb522dd922cefe751065ba4a601415130462e79fb68e", + "zh:d53dfd7311d8f130f0ce3184ed50461c34086d3490913a0d80d63574dac104a6", + "zh:de9a130dd684aed5b89edc7ce44aef37fa38eca06549035cf387cde9d3937432", + "zh:e0922d81fbed02062a74ea126d3cc6830fa0c8eac92108825d1120a262980831", + "zh:fdd6cdabcf5e9bedb3a419ac18bd12b5b02d8371ba0fb2a6123420937354c8e1", + ] +} diff --git a/terragrunt/env/production/buckets/terragrunt.hcl b/terragrunt/env/production/buckets/terragrunt.hcl new file mode 100644 index 0000000..d79dcc5 --- /dev/null +++ b/terragrunt/env/production/buckets/terragrunt.hcl @@ -0,0 +1,7 @@ +include { + path = find_in_parent_folders() +} + +terraform { + source = "../../../aws//buckets" +} \ No newline at end of file diff --git a/terragrunt/env/production/env_vars.hcl b/terragrunt/env/production/env_vars.hcl new file mode 100644 index 0000000..622f40e --- /dev/null +++ b/terragrunt/env/production/env_vars.hcl @@ -0,0 +1,5 @@ +inputs = { + account_id = "739275439843" + env = "production" + billing_tag_value = "PlatformDataLake" +} \ No newline at end of file diff --git a/terragrunt/env/terragrunt.hcl b/terragrunt/env/terragrunt.hcl new file mode 100644 index 0000000..6bbfd0e --- /dev/null +++ b/terragrunt/env/terragrunt.hcl @@ -0,0 +1,37 @@ +locals { + vars = read_terragrunt_config("../env_vars.hcl") +} + +inputs = { + account_id = "${local.vars.inputs.account_id}" + billing_tag_value = "${local.vars.inputs.billing_tag_value}" + env = "${local.vars.inputs.env}" + region = "ca-central-1" +} + +remote_state { + backend = "s3" + generate = { + path = "backend.tf" + if_exists = "overwrite_terragrunt" + } + config = { + encrypt = true + bucket = "cds-data-lake-tfstate-${local.vars.inputs.env}" + dynamodb_table = "terraform-state-lock-dynamo" + region = "ca-central-1" + key = "${path_relative_to_include()}/terraform.tfstate" + } +} + +generate "provider" { + path = "provider.tf" + if_exists = "overwrite" + contents = file("./common/provider.tf") +} + +generate "common_variables" { + path = "common_variables.tf" + if_exists = "overwrite" + contents = file("./common/common_variables.tf") +}