From dd44d2264b6c24e8b64e1086f7d0e8702ec05ac3 Mon Sep 17 00:00:00 2001 From: jbeemster Date: Thu, 21 Sep 2023 22:41:14 +1000 Subject: [PATCH] Add Azure Lake Loader as a destination (closes #78) --- terraform/azure/pipeline/README.md | 3 ++ .../azure/pipeline/target_lake_loader.tf | 42 +++++++++++++++++++ terraform/azure/pipeline/terraform.tfvars | 5 +++ terraform/azure/pipeline/variables.tf | 9 +++- 4 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 terraform/azure/pipeline/target_lake_loader.tf diff --git a/terraform/azure/pipeline/README.md b/terraform/azure/pipeline/README.md index dd0bb1e..8df9577 100644 --- a/terraform/azure/pipeline/README.md +++ b/terraform/azure/pipeline/README.md @@ -19,6 +19,8 @@ No providers. | [eh\_namespace](#module\_eh\_namespace) | snowplow-devops/event-hub-namespace/azurerm | 0.1.1 | | [enrich\_eh](#module\_enrich\_eh) | snowplow-devops/enrich-event-hub-vmss/azurerm | 0.1.2 | | [enriched\_eh\_topic](#module\_enriched\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | +| [lake\_loader](#module\_lake\_loader) | snowplow-devops/lake-loader-vmss/azurerm | 0.1.1 | +| [lake\_storage\_container](#module\_lake\_storage\_container) | snowplow-devops/storage-container/azurerm | 0.1.1 | | [raw\_eh\_topic](#module\_raw\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | | [sf\_loader](#module\_sf\_loader) | snowplow-devops/snowflake-loader-vmss/azurerm | 0.1.1 | | [sf\_message\_queue\_eh\_topic](#module\_sf\_message\_queue\_eh\_topic) | snowplow-devops/event-hub/azurerm | 0.1.1 | @@ -43,6 +45,7 @@ No resources. | [storage\_account\_name](#input\_storage\_account\_name) | The name of the Storage Account the data will be loaded into | `string` | n/a | yes | | [subnet\_id\_lb](#input\_subnet\_id\_lb) | The ID of the subnet to deploy the load balancer into (e.g. collector-agw1) | `string` | n/a | yes | | [subnet\_id\_servers](#input\_subnet\_id\_servers) | The ID of the subnet to deploy the servers into (e.g. pipeline1) | `string` | n/a | yes | +| [lake\_enabled](#input\_lake\_enabled) | Whether to load all data into a Storage Container to build a data-lake based on Delta format | `bool` | `false` | no | | [snowflake\_account](#input\_snowflake\_account) | Snowflake account to use | `string` | `""` | no | | [snowflake\_database](#input\_snowflake\_database) | Snowflake database name | `string` | `""` | no | | [snowflake\_enabled](#input\_snowflake\_enabled) | Whether to enable loading into a Snowflake Database | `bool` | `false` | no | diff --git a/terraform/azure/pipeline/target_lake_loader.tf b/terraform/azure/pipeline/target_lake_loader.tf new file mode 100644 index 0000000..e8f5618 --- /dev/null +++ b/terraform/azure/pipeline/target_lake_loader.tf @@ -0,0 +1,42 @@ +module "lake_storage_container" { + source = "snowplow-devops/storage-container/azurerm" + version = "0.1.1" + + count = var.lake_enabled ? 1 : 0 + + name = "lake-container" + storage_account_name = local.storage_account_name +} + +module "lake_loader" { + source = "snowplow-devops/lake-loader-vmss/azurerm" + version = "0.1.1" + + count = var.lake_enabled ? 1 : 0 + + name = "${var.prefix}-lake-loader" + resource_group_name = var.resource_group_name + subnet_id = var.subnet_id_servers + + enriched_topic_name = module.enriched_eh_topic.name + enriched_topic_connection_string = module.enriched_eh_topic.read_only_primary_connection_string + bad_topic_name = module.bad_1_eh_topic.name + bad_topic_connection_string = module.bad_1_eh_topic.read_write_primary_connection_string + eh_namespace_name = module.eh_namespace.name + eh_namespace_broker = module.eh_namespace.broker + + storage_account_name = local.storage_account_name + storage_container_name = module.lake_storage_container[0].name + + ssh_public_key = var.ssh_public_key + ssh_ip_allowlist = var.ssh_ip_allowlist + + telemetry_enabled = var.telemetry_enabled + user_provided_id = var.user_provided_id + + custom_iglu_resolvers = local.custom_iglu_resolvers + + tags = var.tags + + depends_on = [module.lake_storage_container] +} diff --git a/terraform/azure/pipeline/terraform.tfvars b/terraform/azure/pipeline/terraform.tfvars index e8636ed..0a68241 100644 --- a/terraform/azure/pipeline/terraform.tfvars +++ b/terraform/azure/pipeline/terraform.tfvars @@ -51,6 +51,11 @@ snowflake_warehouse = "" # This controls how often data will be loading into Snowflake snowflake_transformer_window_period_min = 1 +# --- Target: Lake +# Follow the guide to get input values for the loader: +# https://docs.snowplow.io/docs/getting-started-on-snowplow-open-source/quick-start/ +lake_enabled = false + # --- ADVANCED CONFIGURATION ZONE --- # # Telemetry principles: https://docs.snowplowanalytics.com/docs/open-source-quick-start/what-is-the-quick-start-for-open-source/telemetry-principles/ diff --git a/terraform/azure/pipeline/variables.tf b/terraform/azure/pipeline/variables.tf index fbd0525..525e90b 100644 --- a/terraform/azure/pipeline/variables.tf +++ b/terraform/azure/pipeline/variables.tf @@ -96,7 +96,6 @@ variable "snowflake_enabled" { type = bool } - variable "snowflake_account" { description = "Snowflake account to use" type = string @@ -145,3 +144,11 @@ variable "snowflake_transformer_window_period_min" { type = number default = 5 } + +# --- Target: Lake + +variable "lake_enabled" { + description = "Whether to load all data into a Storage Container to build a data-lake based on Delta format" + default = false + type = bool +}