Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/nebari_mlflow_plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class MlflowConfigGCP(Base):

class MlflowProvidersInputSchema(Base):
enabled: bool = True
force_destroy_storage: bool = False # defaults to False to prevent data loss
force_destroy_db_creds: bool = False # defaults to False to prevent credential loss

# provder specific config
aws: Optional[MlflowConfigAWS] = None
Expand Down Expand Up @@ -218,6 +220,8 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]):
"ingress_host": domain,
"cluster_oidc_issuer_url": cluster_oidc_issuer_url,
"overrides": self.config.mlflow.values,
"force_destroy_storage": self.config.mlflow.force_destroy_storage,
"force_destroy_db_creds": self.config.mlflow.force_destroy_db_creds,
}
elif self.config.provider == ProviderEnum.azure:
cluster_oidc_issuer_url = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
Expand All @@ -237,6 +241,8 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]):
"storage_resource_group_name": resource_group_name,
"region": self.config.azure.region,
"storage_account_name": self.config.project_name[:15] + 'mlfsa' + self.config.azure.storage_account_postfix,
"force_destroy_storage": self.config.mlflow.force_destroy_storage,
"force_destroy_db_creds": self.config.mlflow.force_destroy_db_creds,
}
elif self.config.provider == ProviderEnum.gcp:
cluster_oidc_issuer_url = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
Expand All @@ -256,6 +262,8 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]):
"project_id": project_id,
"region": self.config.google_cloud_platform.region,
"bucket_name": f"{self.config.project_name}-mlflow-artifacts",
"force_destroy_storage": self.config.mlflow.force_destroy_storage,
"force_destroy_db_creds": self.config.mlflow.force_destroy_db_creds,
}
else:
raise NotImplementedError(f"Provider {self.config.provider} not implemented")
Expand Down
9 changes: 9 additions & 0 deletions src/nebari_mlflow_plugin/template/aws/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ resource "aws_s3_bucket" "artifact_storage" {
versioning {
enabled = true
}

# Prevent accidental deletion of the bucket unless force_destroy_storage is true
lifecycle {
prevent_destroy = true
}

# Allow Terraform to destroy bucket and all objects when force_destroy_storage is true
force_destroy = var.force_destroy_storage
}

# If enable_s3_encryption is true, create a key and apply Server Side Encryption to S3 bucket
Expand Down Expand Up @@ -116,4 +124,5 @@ module "mlflow" {
s3_bucket_name = aws_s3_bucket.artifact_storage.id
keycloak_config = module.keycloak.config
overrides = var.overrides
force_destroy_db_creds = var.force_destroy_db_creds
}
11 changes: 11 additions & 0 deletions src/nebari_mlflow_plugin/template/aws/modules/mlflow/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ resource "kubernetes_namespace" "this" {
resource "random_password" "mlflow_postgres" {
length = 32
special = false

# Prevent accidental deletion of the password unless force_destroy_db_creds is true
lifecycle {
prevent_destroy = true
ignore_changes = var.force_destroy_db_creds ? [] : [result]
}

# Use a keepers block to force regeneration only when explicitly requested
keepers = var.force_destroy_db_creds ? {
force_regenerate = timestamp()
} : {}
}

resource "helm_release" "mlflow" {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,10 @@ variable "mlflow_sa_iam_role_arn" {
variable "overrides" {
type = any
default = {}
}

variable "force_destroy_db_creds" {
description = "Whether to destroy database credentials when MLflow is disabled"
type = bool
default = false
}
15 changes: 15 additions & 0 deletions src/nebari_mlflow_plugin/template/aws/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,19 @@ variable "overrides" {
variable "cluster_oidc_issuer_url" {
description = "The URL on the EKS cluster for the OpenID Connect identity provider"
type = string
}

# RESOURCE MANAGEMENT SETTINGS
# -----------------

variable "force_destroy_storage" {
description = "Whether to destroy storage bucket when MLflow is disabled"
type = bool
default = false
}

variable "force_destroy_db_creds" {
description = "Whether to destroy database credentials when MLflow is disabled"
type = bool
default = false
}
13 changes: 13 additions & 0 deletions src/nebari_mlflow_plugin/template/azure/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ resource "helm_release" "mlflow" {
},
"minio" = {
"enabled" = false
},
postgresql = {
# Preserve database credentials unless force_destroy_db_creds is true
# When force_destroy_db_creds is false, the secret will be preserved
# and reused on subsequent deployments
auth = {
existingSecret = var.force_destroy_db_creds ? "" : "${var.helm-release-name}-postgresql"
}
}
})
],
Expand All @@ -72,6 +80,11 @@ resource "azurerm_storage_account" "mlflow" {
location = var.region
account_tier = "Standard"
account_replication_type = "LRS"

# Prevent accidental deletion of the storage account unless force_destroy_storage is true
lifecycle {
prevent_destroy = true
}
}

resource "azurerm_storage_container" "mlflow" {
Expand Down
12 changes: 12 additions & 0 deletions src/nebari_mlflow_plugin/template/azure/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,15 @@ variable "storage_account_name" {
variable "region" {
type = string
}

variable "force_destroy_storage" {
description = "Whether to destroy storage account when MLflow is disabled"
type = bool
default = false
}

variable "force_destroy_db_creds" {
description = "Whether to destroy database credentials when MLflow is disabled"
type = bool
default = false
}
13 changes: 12 additions & 1 deletion src/nebari_mlflow_plugin/template/gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ resource "helm_release" "mlflow" {
repository = "bitnamilegacy/postgresql"
tag = "16.6.0-debian-12-r2"
}
# Preserve database credentials unless force_destroy_db_creds is true
# When force_destroy_db_creds is false, the secret will be preserved
# and reused on subsequent deployments
auth = {
existingSecret = var.force_destroy_db_creds ? "" : "${var.helm-release-name}-postgresql"
}
}
waitContainer = {
# TODO: Remove hardcoded image values after Helm chart update
Expand Down Expand Up @@ -92,13 +98,18 @@ resource "google_storage_bucket" "mlflow" {

name = var.bucket_name
location = var.region
force_destroy = false
force_destroy = var.force_destroy_storage

uniform_bucket_level_access = true

versioning {
enabled = true
}

# Prevent accidental deletion of the bucket unless force_destroy_storage is true
lifecycle {
prevent_destroy = true
}
}

# Service Account for Workload Identity
Expand Down
12 changes: 12 additions & 0 deletions src/nebari_mlflow_plugin/template/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,16 @@ variable "bucket_name" {
variable "region" {
description = "GCP region"
type = string
}

variable "force_destroy_storage" {
description = "Whether to destroy storage bucket when MLflow is disabled"
type = bool
default = false
}

variable "force_destroy_db_creds" {
description = "Whether to destroy database credentials when MLflow is disabled"
type = bool
default = false
}