diff --git a/README.md b/README.md index 2a6cc17..cfba5ef 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,12 @@ module "plane_infra" { enable_aws_lb_controller = true # to use aws ALB cluster_version = "1.34" + # Optional: create the OpenSearch ↔ Bedrock connector/model integration + create_opensearch_bedrock_connector = true + # bedrock_model_region = "us-east-1" # defaults to `region` + # bedrock_model = "amazon.titan-embed-text-v1" # default + # bedrock_model_name = "plane-eks-cluster-bedrock" # defaults to "${cluster_name}-bedrock" + tags = { Environment = "plane" } @@ -48,6 +54,80 @@ module "plane_infra" { Override defaults by passing `eks`, `cache`, `mq`, `opensearch`, `object_store`, or `db` objects. See [terraform/README.md](terraform/README.md) for all options. +### OpenSearch ↔ Bedrock connector integration (optional) + +The connector is deployed via a CloudFormation stack and registers a Bedrock embedding model in OpenSearch ML Commons. Because the IAM role used by the connector Lambda must be mapped inside OpenSearch **before** the connector is registered, the setup requires two `terraform apply` passes. + +#### Step 1 — Deploy infrastructure without the connector + +Set `create_opensearch_bedrock_connector = false` (or omit the flag) on the first apply. This provisions the OpenSearch domain and all other infrastructure, but skips the connector CloudFormation stack. + +```hcl +module "plane_infra" { + # ... + create_opensearch_bedrock_connector = false +} +``` + +```bash +terraform apply +``` + +#### Step 2 — Map the IAM role inside OpenSearch + +The connector Lambda runs under an IAM role (`LambdaInvokeOpenSearchMLCommonsRole` by default). You must map this role to the OpenSearch `ml_full_access` and `all_access` built-in roles before registering the connector, otherwise the registration will fail. + +Verify the cluster is reachable: + +```bash +curl -u 'admin:' https:// +``` + +Map to `ml_full_access`: + +```bash +curl -X PUT \ + 'https:///_plugins/_security/api/rolesmapping/ml_full_access' \ + -H 'Content-Type: application/json' \ + -u 'admin:' \ + -d '{ + "backend_roles": [ + "arn:aws:iam:::role/LambdaInvokeOpenSearchMLCommonsRole" + ] + }' +# {"status":"CREATED","message":"'ml_full_access' created."} +``` + +Map to `all_access` (required for model registration): + +```bash +curl -X PUT \ + 'https:///_plugins/_security/api/rolesmapping/all_access' \ + -H 'Content-Type: application/json' \ + -u 'admin:' \ + -d '{ + "backend_roles": [ + "arn:aws:iam:::role/LambdaInvokeOpenSearchMLCommonsRole" + ] + }' +# {"status":"OK","message":"'all_access' updated."} +``` + +#### Step 3 — Deploy the connector + +Set `create_opensearch_bedrock_connector = true` and apply again. Terraform will deploy the CloudFormation stack, which invokes the Lambda to register the Bedrock connector and model in OpenSearch ML Commons. + +```hcl +module "plane_infra" { + # ... + create_opensearch_bedrock_connector = true +} +``` + +```bash +terraform apply +``` + ### Outputs Add these output blocks to your configuration to expose module outputs (e.g. in `main.tf` or `outputs.tf`): diff --git a/terraform/main.tf b/terraform/main.tf index f3fb4f7..114e0d3 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -16,6 +16,23 @@ locals { private_subnet_cidrs = [ for i in range(local.subnet_count) : cidrsubnet(var.vpc_cidr, 6, local.subnet_count + i) ] + + bedrock_allowed_regions = toset(["us-east-1", "us-west-2", "eu-central-1", "ap-northeast-1", "ap-southeast-1"]) + + effective_bedrock_model_region = coalesce( + var.bedrock_model_region, + contains(local.bedrock_allowed_regions, var.region) ? var.region : "us-east-1" + ) + + opensearch_bedrock_effective = var.opensearch_bedrock != null ? var.opensearch_bedrock : { + create_connector = var.create_opensearch_bedrock_connector + bedrock_model_region = local.effective_bedrock_model_region + bedrock_model = coalesce(var.bedrock_model, "amazon.titan-embed-text-v1") + bedrock_model_name = coalesce(var.bedrock_model_name, "${var.cluster_name}-bedrock") + add_process_function = var.add_process_function + add_offline_batch_inference = var.add_offline_batch_inference + lambda_invoke_mlcommons_role_name = var.lambda_invoke_mlcommons_role_name + } } module "vpc" { @@ -144,7 +161,21 @@ module "opensearch" { instance_type = var.opensearch.instance_type instance_count = var.opensearch.instance_count ebs_volume_size = var.opensearch.ebs_volume_size - tags = var.tags + + enable_vpc = true + vpc_id = module.vpc.vpc_id + subnet_ids = [module.vpc.private_subnet_ids[0]] + + allowed_ingress_security_group_ids = [module.eks.node_security_group_id] + + create_connector = local.opensearch_bedrock_effective.create_connector + bedrock_model_region = local.opensearch_bedrock_effective.bedrock_model_region + bedrock_model = local.opensearch_bedrock_effective.bedrock_model + bedrock_model_name = local.opensearch_bedrock_effective.bedrock_model_name + add_process_function = local.opensearch_bedrock_effective.add_process_function + add_offline_batch_inference = local.opensearch_bedrock_effective.add_offline_batch_inference + lambda_invoke_opensearch_mlcommons_role_name = local.opensearch_bedrock_effective.lambda_invoke_mlcommons_role_name + tags = var.tags depends_on = [aws_secretsmanager_secret_version.plane_password] } diff --git a/terraform/modules/opensearch/bedrock-connector.cloudformation.yml b/terraform/modules/opensearch/bedrock-connector.cloudformation.yml new file mode 100644 index 0000000..de2d73e --- /dev/null +++ b/terraform/modules/opensearch/bedrock-connector.cloudformation.yml @@ -0,0 +1,489 @@ +AWSTemplateFormatVersion: '2010-09-09' +Description: CloudFormation Template to connect Amazon OpenSearch to Bedrock model +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - + Label: + default: "Amazon OpenSearch Configuration" + Parameters: + - AmazonOpenSearchEndpoint + - LambdaInvokeOpenSearchMLCommonsRoleName + - + Label: + default: "Model Configuration" + Parameters: + - Model + - ModelName + - BedrockModelRegion + - AddProcessFunction + - AddOfflineBatchInference + - + Label: + default: "Lambda Configuration" + Parameters: + - SecurityGroupIds + - SubnetIds + ParameterLabels: + AmazonOpenSearchEndpoint: + default: "Amazon OpenSearch Endpoint" + LambdaInvokeOpenSearchMLCommonsRoleName: + default: "Lambda Invoke OpenSearch ML Commons Role Name" + ModelName: + default: "Model Name" + BedrockModelRegion: + default: "Model Region" + Model: + default: "Model" + AddProcessFunction: + default: "Add Process Function" + AddOfflineBatchInference: + default: "Enable Offline Batch Inference" + SecurityGroupIds: + default: "Security Group Ids" + SubnetIds: + default: "Subnet Ids" + +Parameters: + AmazonOpenSearchEndpoint: + Type: String + Description: "The endpoint of the Amazon OpenSearch Service. Example: https://example.us-east-1.es.amazonaws.com" + AllowedPattern: "https://.*" + ConstraintDescription: "Must be a valid Amazon OpenSearch Service endpoint." + ModelName: + Type: String + Description: The name of the model deployed, it will be used in the name of created AWS resources. + LambdaInvokeOpenSearchMLCommonsRoleName: + Type: String + Default: "LambdaInvokeOpenSearchMLCommonsRole" + ConstraintDescription: Must use alphanumeric and '+=,.@-_' characters. + Description: The name of the IAM role that is used by Lambda to invoke Amazon OpenSearch domain. Before deploying this template, it must be mapped to the OpenSearch domain's ml_full_access role . The IAM role will be created by this template if it does not exist. You can use the default value or specify a custom name. If you specify a custom name, it must be unique within your account. + AllowedPattern: "[a-zA-Z0-9+=,.@_-]+" + BedrockModelRegion: + Type: String + Description: Select the Bedrock service region from available options. Please make sure the model is access granted in the selected region Bedrock service. If your AOS service is in a region that is different than the bedrock service region, you may expect cross-region latency. + AllowedValues: ["us-east-1", "us-west-2", "eu-central-1", "ap-northeast-1", "ap-southeast-1"] + Model: + Type: String + Description: Select the model from Bedrock service to connect to Amazon OpenSearch Service. Make sure you have requested and obtained access to the model in Bedrock before using it. More details in https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html + AllowedValues: ["amazon.titan-embed-text-v1", "amazon.titan-embed-image-v1", "amazon.titan-embed-text-v2:0", "cohere.embed-english-v3", "cohere.embed-multilingual-v3", "anthropic.claude-3-sonnet-20240229-v1:0"] + AddProcessFunction: + Description: "Enable or disable the default pre and post processing functions in the connector" + Type: String + Default: "true" + AllowedValues: + - "true" + - "false" + AddOfflineBatchInference: + Description: This is compatible with AOS version 2.17 or greater. Enable or disable the bath_predict action in the connector. More details in https://docs.opensearch.org/docs/latest/ml-commons-plugin/api/model-apis/batch-predict/ + Type: String + Default: "false" + AllowedValues: + - "true" + - "false" + SecurityGroupIds: + Type: List + Description: The security Group where Lambda will be deployed that Amazon Opensearch also is deployed in. + SubnetIds: + Type: List + Description: The one or more Subnets where Lambda will be deployed that Amazon Opensearch also is deployed in. The subnetds should be accessible to public NAT Gateway. + +Resources: + OpenSearchInvokeModelRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Join ['-', ["OpenSearchInvokeModelRole", !Ref ModelName]] + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: es.amazonaws.com + Action: sts:AssumeRole + - Effect: Allow + Principal: + Service: es.aws.internal + Action: sts:AssumeRole + Policies: + - PolicyName: OpenSearchAccessAIModelPolicy + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - bedrock:InvokeModel + - bedrock:CreateModelInvocationJob + - bedrock:GetModelInvocationJob + - bedrock:ListModelInvocationJobs + - bedrock:StopModelInvocationJob + Resource: "*" + - Effect: Allow + Action: + - iam:PassRole + Resource: !GetAtt BedrockBatchJobRole.Arn + + BedrockBatchJobRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Join ['-', ['bedrock-batch-job-role', !Ref ModelName]] + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: bedrock.amazonaws.com + Action: sts:AssumeRole + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/AmazonS3FullAccess' + + HelperLambdaInvokeRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Join ['-', ["HelperLambdaInvokeRole", !Ref ModelName]] + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - sts:AssumeRole + - Effect: Allow + Principal: + Service: + - s3.amazonaws.com + Action: + - sts:AssumeRole + Path: "/" + Policies: + - PolicyName: root + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: arn:aws:logs:*:*:* + - Effect: Allow + Action: + - iam:ListRoles + - iam:PassRole + - iam:GetRole + - iam:CreateRole + - iam:GetRolePolicy + - iam:CreatePolicy + - iam:AttachRolePolicy + - iam:PutRolePolicy + Resource: '*' + - Effect: Allow + Action: + - s3:CreateBucket + - s3:PutObject + - s3:GetObject + - s3:ListBucket + - s3:DeleteObject + - s3:DeleteBucket + Resource: "*" + + HelperLambdaFunction: + Type: AWS::Lambda::Function + DependsOn: [HelperLambdaInvokeRole] + Properties: + FunctionName: !Join ['-', ["HelperLambdaFunction", !Ref ModelName]] + Code: + ZipFile: | + import boto3 + import cfnresponse + import urllib.request + import json + import botocore + import time + + def lambda_handler(event, context): + print("event: " + str(event)) + request_type = event['RequestType'] + if request_type in ['Update']: + return respond_success(event, context) + + if request_type in ['Delete'] or event['ResourceProperties']['CleanupS3Bucket'] == 'true': + clean_s3_bucket(event['ResourceProperties']['S3Bucket']) + return respond_success(event, context) + + try: + iam = boto3.client('iam') + os_invoke_model_role_arn = event['ResourceProperties']['OpenSearchInvokeModelRole'] + lambda_invoke_aos_role_name = event['ResourceProperties']['LambdaInvokeAOSRole'] + bedrock_batch_job_role_arn = event['ResourceProperties']['BedrockBatchJobRole'] + role_arn = manage_iam_role(iam, lambda_invoke_aos_role_name, os_invoke_model_role_arn, bedrock_batch_job_role_arn) + + return download_and_upload_lambda(event, role_arn, context) + except Exception as e: + print(f"Unexpected error in lambda_handler: {e}") + return respond_failure(event, context, str(e)) + + def iam_role_exists(iam, role_name): + try: + iam.get_role(RoleName=role_name) + print("LambdaInvokeOpenSearchMLCommonsRole exists") + return True + except iam.exceptions.NoSuchEntityException: + print("LambdaInvokeOpenSearchMLCommonsRole does not exist") + return False + + def vpc_policy_exists(iam, role_name): + try: + iam.get_role_policy(RoleName=role_name, PolicyName='VpcOpenSearchAccess') + print("VpcOpenSearchAccess policy exists") + return True + except iam.exceptions.NoSuchEntityException: + print("VpcOpenSearchAccess policy does not exist") + return False + + def model_passrole_policy_exists(iam, role_name, os_invoke_model_role_arn, bedrock_batch_job_role_arn): + try: + existing_inline_policy = iam.get_role_policy(RoleName=role_name, PolicyName='ModelPassRole') + print("PassRole policy exists") + return True + except iam.exceptions.NoSuchEntityException: + print("PassRole policy does not exist") + return False + + def manage_iam_role(iam, role_name, os_invoke_model_role_arn, bedrock_batch_job_role_arn): + if not iam_role_exists(iam, role_name): + create_iam_role_with_basic_policy(iam, role_name, os_invoke_model_role_arn) + print("Created LambdaInvokeOpenSearchMLCommonsRole") + + if not vpc_policy_exists(iam, role_name): + vpc_policy = create_vpc_policy() + put_role_policy(iam, role_name, 'VpcOpenSearchAccess', vpc_policy) + print("Created VpcOpenSearchAccess policy") + + if not model_passrole_policy_exists(iam, role_name, os_invoke_model_role_arn, bedrock_batch_job_role_arn): + model_passrole_policy = create_empty_passrole_policy_statement(os_invoke_model_role_arn) + passrole_statement_line = create_passrole_statement_line(os_invoke_model_role_arn, bedrock_batch_job_role_arn) + model_passrole_policy['Statement'].append(passrole_statement_line) + put_role_policy(iam, role_name, 'ModelPassRole', model_passrole_policy) + print("Created PassRole policy") + else: + existing_inline_policy = iam.get_role_policy(RoleName=role_name, PolicyName='ModelPassRole') + new_policy = create_passrole_statement(os_invoke_model_role_arn, bedrock_batch_job_role_arn) + inline_policy = existing_inline_policy['PolicyDocument'] + inline_policy['Statement'].extend(new_policy['Statement']) + put_role_policy(iam, role_name, 'ModelPassRole', inline_policy) + print("Updated PassRole policy") + + print("LambdaInvokeOpenSearchMLCommonsRole is ready") + return iam.get_role(RoleName=role_name)['Role']['Arn'] + + def create_iam_role_with_basic_policy(iam, role_name, os_invoke_model_role_arn): + trust_relationship = create_trust_relationship() + basic_policy = create_basic_policy() + role = iam.create_role(RoleName=role_name, AssumeRolePolicyDocument=json.dumps(trust_relationship), Description='Role for Lambda to invoke OpenSearch') + time.sleep(5) + policy_name = 'OpenSearchAccess' + put_role_policy(iam, role_name, policy_name, basic_policy) + return role['Role']['Arn'] + + def put_role_policy(iam, role_name, policy_name, policy_document): + try: + iam.put_role_policy(RoleName=role_name, PolicyName=policy_name, PolicyDocument=json.dumps(policy_document)) + time.sleep(5) + except botocore.exceptions.ClientError as e: + print(e) + raise e + + def create_trust_relationship(): + return { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole" + } + ] + } + + def create_basic_policy(): + return { + "Version": "2012-10-17", + "Statement": [ + {"Effect": "Allow", "Action": [ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents" + ], + "Resource": "*" + }, + {"Effect": "Allow", "Action": ["es:*"], "Resource": "*"}, + {"Effect": "Allow", "Action": ["es:ESHttpPost"], "Resource": "arn:aws:es:*:*:*/*/*"} + ] + } + + def create_vpc_policy(): + return { + "Version": "2012-10-17", + "Statement": [ + {"Effect": "Allow", "Action": [ + "ec2:DescribeVpcEndpoints", + "ec2:DescribeDhcpOptions", + "ec2:DescribeVpcs", + "ec2:DescribeSubnets", + "ec2:DescribeSecurityGroups", + "ec2:DescribeNetworkInterfaces", + "ec2:DeleteNetworkInterfacePermission", + "ec2:DeleteNetworkInterface", + "ec2:CreateNetworkInterfacePermission", + "ec2:CreateNetworkInterface"], + "Resource": "*" + } + ] + } + + def create_passrole_statement_line(os_invoke_model_role_arn, bedrock_batch_job_role_arn): + return {"Effect": "Allow", "Action": ["iam:PassRole"], "Resource": [os_invoke_model_role_arn, bedrock_batch_job_role_arn]} + + def create_passrole_statement(os_invoke_model_role_arn, bedrock_batch_job_role_arn): + return { + "Version": "2012-10-17", + "Statement": [ + create_passrole_statement_line(os_invoke_model_role_arn, bedrock_batch_job_role_arn) + ] + } + + def create_empty_passrole_policy_statement(os_invoke_model_role_arn): + return { + "Version": "2012-10-17", + "Statement": [] + } + + def download_and_upload_lambda(event, role_arn, context): + response_data = {'ContinuedInvoke': True, 'LambdaInvokeAOSRoleArn': role_arn} + try: + + url = event['ResourceProperties']['LambdaZipUrl'] + s3_key = event['ResourceProperties']['S3Key'] + tmp_file_path = f"/tmp/{s3_key}" + + download_file(url, tmp_file_path) + bucket_name = event['ResourceProperties']['S3Bucket'] + create_s3_bucket(bucket_name) + upload_file_to_s3(bucket_name, s3_key, tmp_file_path) + + cfnresponse.send(event, context, cfnresponse.SUCCESS, response_data, context.log_stream_name) + return {"statusCode": 200} + except Exception as e: + print(e) + response_data['ContinuedInvoke'] = False + cfnresponse.send(event, context, cfnresponse.FAILED, response_data, context.log_stream_name) + return {"statusCode": 500} + + def download_file(url, file_path): + urllib.request.urlretrieve(url, file_path) + print(f"Downloaded file from {url} to {file_path}") + + def create_s3_bucket(bucket_name): + s3 = boto3.client('s3') + region = boto3.session.Session().region_name + if region == 'us-east-1': + s3.create_bucket(Bucket=bucket_name) + else: + s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region}) + print(f"S3 bucket {bucket_name} created successfully") + + def upload_file_to_s3(bucket_name, s3_key, file_path): + s3 = boto3.resource('s3') + s3.Bucket(bucket_name).upload_file(file_path, s3_key) + print(f"Uploaded file to s3://{bucket_name}/{s3_key}") + + def respond_success(event, context): + cfnresponse.send(event, context, cfnresponse.SUCCESS, {}, context.log_stream_name) + return {"statusCode": 200} + + def respond_failure(event, context, reason): + cfnresponse.send(event, context, cfnresponse.FAILED, {}, context.log_stream_name, reason) + return {"statusCode": 500, "body": reason} + + def clean_s3_bucket(bucket_name): + try: + s3 = boto3.resource('s3') + bucket = s3.Bucket(bucket_name) + bucket.objects.all().delete() + bucket.delete() + print(f"S3 bucket {bucket_name} deleted successfully") + except Exception as e: + print(f"An error occurred in the S3 bucket cleanup: {e}") + pass + + Handler: index.lambda_handler + Role: !GetAtt HelperLambdaInvokeRole.Arn + Runtime: python3.12 + Timeout: 120 + + InvokeHelperLambda: + Type: Custom::InvokeLambda + DependsOn: [HelperLambdaFunction, OpenSearchInvokeModelRole] + Properties: + ServiceToken: !GetAtt HelperLambdaFunction.Arn + LambdaZipUrl: "https://d2wvpdb2jzk602.cloudfront.net/opensearch-bedrock-connector-select-model-lambda.zip" + LambdaInvokeAOSRole: !Ref LambdaInvokeOpenSearchMLCommonsRoleName + OpenSearchInvokeModelRole: !GetAtt OpenSearchInvokeModelRole.Arn + BedrockBatchJobRole: !GetAtt BedrockBatchJobRole.Arn + S3Bucket: !Join ['-', ["opensearch-ml-models-lambda-function", "stack", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] + S3Key: opensearch-connector-lambda-function.zip + CleanupS3Bucket: false + + ConnectorLambdaFunction: + Type: AWS::Lambda::Function + DependsOn: [InvokeHelperLambda] + Properties: + FunctionName: !Join ['-', ["ConnectorLambdaFunction", !Ref ModelName]] + Code: + S3Bucket: !Join ['-', ["opensearch-ml-models-lambda-function", "stack", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] + S3Key: opensearch-connector-lambda-function.zip + PackageType: Zip + Handler: lambda_function.lambda_handler + Role: + Fn::Sub: arn:aws:iam::${AWS::AccountId}:role/${LambdaInvokeOpenSearchMLCommonsRoleName} + Runtime: python3.12 + Timeout: 120 + VpcConfig: + SecurityGroupIds: !Ref SecurityGroupIds + SubnetIds: !Ref SubnetIds + + InvokeConnectorLambda: + Type: Custom::InvokeLambda + DependsOn: [ConnectorLambdaFunction] + Properties: + ServiceToken: !GetAtt ConnectorLambdaFunction.Arn + AOSEndpoint: !Ref AmazonOpenSearchEndpoint + AOSRoleArn: !GetAtt OpenSearchInvokeModelRole.Arn + ModelName: !Ref ModelName + ModelRegion: !Ref BedrockModelRegion + Model: !Ref Model + BedrockBatchJobRole: !GetAtt BedrockBatchJobRole.Arn + DefaultProcessFunction: !Ref AddProcessFunction + EnableOfflineBatchInference: !Ref AddOfflineBatchInference + + InvokeHelperLambdaToCleanS3: + Type: Custom::InvokeLambda + DependsOn: [InvokeConnectorLambda] + Properties: + ServiceToken: !GetAtt HelperLambdaFunction.Arn + CleanupS3Bucket: true + S3Bucket: !Join ['-', ["opensearch-ml-models-lambda-function", "stack", !Select [4, !Split ['-', !Select [2, !Split ['/', !Ref AWS::StackId]]]]]] + +Outputs: + ConnectorId: + Value: !GetAtt InvokeConnectorLambda.connector_id + Description: The connector id that is created in Amazon OpenSearch Service ML-Commons. + ModelId: + Value: !GetAtt InvokeConnectorLambda.model_id + Description: The model id in ML-Commons that is associated with the connector id. + BedrockEndpoint: + Value: !GetAtt InvokeConnectorLambda.model_endpoint + Description: The endpoint of the Bedrock model that is connected to Amazon OpenSearch Service. + diff --git a/terraform/modules/opensearch/main.tf b/terraform/modules/opensearch/main.tf index 831a314..7cee730 100644 --- a/terraform/modules/opensearch/main.tf +++ b/terraform/modules/opensearch/main.tf @@ -1,6 +1,72 @@ data "aws_caller_identity" "current" {} data "aws_region" "current" {} +locals { + opensearch_sg_id = var.enable_vpc ? aws_security_group.opensearch[0].id : null + + effective_connector_subnet_ids = coalesce(var.connector_subnet_ids, var.subnet_ids) + effective_connector_sg_ids = coalesce(var.connector_security_group_ids, var.enable_vpc ? [aws_security_group.opensearch[0].id] : null) +} + +resource "aws_security_group" "opensearch" { + count = var.enable_vpc ? 1 : 0 + + name_prefix = "${var.domain_name}-opensearch-" + description = "OpenSearch domain security group" + vpc_id = var.vpc_id + + tags = merge(var.tags, { + Name = "${var.domain_name}-opensearch-sg" + }) +} + +resource "aws_security_group_rule" "opensearch_https_self" { + count = var.enable_vpc ? 1 : 0 + + type = "ingress" + security_group_id = aws_security_group.opensearch[0].id + protocol = "tcp" + from_port = 443 + to_port = 443 + self = true + description = "Allow HTTPS within the OpenSearch/connector security group" +} + +resource "aws_security_group_rule" "opensearch_https_egress" { + count = var.enable_vpc ? 1 : 0 + + type = "egress" + security_group_id = aws_security_group.opensearch[0].id + protocol = "tcp" + from_port = 443 + to_port = 443 + cidr_blocks = ["0.0.0.0/0"] + description = "Allow HTTPS egress for Lambda to reach OpenSearch and Bedrock endpoints" +} + +resource "aws_security_group_rule" "opensearch_https_from_allowed_sgs" { + # Use stable keys (list indices) so values may be unknown at plan time (e.g., with -target). + for_each = var.enable_vpc ? { for idx, sg_id in var.allowed_ingress_security_group_ids : tostring(idx) => sg_id } : {} + + type = "ingress" + security_group_id = aws_security_group.opensearch[0].id + protocol = "tcp" + from_port = 443 + to_port = 443 + source_security_group_id = each.value + description = "Allow HTTPS to OpenSearch from approved security group" +} + +resource "aws_security_group_rule" "opensearch_https_from_vpc" { + type = "ingress" + security_group_id = aws_security_group.opensearch[0].id + protocol = "tcp" + from_port = 443 + to_port = 443 + source_security_group_id = var.vpc_id + description = "Allow HTTPS to OpenSearch from VPC" +} + resource "aws_opensearch_domain" "main" { domain_name = var.domain_name engine_version = var.engine_version @@ -52,5 +118,35 @@ resource "aws_opensearch_domain" "main" { tls_security_policy = "Policy-Min-TLS-1-2-2019-07" } + dynamic "vpc_options" { + for_each = var.enable_vpc ? [1] : [] + content { + subnet_ids = var.subnet_ids + security_group_ids = [aws_security_group.opensearch[0].id] + } + } + tags = var.tags } + +resource "aws_cloudformation_stack" "bedrock_connector" { + count = var.create_connector ? 1 : 0 + + name = "${var.domain_name}-bedrock-connector" + template_body = file("${path.module}/bedrock-connector.cloudformation.yml") + capabilities = ["CAPABILITY_NAMED_IAM"] + + parameters = { + AddOfflineBatchInference = var.add_offline_batch_inference ? "true" : "false" + AddProcessFunction = var.add_process_function ? "true" : "false" + AmazonOpenSearchEndpoint = "https://${aws_opensearch_domain.main.endpoint}" + BedrockModelRegion = var.bedrock_model_region + LambdaInvokeOpenSearchMLCommonsRoleName = var.lambda_invoke_opensearch_mlcommons_role_name + Model = var.bedrock_model + ModelName = var.bedrock_model_name + SecurityGroupIds = join(",", local.effective_connector_sg_ids) + SubnetIds = join(",", local.effective_connector_subnet_ids) + } + + depends_on = [aws_opensearch_domain.main] +} diff --git a/terraform/modules/opensearch/outputs.tf b/terraform/modules/opensearch/outputs.tf index 393e3fa..550114f 100644 --- a/terraform/modules/opensearch/outputs.tf +++ b/terraform/modules/opensearch/outputs.tf @@ -12,3 +12,18 @@ output "kibana_endpoint" { description = "OpenSearch Dashboards endpoint" value = aws_opensearch_domain.main.dashboard_endpoint } + +output "bedrock_connector_id" { + description = "Connector ID created in OpenSearch ML Commons (null when create_connector = false)." + value = try(aws_cloudformation_stack.bedrock_connector[0].outputs["ConnectorId"], null) +} + +output "bedrock_model_id" { + description = "Model ID created in OpenSearch ML Commons (null when create_connector = false)." + value = try(aws_cloudformation_stack.bedrock_connector[0].outputs["ModelId"], null) +} + +output "bedrock_endpoint" { + description = "Bedrock model endpoint connected to OpenSearch (null when create_connector = false)." + value = try(aws_cloudformation_stack.bedrock_connector[0].outputs["BedrockEndpoint"], null) +} diff --git a/terraform/modules/opensearch/variables.tf b/terraform/modules/opensearch/variables.tf index 9167c8e..fcd98d2 100644 --- a/terraform/modules/opensearch/variables.tf +++ b/terraform/modules/opensearch/variables.tf @@ -3,6 +3,32 @@ variable "domain_name" { type = string } +variable "enable_vpc" { + description = "Whether to place the OpenSearch domain in a VPC (required for Bedrock connector integration)." + type = bool + default = false + + validation { + condition = ( + var.enable_vpc == false || + (var.vpc_id != null && var.subnet_ids != null && length(var.subnet_ids) > 0) + ) + error_message = "When enable_vpc is true, you must provide vpc_id and a non-empty subnet_ids list." + } +} + +variable "vpc_id" { + description = "VPC ID for the OpenSearch domain security group (required when enable_vpc = true)." + type = string + default = null +} + +variable "subnet_ids" { + description = "Subnet IDs for the OpenSearch domain VPC options (required when enable_vpc = true)." + type = list(string) + default = null +} + variable "master_username" { description = "OpenSearch master username for fine-grained access control" type = string @@ -49,3 +75,78 @@ variable "tags" { type = map(string) default = {} } + +variable "create_connector" { + description = "When true, deploy the Bedrock connector/model integration CloudFormation stack and output connector/model IDs." + type = bool + default = false + + validation { + condition = ( + var.create_connector == false || + (var.enable_vpc && + var.vpc_id != null && + var.subnet_ids != null && + length(var.subnet_ids) > 0 && + var.bedrock_model_region != null && + var.bedrock_model != null && + var.bedrock_model_name != null + ) + ) + error_message = "When create_connector is true, enable_vpc must be true and you must set vpc_id, a non-empty subnet_ids list, bedrock_model_region, bedrock_model, and bedrock_model_name." + } +} + +variable "bedrock_model_region" { + description = "Bedrock model region (CloudFormation parameter: BedrockModelRegion). Required when create_connector = true." + type = string + default = null +} + +variable "bedrock_model" { + description = "Bedrock model ID (CloudFormation parameter: Model). Required when create_connector = true." + type = string + default = null +} + +variable "bedrock_model_name" { + description = "A name used to tag/namescope created integration resources (CloudFormation parameter: ModelName). Required when create_connector = true." + type = string + default = null +} + +variable "add_process_function" { + description = "Enable the default pre/post processing functions in the connector (CloudFormation parameter: AddProcessFunction)." + type = bool + default = true +} + +variable "add_offline_batch_inference" { + description = "Enable the bath_predict action in the connector (CloudFormation parameter: AddOfflineBatchInference)." + type = bool + default = false +} + +variable "lambda_invoke_opensearch_mlcommons_role_name" { + description = "IAM role name used by Lambda to invoke OpenSearch (CloudFormation parameter: LambdaInvokeOpenSearchMLCommonsRoleName)." + type = string + default = "LambdaInvokeOpenSearchMLCommonsRole" +} + +variable "connector_subnet_ids" { + description = "Subnet IDs for the connector Lambda VPC config. Defaults to subnet_ids when null." + type = list(string) + default = null +} + +variable "connector_security_group_ids" { + description = "Security group IDs for the connector Lambda VPC config. Defaults to the module-created OpenSearch SG when null." + type = list(string) + default = null +} + +variable "allowed_ingress_security_group_ids" { + description = "Optional additional security group IDs allowed to reach OpenSearch over HTTPS (443) when VPC-enabled (e.g., EKS node SG)." + type = list(string) + default = [] +} diff --git a/terraform/variables.tf b/terraform/variables.tf index 0e2de7b..0eaf3f1 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -120,6 +120,74 @@ variable "opensearch" { } } +variable "create_opensearch_bedrock_connector" { + description = "When true, create the OpenSearch ↔ Bedrock connector/model integration (via CloudFormation stack)." + type = bool + default = false + + validation { + condition = ( + var.create_opensearch_bedrock_connector == false || + (length(trimspace(coalesce(var.bedrock_model_region, contains(["us-east-1", "us-west-2", "eu-central-1", "ap-northeast-1", "ap-southeast-1"], var.region) ? var.region : "us-east-1"))) > 0 && + length(trimspace(coalesce(var.bedrock_model, "amazon.titan-embed-text-v1"))) > 0 && + length(trimspace(coalesce(var.bedrock_model_name, "${var.cluster_name}-bedrock"))) > 0 + ) + ) + error_message = "When create_opensearch_bedrock_connector is true, effective bedrock_model_region, bedrock_model, and bedrock_model_name must be non-empty." + } +} + +variable "bedrock_model_region" { + description = "Bedrock model region. Defaults to `region` when null." + type = string + default = null +} + +variable "bedrock_model" { + description = "Bedrock model ID. Defaults to amazon.titan-embed-text-v1 when null." + type = string + default = null +} + +variable "bedrock_model_name" { + description = "A name used to tag/namescope created connector resources. Defaults to \"-bedrock\" when null." + type = string + default = null +} + +variable "add_process_function" { + description = "Enable the default pre/post processing functions in the connector." + type = bool + default = true +} + +variable "add_offline_batch_inference" { + description = "Enable offline batch inference in the connector." + type = bool + default = false +} + +variable "lambda_invoke_mlcommons_role_name" { + description = "IAM role name used by the connector Lambda to invoke OpenSearch." + type = string + default = "LambdaInvokeOpenSearchMLCommonsRole" +} + +variable "opensearch_bedrock" { + description = "OpenSearch ↔ Bedrock connector integration (deployed via CloudFormation stack)." + type = object({ + create_connector = bool + bedrock_model_region = string + bedrock_model = string + bedrock_model_name = string + add_process_function = bool + add_offline_batch_inference = bool + lambda_invoke_mlcommons_role_name = string + }) + default = null + nullable = true +} + variable "object_store" { description = "S3 object store configuration" type = object({