diff --git a/.github/workflows/deploy_to_ecs.yml b/.github/workflows/deploy_to_ecs.yml new file mode 100644 index 0000000..c740401 --- /dev/null +++ b/.github/workflows/deploy_to_ecs.yml @@ -0,0 +1,110 @@ +name: Deploy to ECS + +on: + push: + branches: + - main + +jobs: + deploy: + runs-on: ubuntu-latest + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + APP_NAME: fastapi-demo-app + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Create ECR Repository if Not Exists + run: | + aws ecr describe-repositories --repository-names $APP_NAME || \ + aws ecr create-repository --repository-name $APP_NAME + + - name: Retrieve ECR Repository URI + run: | + ECR_URI=$(aws ecr describe-repositories --repository-names $APP_NAME --query 'repositories[0].repositoryUri' --output text) + echo "ECR_URI=${ECR_URI}" >> $GITHUB_ENV + + - name: Log in to ECR + run: | + aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $ECR_URI + + - name: Build Docker image + run: docker build -t $APP_NAME . + + - name: Tag with latest + run: docker tag $APP_NAME:latest $ECR_URI:latest + + - name: Push latest to ECR + run: docker push $ECR_URI:latest + + - name: Set up Terraform + working-directory: infrastructure + run: terraform init + + - name: Terraform Plan + working-directory: infrastructure + run: | + terraform plan -var-file=arguments.tfvars \ + -var="app_name=$APP_NAME" \ + -var="image_tag=latest" \ + -var="ecr_repository_url=$ECR_URI" \ + -var="aws_region=$AWS_DEFAULT_REGION" + + - name: Terraform Apply + working-directory: infrastructure + run: | + terraform apply -auto-approve -var-file=arguments.tfvars \ + -var="app_name=$APP_NAME" \ + -var="image_tag=latest" \ + -var="ecr_repository_url=$ECR_URI" \ + -var="aws_region=$AWS_DEFAULT_REGION" + + - name: Rollback to Stable on Failure + if: failure() + working-directory: infrastructure + run: | + echo "Tagging latest failed image..." + MANIFEST=$(aws ecr batch-get-image --repository-name $APP_NAME --image-ids imageTag=latest --output text --query 'images[].imageManifest') + aws ecr put-image --repository-name $APP_NAME --image-tag latest_failed --image-manifest "$MANIFEST" + if aws ecr describe-images --repository-name $APP_NAME --image-ids imageTag=stable > /dev/null 2>&1; then + echo "Rolling back to stable image..." + terraform apply -auto-approve -var-file=arguments.tfvars \ + -var="app_name=$APP_NAME" \ + -var="image_tag=stable" \ + -var="ecr_repository_url=$ECR_URI" \ + -var="aws_region=$AWS_DEFAULT_REGION" + else + echo "No stable image found. Destroying resources." + terraform destroy -auto-approve -var-file=arguments.tfvars \ + -var="app_name=$APP_NAME" \ + -var="image_tag=latest" \ + -var="ecr_repository_url=$ECR_URI" \ + -var="aws_region=$AWS_DEFAULT_REGION" + fi + + - name: Re-tag latest to stable on Success + if: success() + run: | + MANIFEST=$(aws ecr batch-get-image --repository-name $APP_NAME --image-ids imageTag=latest --output text --query 'images[].imageManifest') + aws ecr put-image --repository-name $APP_NAME --image-tag stable --image-manifest "$MANIFEST" + + - name: Remove Untagged Images from ECR + run: | + UNTAGGED_IMAGES=$(aws ecr list-images --repository-name $APP_NAME --filter "tagStatus=UNTAGGED" --query 'imageIds[*]' --output json) + if [[ "$UNTAGGED_IMAGES" != "[]" ]]; then + echo "Deleting untagged images..." + aws ecr batch-delete-image --repository-name $APP_NAME --image-ids "$UNTAGGED_IMAGES" + else + echo "No untagged images to delete." + fi + + - name: Echo task public IP + run: | + TASK_ARN=$(aws ecs list-tasks --cluster ${APP_NAME}-cluster --service-name ${APP_NAME}-service --query 'taskArns[0]' --output text) + ENI_ID=$(aws ecs describe-tasks --cluster ${APP_NAME}-cluster --tasks $TASK_ARN --query 'tasks[0].attachments[0].details[?name==`networkInterfaceId`].value' --output text) + PUBLIC_IP=$(aws ec2 describe-network-interfaces --network-interface-ids $ENI_ID --query 'NetworkInterfaces[0].Association.PublicIp' --output text) + echo "The application is accessible at: http://$PUBLIC_IP" \ No newline at end of file diff --git a/README.md b/README.md index ffa41be..8dafe35 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,30 @@ -# Machine Learning microservice with FastAPI +# Machine Learning Microservice with FastAPI -App to interact with machine learning models trained on the Titanic dataset. +This application serves machine learning models trained on the Titanic dataset, allowing API-based predictions and model evaluation. -There are 2 versions (`/v1` and `/v2`) each with two API endpoints: `/predict` and `/score`. +## API Overview -`/v1` features a regularized logistic model. `/v2` features a random forest model. `/predict` returns the predicted probability of survival. `/score` returns some test set metrics of the model (ie ROC AUC, accuracy and recall). +There are two versions of the API (`/v1` and `/v2`), each offering two endpoints: `/predict` and `/score`. -## Live demo +- **`/v1`**: Utilizes a regularized logistic regression model. +- **`/v2`**: Utilizes a random forest model. +- **`/predict`**: Returns the predicted probability of passenger survival. +- **`/score`**: Provides model metrics on a test set (ROC AUC, accuracy, and recall). -The app is currently (and hopefully) live on AWS ECS (Amazon Elastic Container Service) +## Deployment -http://fastapi-demo-nlb-ec2-cc72dbfaa6e8866d.elb.eu-west-1.amazonaws.com +The app is deployed with continuous integration to AWS ECS using a Terraform template. GitHub Actions handles automated deployments on changes to the main branch. -## Usage +## Local Usage -Build and spin up the app locally using Docker +To run the app locally using Docker: -``` -$ git clone https://github.com/fedassembly/fastapi-demo.git +```bash +$ git clone https://github.com/ftrifoglio/fastapi-demo.git $ cd fastapi-demo -$ make venv # works on MacOS only $ docker-compose up ``` ## Roadmap -- Add unit tests to `titanic_model` and `api_utils` -- Add CI/CD +- Add unit tests to `titanic_model` and `api_utils` \ No newline at end of file diff --git a/infrastructure/arguments.tfvars b/infrastructure/arguments.tfvars new file mode 100644 index 0000000..bbf7fbc --- /dev/null +++ b/infrastructure/arguments.tfvars @@ -0,0 +1,2 @@ +subnets = ["subnet-03ce986bc9f2b0f44","subnet-00b942351453b58cd","subnet-09458c9ff57d91262"] +vpc = "vpc-0d60ff2a4274d5146" \ No newline at end of file diff --git a/infrastructure/backend.tf b/infrastructure/backend.tf new file mode 100644 index 0000000..3188acf --- /dev/null +++ b/infrastructure/backend.tf @@ -0,0 +1,8 @@ +terraform { + backend "s3" { + bucket = "fastapi-demo-app-terraform" + key = "terraform.tfstate" + region = "eu-west-1" + encrypt = true + } +} \ No newline at end of file diff --git a/infrastructure/main.tf b/infrastructure/main.tf new file mode 100644 index 0000000..238a7eb --- /dev/null +++ b/infrastructure/main.tf @@ -0,0 +1,92 @@ +provider "aws" { + region = var.aws_region +} + +resource "aws_ecs_cluster" "app_cluster" { + name = "${var.app_name}-cluster" +} + +# Create an IAM role with trust policy +resource "aws_iam_role" "ecs_task_execution_role" { + name = "${var.app_name}-task-execution-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Principal = { + Service = "ecs-tasks.amazonaws.com" + } + Action = "sts:AssumeRole" + } + ] + }) +} + +# Attach role policy to role +resource "aws_iam_policy_attachment" "ecs_task_execution_role_policy" { + name = "${var.app_name}-task-execution-role-policy" + roles = [aws_iam_role.ecs_task_execution_role.name] + policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy" +} + +# Create task definition +resource "aws_ecs_task_definition" "app_task" { + family = "${var.app_name}-task" + requires_compatibilities = ["FARGATE"] + network_mode = "awsvpc" + cpu = "1024" + memory = "3072" + execution_role_arn = aws_iam_role.ecs_task_execution_role.arn + container_definitions = jsonencode([ + { + name = var.app_name + image = "${var.ecr_repository_url}:${var.image_tag}" + cpu = 1024 + memory = 3072 + essential = true + portMappings = [ + { + containerPort = 80 + hostPort = 80 + } + ] + } + ]) + runtime_platform { + operating_system_family = "LINUX" + cpu_architecture = "X86_64" + } +} + +# Create security group +resource "aws_security_group" "app_sg" { + name = "${var.app_name}-sg" + vpc_id = var.vpc + ingress { + from_port = 80 + to_port = 80 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + } + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +# Create ECS service +resource "aws_ecs_service" "app_service" { + name = "${var.app_name}-service" + cluster = aws_ecs_cluster.app_cluster.id + task_definition = aws_ecs_task_definition.app_task.arn + desired_count = 1 + launch_type = "FARGATE" + network_configuration { + subnets = var.subnets + security_groups = [aws_security_group.app_sg.id] + assign_public_ip = true + } +} diff --git a/infrastructure/variables.tf b/infrastructure/variables.tf new file mode 100644 index 0000000..17185ab --- /dev/null +++ b/infrastructure/variables.tf @@ -0,0 +1,18 @@ +variable "aws_region" { + type = string +} +variable "app_name" { + type = string +} +variable "ecr_repository_url" { + type = string +} +variable "subnets" { + type = list(string) +} +variable "vpc" { + type = string +} +variable "image_tag" { + type = string +} \ No newline at end of file