Skip to content

Commit

Permalink
add deployment pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
ftrifoglio committed Oct 28, 2024
1 parent 6fe1d02 commit 8dec9c9
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 14 deletions.
110 changes: 110 additions & 0 deletions .github/workflows/deploy_to_ecs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: Deploy to ECS

on:
push:
branches:
- main

jobs:
deploy:
runs-on: ubuntu-latest
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
APP_NAME: fastapi-demo-app

steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Create ECR Repository if Not Exists
run: |
aws ecr describe-repositories --repository-names $APP_NAME || \
aws ecr create-repository --repository-name $APP_NAME
- name: Retrieve ECR Repository URI
run: |
ECR_URI=$(aws ecr describe-repositories --repository-names $APP_NAME --query 'repositories[0].repositoryUri' --output text)
echo "ECR_URI=${ECR_URI}" >> $GITHUB_ENV
- name: Log in to ECR
run: |
aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $ECR_URI
- name: Build Docker image
run: docker build -t $APP_NAME .

- name: Tag with latest
run: docker tag $APP_NAME:latest $ECR_URI:latest

- name: Push latest to ECR
run: docker push $ECR_URI:latest

- name: Set up Terraform
working-directory: infrastructure
run: terraform init

- name: Terraform Plan
working-directory: infrastructure
run: |
terraform plan -var-file=arguments.tfvars \
-var="app_name=$APP_NAME" \
-var="image_tag=latest" \
-var="ecr_repository_url=$ECR_URI" \
-var="aws_region=$AWS_DEFAULT_REGION"
- name: Terraform Apply
working-directory: infrastructure
run: |
terraform apply -auto-approve -var-file=arguments.tfvars \
-var="app_name=$APP_NAME" \
-var="image_tag=latest" \
-var="ecr_repository_url=$ECR_URI" \
-var="aws_region=$AWS_DEFAULT_REGION"
- name: Rollback to Stable on Failure
if: failure()
working-directory: infrastructure
run: |
echo "Tagging latest failed image..."
MANIFEST=$(aws ecr batch-get-image --repository-name $APP_NAME --image-ids imageTag=latest --output text --query 'images[].imageManifest')
aws ecr put-image --repository-name $APP_NAME --image-tag latest_failed --image-manifest "$MANIFEST"
if aws ecr describe-images --repository-name $APP_NAME --image-ids imageTag=stable > /dev/null 2>&1; then
echo "Rolling back to stable image..."
terraform apply -auto-approve -var-file=arguments.tfvars \
-var="app_name=$APP_NAME" \
-var="image_tag=stable" \
-var="ecr_repository_url=$ECR_URI" \
-var="aws_region=$AWS_DEFAULT_REGION"
else
echo "No stable image found. Destroying resources."
terraform destroy -auto-approve -var-file=arguments.tfvars \
-var="app_name=$APP_NAME" \
-var="image_tag=latest" \
-var="ecr_repository_url=$ECR_URI" \
-var="aws_region=$AWS_DEFAULT_REGION"
fi
- name: Re-tag latest to stable on Success
if: success()
run: |
MANIFEST=$(aws ecr batch-get-image --repository-name $APP_NAME --image-ids imageTag=latest --output text --query 'images[].imageManifest')
aws ecr put-image --repository-name $APP_NAME --image-tag stable --image-manifest "$MANIFEST"
- name: Remove Untagged Images from ECR
run: |
UNTAGGED_IMAGES=$(aws ecr list-images --repository-name $APP_NAME --filter "tagStatus=UNTAGGED" --query 'imageIds[*]' --output json)
if [[ "$UNTAGGED_IMAGES" != "[]" ]]; then
echo "Deleting untagged images..."
aws ecr batch-delete-image --repository-name $APP_NAME --image-ids "$UNTAGGED_IMAGES"
else
echo "No untagged images to delete."
fi
- name: Echo task public IP
run: |
TASK_ARN=$(aws ecs list-tasks --cluster ${APP_NAME}-cluster --service-name ${APP_NAME}-service --query 'taskArns[0]' --output text)
ENI_ID=$(aws ecs describe-tasks --cluster ${APP_NAME}-cluster --tasks $TASK_ARN --query 'tasks[0].attachments[0].details[?name==`networkInterfaceId`].value' --output text)
PUBLIC_IP=$(aws ec2 describe-network-interfaces --network-interface-ids $ENI_ID --query 'NetworkInterfaces[0].Association.PublicIp' --output text)
echo "The application is accessible at: http://$PUBLIC_IP"
29 changes: 15 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
# Machine Learning microservice with FastAPI
# Machine Learning Microservice with FastAPI

App to interact with machine learning models trained on the Titanic dataset.
This application serves machine learning models trained on the Titanic dataset, allowing API-based predictions and model evaluation.

There are 2 versions (`/v1` and `/v2`) each with two API endpoints: `/predict` and `/score`.
## API Overview

`/v1` features a regularized logistic model. `/v2` features a random forest model. `/predict` returns the predicted probability of survival. `/score` returns some test set metrics of the model (ie ROC AUC, accuracy and recall).
There are two versions of the API (`/v1` and `/v2`), each offering two endpoints: `/predict` and `/score`.

## Live demo
- **`/v1`**: Utilizes a regularized logistic regression model.
- **`/v2`**: Utilizes a random forest model.
- **`/predict`**: Returns the predicted probability of passenger survival.
- **`/score`**: Provides model metrics on a test set (ROC AUC, accuracy, and recall).

The app is currently (and hopefully) live on AWS ECS (Amazon Elastic Container Service)
## Deployment

http://fastapi-demo-nlb-ec2-cc72dbfaa6e8866d.elb.eu-west-1.amazonaws.com
The app is deployed with continuous integration to AWS ECS using a Terraform template. GitHub Actions handles automated deployments on changes to the main branch.

## Usage
## Local Usage

Build and spin up the app locally using Docker
To run the app locally using Docker:

```
$ git clone https://github.com/fedassembly/fastapi-demo.git
```bash
$ git clone https://github.com/ftrifoglio/fastapi-demo.git
$ cd fastapi-demo
$ make venv # works on MacOS only
$ docker-compose up
```

## Roadmap

- Add unit tests to `titanic_model` and `api_utils`
- Add CI/CD
- Add unit tests to `titanic_model` and `api_utils`
2 changes: 2 additions & 0 deletions infrastructure/arguments.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
subnets = ["subnet-03ce986bc9f2b0f44","subnet-00b942351453b58cd","subnet-09458c9ff57d91262"]
vpc = "vpc-0d60ff2a4274d5146"
8 changes: 8 additions & 0 deletions infrastructure/backend.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
terraform {
backend "s3" {
bucket = "fastapi-demo-app-terraform"
key = "terraform.tfstate"
region = "eu-west-1"
encrypt = true
}
}
92 changes: 92 additions & 0 deletions infrastructure/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
provider "aws" {
region = var.aws_region
}

resource "aws_ecs_cluster" "app_cluster" {
name = "${var.app_name}-cluster"
}

# Create an IAM role with trust policy
resource "aws_iam_role" "ecs_task_execution_role" {
name = "${var.app_name}-task-execution-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Service = "ecs-tasks.amazonaws.com"
}
Action = "sts:AssumeRole"
}
]
})
}

# Attach role policy to role
resource "aws_iam_policy_attachment" "ecs_task_execution_role_policy" {
name = "${var.app_name}-task-execution-role-policy"
roles = [aws_iam_role.ecs_task_execution_role.name]
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}

# Create task definition
resource "aws_ecs_task_definition" "app_task" {
family = "${var.app_name}-task"
requires_compatibilities = ["FARGATE"]
network_mode = "awsvpc"
cpu = "1024"
memory = "3072"
execution_role_arn = aws_iam_role.ecs_task_execution_role.arn
container_definitions = jsonencode([
{
name = var.app_name
image = "${var.ecr_repository_url}:${var.image_tag}"
cpu = 1024
memory = 3072
essential = true
portMappings = [
{
containerPort = 80
hostPort = 80
}
]
}
])
runtime_platform {
operating_system_family = "LINUX"
cpu_architecture = "X86_64"
}
}

# Create security group
resource "aws_security_group" "app_sg" {
name = "${var.app_name}-sg"
vpc_id = var.vpc
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}

# Create ECS service
resource "aws_ecs_service" "app_service" {
name = "${var.app_name}-service"
cluster = aws_ecs_cluster.app_cluster.id
task_definition = aws_ecs_task_definition.app_task.arn
desired_count = 1
launch_type = "FARGATE"
network_configuration {
subnets = var.subnets
security_groups = [aws_security_group.app_sg.id]
assign_public_ip = true
}
}
18 changes: 18 additions & 0 deletions infrastructure/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
variable "aws_region" {
type = string
}
variable "app_name" {
type = string
}
variable "ecr_repository_url" {
type = string
}
variable "subnets" {
type = list(string)
}
variable "vpc" {
type = string
}
variable "image_tag" {
type = string
}

0 comments on commit 8dec9c9

Please sign in to comment.