Skip to content

Commit

Permalink
feat: Push upload/download S3 code
Browse files Browse the repository at this point in the history
  • Loading branch information
iusztinpaul committed Jan 16, 2025
1 parent 91025b4 commit 7490e24
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 10 deletions.
31 changes: 27 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
ifeq (,$(wildcard .env))
$(error .env file is missing. Please create one based on .env.example)
endif

include .env

export PYTHONPATH = .
check_dirs := src

# --- Default Values ---

CHECK_DIRS := src
LOCAL_DATA_PATH := data


# --- Utilities ---

help:
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done
Expand All @@ -26,6 +39,16 @@ local-infrastructure-up: local-docker-infrastructure-up local-zenml-server-down

local-infrastructure-down: local-docker-infrastructure-down local-zenml-server-down

# --- AWS ---

s3-upload: # Upload a local folder to S3
@echo "Uploading to S3 bucket: $(AWS_S3_BUCKET_NAME)"
uv run python -m tools.use_s3 upload $(LOCAL_DATA_PATH) $(AWS_S3_BUCKET_NAME) --s3-prefix "second_brain_course/notion"

s3-download: # Download from S3 to local folder using AWS
@echo "Downloading from S3 bucket: $(AWS_S3_BUCKET_NAME)"
uv run python -m tools.use_s3 download $(AWS_S3_BUCKET_NAME) "second_brain_course/notion/data.zip" $(LOCAL_DATA_PATH)

# --- Pipelines ---

collect-notion-pipeline:
Expand All @@ -39,16 +62,16 @@ test:
# --- QA ---

format-fix:
uv run ruff format $(check_dirs)
uv run ruff format $(CHECK_DIRS)
uv run ruff check --select I --fix

lint-fix:
uv run ruff check --fix

format-check:
uv run ruff format --check $(check_dirs)
uv run ruff format --check $(CHECK_DIRS)
uv run ruff check -e
uv run ruff check --select I -e

lint-check:
uv run ruff check $(check_dirs)
uv run ruff check $(CHECK_DIRS)
5 changes: 2 additions & 3 deletions src/second_brain/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ class Settings(BaseSettings):
NOTION_SECRET_KEY: str | None = None

# --- Required settings even when working locally. ---

AWS_S3_BUCKET_NAME: str = "notion-second-brain-data"
AWS_DEFAULT_REGION: str = "eu-central-1"
AWS_S3_BUCKET_NAME: str = "decodingml-public-data"

# OpenAI API
OPENAI_MODEL_ID: str = "gpt-4o-mini"
Expand All @@ -34,7 +34,6 @@ class Settings(BaseSettings):
DATABASE_NAME: str = "twin"

# AWS Authentication
AWS_REGION: str = "eu-central-1"
AWS_ACCESS_KEY: str | None = None
AWS_SECRET_KEY: str | None = None
AWS_ARN_ROLE: str | None = None
Expand Down
43 changes: 40 additions & 3 deletions src/second_brain/infrastructure/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,21 @@

import boto3

from second_brain.config import settings


class S3Client:
def __init__(self, bucket_name: str) -> None:
"""Initialize S3 client and bucket name."""
self.s3_client = boto3.client("s3")
def __init__(
self, bucket_name: str, region: str = settings.AWS_DEFAULT_REGION
) -> None:
"""Initialize S3 client and bucket name.
Args:
bucket_name: Name of the S3 bucket
region: AWS region (defaults to AWS_DEFAULT_REGION or AWS_REGION env var, or 'us-east-1')
"""
self.region = region
self.s3_client = boto3.client("s3", region_name=self.region)
self.bucket_name = bucket_name

def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> None:
Expand All @@ -21,6 +31,9 @@ def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> No
local_path: Path to the local folder
s3_prefix: Optional prefix (folder path) in S3 bucket
"""
# Ensure bucket exists before proceeding
self.__create_bucket_if_doesnt_exist()

local_path = Path(local_path)

if not local_path.exists():
Expand Down Expand Up @@ -49,6 +62,30 @@ def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> No
# Clean up temporary zip file
os.unlink(temp_zip.name)

def __create_bucket_if_doesnt_exist(self) -> None:
"""
Check if bucket exists and create it if it doesn't.
Raises permission-related exceptions if user lacks necessary permissions.
"""
try:
self.s3_client.head_bucket(Bucket=self.bucket_name)
except self.s3_client.exceptions.ClientError as e:
error_code = e.response["Error"]["Code"]
if error_code == "404":
try:
self.s3_client.create_bucket(
Bucket=self.bucket_name,
CreateBucketConfiguration={"LocationConstraint": self.region},
)
except self.s3_client.exceptions.ClientError as create_error:
raise Exception(
f"Failed to create bucket {self.bucket_name}: {str(create_error)}"
)
elif error_code == "403":
raise Exception(f"No permission to access bucket {self.bucket_name}")
else:
raise

def download_folder(self, s3_prefix: str, local_path: Union[str, Path]) -> None:
"""
Download a zipped folder from S3 and extract it to local storage.
Expand Down
66 changes: 66 additions & 0 deletions tools/use_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import click

from second_brain.infrastructure.aws.s3 import S3Client


@click.group()
def cli() -> None:
"""CLI tool for uploading and downloading folders to/from S3."""
pass


@cli.command()
@click.argument("local_path")
@click.argument("bucket_name")
@click.option("--s3-prefix", default="", help="Optional S3 prefix (folder path)")
def upload(local_path: str, bucket_name: str, s3_prefix: str) -> None:
"""Upload a local folder to S3 bucket.
Args:
local_path: Path to the local folder to upload
bucket_name: Name of the S3 bucket
s3_prefix: Optional S3 prefix (folder path)
Raises:
click.Abort: If upload fails or path is invalid
"""
try:
s3_client = S3Client(bucket_name)
s3_client.upload_folder(local_path, s3_prefix)
click.echo(
f"Successfully uploaded {local_path} to s3://{bucket_name}/{s3_prefix}"
)
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()


@cli.command()
@click.argument("bucket_name")
@click.argument("s3_path")
@click.argument("local_path")
def download(bucket_name: str, s3_path: str, local_path: str) -> None:
"""Download a zipped folder from S3 and extract it to local storage.
Args:
bucket_name: Name of the S3 bucket
s3_path: Path to the zip file in S3 bucket
local_path: Local path where files should be extracted
Raises:
click.Abort: If download fails or path is invalid
"""

try:
s3_client = S3Client(bucket_name)
s3_client.download_folder(s3_path, local_path)
click.echo(
f"Successfully downloaded s3://{bucket_name}/{s3_path} to {local_path}"
)
except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
raise click.Abort()


if __name__ == "__main__":
cli()

0 comments on commit 7490e24

Please sign in to comment.