From 7490e2485c0df7bd7fd3e36ae9eddc39d50f1278 Mon Sep 17 00:00:00 2001
From: iusztinpaul
Date: Thu, 16 Jan 2025 13:00:30 +0200
Subject: [PATCH] feat: Push upload/download S3 code
---
Makefile | 31 +++++++++--
src/second_brain/config.py | 5 +-
src/second_brain/infrastructure/aws/s3.py | 43 +++++++++++++--
tools/use_s3.py | 66 +++++++++++++++++++++++
4 files changed, 135 insertions(+), 10 deletions(-)
create mode 100644 tools/use_s3.py
diff --git a/Makefile b/Makefile
index e025fa6..b955e2a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,18 @@
+ifeq (,$(wildcard .env))
+$(error .env file is missing. Please create one based on .env.example)
+endif
+
+include .env
+
export PYTHONPATH = .
-check_dirs := src
+
+# --- Default Values ---
+
+CHECK_DIRS := src
+LOCAL_DATA_PATH := data
+
+
+# --- Utilities ---
help:
@grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done
@@ -26,6 +39,16 @@ local-infrastructure-up: local-docker-infrastructure-up local-zenml-server-down
local-infrastructure-down: local-docker-infrastructure-down local-zenml-server-down
+# --- AWS ---
+
+s3-upload: # Upload a local folder to S3
+ @echo "Uploading to S3 bucket: $(AWS_S3_BUCKET_NAME)"
+ uv run python -m tools.use_s3 upload $(LOCAL_DATA_PATH) $(AWS_S3_BUCKET_NAME) --s3-prefix "second_brain_course/notion"
+
+s3-download: # Download from S3 to local folder using AWS
+ @echo "Downloading from S3 bucket: $(AWS_S3_BUCKET_NAME)"
+ uv run python -m tools.use_s3 download $(AWS_S3_BUCKET_NAME) "second_brain_course/notion/data.zip" $(LOCAL_DATA_PATH)
+
# --- Pipelines ---
collect-notion-pipeline:
@@ -39,16 +62,16 @@ test:
# --- QA ---
format-fix:
- uv run ruff format $(check_dirs)
+ uv run ruff format $(CHECK_DIRS)
uv run ruff check --select I --fix
lint-fix:
uv run ruff check --fix
format-check:
- uv run ruff format --check $(check_dirs)
+ uv run ruff format --check $(CHECK_DIRS)
uv run ruff check -e
uv run ruff check --select I -e
lint-check:
- uv run ruff check $(check_dirs)
+ uv run ruff check $(CHECK_DIRS)
diff --git a/src/second_brain/config.py b/src/second_brain/config.py
index 0171129..15eed47 100644
--- a/src/second_brain/config.py
+++ b/src/second_brain/config.py
@@ -9,8 +9,8 @@ class Settings(BaseSettings):
NOTION_SECRET_KEY: str | None = None
# --- Required settings even when working locally. ---
-
- AWS_S3_BUCKET_NAME: str = "notion-second-brain-data"
+ AWS_DEFAULT_REGION: str = "eu-central-1"
+ AWS_S3_BUCKET_NAME: str = "decodingml-public-data"
# OpenAI API
OPENAI_MODEL_ID: str = "gpt-4o-mini"
@@ -34,7 +34,6 @@ class Settings(BaseSettings):
DATABASE_NAME: str = "twin"
# AWS Authentication
- AWS_REGION: str = "eu-central-1"
AWS_ACCESS_KEY: str | None = None
AWS_SECRET_KEY: str | None = None
AWS_ARN_ROLE: str | None = None
diff --git a/src/second_brain/infrastructure/aws/s3.py b/src/second_brain/infrastructure/aws/s3.py
index 1622ad8..6fce0a3 100644
--- a/src/second_brain/infrastructure/aws/s3.py
+++ b/src/second_brain/infrastructure/aws/s3.py
@@ -6,11 +6,21 @@
import boto3
+from second_brain.config import settings
+
class S3Client:
- def __init__(self, bucket_name: str) -> None:
- """Initialize S3 client and bucket name."""
- self.s3_client = boto3.client("s3")
+ def __init__(
+ self, bucket_name: str, region: str = settings.AWS_DEFAULT_REGION
+ ) -> None:
+ """Initialize S3 client and bucket name.
+
+ Args:
+ bucket_name: Name of the S3 bucket
+ region: AWS region (defaults to AWS_DEFAULT_REGION or AWS_REGION env var, or 'us-east-1')
+ """
+ self.region = region
+ self.s3_client = boto3.client("s3", region_name=self.region)
self.bucket_name = bucket_name
def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> None:
@@ -21,6 +31,9 @@ def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> No
local_path: Path to the local folder
s3_prefix: Optional prefix (folder path) in S3 bucket
"""
+ # Ensure bucket exists before proceeding
+ self.__create_bucket_if_doesnt_exist()
+
local_path = Path(local_path)
if not local_path.exists():
@@ -49,6 +62,30 @@ def upload_folder(self, local_path: Union[str, Path], s3_prefix: str = "") -> No
# Clean up temporary zip file
os.unlink(temp_zip.name)
+ def __create_bucket_if_doesnt_exist(self) -> None:
+ """
+ Check if bucket exists and create it if it doesn't.
+ Raises permission-related exceptions if user lacks necessary permissions.
+ """
+ try:
+ self.s3_client.head_bucket(Bucket=self.bucket_name)
+ except self.s3_client.exceptions.ClientError as e:
+ error_code = e.response["Error"]["Code"]
+ if error_code == "404":
+ try:
+ self.s3_client.create_bucket(
+ Bucket=self.bucket_name,
+ CreateBucketConfiguration={"LocationConstraint": self.region},
+ )
+ except self.s3_client.exceptions.ClientError as create_error:
+ raise Exception(
+ f"Failed to create bucket {self.bucket_name}: {str(create_error)}"
+ )
+ elif error_code == "403":
+ raise Exception(f"No permission to access bucket {self.bucket_name}")
+ else:
+ raise
+
def download_folder(self, s3_prefix: str, local_path: Union[str, Path]) -> None:
"""
Download a zipped folder from S3 and extract it to local storage.
diff --git a/tools/use_s3.py b/tools/use_s3.py
new file mode 100644
index 0000000..5ef138e
--- /dev/null
+++ b/tools/use_s3.py
@@ -0,0 +1,66 @@
+import click
+
+from second_brain.infrastructure.aws.s3 import S3Client
+
+
+@click.group()
+def cli() -> None:
+ """CLI tool for uploading and downloading folders to/from S3."""
+ pass
+
+
+@cli.command()
+@click.argument("local_path")
+@click.argument("bucket_name")
+@click.option("--s3-prefix", default="", help="Optional S3 prefix (folder path)")
+def upload(local_path: str, bucket_name: str, s3_prefix: str) -> None:
+ """Upload a local folder to S3 bucket.
+
+ Args:
+ local_path: Path to the local folder to upload
+ bucket_name: Name of the S3 bucket
+ s3_prefix: Optional S3 prefix (folder path)
+
+ Raises:
+ click.Abort: If upload fails or path is invalid
+ """
+ try:
+ s3_client = S3Client(bucket_name)
+ s3_client.upload_folder(local_path, s3_prefix)
+ click.echo(
+ f"Successfully uploaded {local_path} to s3://{bucket_name}/{s3_prefix}"
+ )
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ raise click.Abort()
+
+
+@cli.command()
+@click.argument("bucket_name")
+@click.argument("s3_path")
+@click.argument("local_path")
+def download(bucket_name: str, s3_path: str, local_path: str) -> None:
+ """Download a zipped folder from S3 and extract it to local storage.
+
+ Args:
+ bucket_name: Name of the S3 bucket
+ s3_path: Path to the zip file in S3 bucket
+ local_path: Local path where files should be extracted
+
+ Raises:
+ click.Abort: If download fails or path is invalid
+ """
+
+ try:
+ s3_client = S3Client(bucket_name)
+ s3_client.download_folder(s3_path, local_path)
+ click.echo(
+ f"Successfully downloaded s3://{bucket_name}/{s3_path} to {local_path}"
+ )
+ except Exception as e:
+ click.echo(f"Error: {str(e)}", err=True)
+ raise click.Abort()
+
+
+if __name__ == "__main__":
+ cli()