From b04e30022ce13f4c8bb6f9142599f676c4846a6d Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Thu, 5 Dec 2024 22:41:12 +0530
Subject: [PATCH 01/10] Formatting and linting with ruff

---
 gyandex/cli/podgen.py                      |  24 +-
 gyandex/llms/factory.py                    |   9 +-
 gyandex/llms/factory_test.py               |  18 +-
 gyandex/loaders/factory.py                 |  14 +-
 gyandex/loaders/factory_test.py            |  19 +-
 gyandex/podgen/config/loader.py            |   5 +-
 gyandex/podgen/config/loader_test.py       |  19 +-
 gyandex/podgen/config/schema.py            |   4 +-
 gyandex/podgen/engine/publisher.py         |  23 +-
 gyandex/podgen/engine/publisher_test.py    |  10 +-
 gyandex/podgen/feed/generator.py           |   8 +-
 gyandex/podgen/feed/generator_test.py      |   9 +-
 gyandex/podgen/feed/models.py              |  17 +-
 gyandex/podgen/speech/google_cloud.py      |  17 +-
 gyandex/podgen/speech/google_cloud_test.py |  14 +-
 gyandex/podgen/storage/factory.py          |   1 +
 gyandex/podgen/storage/factory_test.py     |   8 +-
 gyandex/podgen/storage/s3.py               |   8 +-
 gyandex/podgen/storage/s3_test.py          |  12 +-
 gyandex/podgen/workflows/alexandria.py     |  59 +--
 gyandex/podgen/workflows/types.py          |   8 +-
 main.ipynb                                 | 412 ---------------------
 poetry.lock                                |  29 +-
 publish.ipynb                              | 119 ------
 pyproject.toml                             |  13 +-
 25 files changed, 178 insertions(+), 701 deletions(-)
 delete mode 100644 main.ipynb
 delete mode 100644 publish.ipynb

diff --git a/gyandex/cli/podgen.py b/gyandex/cli/podgen.py
index aa207f7..a20b13e 100644
--- a/gyandex/cli/podgen.py
+++ b/gyandex/cli/podgen.py
@@ -6,7 +6,6 @@
 from dotenv import load_dotenv
 from rich.console import Console
 
-from gyandex.llms.factory import get_model
 from gyandex.loaders.factory import load_content
 from gyandex.podgen.engine.publisher import PodcastPublisher, PodcastMetadata
 from gyandex.podgen.feed.models import PodcastDB
@@ -31,18 +30,18 @@ def main():
     config = load_config(args.config_path)
 
     # Load the content
-    with console.status('[bold green] Loading content...[/bold green]'):
+    with console.status("[bold green] Loading content...[/bold green]"):
         document = load_content(config.content)
-    console.log('Content loaded...')
+    console.log("Content loaded...")
 
     # Analyze the content
-    with console.status('[bold green] Crafting the script...[/bold green]'):
+    with console.status("[bold green] Crafting the script...[/bold green]"):
         workflow = get_workflow(config)
         script = asyncio.run(workflow.generate_script(document))
     console.log(f'Script completed for "{script.title}". Script contains {len(script.dialogues)} segments...')
 
     # Generate the podcast audio
-    with console.status('[bold green] Generating audio...[/bold green]'):
+    with console.status("[bold green] Generating audio...[/bold green]"):
         tts_engine = get_text_to_speech_engine(config.tts)
         audio_segments = [tts_engine.process_segment(dialogue) for dialogue in script.dialogues]
 
@@ -52,17 +51,18 @@ def main():
 
         podcast_path = f"{output_dir}/podcast_{hashlib.md5(config.content.source.encode()).hexdigest()}.mp3"
         tts_engine.generate_audio_file(audio_segments, podcast_path)
-    console.log(f'Podcast file {podcast_path} generated...')
+    console.log(f"Podcast file {podcast_path} generated...")
 
-    with console.status('[bold green] Publishing podcast...[/bold green]'):
+    with console.status("[bold green] Publishing podcast...[/bold green]"):
         storage = get_storage(config.storage)
-        db = PodcastDB(db_path='assets/podcasts.db')
+        db = PodcastDB(db_path="assets/podcasts.db")
         publisher = PodcastPublisher(
             storage=storage,
             db=db,
-            base_url=f"https://{storage.custom_domain}",  # @FIXME: we need to fallback when custom domain is not available
+            # @FIXME: we need to fallback when custom domain is not available
+            base_url=f"https://{storage.custom_domain}",
         )
-        feed_url = publisher.create_feed(
+        publisher.create_feed(
             slug=config.feed.slug,
             title=config.feed.title,
             email=config.feed.email,
@@ -73,14 +73,14 @@ def main():
             language=config.feed.language,
             categories=",".join(config.feed.categories),
         )
-        console.log('Uploading episode...')
+        console.log("Uploading episode...")
         urls = publisher.add_episode(
             feed_slug=config.feed.slug,
             audio_file_path=podcast_path,
             metadata=PodcastMetadata(
                 title=script.title,
                 description=script.description,
-            )
+            ),
         )
     console.print(f"Feed published at {urls['feed_url']}")
     console.print(f"Episode published at {urls['episode_url']}")
diff --git a/gyandex/llms/factory.py b/gyandex/llms/factory.py
index 40bf559..f51004f 100644
--- a/gyandex/llms/factory.py
+++ b/gyandex/llms/factory.py
@@ -10,16 +10,16 @@
 
 class LLMLoggingCallback(BaseCallbackHandler):
     def __init__(self, log_dir="assets"):
-        logger = logging.getLogger('llm_logger')
+        logger = logging.getLogger("llm_logger")
         logger.setLevel(logging.INFO)
 
         # Create file handler with timestamp in filename
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        fh = logging.FileHandler(f'{log_dir}/llm_logs_{timestamp}.log')
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        fh = logging.FileHandler(f"{log_dir}/llm_logs_{timestamp}.log")
         fh.setLevel(logging.INFO)
 
         # Create formatter
-        formatter = logging.Formatter('%(asctime)s - %(message)s')
+        formatter = logging.Formatter("%(asctime)s - %(message)s")
         fh.setFormatter(formatter)
 
         logger.addHandler(fh)
@@ -38,6 +38,7 @@ def on_llm_end(self, response, **kwargs):
     def on_llm_error(self, error, **kwargs):
         self.logger.error(f"\n=== ERROR ===\n{str(error)}\n")
 
+
 # @TODO: Centralize this argument type in a single place
 def get_model(config: Union[GoogleGenerativeAILLMConfig], log_dir="assets"):
     if config.provider == "google-generative-ai":
diff --git a/gyandex/llms/factory_test.py b/gyandex/llms/factory_test.py
index 2521a38..4e5ba2d 100644
--- a/gyandex/llms/factory_test.py
+++ b/gyandex/llms/factory_test.py
@@ -1,20 +1,16 @@
 import pytest
-from unittest.mock import Mock, patch
-from datetime import datetime
 from langchain_google_genai import GoogleGenerativeAI
 from pydantic import ValidationError
 
-from gyandex.llms.factory import get_model, LLMLoggingCallback
+from gyandex.llms.factory import get_model
 from gyandex.podgen.config.schema import GoogleGenerativeAILLMConfig
 
+
 def test_get_model_returns_google_generative_ai():
     """Tests that get_model creates a GoogleGenerativeAI instance with correct config"""
     # Given
     config = GoogleGenerativeAILLMConfig(
-        provider="google-generative-ai",
-        model="gemini-pro",
-        temperature=0.7,
-        google_api_key="test-key"
+        provider="google-generative-ai", model="gemini-pro", temperature=0.7, google_api_key="test-key"
     )
 
     # When
@@ -25,13 +21,11 @@ def test_get_model_returns_google_generative_ai():
     assert model.model == "gemini-pro"
     assert model.temperature == 0.7
 
+
 def test_get_model_raises_for_unsupported_provider():
     """Tests that get_model raises NotImplementedError for unsupported providers"""
     # When/Then
     with pytest.raises(ValidationError):
-        config = GoogleGenerativeAILLMConfig(
-            provider="unsupported",
-            model="test",
-            temperature=0.5,
-            google_api_key="test-key"
+        _ = GoogleGenerativeAILLMConfig(
+            provider="unsupported", model="test", temperature=0.5, google_api_key="test-key"
         )
diff --git a/gyandex/loaders/factory.py b/gyandex/loaders/factory.py
index 157292e..ccb32e3 100644
--- a/gyandex/loaders/factory.py
+++ b/gyandex/loaders/factory.py
@@ -20,11 +20,15 @@ def load_content(content_config: ContentConfig) -> Document:
 
 
 def fetch_url(url) -> Document:
-    headers = { "Accept": "application/json" }
+    headers = {"Accept": "application/json"}
     response = requests.get(f"https://r.jina.ai/{url}", headers=headers)
     # @TODO: Add error handling
     content = response.json()
-    return Document(title=content['data']['title'], content=content['data']['content'], metadata={
-        'url': content['data']['url'],
-        'description': content['data']['description'],
-    })
+    return Document(
+        title=content["data"]["title"],
+        content=content["data"]["content"],
+        metadata={
+            "url": content["data"]["url"],
+            "description": content["data"]["description"],
+        },
+    )
diff --git a/gyandex/loaders/factory_test.py b/gyandex/loaders/factory_test.py
index 19d7010..0bea739 100644
--- a/gyandex/loaders/factory_test.py
+++ b/gyandex/loaders/factory_test.py
@@ -1,6 +1,3 @@
-import json
-
-import pytest
 import responses
 from gyandex.loaders.factory import fetch_url
 
@@ -11,12 +8,7 @@ def test_fetch_url_returns_json_response():
     # Given
     test_url = "test123"
     actual = {"data": {"title": "title", "content": "test content", "url": "url", "description": "description"}}
-    responses.add(
-        responses.GET,
-        f"https://r.jina.ai/{test_url}",
-        json=actual,
-        status=200
-    )
+    responses.add(responses.GET, f"https://r.jina.ai/{test_url}", json=actual, status=200)
 
     # When
     result = fetch_url(test_url)
@@ -24,7 +16,7 @@ def test_fetch_url_returns_json_response():
     # Then
     assert result.content == "test content"
     assert result.title == "title"
-    assert result.metadata == { "url": "url", "description": "description" }
+    assert result.metadata == {"url": "url", "description": "description"}
 
 
 @responses.activate
@@ -32,12 +24,12 @@ def test_fetch_url_sends_correct_headers():
     """Tests that fetch_url sends the correct Accept header"""
     # Given
     test_url = "test123"
-    expected_headers = {"Accept": "application/json"}
+    _ = {"Accept": "application/json"}
     responses.add(
         responses.GET,
         f"https://r.jina.ai/{test_url}",
         json={"data": {"title": "title", "content": "test content", "url": "url", "description": "description"}},
-        status=200
+        status=200,
     )
 
     # When
@@ -46,6 +38,7 @@ def test_fetch_url_sends_correct_headers():
     # Then
     assert responses.calls[0].request.headers["Accept"] == "application/json"
 
+
 @responses.activate
 def test_fetch_url_constructs_correct_url():
     """Tests that fetch_url constructs the correct URL with the base and provided path"""
@@ -56,7 +49,7 @@ def test_fetch_url_constructs_correct_url():
         responses.GET,
         expected_url,
         json={"data": {"title": "title", "content": "test content", "url": "url", "description": "description"}},
-        status=200
+        status=200,
     )
 
     # When
diff --git a/gyandex/podgen/config/loader.py b/gyandex/podgen/config/loader.py
index 8875818..720a947 100644
--- a/gyandex/podgen/config/loader.py
+++ b/gyandex/podgen/config/loader.py
@@ -3,12 +3,13 @@
 import yaml
 from .schema import PodcastConfig
 
+
 def resolve_env_vars(value: str) -> str:
     """Resolve ${ENV_VAR} patterns in string values"""
     if not isinstance(value, str):
         return value
 
-    pattern = r'\${([^}^{]+)}'
+    pattern = r"\${([^}^{]+)}"
     matches = re.finditer(pattern, value)
 
     for match in matches:
@@ -20,6 +21,7 @@ def resolve_env_vars(value: str) -> str:
 
     return value
 
+
 def resolve_nested_env_vars(data):
     """Recursively resolve environment variables in nested structures"""
     if isinstance(data, dict):
@@ -29,6 +31,7 @@ def resolve_nested_env_vars(data):
     else:
         return resolve_env_vars(data)
 
+
 def load_config(config_path: str) -> PodcastConfig:
     """Load and parse YAML config with environment variable support"""
     with open(config_path) as f:
diff --git a/gyandex/podgen/config/loader_test.py b/gyandex/podgen/config/loader_test.py
index 0c06b96..0f9fa39 100644
--- a/gyandex/podgen/config/loader_test.py
+++ b/gyandex/podgen/config/loader_test.py
@@ -3,6 +3,7 @@
 from .loader import resolve_env_vars, resolve_nested_env_vars, load_config
 from .schema import PodcastConfig
 
+
 def test_resolve_env_vars_replaces_single_variable():
     """Test that resolve_env_vars replaces a single environment variable in a string"""
     # Given
@@ -15,6 +16,7 @@ def test_resolve_env_vars_replaces_single_variable():
     # Then
     assert result == "prefix_test_value_suffix"
 
+
 def test_resolve_env_vars_handles_multiple_variables():
     """Test that resolve_env_vars replaces multiple environment variables in a string"""
     # Given
@@ -28,6 +30,7 @@ def test_resolve_env_vars_handles_multiple_variables():
     # Then
     assert result == "first_middle_second"
 
+
 def test_resolve_env_vars_raises_on_missing_variable():
     """Test that resolve_env_vars raises ValueError when environment variable is not found"""
     # Given
@@ -37,27 +40,19 @@ def test_resolve_env_vars_raises_on_missing_variable():
     with pytest.raises(ValueError, match="Environment variable NONEXISTENT_VAR not found"):
         resolve_env_vars(input_string)
 
+
 def test_resolve_nested_env_vars_handles_dict():
     """Test that resolve_nested_env_vars resolves variables in nested dictionary"""
     # Given
     os.environ["NESTED_VAR"] = "value"
-    input_dict = {
-        "key1": "${NESTED_VAR}",
-        "key2": {
-            "nested_key": "${NESTED_VAR}"
-        }
-    }
+    input_dict = {"key1": "${NESTED_VAR}", "key2": {"nested_key": "${NESTED_VAR}"}}
 
     # When
     result = resolve_nested_env_vars(input_dict)
 
     # Then
-    assert result == {
-        "key1": "value",
-        "key2": {
-            "nested_key": "value"
-        }
-    }
+    assert result == {"key1": "value", "key2": {"nested_key": "value"}}
+
 
 def test_load_config_parses_yaml_with_env_vars(tmp_path):
     """Test that load_config properly loads YAML and resolves environment variables"""
diff --git a/gyandex/podgen/config/schema.py b/gyandex/podgen/config/schema.py
index dc99d89..908f58e 100644
--- a/gyandex/podgen/config/schema.py
+++ b/gyandex/podgen/config/schema.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Dict, List, Optional, Union, Literal
+from typing import List, Optional, Union, Literal
 
 from pydantic import BaseModel, HttpUrl, Field
 
@@ -46,7 +46,7 @@ class Participant(BaseModel):
     name: str
     voice: str
     gender: Gender
-    personality: Optional[str] = ''
+    personality: Optional[str] = ""
     language_code: Optional[str] = "en-US"
 
 
diff --git a/gyandex/podgen/engine/publisher.py b/gyandex/podgen/engine/publisher.py
index e90e710..e460b92 100644
--- a/gyandex/podgen/engine/publisher.py
+++ b/gyandex/podgen/engine/publisher.py
@@ -10,6 +10,7 @@
 from ..storage.s3 import S3CompatibleStorage
 from ..feed.models import PodcastDB, Episode
 
+
 # @TODO: Look at URL manipulation and how URLs are used between storage
 #   and feeds. There is possibly some duplication here.
 @dataclass
@@ -55,12 +56,8 @@ def _get_audio_metadata(self, file_path: str) -> Dict[str, Any]:
         metadata = {}
 
         if audio is not None:
-            metadata["duration"] = (
-                int(audio.info.length) if hasattr(audio.info, "length") else None
-            )
-            metadata["mime_type"] = (
-                audio.mime[0] if hasattr(audio, "mime") and audio.mime else None
-            )
+            metadata["duration"] = int(audio.info.length) if hasattr(audio.info, "length") else None
+            metadata["mime_type"] = audio.mime[0] if hasattr(audio, "mime") and audio.mime else None
 
         metadata["file_size"] = os.path.getsize(file_path)
         return metadata
@@ -71,9 +68,7 @@ def _generate_guid(self, feed_slug: str, file_path: str) -> str:
             file_hash = hashlib.md5(f.read()).hexdigest()
         return f"{feed_slug}-{file_hash}"
 
-    def add_episode(
-        self, feed_slug: str, audio_file_path: str, metadata: PodcastMetadata
-    ) -> Dict[str, str]:
+    def add_episode(self, feed_slug: str, audio_file_path: str, metadata: PodcastMetadata) -> Dict[str, str]:
         """
         Add a new episode to a feed.
 
@@ -110,7 +105,7 @@ def add_episode(
         )
 
         # Add episode to database
-        episode = self.db.add_episode(
+        _ = self.db.add_episode(
             feed_slug=feed_slug,
             title=metadata.title,
             description=metadata.description,
@@ -143,9 +138,7 @@ def _save_temp_feed(self, feed_content: str) -> str:
             f.write(feed_content)
         return temp_path
 
-    def create_feed(
-        self, slug: str, title: str, description: str, author: str, email: str, **kwargs
-    ) -> str:
+    def create_feed(self, slug: str, title: str, description: str, author: str, email: str, **kwargs) -> str:
         """
         Create a new podcast feed.
 
@@ -189,8 +182,6 @@ def get_feed_url(self, feed_slug: str) -> str:
         """Get the URL for a feed."""
         return urljoin(self.base_url, f"{self.feed_prefix}/{feed_slug}.xml")
 
-    def list_episodes(
-        self, feed_slug: str, limit: Optional[int] = None
-    ) -> list[Type[Episode]]:
+    def list_episodes(self, feed_slug: str, limit: Optional[int] = None) -> list[Type[Episode]]:
         """List episodes in a feed."""
         return self.db.get_episodes(feed_slug, limit)
diff --git a/gyandex/podgen/engine/publisher_test.py b/gyandex/podgen/engine/publisher_test.py
index 3b54311..a9906c1 100644
--- a/gyandex/podgen/engine/publisher_test.py
+++ b/gyandex/podgen/engine/publisher_test.py
@@ -1,9 +1,6 @@
 import pytest
-from datetime import datetime
 from unittest.mock import Mock, patch
-import os
 from .publisher import PodcastPublisher, PodcastMetadata
-from ..feed.models_test import test_db  # @todo: move to common fixtures
 from ..storage.s3 import S3CompatibleStorage
 
 
@@ -85,7 +82,8 @@ def test_add_episode(orchestrator, mock_storage, sample_audio, mock_mutagen):
     mock_storage.upload_file.reset_mock()
 
     metadata = PodcastMetadata(
-        title="Test Episode", description="Test Episode Description",
+        title="Test Episode",
+        description="Test Episode Description",
     )
 
     mock_storage.upload_file.side_effect = [
@@ -109,9 +107,7 @@ def test_add_episode_to_nonexistent_feed(orchestrator, sample_audio):
     Then: ValueError should be raised
     """
     # Given
-    metadata = PodcastMetadata(
-        title="Test Episode", description="Test Episode Description"
-    )
+    metadata = PodcastMetadata(title="Test Episode", description="Test Episode Description")
 
     # When/Then
     with pytest.raises(ValueError):
diff --git a/gyandex/podgen/feed/generator.py b/gyandex/podgen/feed/generator.py
index 75e8837..fc7cc85 100644
--- a/gyandex/podgen/feed/generator.py
+++ b/gyandex/podgen/feed/generator.py
@@ -38,9 +38,7 @@ def generate_feed(self, slug: str) -> str:
             fg.image(feed_data.image_url)
 
         # iTunes specific tags
-        fg.podcast.itunes_category(
-            feed_data.categories.split(",")[0] if feed_data.categories else "Technology"
-        )
+        fg.podcast.itunes_category(feed_data.categories.split(",")[0] if feed_data.categories else "Technology")
         fg.podcast.itunes_explicit(feed_data.explicit)
         fg.podcast.itunes_author(feed_data.author)
         fg.podcast.itunes_owner(name=feed_data.author, email=feed_data.email)
@@ -61,9 +59,7 @@ def generate_feed(self, slug: str) -> str:
             fe.enclosure(episode.audio_url, str(episode.file_size), episode.mime_type)
 
             # iTunes specific episode tags
-            fe.podcast.itunes_duration(
-                str(episode.duration) if episode.duration else "0"
-            )
+            fe.podcast.itunes_duration(str(episode.duration) if episode.duration else "0")
             fe.podcast.itunes_explicit(episode.explicit)
             if episode.image_url:
                 fe.podcast.itunes_image(episode.image_url)
diff --git a/gyandex/podgen/feed/generator_test.py b/gyandex/podgen/feed/generator_test.py
index a4adb97..7878195 100644
--- a/gyandex/podgen/feed/generator_test.py
+++ b/gyandex/podgen/feed/generator_test.py
@@ -2,11 +2,6 @@
 import xml.etree.ElementTree as ET
 
 from .generator import PodcastFeedGenerator
-from .models_test import (
-    test_db,
-    sample_feed_data,
-    sample_episode_data,
-)  # @todo: move to common fixtures
 
 # Feed Generator Tests
 
@@ -19,7 +14,7 @@ def test_generate_feed_xml(test_db, sample_feed_data, sample_episode_data):
     """
     # Given
     feed = test_db.create_feed(**sample_feed_data)
-    episode = test_db.add_episode(feed.slug, **sample_episode_data)
+    _ = test_db.add_episode(feed.slug, **sample_episode_data)
 
     # When
     generator = PodcastFeedGenerator(test_db)
@@ -59,7 +54,7 @@ def test_feed_episode_enclosure(test_db, sample_feed_data, sample_episode_data):
     """
     # Given
     feed = test_db.create_feed(**sample_feed_data)
-    episode = test_db.add_episode(feed.slug, **sample_episode_data)
+    _ = test_db.add_episode(feed.slug, **sample_episode_data)
 
     # When
     generator = PodcastFeedGenerator(test_db)
diff --git a/gyandex/podgen/feed/models.py b/gyandex/podgen/feed/models.py
index 3c8a13c..72afdf9 100644
--- a/gyandex/podgen/feed/models.py
+++ b/gyandex/podgen/feed/models.py
@@ -33,9 +33,7 @@ class Feed(Base):
     updated_at = Column(DateTime, onupdate=func.now())
 
     # Relationship
-    episodes = relationship(
-        "Episode", back_populates="feed", cascade="all, delete-orphan"
-    )
+    episodes = relationship("Episode", back_populates="feed", cascade="all, delete-orphan")
 
     def __repr__(self):
         return f"<Feed(slug='{self.slug}', title='{self.title}')>"
@@ -46,18 +44,15 @@ def get_latest_episode(self, session) -> Tuple[int, int]:
         """
         # Query the database to get the maximum episode number for the feed
         max_episode_number = (
-            session.query(func.max(Episode.episode_number))
-            .filter(Episode.feed_id == self.id)
-            .scalar()
+            session.query(func.max(Episode.episode_number)).filter(Episode.feed_id == self.id).scalar()
         ) or 0
 
         max_season_number = (
-            session.query(func.max(Episode.season_number))
-            .filter(Episode.feed_id == self.id)
-            .scalar()
+            session.query(func.max(Episode.season_number)).filter(Episode.feed_id == self.id).scalar()
         ) or 1
         return max_season_number, max_episode_number
 
+
 class Episode(Base):
     __tablename__ = "episodes"
 
@@ -103,9 +98,7 @@ def get_feed(self, slug: str) -> Optional[Feed]:
         with self.session() as session:
             return session.query(Feed).filter(Feed.slug == slug).first()
 
-    def add_episode(
-        self, feed_slug: str, title: str, audio_url: str, guid: str, **kwargs
-    ) -> Episode:
+    def add_episode(self, feed_slug: str, title: str, audio_url: str, guid: str, **kwargs) -> Episode:
         with self.session() as session:
             feed = session.query(Feed).filter(Feed.slug == feed_slug).first()
             if not feed:
diff --git a/gyandex/podgen/speech/google_cloud.py b/gyandex/podgen/speech/google_cloud.py
index 03e5ac3..c35930f 100644
--- a/gyandex/podgen/speech/google_cloud.py
+++ b/gyandex/podgen/speech/google_cloud.py
@@ -1,5 +1,5 @@
 from io import BytesIO
-from typing import List, Optional, Dict, Any, Union
+from typing import List, Optional, Dict, Any
 
 from google.cloud import texttospeech
 from pydub import AudioSegment
@@ -13,8 +13,7 @@ def __init__(self, participants: List[Participant]):
         self.client = texttospeech.TextToSpeechClient()
         self.voices = self.generate_voice_profile(participants)
         self.audio_config = texttospeech.AudioConfig(
-            audio_encoding=texttospeech.AudioEncoding.MP3,
-            effects_profile_id=['headphone-class-device']
+            audio_encoding=texttospeech.AudioEncoding.MP3, effects_profile_id=["headphone-class-device"]
         )
 
     def generate_voice_profile(self, participants: List[Participant]) -> Dict[str, Any]:
@@ -40,13 +39,13 @@ def process_segment(self, segment: ScriptSegment) -> bytes:
     def synthesize_speech(self, text: str, speaker: str) -> bytes:
         synthesis_input = texttospeech.SynthesisInput(text=text)
         response = self.client.synthesize_speech(
-            input=synthesis_input,
-            voice=self.voices[speaker],
-            audio_config=self.audio_config
+            input=synthesis_input, voice=self.voices[speaker], audio_config=self.audio_config
         )
         return response.audio_content
 
-    def generate_audio_file(self, audio_segments: List[bytes], podcast_path: str, options: Optional[Dict[str, Any]] = None):
+    def generate_audio_file(
+        self, audio_segments: List[bytes], podcast_path: str, options: Optional[Dict[str, Any]] = None
+    ):
         if options is None:
             # @TODO: Fix this code-smell
             options = {
@@ -58,10 +57,10 @@ def generate_audio_file(self, audio_segments: List[bytes], podcast_path: str, op
         for segment in audio_segments:
             segment_audio = AudioSegment.from_mp3(BytesIO(segment))
             if previous_segment:
-                combined = combined.append(segment_audio, crossfade=options['crossfade'])
+                combined = combined.append(segment_audio, crossfade=options["crossfade"])
             else:
                 combined += segment_audio
             previous_segment = segment
 
         # Save final podcast
-        combined.export(podcast_path, format="mp3")
\ No newline at end of file
+        combined.export(podcast_path, format="mp3")
diff --git a/gyandex/podgen/speech/google_cloud_test.py b/gyandex/podgen/speech/google_cloud_test.py
index be422b3..fd229a9 100644
--- a/gyandex/podgen/speech/google_cloud_test.py
+++ b/gyandex/podgen/speech/google_cloud_test.py
@@ -1,22 +1,21 @@
-import pytest
 from unittest.mock import Mock, patch
-from io import BytesIO
-from pydub import AudioSegment
 from google.cloud import texttospeech
 from gyandex.podgen.processors.tts import GoogleTTSEngine
 from gyandex.podgen.engine.workflows import ScriptSegment
 
+
 def test_tts_engine_initialization():
     """Tests that TTSEngine initializes with correct voice configurations"""
     # Given/When
     engine = GoogleTTSEngine()
 
     # Then
-    assert 'HOST1' in engine.voices
-    assert 'HOST2' in engine.voices
+    assert "HOST1" in engine.voices
+    assert "HOST2" in engine.voices
     assert isinstance(engine.client, texttospeech.TextToSpeechClient)
 
-@patch('google.cloud.texttospeech.TextToSpeechClient')
+
+@patch("google.cloud.texttospeech.TextToSpeechClient")
 def test_synthesize_speech_for_host1(mock_client):
     """Tests speech synthesis for HOST1 voice"""
     # Given
@@ -32,7 +31,8 @@ def test_synthesize_speech_for_host1(mock_client):
     assert result == b"test_audio_content"
     mock_client.return_value.synthesize_speech.assert_called_once()
 
-@patch('google.cloud.texttospeech.TextToSpeechClient')
+
+@patch("google.cloud.texttospeech.TextToSpeechClient")
 def test_process_segment(mock_client):
     """Tests processing of a complete podcast segment"""
     # Given
diff --git a/gyandex/podgen/storage/factory.py b/gyandex/podgen/storage/factory.py
index a7e3af9..1bf9408 100644
--- a/gyandex/podgen/storage/factory.py
+++ b/gyandex/podgen/storage/factory.py
@@ -3,6 +3,7 @@
 from gyandex.podgen.config.schema import S3StorageConfig
 from gyandex.podgen.storage.s3 import S3CompatibleStorage
 
+
 # @TODO: Centralize this type and move this to a common place
 def get_storage(config: Union[S3StorageConfig]) -> S3CompatibleStorage:
     if config.provider != "s3":  # @TODO: Move this to a enum
diff --git a/gyandex/podgen/storage/factory_test.py b/gyandex/podgen/storage/factory_test.py
index e1985b8..b9074c5 100644
--- a/gyandex/podgen/storage/factory_test.py
+++ b/gyandex/podgen/storage/factory_test.py
@@ -5,6 +5,7 @@
 from gyandex.podgen.storage.factory import get_storage
 from gyandex.podgen.storage.s3 import S3CompatibleStorage
 
+
 def test_get_storage_returns_s3_storage():
     """Tests that get_storage creates an S3CompatibleStorage instance with correct config"""
     # Given
@@ -15,7 +16,7 @@ def test_get_storage_returns_s3_storage():
         secret_key="test-secret-key",
         region="us-east-1",
         endpoint="https://test-endpoint",
-        custom_domain="cdn.example.com"
+        custom_domain="cdn.example.com",
     )
 
     # When
@@ -26,16 +27,17 @@ def test_get_storage_returns_s3_storage():
     assert storage.bucket == "test-bucket"
     assert storage.custom_domain == "cdn.example.com"
 
+
 def test_get_storage_raises_for_unsupported_provider():
     """Tests that get_storage raises NotImplementedError for unsupported providers"""
     # When/Then
     with pytest.raises(ValidationError):
-        config = S3StorageConfig(
+        _ = S3StorageConfig(
             provider="unsupported",
             bucket="test-bucket",
             access_key="test-access-key",
             secret_key="test-secret-key",
             region="us-east-1",
             endpoint="https://test-endpoint",
-            custom_domain="cdn.example.com"
+            custom_domain="cdn.example.com",
         )
diff --git a/gyandex/podgen/storage/s3.py b/gyandex/podgen/storage/s3.py
index eab43b2..df4d2b1 100644
--- a/gyandex/podgen/storage/s3.py
+++ b/gyandex/podgen/storage/s3.py
@@ -38,9 +38,7 @@ def __init__(
         self.acl = acl
 
         # Configure the S3 client with a generous timeout
-        config = Config(
-            connect_timeout=10, read_timeout=30, retries={"max_attempts": 3}
-        )
+        config = Config(connect_timeout=10, read_timeout=30, retries={"max_attempts": 3})
 
         self.client = boto3.client(
             "s3",
@@ -80,9 +78,7 @@ def upload_file(
         if metadata:
             extra_args["Metadata"] = metadata
 
-        self.client.upload_file(
-            file_path, self.bucket, destination_path, ExtraArgs=extra_args
-        )
+        self.client.upload_file(file_path, self.bucket, destination_path, ExtraArgs=extra_args)
 
         return self.get_public_url(destination_path)
 
diff --git a/gyandex/podgen/storage/s3_test.py b/gyandex/podgen/storage/s3_test.py
index ba5cc38..37b07ba 100644
--- a/gyandex/podgen/storage/s3_test.py
+++ b/gyandex/podgen/storage/s3_test.py
@@ -56,9 +56,7 @@ def r2_storage(mock_s3_client):
 def test_initialization(mock_s3_storage):
     """Test storage initialization with different configurations"""
     # Test AWS S3 initialization
-    _ = S3CompatibleStorage(
-        bucket="test-bucket", access_key_id="test-key", secret_access_key="test-secret"
-    )
+    _ = S3CompatibleStorage(bucket="test-bucket", access_key_id="test-key", secret_access_key="test-secret")
 
     mock_s3_storage.assert_called_once_with(
         "s3",
@@ -115,9 +113,7 @@ def test_download_file(storage, mock_s3_client, tmp_path):
 
     storage.download_file("episodes/test.mp3", str(download_path))
 
-    mock_s3_client.download_file.assert_called_with(
-        "test-bucket", "episodes/test.mp3", str(download_path)
-    )
+    mock_s3_client.download_file.assert_called_with("test-bucket", "episodes/test.mp3", str(download_path))
 
 
 def test_get_public_url_aws(storage):
@@ -178,9 +174,7 @@ def test_delete_file(storage, mock_s3_client):
     """Test file deletion functionality"""
     storage.delete_file("episodes/test.mp3")
 
-    mock_s3_client.delete_object.assert_called_with(
-        Bucket="test-bucket", Key="episodes/test.mp3"
-    )
+    mock_s3_client.delete_object.assert_called_with(Bucket="test-bucket", Key="episodes/test.mp3")
 
 
 def test_upload_file_content_type_guessing(storage, mock_s3_client, tmp_path):
diff --git a/gyandex/podgen/workflows/alexandria.py b/gyandex/podgen/workflows/alexandria.py
index 36ee5a4..5720089 100644
--- a/gyandex/podgen/workflows/alexandria.py
+++ b/gyandex/podgen/workflows/alexandria.py
@@ -9,6 +9,7 @@
 from ..config.schema import PodcastConfig, GoogleGenerativeAILLMConfig, Participant
 from ...loaders.factory import Document
 
+
 class OutlineGenerator:
     def __init__(self, config: Union[GoogleGenerativeAILLMConfig]):
         self.model = get_model(config)
@@ -20,7 +21,8 @@ def __init__(self, config: Union[GoogleGenerativeAILLMConfig]):
             Create a focused podcast outline based on the content
 
             Rules:
-            1. Target podcast duration and number of segments should be proportional to the content length; it should not be more than reading the content directly
+            1. Target podcast duration and number of segments should be proportional to the content length; 
+               it should not be more than reading the content directly
             2. Each segment must focus on a UNIQUE aspect with NO overlap
             3. Keep segments concise and focused on actual content from the source
             4. Don't add speculative content or expand beyond the source material
@@ -37,15 +39,16 @@ def __init__(self, config: Union[GoogleGenerativeAILLMConfig]):
             Make sure each segment has a clear transition to the next topic.
             """,
             input_variables=["content"],
-            partial_variables={"format_instructions": self.parser.get_format_instructions()}
+            partial_variables={"format_instructions": self.parser.get_format_instructions()},
         )
 
     def generate_outline(self, document: Document) -> PodcastOutline:
         """Generate structured podcast outline from content summary"""
         chain = self.outline_prompt | self.model | self.parser
-        response = chain.invoke({ "content": document.content, "title": document.title })
+        response = chain.invoke({"content": document.content, "title": document.title})
         return response
 
+
 class ScriptGenerator:
     def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: List[Participant]):
         self.model = get_model(config)
@@ -59,8 +62,10 @@ def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: Lis
                 "host_profiles": "\n".join([self.create_host_profile(participant) for participant in participants]),
             },
             template="""
-            You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. 
-            We are in an alternate universe where actually you have been writing every line they say and they just stream it into their brains.
+            You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, 
+            Lex Fridman, Ben Shapiro, Tim Ferris. 
+            We are in an alternate universe where actually you have been writing every line they say and 
+            they just stream it into their brains.
             You have won multiple podcast awards for your writing.
             
             IMPORTANT: You are generating dialogue for the {position}
@@ -80,24 +85,29 @@ def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: Lis
 
             DIALOGUE GENERATION RULES:
             1. Create natural dialogue with occasional fillers (um, uh, you know)
-            2. Keep the dialogue flowing as one continuous conversation. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 
-            3. If this is middle segment: let the conversation flow naturally into the next topic without announcing transitions or welcoming statements
-            4. End segment dialogues by building on the current point and naturally introducing elements of the next topic, except if it is the closing segment
+            2. Keep the dialogue flowing as one continuous conversation. 
+               Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. 
+            3. If this is middle segment: let the conversation flow naturally into the next topic without 
+               announcing transitions or welcoming statements
+            4. End segment dialogues by building on the current point and naturally introducing elements of the 
+               next topic, except if it is the closing segment
 
             REQUIREMENTS:
             1. WELCOME/INTRO PHRASES ONLY IN THE OPENING SEGMENT.
             2. NO CLOSING/GOODBYE PHRASES UNLESS THIS IS THE CLOSING SEGMENT.
             3. ONLY transition to the next segment at the end of the opening and middle segments
-            4. Generate text without special formatting, so that a TTS can vocalize it. That means no asterisks or hyphens.
+            4. Generate text without special formatting, so that a TTS can vocalize it. 
+               That means no asterisks or hyphens.
 
             TRANSITION STYLE GUIDE:
             - Avoid phrases like "segues into" or "next topic"
             - Connect topics through shared themes or related ideas
-            - Use natural conversational bridges like "That reminds me of..." or "You know what's interesting about that..."
+            - Use natural conversational bridges like "That reminds me of..." or 
+              "You know what's interesting about that..."
             - Let one host's insight naturally lead to the next area of discussion
 
             {format_instructions}
-            """
+            """,
         )
 
         self.chain = self.segment_prompt | self.model | self.parser
@@ -105,22 +115,22 @@ def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: Lis
     def create_host_profile(self, participant: Participant):
         return f"HOST ({participant.name})[{participant.gender}]: {participant.personality}"
 
-    async def generate_segment_script(self,
-                                      segment: OutlineSegment,
-                                      source_content: str, is_first=False,
-                                      is_last=False,
-                                      transition="") -> ScriptSegment:
+    async def generate_segment_script(
+        self, segment: OutlineSegment, source_content: str, is_first=False, is_last=False, transition=""
+    ) -> ScriptSegment:
         """Generate script for a single segment"""
         position = "opening segment" if is_first else "closing segment" if is_last else "middle segment"
         transition = transition if not is_last else ""
-        result = await self.chain.ainvoke({
-            "segment_name": segment.name,
-            "talking_points": segment.talking_points,
-            "duration": segment.duration,
-            "source_content": source_content,
-            "position": position,
-            "transition": transition,
-        })
+        result = await self.chain.ainvoke(
+            {
+                "segment_name": segment.name,
+                "talking_points": segment.talking_points,
+                "duration": segment.duration,
+                "source_content": source_content,
+                "position": position,
+                "transition": transition,
+            }
+        )
         return result
 
     async def generate_full_script(self, outline: PodcastOutline, document_content: str) -> List[ScriptSegment]:
@@ -145,6 +155,7 @@ async def generate_full_script(self, outline: PodcastOutline, document_content:
 
         return await asyncio.gather(*tasks)
 
+
 class AlexandriaWorkflow:
     config: PodcastConfig
 
diff --git a/gyandex/podgen/workflows/types.py b/gyandex/podgen/workflows/types.py
index 83a52f8..4430198 100644
--- a/gyandex/podgen/workflows/types.py
+++ b/gyandex/podgen/workflows/types.py
@@ -7,7 +7,10 @@ class OutlineSegment(BaseModel):
     name: str = Field(description="Name of the podcast segment")
     duration: int = Field(description="Duration of segment in minutes")
     talking_points: List[str] = Field(description="Key points to cover in this segment")
-    transition: str = Field(description="Transition text to the next segment. Use 'Closing remarks' if there is no transition", default="")
+    transition: str = Field(
+        description="Transition text to the next segment. Use 'Closing remarks' if there is no transition", default=""
+    )
+
 
 class PodcastOutline(BaseModel):
     title: str = Field(description="Title of the podcast episode")
@@ -15,15 +18,18 @@ class PodcastOutline(BaseModel):
     total_duration: int = Field(description="Total podcast duration in minutes")
     segments: List[OutlineSegment] = Field(description="List of podcast segments")
 
+
 class DialogueLine(BaseModel):
     speaker: str
     text: str
 
+
 class ScriptSegment(BaseModel):
     name: str
     duration: int = Field(description="Duration of the script in minutes")
     dialogue: List[DialogueLine]
 
+
 class PodcastEpisode(BaseModel):
     title: str
     description: str
diff --git a/main.ipynb b/main.ipynb
deleted file mode 100644
index 6681623..0000000
--- a/main.ipynb
+++ /dev/null
@@ -1,412 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from datetime import datetime\n",
-    "from io import BytesIO\n",
-    "\n",
-    "from langchain.prompts import ChatPromptTemplate\n",
-    "from langchain.schema import HumanMessage, SystemMessage\n",
-    "from langchain.output_parsers import PydanticOutputParser\n",
-    "from langchain_core.output_parsers import StrOutputParser\n",
-    "from langchain_google_genai import GoogleGenerativeAI\n",
-    "from pydantic import BaseModel, Field\n",
-    "from typing import List, Dict\n",
-    "import requests\n",
-    "import dotenv\n",
-    "import os\n",
-    "\n",
-    "from pydub import AudioSegment\n",
-    "\n",
-    "dotenv.load_dotenv()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "api_key = os.getenv('GOOGLE_API_KEY')\n",
-    "urls = [\n",
-    "    # \"https://kellblog.com/2024/10/12/design-your-organization-for-the-conflicts-you-want-to-hear-about/\",\n",
-    "    # \"https://peterszasz.com/engineering-managers-guide-to-effective-annual-feedback/\",\n",
-    "    \"https://dennisnerush.medium.com/my-top-10-favorite-leadership-and-management-books-87178902826e\",\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Implement a memoizing decorator that can be applied to a function\n",
-    "# to cache the results of the function calls.\n",
-    "def memoize(func):\n",
-    "    cache = {}\n",
-    "    def wrapper(*args, **kwargs):\n",
-    "        key = str(args) + str(kwargs)\n",
-    "        if key not in cache:\n",
-    "            cache[key] = func(*args, **kwargs)\n",
-    "        return cache[key]\n",
-    "    return wrapper"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@memoize\n",
-    "def fetch_url(url):\n",
-    "    headers = { \"Accept\": \"application/json\" }\n",
-    "    response = requests.get(f\"https://r.jina.ai/{url}\", headers=headers)\n",
-    "    return response.json()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "content = [fetch_url(url) for url in urls]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "content_analysis_prompt = ChatPromptTemplate.from_messages([\n",
-    "    (\n",
-    "        \"system\", \n",
-    "        \"\"\"You are an expert content strategist specializing in creating engaging educational content.\n",
-    "        Your strength lies in breaking down complex topics into clear, relatable concepts while maintaining intellectual depth.\n",
-    "        \n",
-    "        Approach the analysis with:\n",
-    "        1. Systems thinking - identify interconnections and patterns\n",
-    "        2. Multi-level abstraction - from high-level principles to practical implementation\n",
-    "        3. Engaging storytelling - find hooks and analogies that make concepts stick\n",
-    "        4. Dialectical thinking - explore tensions and competing viewpoints\n",
-    "        \n",
-    "        Structure your analysis in this exact format:\n",
-    "    \n",
-    "        === CONCEPTS ===\n",
-    "        [Each concept includes 3 depth levels marked with -]\n",
-    "        ### [Concept Name]\n",
-    "        - Strategic: [High level insight]\n",
-    "        - Tactical: [Mid level approach] \n",
-    "        - Practice: [Concrete examples]\n",
-    "    \n",
-    "        === HOOKS ===\n",
-    "        [Each hook includes story + debate]\n",
-    "        ### [Topic]\n",
-    "        Story: [Engaging narrative]\n",
-    "        Debate: [Key discussion points]\n",
-    "    \n",
-    "        === SEGMENTS ===\n",
-    "        [List of main segments, one per line]\"\"\",\n",
-    "    ),\n",
-    "    (\n",
-    "        \"human\", \n",
-    "        \"\"\"Analyze these articles through multiple lenses to create rich podcast material:\n",
-    "    \n",
-    "        {article_contents}\n",
-    "    \n",
-    "        Create a layered analysis that:\n",
-    "        1. Breaks down complex ideas through progressive levels of detail\n",
-    "        2. Identifies natural conversation flows and engaging discussion points\n",
-    "        3. Maps out competing viewpoints and their nuances\n",
-    "        4. Groups related concepts into potential podcast segments\n",
-    "        \"\"\",\n",
-    "    ),\n",
-    "])\n",
-    "\n",
-    "model = GoogleGenerativeAI(model=\"gemini-1.5-pro\", google_api_key=api_key)\n",
-    "\n",
-    "content_analysis_chain = content_analysis_prompt | model | StrOutputParser()\n",
-    "\n",
-    "# Usage\n",
-    "article_contents = \"\\n\\n\".join([x['data']['content'] for x in content])\n",
-    "result = content_analysis_chain.invoke({\n",
-    "    \"article_contents\": article_contents,\n",
-    "})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(result)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class PodcastSegment(BaseModel):\n",
-    "    speaker: str = Field(description=\"HOST1 or HOST2\")\n",
-    "    tone: str = Field(description=\"EXCITED|CALM|SERIOUS|THOUGHTFUL\")\n",
-    "    text: str = Field(description=\"Raw text content\")\n",
-    "    pace: str = Field(description=\"FAST|MEDIUM|SLOW\")\n",
-    "    emphasis_words: List[str] = Field(description=\"Words to emphasize\")\n",
-    "    pause_after: int = Field(description=\"Pause duration in ms\")\n",
-    "\n",
-    "class PodcastScript(BaseModel):\n",
-    "    title: str\n",
-    "    segments: List[PodcastSegment]\n",
-    "\n",
-    "podcast_script_parser = PydanticOutputParser(pydantic_object=PodcastScript)\n",
-    "podcast_script_prompt = ChatPromptTemplate.from_messages([\n",
-    "    (\n",
-    "        \"system\", \n",
-    "        \"\"\"\n",
-    "        You are an expert podcast host duo creating deep-dive episodes. Structure your conversation to:\n",
-    "    \n",
-    "        1. Start with a hook that captures attention\n",
-    "        2. Layer concepts from surface to core insights\n",
-    "        3. Use the Feynman technique to break down complex ideas\n",
-    "        4. Challenge assumptions and explore counterpoints\n",
-    "        5. Share concrete examples and case studies\n",
-    "        6. Connect ideas across different contexts\n",
-    "        7. End with actionable takeaways\n",
-    "    \n",
-    "        You are an expert podcast host duo creating full-length episodes. Generate a complete 5-30 minute episode with:\n",
-    "\n",
-    "        1. Opening [2-3 segments]\n",
-    "        - Hook and episode preview\n",
-    "        - Quick host banter\n",
-    "        - Topic introduction\n",
-    "    \n",
-    "        2. Main Discussion [5-20 segments]\n",
-    "        - Layer 1: Surface overview\n",
-    "        - Layer 2: Core concepts unpacked\n",
-    "        - Layer 3: Deep analysis\n",
-    "        - Layer 4: Implementation details\n",
-    "        - Regular transitions between hosts\n",
-    "        - Examples and case studies\n",
-    "        - Counterpoints and debates\n",
-    "    \n",
-    "        3. Closing [3-4 segments]\n",
-    "        - Key takeaways\n",
-    "        - Action items\n",
-    "    \n",
-    "        Each segment should be 1-2 minutes of spoken content.\n",
-    "        Create a natural flow between segments:\n",
-    "        - Build on previous points\n",
-    "        - Ask probing questions\n",
-    "        - Share relevant examples\n",
-    "        - Challenge and debate ideas\n",
-    "        - Synthesize insights\n",
-    "        \n",
-    "        Format each segment as:\n",
-    "        {format_instructions}\n",
-    "        \"\"\",\n",
-    "    ),\n",
-    "    (\n",
-    "        \"human\", \n",
-    "        \"\"\"Generate a podcast script using:\n",
-    "        # Analysis result\n",
-    "        {analysis_result}\n",
-    "        \n",
-    "        # Original content\n",
-    "        {article_contents}\n",
-    "        \"\"\",\n",
-    "    ),\n",
-    "])\n",
-    "\n",
-    "script_chain = (\n",
-    "    podcast_script_prompt.partial(format_instructions=podcast_script_parser.get_format_instructions())\n",
-    "    | model\n",
-    "    | podcast_script_parser\n",
-    ")\n",
-    "script = script_chain.invoke({ \"analysis_result\": result, \"article_contents\": article_contents })"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(len(script.segments))\n",
-    "print(script)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9",
-   "metadata": {},
-   "source": [
-    "# TTS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "10",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from IPython.display import Audio\n",
-    "from google.cloud import texttospeech"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "11",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TTSEngine:\n",
-    "    def __init__(self):\n",
-    "        self.client = texttospeech.TextToSpeechClient()\n",
-    "        self.voices = {\n",
-    "            'HOST1': texttospeech.VoiceSelectionParams(\n",
-    "                language_code='en-US',\n",
-    "                name='en-US-Neural2-I',\n",
-    "                ssml_gender=texttospeech.SsmlVoiceGender.MALE\n",
-    "            ),\n",
-    "            'HOST2': texttospeech.VoiceSelectionParams(\n",
-    "                language_code='en-US',\n",
-    "                name='en-US-Neural2-F',\n",
-    "                ssml_gender=texttospeech.SsmlVoiceGender.FEMALE\n",
-    "            )\n",
-    "        }\n",
-    "        self.audio_config = texttospeech.AudioConfig(\n",
-    "            audio_encoding=texttospeech.AudioEncoding.MP3,\n",
-    "            effects_profile_id=['headphone-class-device']\n",
-    "        )\n",
-    "\n",
-    "    def process_segment(self, segment: PodcastSegment) -> bytes:\n",
-    "        ssml = self.generate_ssml(segment)\n",
-    "        return self.synthesize_speech(ssml, segment.speaker)\n",
-    "\n",
-    "    def get_pace(self, pace: str) -> str:\n",
-    "        pace_values = {\n",
-    "            \"FAST\": \"120%\",\n",
-    "            \"MEDIUM\": \"100%\",\n",
-    "            \"SLOW\": \"85%\",\n",
-    "            \"VERY_SLOW\": \"75%\",\n",
-    "            \"VERY_FAST\": \"140%\"\n",
-    "        }\n",
-    "        return pace_values.get(pace, \"100%\")\n",
-    "\n",
-    "    def get_tone(self, tone: str) -> str:\n",
-    "        tone_values = {\n",
-    "            \"EXCITED\": \"+4st\",\n",
-    "            \"CALM\": \"-1st\",\n",
-    "            \"SERIOUS\": \"-2st\",\n",
-    "            \"THOUGHTFUL\": \"+0st\",\n",
-    "            \"WORRIED\": \"-3st\",\n",
-    "            \"INTENSE\": \"+2st\",\n",
-    "            \"ENTHUSIASTIC\": \"+3st\",\n",
-    "            \"SKEPTICAL\": \"-1.5st\",\n",
-    "            \"CURIOUS\": \"+1st\",\n",
-    "            \"AMUSED\": \"+2.5st\"\n",
-    "        }\n",
-    "        return tone_values.get(tone, \"+0st\")\n",
-    "\n",
-    "    def generate_ssml(self, segment: PodcastSegment) -> str:\n",
-    "        text = segment.text\n",
-    "        for word in segment.emphasis_words:\n",
-    "            text = text.replace(word, f'<emphasis level=\"strong\">{word}</emphasis>')\n",
-    "\n",
-    "        ssml = f'<speak><prosody rate=\"{self.get_pace(segment.pace)}\" pitch=\"{self.get_tone(segment.tone)}\">{text}</prosody>'\n",
-    "        ssml += f'<break time=\"{segment.pause_after}ms\"/></speak>'\n",
-    "        return ssml\n",
-    "\n",
-    "    def synthesize_speech(self, ssml: str, speaker: str) -> bytes:\n",
-    "        synthesis_input = texttospeech.SynthesisInput(ssml=ssml)\n",
-    "        response = self.client.synthesize_speech(\n",
-    "            input=synthesis_input,\n",
-    "            voice=self.voices[speaker],\n",
-    "            audio_config=self.audio_config\n",
-    "        )\n",
-    "        return response.audio_content\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "12",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tts_engine = TTSEngine()\n",
-    "audio_segments = [tts_engine.process_segment(segment) for segment in script.segments]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "13",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create output directory\n",
-    "output_dir = \"generated_podcasts\"\n",
-    "os.makedirs(output_dir, exist_ok=True)\n",
-    "\n",
-    "# Generate timestamp for unique filename\n",
-    "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
-    "podcast_path = f\"{output_dir}/podcast_{timestamp}.mp3\"\n",
-    "\n",
-    "# Combine segments directly\n",
-    "combined = AudioSegment.empty()\n",
-    "for segment in audio_segments:\n",
-    "    segment_audio = AudioSegment.from_mp3(BytesIO(segment))\n",
-    "    combined += segment_audio\n",
-    "\n",
-    "# Save final podcast\n",
-    "combined.export(podcast_path, format=\"mp3\")\n",
-    "\n",
-    "# Play in notebook\n",
-    "display(Audio(podcast_path, autoplay=False))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/poetry.lock b/poetry.lock
index 2ff1928..5ac5676 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4142,6 +4142,33 @@ files = [
 [package.dependencies]
 pyasn1 = ">=0.1.3"
 
+[[package]]
+name = "ruff"
+version = "0.8.2"
+description = "An extremely fast Python linter and code formatter, written in Rust."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "ruff-0.8.2-py3-none-linux_armv6l.whl", hash = "sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d"},
+    {file = "ruff-0.8.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5"},
+    {file = "ruff-0.8.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93"},
+    {file = "ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f"},
+    {file = "ruff-0.8.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22"},
+    {file = "ruff-0.8.2-py3-none-win32.whl", hash = "sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1"},
+    {file = "ruff-0.8.2-py3-none-win_amd64.whl", hash = "sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea"},
+    {file = "ruff-0.8.2-py3-none-win_arm64.whl", hash = "sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8"},
+    {file = "ruff-0.8.2.tar.gz", hash = "sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5"},
+]
+
 [[package]]
 name = "rust-just"
 version = "1.36.0"
@@ -4830,4 +4857,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "acbdd61fb0fdf168df5a26b036929d325a43a54c54f8332b09e0cd6f554e4116"
+content-hash = "f1281860f40b39bc21c0b5a6c0aa632f44f87401e3173fe034cf5a3f9ada5aed"
diff --git a/publish.ipynb b/publish.ipynb
deleted file mode 100644
index 952e48c..0000000
--- a/publish.ipynb
+++ /dev/null
@@ -1,119 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "from dotenv import load_dotenv\n",
-    "load_dotenv()\n",
-    "\n",
-    "import os\n",
-    "\n",
-    "from gyandex.podgen.engine.publisher import PodcastPublisher, PodcastMetadata\n",
-    "from gyandex.podgen.storage.s3 import S3CompatibleStorage\n",
-    "from gyandex.podgen.feed.models import PodcastDB\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "storage = S3CompatibleStorage(\n",
-    "    bucket=\"gyandex\",\n",
-    "    access_key_id=os.getenv('ACCESS_KEY_ID'),\n",
-    "    secret_access_key=os.getenv('SECRET_ACCESS_KEY'),\n",
-    "    endpoint_url=\"https://675f4b8193843a14b144c70d7a440064.r2.cloudflarestorage.com\",\n",
-    "    custom_domain=\"pub-347a2b64a84a441c97338968c27696c5.r2.dev\",\n",
-    ")\n",
-    "\n",
-    "db = PodcastDB(\n",
-    "    db_path='assets/podcastdb.sqlite',\n",
-    ")\n",
-    "\n",
-    "publisher = PodcastPublisher(\n",
-    "    storage=storage,\n",
-    "    db=db,\n",
-    "    base_url='https://pub-347a2b64a84a441c97338968c27696c5.r2.dev',\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create a new feed\n",
-    "feed_url = publisher.create_feed(\n",
-    "    name=\"tech-talk\",\n",
-    "    title=\"Tech Talk Podcast\",\n",
-    "    description=\"A podcast about technology\",\n",
-    "    author=\"Dhruv Baldawa\",\n",
-    "    email=\"me@example.com\",\n",
-    "    language=\"en\",\n",
-    "    categories=\"Technology,News\"\n",
-    ")\n",
-    "\n",
-    "# Add an episode\n",
-    "urls = publisher.add_episode(\n",
-    "    feed_name=\"tech-talk\",\n",
-    "    audio_file_path=\"./generated_podcasts/podcast_20241025_021450.mp3\",\n",
-    "    metadata=PodcastMetadata(\n",
-    "        title=\"Prioritizing and Balancing Energy\",\n",
-    "        description=\"Prioritizing and Balancing Energy\",\n",
-    "        episode_number=1,\n",
-    "        season_number=1\n",
-    "    )\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(urls)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/pyproject.toml b/pyproject.toml
index 724a5a4..947f303 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ license = "AGPL-3.0-or-later"
 readme = "README.md"
 
 [tool.poetry.scripts]
-podgen = "gyandex.cli.podgen:main"
+podgen = "gyandex.cli.genpod:main"
 
 [tool.poetry.dependencies]
 python = "^3.11"
@@ -33,6 +33,7 @@ pytest-mock = "^3.14.0"
 pytest-cov = "^5.0.0"
 rust-just = "^1.36.0"
 responses = "^0.25.3"
+ruff = "^0.8.2"
 
 [project.classifiers]
 license = "OSI Approved :: GNU Affero General Public License v3 (AGPLv3)"
@@ -40,3 +41,13 @@ license = "OSI Approved :: GNU Affero General Public License v3 (AGPLv3)"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.ruff]
+line-length = 120
+fix = true
+
+[tool.ruff.lint]
+# Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that
+# overlap with the use of a formatter, like Black, but we can override this behavior by
+# explicitly adding the rule.
+extend-select = ["E501"]
\ No newline at end of file

From 34f8c2acbd6b1182b0aed8a48d48204c494fb520 Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Thu, 5 Dec 2024 23:01:16 +0530
Subject: [PATCH 02/10] lint and format using ruff

---
 gyandex/cli/podgen.py                      |  4 +-
 gyandex/llms/factory_test.py               |  4 +-
 gyandex/loaders/factory.py                 |  2 +-
 gyandex/loaders/factory_test.py            |  3 +-
 gyandex/podgen/config/loader.py            |  2 +
 gyandex/podgen/config/loader_test.py       |  4 +-
 gyandex/podgen/config/schema.py            |  4 +-
 gyandex/podgen/engine/publisher.py         | 11 ++--
 gyandex/podgen/engine/publisher_test.py    |  6 ++-
 gyandex/podgen/feed/generator.py           |  4 +-
 gyandex/podgen/feed/generator_test.py      |  3 +-
 gyandex/podgen/feed/models.py              |  9 ++--
 gyandex/podgen/feed/models_test.py         |  3 +-
 gyandex/podgen/speech/factory.py           |  2 +-
 gyandex/podgen/speech/google_cloud.py      |  4 +-
 gyandex/podgen/speech/google_cloud_test.py |  6 ++-
 gyandex/podgen/storage/factory.py          |  4 +-
 gyandex/podgen/storage/factory_test.py     |  6 +--
 gyandex/podgen/storage/s3.py               |  7 +--
 gyandex/podgen/storage/s3_test.py          |  4 +-
 gyandex/podgen/workflows/alexandria.py     |  9 ++--
 gyandex/podgen/workflows/factory.py        |  2 +-
 justfile                                   |  5 ++
 poetry.lock                                | 33 +++++++++++-
 pyproject.toml                             |  3 +-
 reading-list.yaml                          | 61 ++++++++++++++++++++++
 26 files changed, 161 insertions(+), 44 deletions(-)
 create mode 100644 reading-list.yaml

diff --git a/gyandex/cli/podgen.py b/gyandex/cli/podgen.py
index a20b13e..83e21d2 100644
--- a/gyandex/cli/podgen.py
+++ b/gyandex/cli/podgen.py
@@ -7,9 +7,9 @@
 from rich.console import Console
 
 from gyandex.loaders.factory import load_content
-from gyandex.podgen.engine.publisher import PodcastPublisher, PodcastMetadata
-from gyandex.podgen.feed.models import PodcastDB
 from gyandex.podgen.config.loader import load_config
+from gyandex.podgen.engine.publisher import PodcastMetadata, PodcastPublisher
+from gyandex.podgen.feed.models import PodcastDB
 from gyandex.podgen.speech.factory import get_text_to_speech_engine
 from gyandex.podgen.storage.factory import get_storage
 from gyandex.podgen.workflows.factory import get_workflow
diff --git a/gyandex/llms/factory_test.py b/gyandex/llms/factory_test.py
index 4e5ba2d..5ba2661 100644
--- a/gyandex/llms/factory_test.py
+++ b/gyandex/llms/factory_test.py
@@ -2,8 +2,8 @@
 from langchain_google_genai import GoogleGenerativeAI
 from pydantic import ValidationError
 
-from gyandex.llms.factory import get_model
-from gyandex.podgen.config.schema import GoogleGenerativeAILLMConfig
+from ..podgen.config.schema import GoogleGenerativeAILLMConfig
+from .factory import get_model
 
 
 def test_get_model_returns_google_generative_ai():
diff --git a/gyandex/loaders/factory.py b/gyandex/loaders/factory.py
index ccb32e3..eaabbd9 100644
--- a/gyandex/loaders/factory.py
+++ b/gyandex/loaders/factory.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, Any
+from typing import Any, Dict, Optional
 
 import requests
 from pydantic import BaseModel
diff --git a/gyandex/loaders/factory_test.py b/gyandex/loaders/factory_test.py
index 0bea739..6a9a58a 100644
--- a/gyandex/loaders/factory_test.py
+++ b/gyandex/loaders/factory_test.py
@@ -1,5 +1,6 @@
 import responses
-from gyandex.loaders.factory import fetch_url
+
+from .factory import fetch_url
 
 
 @responses.activate
diff --git a/gyandex/podgen/config/loader.py b/gyandex/podgen/config/loader.py
index 720a947..724cff8 100644
--- a/gyandex/podgen/config/loader.py
+++ b/gyandex/podgen/config/loader.py
@@ -1,6 +1,8 @@
 import os
 import re
+
 import yaml
+
 from .schema import PodcastConfig
 
 
diff --git a/gyandex/podgen/config/loader_test.py b/gyandex/podgen/config/loader_test.py
index 0f9fa39..550c8f3 100644
--- a/gyandex/podgen/config/loader_test.py
+++ b/gyandex/podgen/config/loader_test.py
@@ -1,6 +1,8 @@
 import os
+
 import pytest
-from .loader import resolve_env_vars, resolve_nested_env_vars, load_config
+
+from .loader import load_config, resolve_env_vars, resolve_nested_env_vars
 from .schema import PodcastConfig
 
 
diff --git a/gyandex/podgen/config/schema.py b/gyandex/podgen/config/schema.py
index 908f58e..ac335ff 100644
--- a/gyandex/podgen/config/schema.py
+++ b/gyandex/podgen/config/schema.py
@@ -1,7 +1,7 @@
 from enum import Enum
-from typing import List, Optional, Union, Literal
+from typing import List, Literal, Optional, Union
 
-from pydantic import BaseModel, HttpUrl, Field
+from pydantic import BaseModel, Field, HttpUrl
 
 
 # @TODO: Redo this, the content format can be better structured
diff --git a/gyandex/podgen/engine/publisher.py b/gyandex/podgen/engine/publisher.py
index e460b92..a665c1c 100644
--- a/gyandex/podgen/engine/publisher.py
+++ b/gyandex/podgen/engine/publisher.py
@@ -1,14 +1,15 @@
-from typing import Optional, Dict, Any, Type
-import os
 import hashlib
-from datetime import datetime
-import mutagen
+import os
 from dataclasses import dataclass
+from datetime import datetime
+from typing import Any, Dict, Optional, Type
 from urllib.parse import urljoin
 
+import mutagen
+
 from ..feed.generator import PodcastFeedGenerator
+from ..feed.models import Episode, PodcastDB
 from ..storage.s3 import S3CompatibleStorage
-from ..feed.models import PodcastDB, Episode
 
 
 # @TODO: Look at URL manipulation and how URLs are used between storage
diff --git a/gyandex/podgen/engine/publisher_test.py b/gyandex/podgen/engine/publisher_test.py
index a9906c1..76cafa8 100644
--- a/gyandex/podgen/engine/publisher_test.py
+++ b/gyandex/podgen/engine/publisher_test.py
@@ -1,7 +1,9 @@
-import pytest
 from unittest.mock import Mock, patch
-from .publisher import PodcastPublisher, PodcastMetadata
+
+import pytest
+
 from ..storage.s3 import S3CompatibleStorage
+from .publisher import PodcastMetadata, PodcastPublisher
 
 
 @pytest.fixture
diff --git a/gyandex/podgen/feed/generator.py b/gyandex/podgen/feed/generator.py
index fc7cc85..eae4e85 100644
--- a/gyandex/podgen/feed/generator.py
+++ b/gyandex/podgen/feed/generator.py
@@ -1,6 +1,8 @@
-from feedgen.feed import FeedGenerator
 from email.utils import formatdate
+
 import pytz
+from feedgen.feed import FeedGenerator
+
 from .models import PodcastDB
 
 
diff --git a/gyandex/podgen/feed/generator_test.py b/gyandex/podgen/feed/generator_test.py
index 7878195..61499e4 100644
--- a/gyandex/podgen/feed/generator_test.py
+++ b/gyandex/podgen/feed/generator_test.py
@@ -1,6 +1,7 @@
-import pytest
 import xml.etree.ElementTree as ET
 
+import pytest
+
 from .generator import PodcastFeedGenerator
 
 # Feed Generator Tests
diff --git a/gyandex/podgen/feed/models.py b/gyandex/podgen/feed/models.py
index 72afdf9..5a1ea91 100644
--- a/gyandex/podgen/feed/models.py
+++ b/gyandex/podgen/feed/models.py
@@ -1,12 +1,13 @@
-from typing import Optional, Type, Tuple
+from typing import Optional, Tuple, Type
+
 from sqlalchemy import (
-    create_engine,
     Column,
-    Integer,
-    String,
     DateTime,
     ForeignKey,
+    Integer,
+    String,
     Text,
+    create_engine,
 )
 from sqlalchemy.orm import declarative_base, relationship, sessionmaker
 from sqlalchemy.sql import func
diff --git a/gyandex/podgen/feed/models_test.py b/gyandex/podgen/feed/models_test.py
index ebfe1fb..2213715 100644
--- a/gyandex/podgen/feed/models_test.py
+++ b/gyandex/podgen/feed/models_test.py
@@ -1,7 +1,8 @@
 import os
+
 import pytest
 
-from .models import PodcastDB, Feed
+from .models import Feed, PodcastDB
 
 
 @pytest.fixture
diff --git a/gyandex/podgen/speech/factory.py b/gyandex/podgen/speech/factory.py
index ddf1d2e..9fbf992 100644
--- a/gyandex/podgen/speech/factory.py
+++ b/gyandex/podgen/speech/factory.py
@@ -1,7 +1,7 @@
 from typing import Union
 
-from .google_cloud import GoogleTTSEngine
 from ..config.schema import GoogleCloudTTSConfig
+from .google_cloud import GoogleTTSEngine
 
 
 # @TODO: Centralize this type and move this to a common place
diff --git a/gyandex/podgen/speech/google_cloud.py b/gyandex/podgen/speech/google_cloud.py
index c35930f..354f865 100644
--- a/gyandex/podgen/speech/google_cloud.py
+++ b/gyandex/podgen/speech/google_cloud.py
@@ -1,10 +1,10 @@
 from io import BytesIO
-from typing import List, Optional, Dict, Any
+from typing import Any, Dict, List, Optional
 
 from google.cloud import texttospeech
 from pydub import AudioSegment
 
-from ..config.schema import Participant, Gender
+from ..config.schema import Gender, Participant
 from ..workflows.types import ScriptSegment  # @TODO: Pull this out of workflows
 
 
diff --git a/gyandex/podgen/speech/google_cloud_test.py b/gyandex/podgen/speech/google_cloud_test.py
index fd229a9..cf248e8 100644
--- a/gyandex/podgen/speech/google_cloud_test.py
+++ b/gyandex/podgen/speech/google_cloud_test.py
@@ -1,7 +1,9 @@
 from unittest.mock import Mock, patch
+
 from google.cloud import texttospeech
-from gyandex.podgen.processors.tts import GoogleTTSEngine
-from gyandex.podgen.engine.workflows import ScriptSegment
+
+from ..speech.google_cloud import GoogleTTSEngine
+from ..workflows.types import ScriptSegment
 
 
 def test_tts_engine_initialization():
diff --git a/gyandex/podgen/storage/factory.py b/gyandex/podgen/storage/factory.py
index 1bf9408..d1d59ff 100644
--- a/gyandex/podgen/storage/factory.py
+++ b/gyandex/podgen/storage/factory.py
@@ -1,7 +1,7 @@
 from typing import Union
 
-from gyandex.podgen.config.schema import S3StorageConfig
-from gyandex.podgen.storage.s3 import S3CompatibleStorage
+from ..config.schema import S3StorageConfig
+from ..storage.s3 import S3CompatibleStorage
 
 
 # @TODO: Centralize this type and move this to a common place
diff --git a/gyandex/podgen/storage/factory_test.py b/gyandex/podgen/storage/factory_test.py
index b9074c5..05d62ce 100644
--- a/gyandex/podgen/storage/factory_test.py
+++ b/gyandex/podgen/storage/factory_test.py
@@ -1,9 +1,9 @@
 import pytest
 from pydantic import ValidationError
 
-from gyandex.podgen.config.schema import S3StorageConfig
-from gyandex.podgen.storage.factory import get_storage
-from gyandex.podgen.storage.s3 import S3CompatibleStorage
+from ..config.schema import S3StorageConfig
+from .factory import get_storage
+from .s3 import S3CompatibleStorage
 
 
 def test_get_storage_returns_s3_storage():
diff --git a/gyandex/podgen/storage/s3.py b/gyandex/podgen/storage/s3.py
index df4d2b1..55a32db 100644
--- a/gyandex/podgen/storage/s3.py
+++ b/gyandex/podgen/storage/s3.py
@@ -1,8 +1,9 @@
-from typing import Optional, Dict, Any
-import boto3
-from botocore.client import Config
 import mimetypes
 import os
+from typing import Any, Dict, Optional
+
+import boto3
+from botocore.client import Config
 
 
 class S3CompatibleStorage:
diff --git a/gyandex/podgen/storage/s3_test.py b/gyandex/podgen/storage/s3_test.py
index 37b07ba..e9afca0 100644
--- a/gyandex/podgen/storage/s3_test.py
+++ b/gyandex/podgen/storage/s3_test.py
@@ -1,6 +1,8 @@
+from unittest.mock import ANY, Mock, patch
+
 import pytest
-from unittest.mock import Mock, patch, ANY
 from botocore.exceptions import ClientError
+
 from .s3 import S3CompatibleStorage
 
 
diff --git a/gyandex/podgen/workflows/alexandria.py b/gyandex/podgen/workflows/alexandria.py
index 5720089..3eebb67 100644
--- a/gyandex/podgen/workflows/alexandria.py
+++ b/gyandex/podgen/workflows/alexandria.py
@@ -1,13 +1,14 @@
+import asyncio
+from typing import List, Union
+
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
-from typing import List, Union
-import asyncio
 from rich import print as rprint
 
-from .types import PodcastOutline, ScriptSegment, PodcastEpisode, OutlineSegment
 from ...llms.factory import get_model
-from ..config.schema import PodcastConfig, GoogleGenerativeAILLMConfig, Participant
 from ...loaders.factory import Document
+from ..config.schema import GoogleGenerativeAILLMConfig, Participant, PodcastConfig
+from .types import OutlineSegment, PodcastEpisode, PodcastOutline, ScriptSegment
 
 
 class OutlineGenerator:
diff --git a/gyandex/podgen/workflows/factory.py b/gyandex/podgen/workflows/factory.py
index d7d7071..16467e0 100644
--- a/gyandex/podgen/workflows/factory.py
+++ b/gyandex/podgen/workflows/factory.py
@@ -1,7 +1,7 @@
 from typing import Union
 
-from .alexandria import AlexandriaWorkflow
 from ..config.schema import PodcastConfig
+from .alexandria import AlexandriaWorkflow
 
 
 def get_workflow(config: PodcastConfig) -> Union[AlexandriaWorkflow]:
diff --git a/justfile b/justfile
index 9316b4e..c1a5751 100644
--- a/justfile
+++ b/justfile
@@ -1,2 +1,7 @@
 test:
     pytest --cov=gyandex --cov-report html --cov-report term:skip-covered gyandex/
+
+lint:
+    ruff check
+    ruff check --select I --fix
+    ruff format
diff --git a/poetry.lock b/poetry.lock
index 5ac5676..5cb7900 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2625,6 +2625,17 @@ example = ["cairocffi (>=1.7)", "contextily (>=1.6)", "igraph (>=0.11)", "momepy
 extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.10)"]
 test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+description = "Node.js virtual environment builder"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
+    {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
+]
+
 [[package]]
 name = "notebook"
 version = "7.2.2"
@@ -3556,6 +3567,26 @@ files = [
 [package.extras]
 diagrams = ["jinja2", "railroad-diagrams"]
 
+[[package]]
+name = "pyright"
+version = "1.1.390"
+description = "Command line wrapper for pyright"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pyright-1.1.390-py3-none-any.whl", hash = "sha256:ecebfba5b6b50af7c1a44c2ba144ba2ab542c227eb49bc1f16984ff714e0e110"},
+    {file = "pyright-1.1.390.tar.gz", hash = "sha256:aad7f160c49e0fbf8209507a15e17b781f63a86a1facb69ca877c71ef2e9538d"},
+]
+
+[package.dependencies]
+nodeenv = ">=1.6.0"
+typing-extensions = ">=4.1"
+
+[package.extras]
+all = ["nodejs-wheel-binaries", "twine (>=3.4.1)"]
+dev = ["twine (>=3.4.1)"]
+nodejs = ["nodejs-wheel-binaries"]
+
 [[package]]
 name = "pytest"
 version = "8.3.3"
@@ -4857,4 +4888,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "f1281860f40b39bc21c0b5a6c0aa632f44f87401e3173fe034cf5a3f9ada5aed"
+content-hash = "592f5a9a27d922b3d5940c94210b79988cd92b6a3b1a6422bd03959c953fbf24"
diff --git a/pyproject.toml b/pyproject.toml
index 947f303..78769cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,6 +34,7 @@ pytest-cov = "^5.0.0"
 rust-just = "^1.36.0"
 responses = "^0.25.3"
 ruff = "^0.8.2"
+pyright = "^1.1.390"
 
 [project.classifiers]
 license = "OSI Approved :: GNU Affero General Public License v3 (AGPLv3)"
@@ -50,4 +51,4 @@ fix = true
 # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that
 # overlap with the use of a formatter, like Black, but we can override this behavior by
 # explicitly adding the rule.
-extend-select = ["E501"]
\ No newline at end of file
+extend-select = ["E501", "I"]
\ No newline at end of file
diff --git a/reading-list.yaml b/reading-list.yaml
new file mode 100644
index 0000000..c725e96
--- /dev/null
+++ b/reading-list.yaml
@@ -0,0 +1,61 @@
+version: "1.0"
+content:
+#  source: "https://notes.mtb.xyz/p/invisible-asymptotes-vertical-software"
+  source: "https://learnings.aleixmorgadas.dev/p/dealing-with-teams-with-competing"
+  format: "html"
+
+workflow:
+  name: alexandria
+  verbose: true
+  outline:
+    provider: "google-generative-ai"
+    model: "gemini-1.5-pro"
+    temperature: 0.4
+    google_api_key: "${GOOGLE_API_KEY}"
+  script:
+    provider: "google-generative-ai"
+    model: "gemini-1.5-flash"
+    temperature: 0.8
+    google_api_key: "${GOOGLE_API_KEY}"
+
+tts:
+  provider: "google-cloud"
+  participants:
+    - name: Sarah
+      personality: |
+        An enthusiastic and knowledgeable tech journalist with 10 years of experience.
+        Style: Articulate, engaging, asks insightful questions, and guides the conversation smoothly.
+      voice: en-US-Journey-F
+      language_code: en-US
+#      voice: en-GB-Neural2-N
+#      language_code: en-GB
+      gender: female
+    - name: Mike
+      personality: |
+        A practical industry expert with hands-on experience.
+        Style: Down-to-earth, provides real-world examples, occasionally humorous, and good at breaking down complex topics.
+      voice: en-US-Journey-D
+      language_code: en-US
+#      voice: en-GB-Neural2-O
+#      language_code: en-GB
+      gender: male
+
+storage:
+  provider: "s3"
+  access_key: "${ACCESS_KEY_ID}"
+  secret_key: "${SECRET_ACCESS_KEY}"
+  bucket: "gyandex"
+  region: "us-east-1"
+  endpoint: "https://675f4b8193843a14b144c70d7a440064.r2.cloudflarestorage.com"
+  custom_domain: "pub-347a2b64a84a441c97338968c27696c5.r2.dev"
+
+feed:
+  title: "Gyandex: Tech Reading"
+  slug: "reading-list"
+  description: "Technical reading list curated by Dhruv Baldawa"
+  author: "Dhruv Baldawa"
+  email: "me@dhruvb.com"
+  language: "en"
+  categories: ["Technology", "Software Development", "Programming"]
+  image: "https://images.pexels.com/photos/26730962/pexels-photo-26730962.jpeg?cs=srgb&dl=pexels-helloaesthe-26730962.jpg&fm=jpg&w=640&h=960"
+  website: "https://github.com/dhruvbaldawa/gyandex"

From c92a5e4aeb49fed40afe27818d733f192290ec77 Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Thu, 5 Dec 2024 23:46:45 +0530
Subject: [PATCH 03/10] Add pyright

---
 gyandex/llms/factory.py                    |  6 +--
 gyandex/podgen/config/loader.py            |  2 +-
 gyandex/podgen/config/schema.py            | 10 ++--
 gyandex/podgen/engine/publisher.py         |  6 +--
 gyandex/podgen/feed/generator.py           | 28 +++++-----
 gyandex/podgen/feed/models.py              |  4 +-
 gyandex/podgen/speech/factory.py           |  2 +-
 gyandex/podgen/speech/google_cloud.py      |  4 +-
 gyandex/podgen/speech/google_cloud_test.py |  4 +-
 gyandex/podgen/storage/factory.py          |  2 +-
 gyandex/podgen/storage/factory_test.py     |  2 +-
 gyandex/podgen/storage/s3.py               |  4 +-
 gyandex/podgen/workflows/alexandria.py     |  6 +--
 gyandex/podgen/workflows/factory.py        |  2 +-
 justfile                                   |  6 +--
 pyproject.toml                             | 18 ++++++-
 reading-list.yaml                          | 61 ----------------------
 17 files changed, 61 insertions(+), 106 deletions(-)
 delete mode 100644 reading-list.yaml

diff --git a/gyandex/llms/factory.py b/gyandex/llms/factory.py
index f51004f..a91eda0 100644
--- a/gyandex/llms/factory.py
+++ b/gyandex/llms/factory.py
@@ -40,13 +40,13 @@ def on_llm_error(self, error, **kwargs):
 
 
 # @TODO: Centralize this argument type in a single place
-def get_model(config: Union[GoogleGenerativeAILLMConfig], log_dir="assets"):
+def get_model(config: Union[GoogleGenerativeAILLMConfig], log_dir="assets"):  # pyright: ignore [reportInvalidTypeArguments]
     if config.provider == "google-generative-ai":
         return GoogleGenerativeAI(
             model=config.model,
             temperature=config.temperature,
-            google_api_key=config.google_api_key,
-            max_output_tokens=8192,  # @TODO: Move this to config params
+            google_api_key=config.google_api_key,  # pyright: ignore [reportCallIssue]
+            max_output_tokens=8192,  # @TODO: Move this to config params  # pyright: ignore [reportCallIssue]
             callbacks=[LLMLoggingCallback(log_dir)],
         )
     else:
diff --git a/gyandex/podgen/config/loader.py b/gyandex/podgen/config/loader.py
index 724cff8..bdb2225 100644
--- a/gyandex/podgen/config/loader.py
+++ b/gyandex/podgen/config/loader.py
@@ -43,4 +43,4 @@ def load_config(config_path: str) -> PodcastConfig:
     config_dict = resolve_nested_env_vars(config_dict)
 
     # Parse with Pydantic
-    return PodcastConfig(**config_dict)
+    return PodcastConfig(**config_dict)  # pyright: ignore [reportCallIssue]
diff --git a/gyandex/podgen/config/schema.py b/gyandex/podgen/config/schema.py
index ac335ff..c12e1ec 100644
--- a/gyandex/podgen/config/schema.py
+++ b/gyandex/podgen/config/schema.py
@@ -31,8 +31,8 @@ class GoogleGenerativeAILLMConfig(BaseModel):
 
 class AlexandriaWorkflowConfig(BaseModel):
     name: Literal["alexandria"]
-    outline: Union[GoogleGenerativeAILLMConfig]
-    script: Union[GoogleGenerativeAILLMConfig]
+    outline: Union[GoogleGenerativeAILLMConfig]  # pyright: ignore [reportInvalidTypeArguments]
+    script: Union[GoogleGenerativeAILLMConfig]  # pyright: ignore [reportInvalidTypeArguments]
     verbose: Optional[bool] = False
 
 
@@ -90,7 +90,7 @@ class ContentStructure(BaseModel):
 class PodcastConfig(BaseModel):
     version: str
     content: ContentConfig
-    workflow: Union[AlexandriaWorkflowConfig] = Field(discriminator="name")
-    tts: Union[GoogleCloudTTSConfig] = Field(discriminator="provider")
-    storage: Union[S3StorageConfig] = Field(discriminator="provider")
+    workflow: Union[AlexandriaWorkflowConfig] = Field(discriminator="name")  # pyright: ignore [reportInvalidTypeArguments]
+    tts: Union[GoogleCloudTTSConfig] = Field(discriminator="provider")  # pyright: ignore [reportInvalidTypeArguments]
+    storage: Union[S3StorageConfig] = Field(discriminator="provider")  # pyright: ignore [reportInvalidTypeArguments]
     feed: FeedConfig
diff --git a/gyandex/podgen/engine/publisher.py b/gyandex/podgen/engine/publisher.py
index a665c1c..6910f3b 100644
--- a/gyandex/podgen/engine/publisher.py
+++ b/gyandex/podgen/engine/publisher.py
@@ -2,7 +2,7 @@
 import os
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any, Dict, Optional, Type
+from typing import Any, Dict, Optional, Sequence
 from urllib.parse import urljoin
 
 import mutagen
@@ -53,7 +53,7 @@ def __init__(
 
     def _get_audio_metadata(self, file_path: str) -> Dict[str, Any]:
         """Extract metadata from audio file."""
-        audio = mutagen.File(file_path)
+        audio = mutagen.File(file_path)  # pyright: ignore [reportPrivateImportUsage]
         metadata = {}
 
         if audio is not None:
@@ -183,6 +183,6 @@ def get_feed_url(self, feed_slug: str) -> str:
         """Get the URL for a feed."""
         return urljoin(self.base_url, f"{self.feed_prefix}/{feed_slug}.xml")
 
-    def list_episodes(self, feed_slug: str, limit: Optional[int] = None) -> list[Type[Episode]]:
+    def list_episodes(self, feed_slug: str, limit: Optional[int] = None) -> Sequence[Episode]:
         """List episodes in a feed."""
         return self.db.get_episodes(feed_slug, limit)
diff --git a/gyandex/podgen/feed/generator.py b/gyandex/podgen/feed/generator.py
index eae4e85..60e67a2 100644
--- a/gyandex/podgen/feed/generator.py
+++ b/gyandex/podgen/feed/generator.py
@@ -35,15 +35,15 @@ def generate_feed(self, slug: str) -> str:
         fg.language(feed_data.language)
         fg.copyright(feed_data.copyright)
 
-        if feed_data.image_url:
+        if feed_data.image_url is not None:
             fg.logo(feed_data.image_url)
             fg.image(feed_data.image_url)
 
         # iTunes specific tags
-        fg.podcast.itunes_category(feed_data.categories.split(",")[0] if feed_data.categories else "Technology")
-        fg.podcast.itunes_explicit(feed_data.explicit)
-        fg.podcast.itunes_author(feed_data.author)
-        fg.podcast.itunes_owner(name=feed_data.author, email=feed_data.email)
+        fg.podcast.itunes_category(feed_data.categories.split(",")[0] if feed_data.categories else "Technology")  # pyright: ignore [reportAttributeAccessIssue, reportGeneralTypeIssues]
+        fg.podcast.itunes_explicit(feed_data.explicit)  # pyright: ignore [reportAttributeAccessIssue, reportGeneralTypeIssues]
+        fg.podcast.itunes_author(feed_data.author)  # pyright: ignore [reportAttributeAccessIssue, reportGeneralTypeIssues]
+        fg.podcast.itunes_owner(name=feed_data.author, email=feed_data.email)  # pyright: ignore [reportAttributeAccessIssue, reportGeneralTypeIssues]
 
         # Add episodes
         episodes = self.db.get_episodes(slug)
@@ -61,14 +61,14 @@ def generate_feed(self, slug: str) -> str:
             fe.enclosure(episode.audio_url, str(episode.file_size), episode.mime_type)
 
             # iTunes specific episode tags
-            fe.podcast.itunes_duration(str(episode.duration) if episode.duration else "0")
-            fe.podcast.itunes_explicit(episode.explicit)
-            if episode.image_url:
-                fe.podcast.itunes_image(episode.image_url)
-            if episode.episode_number:
-                fe.podcast.itunes_episode(str(episode.episode_number))
-            if episode.season_number:
-                fe.podcast.itunes_season(str(episode.season_number))
-            fe.podcast.itunes_episode_type(episode.episode_type)
+            fe.podcast.itunes_duration(str(episode.duration) if episode.duration is not None else "0")  # pyright: ignore [reportAttributeAccessIssue]
+            fe.podcast.itunes_explicit(episode.explicit)  # pyright: ignore [reportAttributeAccessIssue]
+            if episode.image_url is not None:
+                fe.podcast.itunes_image(episode.image_url)  # pyright: ignore [reportAttributeAccessIssue]
+            if episode.episode_number is not None:
+                fe.podcast.itunes_episode(str(episode.episode_number))  # pyright: ignore [reportAttributeAccessIssue]
+            if episode.season_number is not None:
+                fe.podcast.itunes_season(str(episode.season_number))  # pyright: ignore [reportAttributeAccessIssue]
+            fe.podcast.itunes_episode_type(episode.episode_type)  # pyright: ignore [reportAttributeAccessIssue]
 
         return fg.rss_str(pretty=True).decode("utf-8")
diff --git a/gyandex/podgen/feed/models.py b/gyandex/podgen/feed/models.py
index 5a1ea91..71f4111 100644
--- a/gyandex/podgen/feed/models.py
+++ b/gyandex/podgen/feed/models.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Type
+from typing import Optional, Sequence, Tuple
 
 from sqlalchemy import (
     Column,
@@ -120,7 +120,7 @@ def add_episode(self, feed_slug: str, title: str, audio_url: str, guid: str, **k
             return episode
 
     # @TODO: Update using the feed id, instead of name
-    def get_episodes(self, feed_slug: str, limit: int = None) -> list[Type[Episode]]:
+    def get_episodes(self, feed_slug: str, limit: Optional[int] = None) -> Sequence[Episode]:
         with self.session() as session:
             query = (
                 session.query(Episode)
diff --git a/gyandex/podgen/speech/factory.py b/gyandex/podgen/speech/factory.py
index 9fbf992..3a32f78 100644
--- a/gyandex/podgen/speech/factory.py
+++ b/gyandex/podgen/speech/factory.py
@@ -5,7 +5,7 @@
 
 
 # @TODO: Centralize this type and move this to a common place
-def get_text_to_speech_engine(tts_config: Union[GoogleCloudTTSConfig]):
+def get_text_to_speech_engine(tts_config: Union[GoogleCloudTTSConfig]):  # pyright: ignore [reportInvalidTypeArguments]
     if tts_config.provider == "google-cloud":
         return GoogleTTSEngine(tts_config.participants)
     else:
diff --git a/gyandex/podgen/speech/google_cloud.py b/gyandex/podgen/speech/google_cloud.py
index 354f865..a5cf6b1 100644
--- a/gyandex/podgen/speech/google_cloud.py
+++ b/gyandex/podgen/speech/google_cloud.py
@@ -5,7 +5,7 @@
 from pydub import AudioSegment
 
 from ..config.schema import Gender, Participant
-from ..workflows.types import ScriptSegment  # @TODO: Pull this out of workflows
+from ..workflows.types import DialogueLine  # @TODO: Pull this out of workflows
 
 
 class GoogleTTSEngine:
@@ -33,7 +33,7 @@ def resolve_gender(gender: Gender):
             for participant in participants
         }
 
-    def process_segment(self, segment: ScriptSegment) -> bytes:
+    def process_segment(self, segment: DialogueLine) -> bytes:
         return self.synthesize_speech(segment.text, segment.speaker)
 
     def synthesize_speech(self, text: str, speaker: str) -> bytes:
diff --git a/gyandex/podgen/speech/google_cloud_test.py b/gyandex/podgen/speech/google_cloud_test.py
index cf248e8..71b7d89 100644
--- a/gyandex/podgen/speech/google_cloud_test.py
+++ b/gyandex/podgen/speech/google_cloud_test.py
@@ -3,7 +3,7 @@
 from google.cloud import texttospeech
 
 from ..speech.google_cloud import GoogleTTSEngine
-from ..workflows.types import ScriptSegment
+from ..workflows.types import DialogueLine, ScriptSegment
 
 
 def test_tts_engine_initialization():
@@ -39,7 +39,7 @@ def test_process_segment(mock_client):
     """Tests processing of a complete podcast segment"""
     # Given
     engine = GoogleTTSEngine()
-    segment = ScriptSegment(dialogue="Test segment", speaker="HOST1")
+    segment = ScriptSegment(dialogue=[DialogueLine(text="Test segment", speaker="HOST1")])
     mock_response = Mock()
     mock_response.audio_content = b"test_audio_content"
     mock_client.return_value.synthesize_speech.return_value = mock_response
diff --git a/gyandex/podgen/storage/factory.py b/gyandex/podgen/storage/factory.py
index d1d59ff..0e58cc5 100644
--- a/gyandex/podgen/storage/factory.py
+++ b/gyandex/podgen/storage/factory.py
@@ -5,7 +5,7 @@
 
 
 # @TODO: Centralize this type and move this to a common place
-def get_storage(config: Union[S3StorageConfig]) -> S3CompatibleStorage:
+def get_storage(config: Union[S3StorageConfig]) -> S3CompatibleStorage:  # pyright: ignore [reportInvalidTypeArguments]
     if config.provider != "s3":  # @TODO: Move this to a enum
         raise NotImplementedError(f"Unsupported storage provider: {config.provider}")
 
diff --git a/gyandex/podgen/storage/factory_test.py b/gyandex/podgen/storage/factory_test.py
index 05d62ce..1d2626b 100644
--- a/gyandex/podgen/storage/factory_test.py
+++ b/gyandex/podgen/storage/factory_test.py
@@ -33,7 +33,7 @@ def test_get_storage_raises_for_unsupported_provider():
     # When/Then
     with pytest.raises(ValidationError):
         _ = S3StorageConfig(
-            provider="unsupported",
+            provider="s3",
             bucket="test-bucket",
             access_key="test-access-key",
             secret_key="test-secret-key",
diff --git a/gyandex/podgen/storage/s3.py b/gyandex/podgen/storage/s3.py
index 55a32db..5e96cca 100644
--- a/gyandex/podgen/storage/s3.py
+++ b/gyandex/podgen/storage/s3.py
@@ -17,7 +17,7 @@ def __init__(
         access_key_id: str,
         secret_access_key: str,
         endpoint_url: Optional[str] = None,
-        region_name: str = "auto",
+        region_name: Optional[str] = "auto",
         custom_domain: Optional[str] = None,
         acl: str = "public-read",
     ):
@@ -74,7 +74,7 @@ def upload_file(
             if not content_type:
                 content_type = "application/octet-stream"
 
-        extra_args = {"ACL": self.acl, "ContentType": content_type}
+        extra_args: Dict[str, Any] = {"ACL": self.acl, "ContentType": content_type}
 
         if metadata:
             extra_args["Metadata"] = metadata
diff --git a/gyandex/podgen/workflows/alexandria.py b/gyandex/podgen/workflows/alexandria.py
index 3eebb67..110ebf4 100644
--- a/gyandex/podgen/workflows/alexandria.py
+++ b/gyandex/podgen/workflows/alexandria.py
@@ -12,7 +12,7 @@
 
 
 class OutlineGenerator:
-    def __init__(self, config: Union[GoogleGenerativeAILLMConfig]):
+    def __init__(self, config: Union[GoogleGenerativeAILLMConfig]):  # pyright: ignore [reportInvalidTypeArguments]
         self.model = get_model(config)
 
         self.parser = PydanticOutputParser(pydantic_object=PodcastOutline)
@@ -51,7 +51,7 @@ def generate_outline(self, document: Document) -> PodcastOutline:
 
 
 class ScriptGenerator:
-    def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: List[Participant]):
+    def __init__(self, config: Union[GoogleGenerativeAILLMConfig], participants: List[Participant]):  # pyright: ignore [reportInvalidTypeArguments]
         self.model = get_model(config)
 
         self.parser = PydanticOutputParser(pydantic_object=ScriptSegment)
@@ -191,7 +191,7 @@ async def generate_script(self, document: Document) -> PodcastEpisode:
                 rprint(f"Transition: {segment.transition}\n")
 
         # Generate script segments
-        script_segments = await script_gen.generate_full_script(outline, document)
+        script_segments = await script_gen.generate_full_script(outline, document.content)
 
         if self.config.workflow.verbose:
             # Print results in dialogue format
diff --git a/gyandex/podgen/workflows/factory.py b/gyandex/podgen/workflows/factory.py
index 16467e0..3dc883a 100644
--- a/gyandex/podgen/workflows/factory.py
+++ b/gyandex/podgen/workflows/factory.py
@@ -4,7 +4,7 @@
 from .alexandria import AlexandriaWorkflow
 
 
-def get_workflow(config: PodcastConfig) -> Union[AlexandriaWorkflow]:
+def get_workflow(config: PodcastConfig) -> Union[AlexandriaWorkflow]:  # pyright: ignore [reportInvalidTypeArguments]
     """Get workflow based on config"""
     if config.workflow.name == "alexandria":
         return AlexandriaWorkflow(config)
diff --git a/justfile b/justfile
index c1a5751..660641c 100644
--- a/justfile
+++ b/justfile
@@ -1,7 +1,7 @@
 test:
     pytest --cov=gyandex --cov-report html --cov-report term:skip-covered gyandex/
 
-lint:
-    ruff check
-    ruff check --select I --fix
+check:
+    pyright && \
+    ruff check && \
     ruff format
diff --git a/pyproject.toml b/pyproject.toml
index 78769cd..6b70856 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,4 +51,20 @@ fix = true
 # Add the `line-too-long` rule to the enforced rule set. By default, Ruff omits rules that
 # overlap with the use of a formatter, like Black, but we can override this behavior by
 # explicitly adding the rule.
-extend-select = ["E501", "I"]
\ No newline at end of file
+extend-select = ["E501", "I"]
+
+[tool.pyright]
+include = ["gyandex"]
+exclude = [
+    "**/__pycache__",
+    "**/*_test.py",  # @FIXME: remove this once code issues are resolved
+]
+
+defineConstant = { DEBUG = true }
+
+reportMissingImports = "error"
+reportMissingTypeStubs = false
+
+executionEnvironments = [
+    { root = "gyandex" }
+]
\ No newline at end of file
diff --git a/reading-list.yaml b/reading-list.yaml
deleted file mode 100644
index c725e96..0000000
--- a/reading-list.yaml
+++ /dev/null
@@ -1,61 +0,0 @@
-version: "1.0"
-content:
-#  source: "https://notes.mtb.xyz/p/invisible-asymptotes-vertical-software"
-  source: "https://learnings.aleixmorgadas.dev/p/dealing-with-teams-with-competing"
-  format: "html"
-
-workflow:
-  name: alexandria
-  verbose: true
-  outline:
-    provider: "google-generative-ai"
-    model: "gemini-1.5-pro"
-    temperature: 0.4
-    google_api_key: "${GOOGLE_API_KEY}"
-  script:
-    provider: "google-generative-ai"
-    model: "gemini-1.5-flash"
-    temperature: 0.8
-    google_api_key: "${GOOGLE_API_KEY}"
-
-tts:
-  provider: "google-cloud"
-  participants:
-    - name: Sarah
-      personality: |
-        An enthusiastic and knowledgeable tech journalist with 10 years of experience.
-        Style: Articulate, engaging, asks insightful questions, and guides the conversation smoothly.
-      voice: en-US-Journey-F
-      language_code: en-US
-#      voice: en-GB-Neural2-N
-#      language_code: en-GB
-      gender: female
-    - name: Mike
-      personality: |
-        A practical industry expert with hands-on experience.
-        Style: Down-to-earth, provides real-world examples, occasionally humorous, and good at breaking down complex topics.
-      voice: en-US-Journey-D
-      language_code: en-US
-#      voice: en-GB-Neural2-O
-#      language_code: en-GB
-      gender: male
-
-storage:
-  provider: "s3"
-  access_key: "${ACCESS_KEY_ID}"
-  secret_key: "${SECRET_ACCESS_KEY}"
-  bucket: "gyandex"
-  region: "us-east-1"
-  endpoint: "https://675f4b8193843a14b144c70d7a440064.r2.cloudflarestorage.com"
-  custom_domain: "pub-347a2b64a84a441c97338968c27696c5.r2.dev"
-
-feed:
-  title: "Gyandex: Tech Reading"
-  slug: "reading-list"
-  description: "Technical reading list curated by Dhruv Baldawa"
-  author: "Dhruv Baldawa"
-  email: "me@dhruvb.com"
-  language: "en"
-  categories: ["Technology", "Software Development", "Programming"]
-  image: "https://images.pexels.com/photos/26730962/pexels-photo-26730962.jpeg?cs=srgb&dl=pexels-helloaesthe-26730962.jpg&fm=jpg&w=640&h=960"
-  website: "https://github.com/dhruvbaldawa/gyandex"

From a40ee452af55391dc347735ce02617c872c763de Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Thu, 5 Dec 2024 23:58:28 +0530
Subject: [PATCH 04/10] fix broken tests

---
 gyandex/podgen/config/loader_test.py       | 44 ++++++++++++++--------
 gyandex/podgen/engine/publisher_test.py    |  2 +-
 gyandex/podgen/feed/generator_test.py      |  4 +-
 gyandex/podgen/speech/google_cloud_test.py | 24 +++++++++---
 gyandex/podgen/storage/factory_test.py     |  2 +-
 5 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/gyandex/podgen/config/loader_test.py b/gyandex/podgen/config/loader_test.py
index 550c8f3..1432389 100644
--- a/gyandex/podgen/config/loader_test.py
+++ b/gyandex/podgen/config/loader_test.py
@@ -69,22 +69,36 @@ def test_load_config_parses_yaml_with_env_vars(tmp_path):
     content:
       format: html
       source: https://example.com/feed
-    llm:
-      provider: google-generative-ai
-      google_api_key: test_api_key
-      model: gpt-3.5-turbo
-      temperature: 1
-      max_tokens: 1000
-      script_template: test_template
-      system_prompt: test_prompt
+    workflow:
+      name: alexandria
+      verbose: true
+      outline:
+        provider: "google-generative-ai"
+        model: "gemini-1.5-pro"
+        temperature: 0.4
+        google_api_key: "xxx"
+      script:
+        provider: "google-generative-ai"
+        model: "gemini-1.5-flash"
+        temperature: 0.8
+        google_api_key: "xxx"
     tts:
-      provider: test
-      default_voice: test_voice
-      voices:
-        test_voice:
-          voice_id: test_voice_id
-          speaking_rate: 1.0
-          pitch: 100
+      provider: "google-cloud"
+      participants:
+        - name: Sarah
+          personality: |
+            An enthusiastic and knowledgeable tech journalist with 10 years of experience.
+            Style: Articulate, engaging, asks insightful questions, and guides the conversation smoothly.
+          voice: en-US-Journey-F
+          language_code: en-US
+          gender: female
+        - name: Mike
+          personality: |
+            A practical industry expert with hands-on experience.
+            Style: Down-to-earth, provides real-world examples, occasionally humorous, and good at breaking down complex topics.
+          voice: en-US-Journey-D
+          language_code: en-US
+          gender: male
     storage:
       provider: s3
       bucket: test_bucket
diff --git a/gyandex/podgen/engine/publisher_test.py b/gyandex/podgen/engine/publisher_test.py
index 76cafa8..167cf4b 100644
--- a/gyandex/podgen/engine/publisher_test.py
+++ b/gyandex/podgen/engine/publisher_test.py
@@ -4,7 +4,7 @@
 
 from ..storage.s3 import S3CompatibleStorage
 from .publisher import PodcastMetadata, PodcastPublisher
-
+from ..feed.models_test import test_db
 
 @pytest.fixture
 def mock_storage():
diff --git a/gyandex/podgen/feed/generator_test.py b/gyandex/podgen/feed/generator_test.py
index 61499e4..bf9f063 100644
--- a/gyandex/podgen/feed/generator_test.py
+++ b/gyandex/podgen/feed/generator_test.py
@@ -3,9 +3,7 @@
 import pytest
 
 from .generator import PodcastFeedGenerator
-
-# Feed Generator Tests
-
+from ..feed.models_test import test_db, sample_feed_data, sample_episode_data
 
 def test_generate_feed_xml(test_db, sample_feed_data, sample_episode_data):
     """
diff --git a/gyandex/podgen/speech/google_cloud_test.py b/gyandex/podgen/speech/google_cloud_test.py
index 71b7d89..7e5d610 100644
--- a/gyandex/podgen/speech/google_cloud_test.py
+++ b/gyandex/podgen/speech/google_cloud_test.py
@@ -2,14 +2,28 @@
 
 from google.cloud import texttospeech
 
+from ..config.schema import Participant, Gender
 from ..speech.google_cloud import GoogleTTSEngine
 from ..workflows.types import DialogueLine, ScriptSegment
 
-
+dummy_participants = [
+    Participant(
+        name="HOST1",
+        language_code="en-US",
+        voice="en-US-Neural2-F",
+        gender=Gender.FEMALE
+    ),
+    Participant(
+        name="HOST2",
+        language_code="en-US",
+        voice="en-US-Neural2-F",
+        gender=Gender.FEMALE
+    )
+]
 def test_tts_engine_initialization():
     """Tests that TTSEngine initializes with correct voice configurations"""
     # Given/When
-    engine = GoogleTTSEngine()
+    engine = GoogleTTSEngine(participants=dummy_participants)
 
     # Then
     assert "HOST1" in engine.voices
@@ -21,7 +35,7 @@ def test_tts_engine_initialization():
 def test_synthesize_speech_for_host1(mock_client):
     """Tests speech synthesis for HOST1 voice"""
     # Given
-    engine = GoogleTTSEngine()
+    engine = GoogleTTSEngine(participants=dummy_participants)
     mock_response = Mock()
     mock_response.audio_content = b"test_audio_content"
     mock_client.return_value.synthesize_speech.return_value = mock_response
@@ -38,8 +52,8 @@ def test_synthesize_speech_for_host1(mock_client):
 def test_process_segment(mock_client):
     """Tests processing of a complete podcast segment"""
     # Given
-    engine = GoogleTTSEngine()
-    segment = ScriptSegment(dialogue=[DialogueLine(text="Test segment", speaker="HOST1")])
+    engine = GoogleTTSEngine(participants=dummy_participants)
+    segment = DialogueLine(text="Test segment", speaker="HOST1")
     mock_response = Mock()
     mock_response.audio_content = b"test_audio_content"
     mock_client.return_value.synthesize_speech.return_value = mock_response
diff --git a/gyandex/podgen/storage/factory_test.py b/gyandex/podgen/storage/factory_test.py
index 1d2626b..05d62ce 100644
--- a/gyandex/podgen/storage/factory_test.py
+++ b/gyandex/podgen/storage/factory_test.py
@@ -33,7 +33,7 @@ def test_get_storage_raises_for_unsupported_provider():
     # When/Then
     with pytest.raises(ValidationError):
         _ = S3StorageConfig(
-            provider="s3",
+            provider="unsupported",
             bucket="test-bucket",
             access_key="test-access-key",
             secret_key="test-secret-key",

From f55b6371cd02c4c74d879d41647bbea49be2978b Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 12:20:33 +0530
Subject: [PATCH 05/10] Fix broken tests

---
 gyandex/podgen/config/loader_test.py       |  3 ++-
 gyandex/podgen/engine/publisher_test.py    |  2 +-
 gyandex/podgen/feed/generator_test.py      |  2 +-
 gyandex/podgen/speech/factory_test.py      | 29 ++++++++++++++++++++++
 gyandex/podgen/speech/google_cloud_test.py | 20 +++++----------
 gyandex/podgen/workflows/factory_test.py   | 29 ++++++++++++++++++++++
 6 files changed, 68 insertions(+), 17 deletions(-)
 create mode 100644 gyandex/podgen/speech/factory_test.py
 create mode 100644 gyandex/podgen/workflows/factory_test.py

diff --git a/gyandex/podgen/config/loader_test.py b/gyandex/podgen/config/loader_test.py
index 1432389..8c49baf 100644
--- a/gyandex/podgen/config/loader_test.py
+++ b/gyandex/podgen/config/loader_test.py
@@ -95,7 +95,8 @@ def test_load_config_parses_yaml_with_env_vars(tmp_path):
         - name: Mike
           personality: |
             A practical industry expert with hands-on experience.
-            Style: Down-to-earth, provides real-world examples, occasionally humorous, and good at breaking down complex topics.
+            Style: Down-to-earth, provides real-world examples, occasionally humorous, 
+            and good at breaking down complex topics.
           voice: en-US-Journey-D
           language_code: en-US
           gender: male
diff --git a/gyandex/podgen/engine/publisher_test.py b/gyandex/podgen/engine/publisher_test.py
index 167cf4b..76cafa8 100644
--- a/gyandex/podgen/engine/publisher_test.py
+++ b/gyandex/podgen/engine/publisher_test.py
@@ -4,7 +4,7 @@
 
 from ..storage.s3 import S3CompatibleStorage
 from .publisher import PodcastMetadata, PodcastPublisher
-from ..feed.models_test import test_db
+
 
 @pytest.fixture
 def mock_storage():
diff --git a/gyandex/podgen/feed/generator_test.py b/gyandex/podgen/feed/generator_test.py
index bf9f063..12e85d8 100644
--- a/gyandex/podgen/feed/generator_test.py
+++ b/gyandex/podgen/feed/generator_test.py
@@ -3,7 +3,7 @@
 import pytest
 
 from .generator import PodcastFeedGenerator
-from ..feed.models_test import test_db, sample_feed_data, sample_episode_data
+
 
 def test_generate_feed_xml(test_db, sample_feed_data, sample_episode_data):
     """
diff --git a/gyandex/podgen/speech/factory_test.py b/gyandex/podgen/speech/factory_test.py
new file mode 100644
index 0000000..3682139
--- /dev/null
+++ b/gyandex/podgen/speech/factory_test.py
@@ -0,0 +1,29 @@
+import pytest
+
+from ..config.schema import Gender, GoogleCloudTTSConfig, Participant
+from .factory import get_text_to_speech_engine
+from .google_cloud import GoogleTTSEngine
+
+
+def test_get_text_to_speech_engine_returns_google_cloud():
+    """Tests that get_text_to_speech_engine creates a GoogleTTSEngine instance with correct config"""
+    # Given
+    participants = [Participant(name="HOST1", language_code="en-US", voice="en-US-Neural2-F", gender=Gender.FEMALE)]
+    config = GoogleCloudTTSConfig(provider="google-cloud", participants=participants)
+
+    # When
+    engine = get_text_to_speech_engine(config)
+
+    # Then
+    assert isinstance(engine, GoogleTTSEngine)
+    assert engine.voices["HOST1"].name == "en-US-Neural2-F"
+
+
+def test_get_text_to_speech_engine_raises_for_unsupported_provider():
+    """Tests that get_text_to_speech_engine raises NotImplementedError for unsupported providers"""
+    # Given
+    config = GoogleCloudTTSConfig.model_construct(provider="unsupported", participants=[])
+
+    # When/Then
+    with pytest.raises(NotImplementedError, match="Unsupported TTS provider: unsupported"):
+        get_text_to_speech_engine(config)
diff --git a/gyandex/podgen/speech/google_cloud_test.py b/gyandex/podgen/speech/google_cloud_test.py
index 7e5d610..e0492fa 100644
--- a/gyandex/podgen/speech/google_cloud_test.py
+++ b/gyandex/podgen/speech/google_cloud_test.py
@@ -2,24 +2,16 @@
 
 from google.cloud import texttospeech
 
-from ..config.schema import Participant, Gender
+from ..config.schema import Gender, Participant
 from ..speech.google_cloud import GoogleTTSEngine
-from ..workflows.types import DialogueLine, ScriptSegment
+from ..workflows.types import DialogueLine
 
 dummy_participants = [
-    Participant(
-        name="HOST1",
-        language_code="en-US",
-        voice="en-US-Neural2-F",
-        gender=Gender.FEMALE
-    ),
-    Participant(
-        name="HOST2",
-        language_code="en-US",
-        voice="en-US-Neural2-F",
-        gender=Gender.FEMALE
-    )
+    Participant(name="HOST1", language_code="en-US", voice="en-US-Neural2-F", gender=Gender.FEMALE),
+    Participant(name="HOST2", language_code="en-US", voice="en-US-Neural2-F", gender=Gender.FEMALE),
 ]
+
+
 def test_tts_engine_initialization():
     """Tests that TTSEngine initializes with correct voice configurations"""
     # Given/When
diff --git a/gyandex/podgen/workflows/factory_test.py b/gyandex/podgen/workflows/factory_test.py
new file mode 100644
index 0000000..82dc81c
--- /dev/null
+++ b/gyandex/podgen/workflows/factory_test.py
@@ -0,0 +1,29 @@
+import pytest
+
+from ..config.schema import AlexandriaWorkflowConfig, PodcastConfig
+from .alexandria import AlexandriaWorkflow
+from .factory import get_workflow
+
+
+def test_get_workflow_returns_alexandria():
+    """Tests that get_workflow creates an AlexandriaWorkflow instance with correct config"""
+    # Given
+    workflow_config = AlexandriaWorkflowConfig.model_construct(name="alexandria")
+    config = PodcastConfig.model_construct(workflow=workflow_config)
+
+    # When
+    workflow = get_workflow(config)
+
+    # Then
+    assert isinstance(workflow, AlexandriaWorkflow)
+
+
+def test_get_workflow_raises_for_unsupported_workflow():
+    """Tests that get_workflow raises NotImplementedError for unsupported workflows"""
+    # Given
+    workflow_config = AlexandriaWorkflowConfig.model_construct(name="unsupported")
+    config = PodcastConfig.model_construct(workflow=workflow_config)
+
+    # When/Then
+    with pytest.raises(NotImplementedError, match="Unsupported workflow: unsupported"):
+        get_workflow(config)

From fd3835e3bb391b300a2426e6a53139e78251e1cb Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 12:38:34 +0530
Subject: [PATCH 06/10] separate pytest fixtures

---
 conftest.py                             |  5 +++
 gyandex/cli/podgen_test.py              | 31 ++++++++++++++
 gyandex/podgen/engine/publisher_test.py | 38 +----------------
 gyandex/podgen/engine/test_fixtures.py  | 39 ++++++++++++++++++
 gyandex/podgen/feed/models_test.py      | 53 +-----------------------
 gyandex/podgen/feed/test_fixtures.py    | 54 +++++++++++++++++++++++++
 gyandex/podgen/storage/s3_test.py       | 51 +----------------------
 gyandex/podgen/storage/test_fixtures.py | 54 +++++++++++++++++++++++++
 8 files changed, 186 insertions(+), 139 deletions(-)
 create mode 100644 conftest.py
 create mode 100644 gyandex/cli/podgen_test.py
 create mode 100644 gyandex/podgen/engine/test_fixtures.py
 create mode 100644 gyandex/podgen/feed/test_fixtures.py
 create mode 100644 gyandex/podgen/storage/test_fixtures.py

diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..4278925
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,5 @@
+pytest_plugins = [
+    "gyandex.podgen.storage.test_fixtures",
+    "gyandex.podgen.feed.test_fixtures",
+    "gyandex.podgen.engine.test_fixtures",
+]
diff --git a/gyandex/cli/podgen_test.py b/gyandex/cli/podgen_test.py
new file mode 100644
index 0000000..ceb7cc6
--- /dev/null
+++ b/gyandex/cli/podgen_test.py
@@ -0,0 +1,31 @@
+from unittest.mock import Mock, patch
+
+import pytest
+
+from gyandex.cli.podgen import main
+
+
+def test_cli_help_command():
+    """Tests that help command prints help message and exits"""
+    # When
+    with (
+        patch("argparse.ArgumentParser.parse_args", return_value=Mock(config_path="--help")),
+        patch("argparse.ArgumentParser.print_help") as mock_help,
+    ):
+        main()
+
+        # Then
+        mock_help.assert_called_once()
+
+
+def test_invalid_config_path():
+    """Tests handling of invalid configuration file path"""
+    # Given
+    invalid_path = "nonexistent.yaml"
+
+    # When/Then
+    with (
+        pytest.raises(FileNotFoundError),
+        patch("argparse.ArgumentParser.parse_args", return_value=Mock(config_path=invalid_path)),
+    ):
+        main()
diff --git a/gyandex/podgen/engine/publisher_test.py b/gyandex/podgen/engine/publisher_test.py
index 76cafa8..3e53f64 100644
--- a/gyandex/podgen/engine/publisher_test.py
+++ b/gyandex/podgen/engine/publisher_test.py
@@ -1,42 +1,6 @@
-from unittest.mock import Mock, patch
-
 import pytest
 
-from ..storage.s3 import S3CompatibleStorage
-from .publisher import PodcastMetadata, PodcastPublisher
-
-
-@pytest.fixture
-def mock_storage():
-    return Mock(spec=S3CompatibleStorage)
-
-
-@pytest.fixture
-def orchestrator(mock_storage, test_db):
-    return PodcastPublisher(
-        storage=mock_storage,
-        db=test_db,
-        base_url="https://example.com",
-        audio_prefix="episodes",
-        feed_prefix="feeds",
-    )
-
-
-@pytest.fixture
-def sample_audio(tmp_path):
-    audio_path = tmp_path / "test.mp3"
-    audio_path.write_bytes(b"fake mp3 content")
-    return str(audio_path)
-
-
-@pytest.fixture
-def mock_mutagen():
-    with patch("mutagen.File") as mock_file:
-        mock_audio = Mock()
-        mock_audio.info.length = 300
-        mock_audio.mime = ["audio/mpeg"]
-        mock_file.return_value = mock_audio
-        yield mock_file
+from .publisher import PodcastMetadata
 
 
 def test_create_feed(orchestrator, mock_storage):
diff --git a/gyandex/podgen/engine/test_fixtures.py b/gyandex/podgen/engine/test_fixtures.py
new file mode 100644
index 0000000..f672fd6
--- /dev/null
+++ b/gyandex/podgen/engine/test_fixtures.py
@@ -0,0 +1,39 @@
+from unittest.mock import Mock, patch
+
+import pytest
+
+from ..storage.s3 import S3CompatibleStorage
+from .publisher import PodcastPublisher
+
+
+@pytest.fixture
+def mock_storage():
+    return Mock(spec=S3CompatibleStorage)
+
+
+@pytest.fixture
+def orchestrator(mock_storage, test_db):
+    return PodcastPublisher(
+        storage=mock_storage,
+        db=test_db,
+        base_url="https://example.com",
+        audio_prefix="episodes",
+        feed_prefix="feeds",
+    )
+
+
+@pytest.fixture
+def sample_audio(tmp_path):
+    audio_path = tmp_path / "test.mp3"
+    audio_path.write_bytes(b"fake mp3 content")
+    return str(audio_path)
+
+
+@pytest.fixture
+def mock_mutagen():
+    with patch("mutagen.File") as mock_file:
+        mock_audio = Mock()
+        mock_audio.info.length = 300
+        mock_audio.mime = ["audio/mpeg"]
+        mock_file.return_value = mock_audio
+        yield mock_file
diff --git a/gyandex/podgen/feed/models_test.py b/gyandex/podgen/feed/models_test.py
index 2213715..a080ea7 100644
--- a/gyandex/podgen/feed/models_test.py
+++ b/gyandex/podgen/feed/models_test.py
@@ -1,57 +1,6 @@
-import os
-
 import pytest
 
-from .models import Feed, PodcastDB
-
-
-@pytest.fixture
-def test_db():
-    """Create a temporary test database"""
-    db_path = "test_podcast.db"
-    db = PodcastDB(db_path)
-    yield db
-    os.remove(db_path)
-
-
-@pytest.fixture
-def db_session(test_db):
-    """Create a database session for testing"""
-    Session = test_db.session
-    with Session() as session:
-        yield session
-
-
-@pytest.fixture
-def sample_feed_data():
-    """Sample feed data for testing"""
-    return {
-        "slug": "test-podcast",
-        "title": "Test Podcast",
-        "description": "A test podcast",
-        "author": "Test Author",
-        "email": "test@example.com",
-        "website": "https://example.com",
-        "language": "en",
-        "copyright": "2024 Test Author",
-        "categories": "Technology,Education",
-        "explicit": "no",
-    }
-
-
-@pytest.fixture
-def sample_episode_data():
-    """Sample episode data for testing"""
-    return {
-        "title": "Test Episode",
-        "description": "A test episode",
-        "audio_url": "https://example.com/episode1.mp3",
-        "guid": "episode-1",
-        "duration": 1800,
-        "file_size": 15000000,
-        "mime_type": "audio/mpeg",
-        "episode_type": "full",
-    }
+from .models import Feed
 
 
 # Database Tests
diff --git a/gyandex/podgen/feed/test_fixtures.py b/gyandex/podgen/feed/test_fixtures.py
new file mode 100644
index 0000000..e117bbf
--- /dev/null
+++ b/gyandex/podgen/feed/test_fixtures.py
@@ -0,0 +1,54 @@
+import os
+
+import pytest
+
+from .models import PodcastDB
+
+
+@pytest.fixture
+def test_db():
+    """Create a temporary test database"""
+    db_path = "test_podcast.db"
+    db = PodcastDB(db_path)
+    yield db
+    os.remove(db_path)
+
+
+@pytest.fixture
+def db_session(test_db):
+    """Create a database session for testing"""
+    Session = test_db.session
+    with Session() as session:
+        yield session
+
+
+@pytest.fixture
+def sample_feed_data():
+    """Sample feed data for testing"""
+    return {
+        "slug": "test-podcast",
+        "title": "Test Podcast",
+        "description": "A test podcast",
+        "author": "Test Author",
+        "email": "test@example.com",
+        "website": "https://example.com",
+        "language": "en",
+        "copyright": "2024 Test Author",
+        "categories": "Technology,Education",
+        "explicit": "no",
+    }
+
+
+@pytest.fixture
+def sample_episode_data():
+    """Sample episode data for testing"""
+    return {
+        "title": "Test Episode",
+        "description": "A test episode",
+        "audio_url": "https://example.com/episode1.mp3",
+        "guid": "episode-1",
+        "duration": 1800,
+        "file_size": 15000000,
+        "mime_type": "audio/mpeg",
+        "episode_type": "full",
+    }
diff --git a/gyandex/podgen/storage/s3_test.py b/gyandex/podgen/storage/s3_test.py
index e9afca0..0ba8105 100644
--- a/gyandex/podgen/storage/s3_test.py
+++ b/gyandex/podgen/storage/s3_test.py
@@ -1,4 +1,4 @@
-from unittest.mock import ANY, Mock, patch
+from unittest.mock import ANY, Mock
 
 import pytest
 from botocore.exceptions import ClientError
@@ -6,55 +6,6 @@
 from .s3 import S3CompatibleStorage
 
 
-@pytest.fixture
-def mock_s3_factory():
-    with patch("boto3.client") as mock_client:
-        # Create a mock client instance
-        client = Mock()
-        mock_client.return_value = client
-
-        # Mock the meta attributes
-        client.meta.endpoint_url = None
-        client.meta.region_name = "us-east-1"
-
-        yield mock_client, client
-
-
-@pytest.fixture
-def mock_s3_storage(mock_s3_factory):
-    mock_client, _ = mock_s3_factory
-    return mock_client
-
-
-@pytest.fixture
-def mock_s3_client(mock_s3_factory):
-    _, client = mock_s3_factory
-    return client
-
-
-@pytest.fixture
-def storage(mock_s3_client):
-    return S3CompatibleStorage(
-        bucket="test-bucket",
-        access_key_id="test-key",
-        secret_access_key="test-secret",
-        region_name="us-east-1",
-    )
-
-
-@pytest.fixture
-def r2_storage(mock_s3_client):
-    # Mock R2 endpoint
-    mock_s3_client.meta.endpoint_url = "https://test.r2.cloudflarestorage.com"
-    return S3CompatibleStorage(
-        bucket="test-bucket",
-        access_key_id="test-key",
-        secret_access_key="test-secret",
-        endpoint_url="https://test.r2.cloudflarestorage.com",
-        region_name="auto",
-    )
-
-
 def test_initialization(mock_s3_storage):
     """Test storage initialization with different configurations"""
     # Test AWS S3 initialization
diff --git a/gyandex/podgen/storage/test_fixtures.py b/gyandex/podgen/storage/test_fixtures.py
new file mode 100644
index 0000000..7910be7
--- /dev/null
+++ b/gyandex/podgen/storage/test_fixtures.py
@@ -0,0 +1,54 @@
+from unittest.mock import Mock, patch
+
+import pytest
+
+from .s3 import S3CompatibleStorage
+
+
+@pytest.fixture
+def mock_s3_factory():
+    with patch("boto3.client") as mock_client:
+        # Create a mock client instance
+        client = Mock()
+        mock_client.return_value = client
+
+        # Mock the meta attributes
+        client.meta.endpoint_url = None
+        client.meta.region_name = "us-east-1"
+
+        yield mock_client, client
+
+
+@pytest.fixture
+def mock_s3_storage(mock_s3_factory):
+    mock_client, _ = mock_s3_factory
+    return mock_client
+
+
+@pytest.fixture
+def mock_s3_client(mock_s3_factory):
+    _, client = mock_s3_factory
+    return client
+
+
+@pytest.fixture
+def storage(mock_s3_client):
+    return S3CompatibleStorage(
+        bucket="test-bucket",
+        access_key_id="test-key",
+        secret_access_key="test-secret",
+        region_name="us-east-1",
+    )
+
+
+@pytest.fixture
+def r2_storage(mock_s3_client):
+    # Mock R2 endpoint
+    mock_s3_client.meta.endpoint_url = "https://test.r2.cloudflarestorage.com"
+    return S3CompatibleStorage(
+        bucket="test-bucket",
+        access_key_id="test-key",
+        secret_access_key="test-secret",
+        endpoint_url="https://test.r2.cloudflarestorage.com",
+        region_name="auto",
+    )

From 04794d9165979d954a4f31b882d68c11070ab01b Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 12:56:52 +0530
Subject: [PATCH 07/10] Add checks in CI

---
 .github/workflows/ci.yml | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..574d03b
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,30 @@
+name: CI Quality checks
+
+on:
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+
+      - name: Install just
+        uses: extractions/setup-just@v1
+
+      - name: Install dependencies
+        run: poetry install
+
+      - name: Run tests
+        run: poetry run just test
+
+      - name: Run checks
+        run: poetry run just check

From 33869e66e4bc7dcf45a45597ea27aa94a7974cf7 Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 13:18:55 +0530
Subject: [PATCH 08/10] add google cloud authentication in CI

---
 .github/workflows/ci.yml | 31 ++++++++++++++++++++++++++-----
 README.md                |  6 ------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 574d03b..3a5d4ed 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: CI Quality checks
+name: Quality Checks
 
 on:
   pull_request:
@@ -9,22 +9,43 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
+      - uses: 'google-github-actions/auth@v2'
+        with:
+          credentials_json: '${{ secrets.GOOGLE_CREDENTIALS }}'
+
+      - name: Set up Cloud SDK
+        uses: google-github-actions/setup-gcloud@v2
+
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
           python-version: '3.11'
+          cache: 'pip'
 
       - name: Install Poetry
         uses: snok/install-poetry@v1
+        with:
+          version: latest
+
+      - name: Setup Poetry cache
+        uses: actions/cache@v3
+        with:
+          path: ./.venv
+          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
 
       - name: Install just
         uses: extractions/setup-just@v1
 
-      - name: Install dependencies
-        run: poetry install
+      - name: Configure Poetry
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry config virtualenvs.create true
 
-      - name: Run tests
-        run: poetry run just test
+      - name: Install dependencies
+        run: poetry install --no-interaction
 
       - name: Run checks
         run: poetry run just check
+
+      - name: Run tests
+        run: poetry run just test
diff --git a/README.md b/README.md
index 2072f9f..ca77596 100644
--- a/README.md
+++ b/README.md
@@ -27,12 +27,6 @@ poetry install
 - Add `GOOGLE_API_KEY` in `.env` by generating the [API key from Google AI Studio](https://aistudio.google.com/app/apikey)
 - Login to Google Cloud using [the following instructions](https://cloud.google.com/text-to-speech/docs/create-audio-text-client-libraries)
 
-## Running the Application
-```shell
-jupyter notebook
-```
-Run the `main.ipynb` notebook
-
 ## License
 
 This project is licensed under the **AGPL v3** for open-source use. For those wishing to use the software in proprietary applications without disclosing source code, a **commercial license** is available.

From ceea573aa119dd9dcfbabbbb29bbaacb42c42588 Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 13:42:50 +0530
Subject: [PATCH 09/10] Setup precommit hooks

---
 .pre-commit-config.yaml | 16 +++++++
 poetry.lock             | 94 ++++++++++++++++++++++++++++++++++++++++-
 pyproject.toml          |  2 +
 3 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..68d2a26
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,16 @@
+repos:
+  - repo: local
+    hooks:
+      - id: ruff-check
+        name: Run ruff check
+        entry: poetry run just check
+        language: system
+        pass_filenames: false
+        stages: [pre-commit]
+
+      - id: pytest
+        name: Run pytest
+        entry: poetry run just test
+        language: system
+        pass_filenames: false
+        stages: [pre-commit]
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 5cb7900..7e61698 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -493,6 +493,17 @@ files = [
 [package.dependencies]
 pycparser = "*"
 
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+description = "Validate configuration and produce human readable error messages."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
+    {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.0"
@@ -781,6 +792,17 @@ files = [
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
 ]
 
+[[package]]
+name = "distlib"
+version = "0.3.9"
+description = "Distribution utilities"
+optional = false
+python-versions = "*"
+files = [
+    {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"},
+    {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"},
+]
+
 [[package]]
 name = "executing"
 version = "2.1.0"
@@ -1421,6 +1443,20 @@ files = [
     {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"},
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.3"
+description = "File identification library for Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"},
+    {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"},
+]
+
+[package.extras]
+license = ["ukkonen"]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -3121,6 +3157,24 @@ files = [
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]
 
+[[package]]
+name = "pre-commit"
+version = "4.0.1"
+description = "A framework for managing and maintaining multi-language pre-commit hooks."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pre_commit-4.0.1-py2.py3-none-any.whl", hash = "sha256:efde913840816312445dc98787724647c65473daefe420785f885e8ed9a06878"},
+    {file = "pre_commit-4.0.1.tar.gz", hash = "sha256:80905ac375958c0444c65e9cebebd948b3cdb518f335a091a670a89d652139d2"},
+]
+
+[package.dependencies]
+cfgv = ">=2.0.0"
+identify = ">=1.0.0"
+nodeenv = ">=0.11.1"
+pyyaml = ">=5.1"
+virtualenv = ">=20.10.0"
+
 [[package]]
 name = "prometheus-client"
 version = "0.21.0"
@@ -3607,6 +3661,24 @@ pluggy = ">=1.5,<2"
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "pytest-asyncio"
+version = "0.25.0"
+description = "Pytest support for asyncio"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest_asyncio-0.25.0-py3-none-any.whl", hash = "sha256:db5432d18eac6b7e28b46dcd9b69921b55c3b1086e85febfe04e70b18d9e81b3"},
+    {file = "pytest_asyncio-0.25.0.tar.gz", hash = "sha256:8c0610303c9e0442a5db8604505fc0f545456ba1528824842b37b4a626cbf609"},
+]
+
+[package.dependencies]
+pytest = ">=8.2,<9"
+
+[package.extras]
+docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1)"]
+testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
+
 [[package]]
 name = "pytest-cov"
 version = "5.0.0"
@@ -4725,6 +4797,26 @@ h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
+[[package]]
+name = "virtualenv"
+version = "20.28.0"
+description = "Virtual Python Environment builder"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"},
+    {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"},
+]
+
+[package.dependencies]
+distlib = ">=0.3.7,<1"
+filelock = ">=3.12.2,<4"
+platformdirs = ">=3.9.1,<5"
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
+
 [[package]]
 name = "wcwidth"
 version = "0.2.13"
@@ -4888,4 +4980,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "592f5a9a27d922b3d5940c94210b79988cd92b6a3b1a6422bd03959c953fbf24"
+content-hash = "82c90397bf4c785f1ba7239299aca8f24327b4bd62047b736b909e847ffda535"
diff --git a/pyproject.toml b/pyproject.toml
index 6b70856..65097c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,6 +35,8 @@ rust-just = "^1.36.0"
 responses = "^0.25.3"
 ruff = "^0.8.2"
 pyright = "^1.1.390"
+pytest-asyncio = "^0.25.0"
+pre-commit = "^4.0.1"
 
 [project.classifiers]
 license = "OSI Approved :: GNU Affero General Public License v3 (AGPLv3)"

From 9af0aa38eb8cb445f9549318da431f546f8d184b Mon Sep 17 00:00:00 2001
From: Dhruv Baldawa <dhruvbaldawa@gmail.com>
Date: Sat, 14 Dec 2024 13:46:19 +0530
Subject: [PATCH 10/10] Fix flaky test

---
 .github/workflows/ci.yml          | 2 +-
 gyandex/podgen/storage/s3_test.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3a5d4ed..1b9edc4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
           version: latest
 
       - name: Setup Poetry cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ./.venv
           key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}
diff --git a/gyandex/podgen/storage/s3_test.py b/gyandex/podgen/storage/s3_test.py
index 0ba8105..262ce28 100644
--- a/gyandex/podgen/storage/s3_test.py
+++ b/gyandex/podgen/storage/s3_test.py
@@ -134,7 +134,6 @@ def test_upload_file_content_type_guessing(storage, mock_s3_client, tmp_path):
     """Test content type guessing for different file types"""
     test_cases = [
         ("test.mp3", "audio/mpeg"),
-        ("test.m4a", "audio/mp4a-latm"),
         ("test.wav", "audio/x-wav"),
         ("test.txt", "text/plain"),
         ("test.unknown", "application/octet-stream"),