Skip to content

Commit

Permalink
Merge pull request #70 from dplocki/69-new-method-of-checking-the-las…
Browse files Browse the repository at this point in the history
…t-time-run

69 new method of checking the last time run
  • Loading branch information
dplocki authored May 21, 2024
2 parents e3c5b6a + 879e621 commit 2487602
Show file tree
Hide file tree
Showing 5 changed files with 210 additions and 7 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ If a directory for podcast is empty, the script needs to know what to do. Due to
* [download last n episodes](#download-last-n-episodes)
* [download all new episode from last n days](#download-all-from-n-days)
* [download all new episode since day after, the last episode should appear](#download-all-episode-since-last-excepted)
* [download all the new, since the last run](#download-all-new-since-the-last-run)

Default behavior is: `download_last`

Expand Down Expand Up @@ -315,6 +316,31 @@ Examples:
| `download_from_Fri` | New episodes appear in Friday. The script will download all episodes since last Saturday (including it) |
| `download_from_12` | New episodes appear each 12th of month. The script will download all episodes since 13 month before |

### Download all new, since the last run

Once you establish the totem file, the script can use it to store the date of its last run. Then, based on this date, the script will download all new episodes that have appeared since then.

Set by `download_since_last_run`. Requires establish the store file by `last_run_mark_file_path`.

#### Example:

```json
{
"last_run_mark_file_path": "~/.totem.json",
"podcasts": [
{
"name": "The Skeptic Guide",
"rss_link": "https://feed.theskepticsguide.org/feed/rss.aspx",
"path": "~/podcasts/SGTTU"
}
]
}
```

#### The last run mark file

The script is reading the date of last modification of the file. The file's modification date is updated by the script.

## Download files from gaps

The script recognizes the stream of downloaded files (based on the feed data). By default, the last downloaded file (according to the feed) marks the start of downloading. In case of gaps, situation where there are missing files before the last downloaded one, the script will ignore them by default. However, there is a possibility to change this behavior to download all missing files between already downloaded ones. To enable this, you need to set the `fill_up_gaps` value to **true**. It's important to note that the script will not download files before the first one (according to the feed), the most earlier episode.
Expand Down
31 changes: 29 additions & 2 deletions e2e/fixures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import json
import os
import pytest
import random
import subprocess
Expand Down Expand Up @@ -126,9 +127,9 @@ class PodcastDirectory:
def __init__(self, download_destination_directory: Path) -> None:
self.download_destination_directory = download_destination_directory

def add_file(self, file_name: str) -> None:
def add_file(self, file_name: str, content: str = None) -> None:
file_path = self.download_destination_directory / file_name.lower()
file_path.write_text(file_name + " content")
file_path.write_text(file_name + " content" if content == None else content)

def is_containing_only(self, expected_files_list: List[str]) -> None:
files_in_destination_directory = self.get_files_list()
Expand Down Expand Up @@ -223,6 +224,27 @@ def is_highlighted_in_outcome(self, word: str) -> bool:
def is_containing(self, word: str) -> bool:
return word in self.output.stdout

def get_output(self):
return self.output.stdout.splitlines()


class MarkerFileManager:
def __init__(self, directory) -> None:
self.path_of_marker_file = directory / ".marker"

def get_path(self) -> str:
return str(self.path_of_marker_file)

def is_exists(self) -> bool:
return self.path_of_marker_file.is_file()

def set_date(self, new_modification_time: datetime.datetime) -> None:
if not self.is_exists():
self.path_of_marker_file.write_text(generate_random_string())

dt = new_modification_time.timestamp()
os.utime(self.path_of_marker_file, (dt, dt))


@pytest.fixture()
def download_destination_directory(tmp_path) -> Path:
Expand Down Expand Up @@ -270,3 +292,8 @@ def internal(config_object: Dict, skip_default: bool = False):
def podcast_downloader(tmp_path) -> Generator[PodcastDownloaderRunner, None, None]:
runner = PodcastDownloaderRunner(tmp_path)
yield runner


@pytest.fixture()
def marker_file_manager(tmp_path):
yield MarkerFileManager(tmp_path)
111 changes: 111 additions & 0 deletions e2e/test_simple_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from itertools import chain
from e2e.fixures import (
FeedBuilder,
MarkerFileManager,
PodcastDirectory,
PodcastDownloaderRunner,
# fixures:
Expand All @@ -10,6 +11,7 @@
use_config,
podcast_directory,
podcast_downloader,
marker_file_manager,
)
from e2e.random import (
call_n_times,
Expand Down Expand Up @@ -276,3 +278,112 @@ def test_download_last_n_episodes_behavior(

# Assert
podcast_directory.is_containing_only(expected_downloaded_files)


def test_download_since_last_run(
feed: FeedBuilder,
use_config: Callable[[Dict], None],
podcast_downloader: Callable[[List[str]], PodcastDownloaderRunner],
podcast_directory: PodcastDirectory,
marker_file_manager: MarkerFileManager,
):
# Arrange
expected_number_of_episode = generate_random_int(2, 5)
metadata = []
previous = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1)
for _ in range(generate_random_int(7, 13)):
metadata.append((generate_random_mp3_file(), previous))
previous -= datetime.timedelta(days=1)

metadata.reverse()

expected_episode = [
name.lower() for name, _ in metadata[expected_number_of_episode:]
]
last_run_date = metadata[expected_number_of_episode][1] - datetime.timedelta(
hours=1
)

marker_file_manager.set_date(last_run_date)

for file_name, file_publish_data in metadata:
feed.add_entry(file_name=file_name, published_date=file_publish_data)

use_config(
{
"last_run_mark_file_path": marker_file_manager.get_path(),
"podcasts": [
{
"if_directory_empty": "download_since_last_run",
"path": podcast_directory.path(),
"rss_link": feed.get_feed_url(),
}
],
}
)

# Act
podcast_downloader.run()

# Assert
podcast_directory.is_containing_only(expected_episode)


def test_download_since_last_run_with_no_marker_file_setup(
feed: FeedBuilder,
use_config: Callable[[Dict], None],
podcast_downloader: PodcastDownloaderRunner,
podcast_directory: PodcastDirectory,
marker_file_manager: MarkerFileManager,
):
# Arrange
feed.add_random_entries()

use_config(
{
"last_run_mark_file_path": marker_file_manager.get_path(),
"podcasts": [
{
"if_directory_empty": "download_since_last_run",
"path": podcast_directory.path(),
"rss_link": feed.get_feed_url(),
}
],
}
)

# Act
podcast_downloader.run()

# Assert
assert marker_file_manager.is_exists()
assert podcast_downloader.is_containing("Marker file")


def test_download_since_last_run_with_missing_marker_file_setup(
feed: FeedBuilder,
use_config: Callable[[Dict], None],
podcast_downloader: PodcastDownloaderRunner,
podcast_directory: PodcastDirectory,
):
# Arrange
feed.add_random_entries()

use_config(
{
"podcasts": [
{
"if_directory_empty": "download_since_last_run",
"path": podcast_directory.path(),
"rss_link": feed.get_feed_url(),
}
],
}
)

try:
# Act
podcast_downloader.run()
except Exception as error:
# Assert
assert "Missing the last run mark file" in error.stderr
48 changes: 43 additions & 5 deletions podcast_downloader/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,23 @@ def build_parser() -> argparse.ArgumentParser:


def configuration_to_function_on_empty_directory(
configuration_value: str,
configuration_value: str, last_run_date: time.struct_time
) -> Callable[[Iterable[RSSEntity]], Iterable[RSSEntity]]:
if configuration_value == "download_last":
return partial(only_last_n_entities, 1)

if configuration_value == "download_all_from_feed":
return lambda source: source

if configuration_value == "download_since_last_run":
if last_run_date:
return only_entities_from_date(last_run_date)

logger.error(
'The "download_since_last_run" require setup the "last_run_mark_file_path"'
)
raise Exception("Missing the last run mark file")

local_time = time.localtime()

from_n_day_match = re.match(r"^download_from_(\d+)_days$", configuration_value)
Expand Down Expand Up @@ -157,6 +166,31 @@ def configuration_to_function_rss_to_name(
return partial(file_template_to_file_name, configuration_value)


def load_the_last_run_date_store_now(marker_file_path, now):
if marker_file_path == None:
return None

if not os.path.exists(marker_file_path):
logger.warning("Marker file doesn't exist, creating (set last time run as now)")

with open(marker_file_path, "w") as file:
file.write(
"This is a marker file for podcast_download. It last access date is used to determine the last run time"
)

return now

access_time = time.localtime(os.path.getatime(marker_file_path))

logger.info(
"Last time the script has been run: %s",
time.strftime("%Y-%m-%d %H:%M:%S", access_time),
)

os.utime(marker_file_path, times=(time.mktime(now), time.mktime(now)))
return access_time


if __name__ == "__main__":
import sys
from logging import getLogger, StreamHandler, INFO
Expand All @@ -175,6 +209,7 @@ def configuration_to_function_rss_to_name(
configuration.CONFIG_HTTP_HEADER: {"User-Agent": "podcast-downloader"},
configuration.CONFIG_FILL_UP_GAPS: False,
configuration.CONFIG_DOWNLOAD_DELAY: 0,
configuration.CONFIG_LAST_RUN_MARK_PATH: None,
configuration.CONFIG_PODCASTS: [],
}

Expand All @@ -201,6 +236,9 @@ def configuration_to_function_rss_to_name(

RSS_SOURCES = CONFIGURATION[configuration.CONFIG_PODCASTS]
DOWNLOADS_LIMITS = CONFIGURATION[configuration.CONFIG_DOWNLOADS_LIMIT]
LAST_RUN_DATETIME = load_the_last_run_date_store_now(
CONFIGURATION[configuration.CONFIG_LAST_RUN_MARK_PATH], time.localtime()
)

for rss_source in RSS_SOURCES:
file_length_limit = get_system_file_name_limit(rss_source)
Expand All @@ -214,7 +252,7 @@ def configuration_to_function_rss_to_name(
configuration.CONFIG_FILE_NAME_TEMPLATE,
CONFIGURATION[configuration.CONFIG_FILE_NAME_TEMPLATE],
)
rss_if_directory_empty = rss_source.get(
rss_on_empty_directory = rss_source.get(
configuration.CONFIG_IF_DIRECTORY_EMPTY,
CONFIGURATION[configuration.CONFIG_IF_DIRECTORY_EMPTY],
)
Expand Down Expand Up @@ -255,8 +293,8 @@ def configuration_to_function_rss_to_name(
rss_file_name_template_value, rss_source
)

on_directory_empty = configuration_to_function_on_empty_directory(
rss_if_directory_empty
on_empty_directory = configuration_to_function_on_empty_directory(
rss_on_empty_directory, LAST_RUN_DATETIME
)

downloaded_files = list(
Expand Down Expand Up @@ -294,7 +332,7 @@ def configuration_to_function_rss_to_name(
build_only_new_entities(to_name_function), last_downloaded_file
)
else:
download_limiter_function = on_directory_empty
download_limiter_function = on_empty_directory

missing_files_links = compose(list, download_limiter_function)(all_feed_entries)

Expand Down
1 change: 1 addition & 0 deletions podcast_downloader/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
CONFIG_HTTP_HEADER = "http_headers"
CONFIG_FILL_UP_GAPS = "fill_up_gaps"
CONFIG_DOWNLOAD_DELAY = "download_delay"
CONFIG_LAST_RUN_MARK_PATH = "last_run_mark_file_path"

CONFIG_PODCASTS = "podcasts"
CONFIG_PODCASTS_NAME = "name"
Expand Down

0 comments on commit 2487602

Please sign in to comment.