Skip to content

Commit 013b2cc

Browse files
authored
Media downloading indexing options (round 2) (#14)
* Adds options + option builder + metadata parsing for media thumbnails * Added release-type options to media profile; built option parser for indexing operations * Added new media_profile options to creation form; made show helper for rendering database items * Added options for downloading/embedding metadata * Adds option on sources to not download media (index only) * reformatted docs
1 parent ca01f17 commit 013b2cc

31 files changed

+2317
-1731
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,4 @@ npm-debug.log
3939
/.elixir_ls
4040
.env
4141
.DS_Store
42+
scratchpad.md

lib/pinchflat/media/media_item.ex

+11-1
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,23 @@ defmodule Pinchflat.Media.MediaItem do
1010
alias Pinchflat.MediaSource.Source
1111
alias Pinchflat.Media.MediaMetadata
1212

13+
@allowed_fields ~w(
14+
title
15+
media_id
16+
media_filepath
17+
source_id
18+
subtitle_filepaths
19+
thumbnail_filepath
20+
metadata_filepath
21+
)a
1322
@required_fields ~w(media_id source_id)a
14-
@allowed_fields ~w(title media_id media_filepath source_id subtitle_filepaths)a
1523

1624
schema "media_items" do
1725
field :title, :string
1826
field :media_id, :string
1927
field :media_filepath, :string
28+
field :thumbnail_filepath, :string
29+
field :metadata_filepath, :string
2030
# This is an array of [iso-2 language, filepath] pairs. Probably could
2131
# be an associated record, but I don't see the benefit right now.
2232
# Will very likely revisit because I can't leave well-enough alone.

lib/pinchflat/media_client/backends/yt_dlp/metadata_parser.ex

+23-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ defmodule Pinchflat.MediaClient.Backends.YtDlp.MetadataParser do
2525
metadata_attrs
2626
|> Map.merge(parse_media_metadata(metadata))
2727
|> Map.merge(parse_subtitle_metadata(metadata))
28+
|> Map.merge(parse_thumbnail_metadata(metadata))
29+
|> Map.merge(parse_infojson_metadata(metadata))
2830
end
2931

3032
defp parse_media_metadata(metadata) do
@@ -35,15 +37,34 @@ defmodule Pinchflat.MediaClient.Backends.YtDlp.MetadataParser do
3537
end
3638

3739
defp parse_subtitle_metadata(metadata) do
38-
subtitle_map = metadata["requested_subtitles"] || %{}
3940
# IDEA: if needed, consider filtering out subtitles that don't exist on-disk
4041
subtitle_filepaths =
41-
subtitle_map
42+
(metadata["requested_subtitles"] || %{})
4243
|> Enum.map(fn {lang, attrs} -> [lang, attrs["filepath"]] end)
4344
|> Enum.sort(fn [lang_a, _], [lang_b, _] -> lang_a < lang_b end)
4445

4546
%{
4647
subtitle_filepaths: subtitle_filepaths
4748
}
4849
end
50+
51+
defp parse_thumbnail_metadata(metadata) do
52+
thumbnail_filepath =
53+
(metadata["thumbnails"] || %{})
54+
# Reverse so that higher resolution thumbnails come first.
55+
# This _shouldn't_ matter yet, but I'd rather default to the best
56+
# in case I'm wrong.
57+
|> Enum.reverse()
58+
|> Enum.find_value(fn attrs -> attrs["filepath"] end)
59+
60+
%{
61+
thumbnail_filepath: thumbnail_filepath
62+
}
63+
end
64+
65+
defp parse_infojson_metadata(metadata) do
66+
%{
67+
metadata_filepath: metadata["infojson_filename"]
68+
}
69+
end
4970
end

lib/pinchflat/media_client/source_details.ex

+25-3
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@ defmodule Pinchflat.MediaClient.SourceDetails do
66
it open-ish for future expansion (just in case).
77
"""
88

9+
alias Pinchflat.Repo
10+
alias Pinchflat.MediaSource.Source
11+
912
alias Pinchflat.MediaClient.Backends.YtDlp.VideoCollection, as: YtDlpSource
13+
alias Pinchflat.Profiles.Options.YtDlp.IndexOptionBuilder, as: YtDlpIndexOptionBuilder
1014

1115
@doc """
12-
Gets a source's ID and name from its URL, using the given backend.
16+
Gets a source's ID and name from its URL using the given backend.
1317
1418
Returns {:ok, map()} | {:error, any, ...}.
1519
"""
@@ -18,11 +22,23 @@ defmodule Pinchflat.MediaClient.SourceDetails do
1822
end
1923

2024
@doc """
21-
Returns a list of video IDs for the given source URL, using the given backend.
25+
Returns a list of video IDs for the given source URL OR source record using the given backend.
26+
27+
If passing a source record, the call to the backend may have custom options applied based on
28+
the `option_builder`.
2229
2330
Returns {:ok, list(binary())} | {:error, any, ...}.
2431
"""
25-
def get_video_ids(source_url, backend \\ :yt_dlp) do
32+
def get_video_ids(sourceable, backend \\ :yt_dlp)
33+
34+
def get_video_ids(%Source{} = source, backend) do
35+
media_profile = Repo.preload(source, :media_profile).media_profile
36+
{:ok, options} = option_builder(backend).build(media_profile)
37+
38+
source_module(backend).get_video_ids(source.collection_id, options)
39+
end
40+
41+
def get_video_ids(source_url, backend) when is_binary(source_url) do
2642
source_module(backend).get_video_ids(source_url)
2743
end
2844

@@ -31,4 +47,10 @@ defmodule Pinchflat.MediaClient.SourceDetails do
3147
:yt_dlp -> YtDlpSource
3248
end
3349
end
50+
51+
defp option_builder(backend) do
52+
case backend do
53+
:yt_dlp -> YtDlpIndexOptionBuilder
54+
end
55+
end
3456
end

lib/pinchflat/media_client/video_downloader.ex

+6-2
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,18 @@ defmodule Pinchflat.MediaClient.VideoDownloader do
1414
alias Pinchflat.Profiles.MediaProfile
1515

1616
alias Pinchflat.MediaClient.Backends.YtDlp.Video, as: YtDlpVideo
17-
alias Pinchflat.Profiles.Options.YtDlp.OptionBuilder, as: YtDlpOptionBuilder
17+
alias Pinchflat.Profiles.Options.YtDlp.DownloadOptionBuilder, as: YtDlpDownloadOptionBuilder
1818
alias Pinchflat.MediaClient.Backends.YtDlp.MetadataParser, as: YtDlpMetadataParser
1919

2020
@doc """
2121
Downloads a video for a media item, updating the media item based on the metadata
2222
returned by the backend. Also saves the entire metadata response to the associated
2323
media_metadata record.
2424
25+
NOTE: related methods (like the download worker) won't download if the media item's source
26+
is set to not download media. However, I'm not enforcing that here since I need this for testing.
27+
This may change in the future but I'm not stressed.
28+
2529
Returns {:ok, %MediaItem{}} | {:error, any, ...any}
2630
"""
2731
def download_for_media_item(%MediaItem{} = media_item, backend \\ :yt_dlp) do
@@ -52,7 +56,7 @@ defmodule Pinchflat.MediaClient.VideoDownloader do
5256

5357
defp option_builder(backend) do
5458
case backend do
55-
:yt_dlp -> YtDlpOptionBuilder
59+
:yt_dlp -> YtDlpDownloadOptionBuilder
5660
end
5761
end
5862

lib/pinchflat/media_source/source.ex

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ defmodule Pinchflat.MediaSource.Source do
1515
collection_type
1616
friendly_name
1717
index_frequency_minutes
18+
download_media
1819
original_url
1920
media_profile_id
2021
)a
@@ -27,6 +28,7 @@ defmodule Pinchflat.MediaSource.Source do
2728
field :collection_id, :string
2829
field :collection_type, Ecto.Enum, values: [:channel, :playlist]
2930
field :index_frequency_minutes, :integer, default: 60 * 24
31+
field :download_media, :boolean, default: true
3032
# This should only be used for user reference going forward
3133
# as the collection_id should be used for all API calls
3234
field :original_url, :string

lib/pinchflat/profiles/media_profile.ex

+21
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,39 @@ defmodule Pinchflat.Profiles.MediaProfile do
1515
download_auto_subs
1616
embed_subs
1717
sub_langs
18+
download_thumbnail
19+
embed_thumbnail
20+
download_metadata
21+
embed_metadata
22+
shorts_behaviour
23+
livestream_behaviour
1824
)a
1925

2026
@required_fields ~w(name output_path_template)a
2127

2228
schema "media_profiles" do
2329
field :name, :string
2430
field :output_path_template, :string
31+
2532
field :download_subs, :boolean, default: true
2633
field :download_auto_subs, :boolean, default: true
2734
field :embed_subs, :boolean, default: true
2835
field :sub_langs, :string, default: "en"
2936

37+
field :download_thumbnail, :boolean, default: true
38+
field :embed_thumbnail, :boolean, default: true
39+
40+
field :download_metadata, :boolean, default: true
41+
field :embed_metadata, :boolean, default: true
42+
43+
# NOTE: these do NOT speed up indexing - the indexer still has to go
44+
# through the entire collection to determine if a video is a short or
45+
# a livestream.
46+
# NOTE: these can BOTH be set to :only which will download shorts and
47+
# livestreams _only_ and ignore regular videos.
48+
field :shorts_behaviour, Ecto.Enum, values: [:include, :exclude, :only], default: :include
49+
field :livestream_behaviour, Ecto.Enum, values: [:include, :exclude, :only], default: :include
50+
3051
has_many :sources, Source
3152

3253
timestamps(type: :utc_datetime)

lib/pinchflat/profiles/options/yt_dlp/option_builder.ex lib/pinchflat/profiles/options/yt_dlp/download_option_builder.ex

+30-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
defmodule Pinchflat.Profiles.Options.YtDlp.OptionBuilder do
1+
defmodule Pinchflat.Profiles.Options.YtDlp.DownloadOptionBuilder do
22
@moduledoc """
3-
Builds the options for yt-dlp based on the given media profile.
3+
Builds the options for yt-dlp to download media based on the given media profile.
44
55
IDEA: consider making this a behaviour so I can add other backends later
66
"""
@@ -9,7 +9,7 @@ defmodule Pinchflat.Profiles.Options.YtDlp.OptionBuilder do
99
alias Pinchflat.Profiles.Options.YtDlp.OutputPathBuilder
1010

1111
@doc """
12-
Builds the options for yt-dlp based on the given media profile.
12+
Builds the options for yt-dlp to download media based on the given media profile.
1313
1414
IDEA: consider adding the ability to pass in a second argument to override
1515
these options
@@ -24,18 +24,16 @@ defmodule Pinchflat.Profiles.Options.YtDlp.OptionBuilder do
2424
built_options =
2525
default_options() ++
2626
subtitle_options(media_profile) ++
27+
thumbnail_options(media_profile) ++
28+
metadata_options(media_profile) ++
2729
output_options(media_profile)
2830

2931
{:ok, built_options}
3032
end
3133

3234
# This will be updated a lot as I add new options to profiles
3335
defp default_options do
34-
[
35-
:embed_metadata,
36-
:embed_thumbnail,
37-
:no_progress
38-
]
36+
[:no_progress]
3937
end
4038

4139
defp subtitle_options(media_profile) do
@@ -65,6 +63,30 @@ defmodule Pinchflat.Profiles.Options.YtDlp.OptionBuilder do
6563
end)
6664
end
6765

66+
defp thumbnail_options(media_profile) do
67+
mapped_struct = Map.from_struct(media_profile)
68+
69+
Enum.reduce(mapped_struct, [], fn attr, acc ->
70+
case attr do
71+
{:download_thumbnail, true} -> acc ++ [:write_thumbnail]
72+
{:embed_thumbnail, true} -> acc ++ [:embed_thumbnail]
73+
_ -> acc
74+
end
75+
end)
76+
end
77+
78+
defp metadata_options(media_profile) do
79+
mapped_struct = Map.from_struct(media_profile)
80+
81+
Enum.reduce(mapped_struct, [], fn attr, acc ->
82+
case attr do
83+
{:download_metadata, true} -> acc ++ [:write_info_json, :clean_info_json]
84+
{:embed_metadata, true} -> acc ++ [:embed_metadata]
85+
_ -> acc
86+
end
87+
end)
88+
end
89+
6890
defp output_options(media_profile) do
6991
{:ok, output_path} = OutputPathBuilder.build(media_profile.output_path_template)
7092

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
defmodule Pinchflat.Profiles.Options.YtDlp.IndexOptionBuilder do
2+
@moduledoc """
3+
Builds the options for yt-dlp to index a media source based on the given media profile.
4+
"""
5+
6+
alias Pinchflat.Profiles.MediaProfile
7+
8+
@doc """
9+
Builds the options for yt-dlp to index a media source based on the given media profile.
10+
"""
11+
def build(%MediaProfile{} = media_profile) do
12+
built_options = release_type_options(media_profile)
13+
14+
{:ok, built_options}
15+
end
16+
17+
defp release_type_options(media_profile) do
18+
mapped_struct = Map.from_struct(media_profile)
19+
20+
# Appending multiple match filters treats them as an OR condition,
21+
# so we have to be careful around combining `only` and `exclude` options.
22+
# eg: only shorts + exclude livestreams = "any video that is a short OR is not a livestream"
23+
# which will return all shorts AND normal videos.
24+
Enum.reduce(mapped_struct, [], fn attr, acc ->
25+
case {attr, media_profile} do
26+
{{:shorts_behaviour, :only}, _} ->
27+
acc ++ [match_filter: "original_url*=/shorts/"]
28+
29+
{{:livestream_behaviour, :only}, _} ->
30+
acc ++ [match_filter: "was_live"]
31+
32+
# Since match_filter is an OR (see above), `exclude`s must be ignored entirely if the
33+
# other type is set to `only`. There is also special behaviour if they're both excludes,
34+
# hence why these check against `:include` alone.
35+
{{:shorts_behaviour, :exclude}, %{livestream_behaviour: :include}} ->
36+
acc ++ [match_filter: "original_url!*=/shorts/"]
37+
38+
{{:livestream_behaviour, :exclude}, %{shorts_behaviour: :include}} ->
39+
acc ++ [match_filter: "!was_live"]
40+
41+
# Again, since it's an OR, there's a special syntax if they're both excluded
42+
# to make it an AND. Note that I'm not checking for the other permutation of
43+
# both excluding since this MUST get hit so adding the other version would double up.
44+
{{:livestream_behaviour, :exclude}, %{shorts_behaviour: :exclude}} ->
45+
acc ++ [match_filter: "!was_live & original_url!*=/shorts/"]
46+
47+
_ ->
48+
acc
49+
end
50+
end)
51+
end
52+
end

lib/pinchflat/profiles/options/yt_dlp/output_path_builder.ex

+5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ defmodule Pinchflat.Profiles.Options.YtDlp.OutputPathBuilder do
1212
1313
Translates liquid-style templates into yt-dlp-style templates,
1414
leaving yt-dlp syntax intact.
15+
16+
IDEA: apart from any custom options I've defined, I can support any yt-dlp
17+
option by assuming `{{ identifier }}` should transform to `%(identifier)S`.
18+
It's not doing anything huge, but it's nicer to type and more approachable IMO.
19+
IDEA: set a default for `MediaProfile`'s `output_path_template` field
1520
"""
1621
def build(template_string) do
1722
TemplateParser.parse(template_string, full_yt_dlp_options_map())

lib/pinchflat/tasks/source_tasks.ex

+6-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ defmodule Pinchflat.Tasks.SourceTasks do
3535

3636
@doc """
3737
Starts tasks for downloading videos for any of a sources _pending_ media items.
38+
Jobs are not enqueued if the source is set to not download media. This will return :ok.
3839
3940
NOTE: this starts a download for each media item that is pending,
4041
not just the ones that were indexed in this job run. This should ensure
@@ -45,7 +46,7 @@ defmodule Pinchflat.Tasks.SourceTasks do
4546
4647
Returns :ok
4748
"""
48-
def enqueue_pending_media_downloads(%Source{} = source) do
49+
def enqueue_pending_media_downloads(%Source{download_media: true} = source) do
4950
source
5051
|> Media.list_pending_media_items_for()
5152
|> Enum.each(fn media_item ->
@@ -55,4 +56,8 @@ defmodule Pinchflat.Tasks.SourceTasks do
5556
|> Tasks.create_job_with_task(media_item)
5657
end)
5758
end
59+
60+
def enqueue_pending_media_downloads(%Source{download_media: false} = _source) do
61+
:ok
62+
end
5863
end

0 commit comments

Comments
 (0)