Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Track the predicted final filepath for indexed media items #461

Merged
merged 7 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions lib/pinchflat/downloading/download_option_builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,38 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do

@doc """
Builds the output path for yt-dlp to download media based on the given source's
media profile. Uses the source's override output path template if it exists.
or media_item's media profile. Uses the source's override output path template if it exists.

Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it
will use a default %MediaItem{} struct with the given source.

Returns binary()
"""
def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
end

def build_output_path_for(%MediaItem{} = media_item_with_preloads) do
output_path_template = Sources.output_path_template(media_item_with_preloads.source)

build_output_path(output_path_template, media_item_with_preloads)
end

def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
@doc """
Builds the quality options for yt-dlp to download media based on the given source's
or media_item's media profile. Useful for helping predict final filepath of downloaded
media.

returns [Keyword.t()]
"""
def build_quality_options_for(%Source{} = source_with_preloads) do
build_quality_options_for(%MediaItem{source: source_with_preloads})
end

def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do
media_profile = media_item_with_preloads.source.media_profile

quality_options(media_profile)
end

defp default_options(override_opts) do
Expand Down
11 changes: 10 additions & 1 deletion lib/pinchflat/fast_indexing/fast_indexing_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
alias Pinchflat.FastIndexing.YoutubeRss
alias Pinchflat.FastIndexing.YoutubeApi
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.Downloading.DownloadOptionBuilder

alias Pinchflat.YtDlp.Media, as: YtDlpMedia

Expand All @@ -27,6 +28,10 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
downloaded_.
"""
def kickoff_download_tasks_from_youtube_rss_feed(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])

{:ok, media_ids} = get_recent_media_ids(source)
existing_media_items = list_media_items_by_media_id_for(source, media_ids)
new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id)
Expand Down Expand Up @@ -68,7 +73,11 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
defp create_media_item_from_media_id(source, media_id) do
url = "https://www.youtube.com/watch?v=#{media_id}"

case YtDlpMedia.get_media_attributes(url, use_cookies: source.use_cookies) do
command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)

case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: source.use_cookies) do
{:ok, media_attrs} ->
Media.create_media_item_from_backend_attrs(source, media_attrs)

Expand Down
2 changes: 2 additions & 0 deletions lib/pinchflat/media/media_item.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ defmodule Pinchflat.Media.MediaItem do
:uploaded_at,
:upload_date_index,
:duration_seconds,
:predicted_media_filepath,
# these fields are captured only on download
:media_downloaded_at,
:media_filepath,
Expand Down Expand Up @@ -76,6 +77,7 @@ defmodule Pinchflat.Media.MediaItem do
field :duration_seconds, :integer
field :playlist_index, :integer, default: 0

field :predicted_media_filepath, :string
field :media_filepath, :string
field :media_size_bytes, :integer
field :thumbnail_filepath, :string
Expand Down
11 changes: 10 additions & 1 deletion lib/pinchflat/slow_indexing/slow_indexing_helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
alias Pinchflat.YtDlp.MediaCollection
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.SlowIndexing.FileFollowerServer
alias Pinchflat.Downloading.DownloadOptionBuilder
alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker

alias Pinchflat.YtDlp.Media, as: YtDlpMedia
Expand Down Expand Up @@ -56,6 +57,9 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
Returns [%MediaItem{} | %Ecto.Changeset{}]
"""
def index_and_enqueue_download_for_media_items(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])
# See the method definition below for more info on how file watchers work
# (important reading if you're not familiar with it)
{:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source)
Expand Down Expand Up @@ -94,8 +98,13 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
{:ok, pid} = FileFollowerServer.start_link()

handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end

command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)

runner_opts = [file_listener_handler: handler, use_cookies: source.use_cookies]
result = MediaCollection.get_media_attributes_for_collection(source.original_url, runner_opts)
result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts)

FileFollowerServer.stop(pid)

Expand Down
19 changes: 12 additions & 7 deletions lib/pinchflat/yt_dlp/media.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ defmodule Pinchflat.YtDlp.Media do
:livestream,
:short_form_content,
:uploaded_at,
:duration_seconds
:duration_seconds,
:predicted_media_filepath
]

defstruct [
Expand All @@ -23,7 +24,8 @@ defmodule Pinchflat.YtDlp.Media do
:short_form_content,
:uploaded_at,
:duration_seconds,
:playlist_index
:playlist_index,
:predicted_media_filepath
]

alias __MODULE__
Expand Down Expand Up @@ -63,15 +65,17 @@ defmodule Pinchflat.YtDlp.Media do

@doc """
Returns a map representing the media at the given URL.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.

Returns {:ok, %Media{}} | {:error, any, ...}.
"""
def get_media_attributes(url, addl_opts \\ []) do
def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
command_opts = [:simulate, :skip_download]
all_command_opts = [:simulate, :skip_download] ++ command_opts
output_template = indexing_output_template()

case runner.run(url, command_opts, output_template, addl_opts) do
case runner.run(url, all_command_opts, output_template, addl_opts) do
{:ok, output} ->
output
|> Phoenix.json_library().decode!()
Expand All @@ -91,7 +95,7 @@ defmodule Pinchflat.YtDlp.Media do
if something is a short via the URL again
"""
def indexing_output_template do
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index})j"
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j"
end

@doc """
Expand All @@ -110,7 +114,8 @@ defmodule Pinchflat.YtDlp.Media do
duration_seconds: response["duration"] && round(response["duration"]),
short_form_content: response["original_url"] && short_form_content?(response),
uploaded_at: response["upload_date"] && parse_uploaded_at(response),
playlist_index: response["playlist_index"] || 0
playlist_index: response["playlist_index"] || 0,
predicted_media_filepath: response["filename"]
}
end

Expand Down
11 changes: 7 additions & 4 deletions lib/pinchflat/yt_dlp/media_collection.ex
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@ defmodule Pinchflat.YtDlp.MediaCollection do

@doc """
Returns a list of maps representing the media in the collection.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.

Options:
Runner Options:
- :file_listener_handler - a function that will be called with the path to the
file that will be written to when yt-dlp is done. This is useful for
setting up a file watcher to know when the file is ready to be read.
- :use_cookies - whether or not to use user-provided cookies when fetching the media details

Returns {:ok, [map()]} | {:error, any, ...}.
"""
def get_media_attributes_for_collection(url, addl_opts \\ []) do
def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
# `ignore_no_formats_error` is necessary because yt-dlp will error out if
# the first video has not released yet (ie: is a premier). We don't care about
# available formats since we're just getting the media details
command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings]
all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts
use_cookies = Keyword.get(addl_opts, :use_cookies, false)
output_template = YtDlpMedia.indexing_output_template()
output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
Expand All @@ -35,7 +38,7 @@ defmodule Pinchflat.YtDlp.MediaCollection do
file_listener_handler.(output_filepath)
end

case runner.run(url, command_opts, output_template, runner_opts) do
case runner.run(url, all_command_opts, output_template, runner_opts) do
{:ok, output} ->
parsed_lines =
output
Expand Down
Binary file modified priv/repo/erd.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
defmodule Pinchflat.Repo.Migrations.AddPredictedMediaFilepathToMediaItems do
use Ecto.Migration

def change do
alter table(:media_items) do
add :predicted_media_filepath, :string
end
end
end
16 changes: 16 additions & 0 deletions test/pinchflat/downloading/download_option_builder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,22 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilderTest do
end
end

describe "build_quality_options_for/1" do
test "builds quality options for a media item", %{media_item: media_item} do
options = DownloadOptionBuilder.build_quality_options_for(media_item)

assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
assert {:remux_video, "mp4"} in options
end

test "builds quality options for a source", %{media_item: media_item} do
options = DownloadOptionBuilder.build_quality_options_for(media_item.source)

assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
assert {:remux_video, "mp4"} in options
end
end

defp update_media_profile_attribute(media_item_with_preloads, attrs) do
media_item_with_preloads.source.media_profile
|> Profiles.change_media_profile(attrs)
Expand Down
12 changes: 12 additions & 0 deletions test/pinchflat/fast_indexing/fast_indexing_helpers_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,18 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpersTest do
assert [_] = Tasks.list_tasks_for(media_item, "MediaDownloadWorker")
end

test "passes the source's download options to the yt-dlp runner", %{source: source} do
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)

expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
assert {:remux_video, "mp4"} in opts
{:ok, media_attributes_return_fixture()}
end)

FastIndexingHelpers.kickoff_download_tasks_from_youtube_rss_feed(source)
end

test "sets use_cookies if the source uses cookies" do
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)

Expand Down
10 changes: 10 additions & 0 deletions test/pinchflat/slow_indexing/slow_indexing_helpers_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
assert %Ecto.Changeset{} = changeset
end

test "passes the source's download options to the yt-dlp runner", %{source: source} do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
assert {:remux_video, "mp4"} in opts
{:ok, source_attributes_return_fixture()}
end)

SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end

test "sets use_cookies if the source uses cookies" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
assert {:use_cookies, true} in addl_opts
Expand Down
12 changes: 11 additions & 1 deletion test/pinchflat/yt_dlp/media_collection_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
assert {:error, "Big issue", 1} = MediaCollection.get_media_attributes_for_collection(@channel_url)
end

test "passes long additional command options" do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert :foo in opts

{:ok, ""}
end)

assert {:ok, _} = MediaCollection.get_media_attributes_for_collection(@channel_url, [:foo])
end

test "passes additional args to runner" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
assert [{:output_filepath, filepath} | _] = addl_opts
Expand All @@ -56,7 +66,7 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
end

assert {:ok, _} =
MediaCollection.get_media_attributes_for_collection(@channel_url, file_listener_handler: handler)
MediaCollection.get_media_attributes_for_collection(@channel_url, [], file_listener_handler: handler)

assert_receive {:handler, filename}
assert String.ends_with?(filename, ".json")
Expand Down
19 changes: 15 additions & 4 deletions test/pinchflat/yt_dlp/media_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,22 @@ defmodule Pinchflat.YtDlp.MediaTest do
assert {:ok, _} = Media.get_media_attributes(@media_url)
end

test "passes along additional command options" do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl ->
assert [:simulate, :skip_download, :custom_arg] = opts
{:ok, media_attributes_return_fixture()}
end)

assert {:ok, _} = Media.get_media_attributes(@media_url, [:custom_arg])
end

test "passes along additional options" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl ->
assert [addl_arg: true] = addl
{:ok, media_attributes_return_fixture()}
end)

assert {:ok, _} = Media.get_media_attributes(@media_url, addl_arg: true)
assert {:ok, _} = Media.get_media_attributes(@media_url, [], addl_arg: true)
end

test "returns the error straight through when the command fails" do
Expand All @@ -139,7 +148,7 @@ defmodule Pinchflat.YtDlp.MediaTest do
describe "indexing_output_template/0" do
test "contains all the greatest hits" do
attrs =
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index)a
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index filename)a

formatted_attrs = "%(.{#{Enum.join(attrs, ",")}})j"

Expand All @@ -159,7 +168,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
"duration" => 60,
"upload_date" => "20210101",
"timestamp" => 1_600_000_000,
"playlist_index" => 1
"playlist_index" => 1,
"filename" => "TiZPUDkDYbk.mp4"
}

assert %Media{
Expand All @@ -171,7 +181,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
short_form_content: false,
uploaded_at: ~U[2020-09-13 12:26:40Z],
duration_seconds: 60,
playlist_index: 1
playlist_index: 1,
predicted_media_filepath: "TiZPUDkDYbk.mp4"
} == Media.response_to_struct(response)
end

Expand Down