Skip to content

Commit

Permalink
Fix lrclib lyrics (#5406)
Browse files Browse the repository at this point in the history
Fixes #5102

### LRCLib lyrics backend fixes

#### Bug Fixes
- Fixed fetching lyrics from `lrclib` source. If lyrics for a specific
album, artist, and title combination are not found, the plugin now
searches for the artist and title and picks the most relevant result,
scoring them by
  1. Duration similarity to the target item 
  1. Availability of synced lyrics
- Updated the default `sources` configuration to prioritize `lrclib`
over other sources for faster and more reliable results.

#### Code Improvements
  - Added type annotations to `fetch` method in all backends.
- Introduced `LRCLyrics` and `LRCLibItem` classes to encapsulate lyrics
data and improve code structure.
- Enhanced error handling and logging enchancements to the `LRCLib`
backend. These will be added to the rest of the backends in a separate
PR.
  • Loading branch information
snejus authored Jan 20, 2025
2 parents 38c8209 + bb5f3e0 commit f709ac1
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 54 deletions.
180 changes: 153 additions & 27 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,15 @@
import struct
import unicodedata
import warnings
from functools import partial
from typing import TYPE_CHECKING, ClassVar
from contextlib import suppress
from dataclasses import dataclass
from functools import cached_property, partial, total_ordering
from http import HTTPStatus
from typing import TYPE_CHECKING, ClassVar, Iterable, Iterator
from urllib.parse import quote, urlencode

import requests
from typing_extensions import TypedDict
from unidecode import unidecode

import beets
Expand Down Expand Up @@ -59,6 +63,7 @@
TAG_RE = re.compile(r"<[^>]*>")
BREAK_RE = re.compile(r"\n?\s*<br([\s|/][^>]*)*>\s*\n?", re.I)
USER_AGENT = f"beets/{beets.__version__}"
INSTRUMENTAL_LYRICS = "[Instrumental]"

# The content for the base index.rst generated in ReST mode.
REST_INDEX_TEMPLATE = """Lyrics
Expand Down Expand Up @@ -96,6 +101,10 @@
"""


class NotFoundError(requests.exceptions.HTTPError):
pass


# Utilities.


Expand Down Expand Up @@ -266,37 +275,154 @@ def fetch(
raise NotImplementedError


class LRCLibItem(TypedDict):
"""Lyrics data item returned by the LRCLib API."""

id: int
name: str
trackName: str
artistName: str
albumName: str
duration: float | None
instrumental: bool
plainLyrics: str
syncedLyrics: str | None


@dataclass
@total_ordering
class LRCLyrics:
#: Percentage tolerance for max duration difference between lyrics and item.
DURATION_DIFF_TOLERANCE = 0.05

target_duration: float
duration: float
instrumental: bool
plain: str
synced: str | None

def __le__(self, other: LRCLyrics) -> bool:
"""Compare two lyrics items by their score."""
return self.dist < other.dist

@classmethod
def make(cls, candidate: LRCLibItem, target_duration: float) -> LRCLyrics:
return cls(
target_duration,
candidate["duration"] or 0.0,
candidate["instrumental"],
candidate["plainLyrics"],
candidate["syncedLyrics"],
)

@cached_property
def duration_dist(self) -> float:
"""Return the absolute difference between lyrics and target duration."""
return abs(self.duration - self.target_duration)

@cached_property
def is_valid(self) -> bool:
"""Return whether the lyrics item is valid.
Lyrics duration must be within the tolerance defined by
:attr:`DURATION_DIFF_TOLERANCE`.
"""
return (
self.duration_dist
<= self.target_duration * self.DURATION_DIFF_TOLERANCE
)

@cached_property
def dist(self) -> tuple[bool, float]:
"""Distance/score of the given lyrics item.
Return a tuple with the following values:
1. Absolute difference between lyrics and target duration
2. Boolean telling whether synced lyrics are available.
Best lyrics match is the one that has the closest duration to
``target_duration`` and has synced lyrics available.
"""
return not self.synced, self.duration_dist

def get_text(self, want_synced: bool) -> str:
if self.instrumental:
return INSTRUMENTAL_LYRICS

if want_synced and self.synced:
return "\n".join(map(str.strip, self.synced.splitlines()))

return self.plain


class LRCLib(Backend):
base_url = "https://lrclib.net/api/get"
"""Fetch lyrics from the LRCLib API."""

def fetch(
BASE_URL = "https://lrclib.net/api"
GET_URL = f"{BASE_URL}/get"
SEARCH_URL = f"{BASE_URL}/search"

def warn(self, message: str, *args) -> None:
"""Log a warning message with the class name."""
self._log.warning(f"{self.__class__.__name__}: {message}", *args)

def fetch_json(self, *args, **kwargs):
"""Wrap the request method to raise an exception on HTTP errors."""
kwargs.setdefault("timeout", 10)
kwargs.setdefault("headers", {"User-Agent": USER_AGENT})
r = requests.get(*args, **kwargs)
if r.status_code == HTTPStatus.NOT_FOUND:
raise NotFoundError("HTTP Error: Not Found", response=r)
r.raise_for_status()

return r.json()

def fetch_candidates(
self, artist: str, title: str, album: str, length: int
) -> str | None:
params: dict[str, str | int] = {
"artist_name": artist,
"track_name": title,
}
) -> Iterator[list[LRCLibItem]]:
"""Yield lyrics candidates for the given song data.
I found that the ``/get`` endpoint sometimes returns inaccurate or
unsynced lyrics, while ``search`` yields more suitable candidates.
Therefore, we prioritize the latter and rank the results using our own
algorithm. If the search does not give suitable lyrics, we fall back to
the ``/get`` endpoint.
Return an iterator over lists of candidates.
"""
base_params = {"artist_name": artist, "track_name": title}
get_params = {**base_params, "duration": length}
if album:
params["album_name"] = album
get_params["album_name"] = album

if length:
params["duration"] = length
yield self.fetch_json(self.SEARCH_URL, params=base_params)

try:
response = requests.get(
self.base_url,
params=params,
timeout=10,
)
data = response.json()
except (requests.RequestException, json.decoder.JSONDecodeError) as exc:
self._log.debug("LRCLib API request failed: {0}", exc)
return None
with suppress(NotFoundError):
yield [self.fetch_json(self.GET_URL, params=get_params)]

@classmethod
def pick_best_match(cls, lyrics: Iterable[LRCLyrics]) -> LRCLyrics | None:
"""Return best matching lyrics item from the given list."""
return min((li for li in lyrics if li.is_valid), default=None)

def fetch(
self, artist: str, title: str, album: str, length: int
) -> str | None:
"""Fetch lyrics text for the given song data."""
evaluate_item = partial(LRCLyrics.make, target_duration=length)

if self.config["synced"]:
return data.get("syncedLyrics") or data.get("plainLyrics")
try:
for group in self.fetch_candidates(artist, title, album, length):
candidates = [evaluate_item(item) for item in group]
if item := self.pick_best_match(candidates):
return item.get_text(self.config["synced"])
except StopIteration:
pass
except requests.JSONDecodeError:
self.warn("Could not decode response JSON data")
except requests.RequestException as exc:
self.warn("Request error: {}", exc)

return data.get("plainLyrics")
return None


class DirectBackend(Backend):
Expand Down Expand Up @@ -478,7 +604,7 @@ def _try_extracting_lyrics_from_non_data_lyrics_container(self, soup):
string="This song is an instrumental",
):
self._log.debug("Detected instrumental")
return "[Instrumental]"
return INSTRUMENTAL_LYRICS
else:
self._log.debug("Couldn't scrape page using known layouts")
return None
Expand Down Expand Up @@ -725,7 +851,7 @@ def fetch(self, artist: str, title: str, *_) -> str | None:


class LyricsPlugin(plugins.BeetsPlugin):
SOURCES = ["google", "musixmatch", "genius", "tekstowo", "lrclib"]
SOURCES = ["lrclib", "google", "musixmatch", "genius", "tekstowo"]
SOURCE_BACKENDS = {
"google": Google,
"musixmatch": MusiXmatch,
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ Bug fixes:
* :doc:`plugins/lyrics`: Do not attempt to search for lyrics if either the
artist or title is missing and ignore ``artist_sort`` value if it is empty.
:bug:`2635`
* :doc:`plugins/lyrics`: Fix fetching lyrics from ``lrclib`` source. If we
cannot find lyrics for a specific album, artist, title combination, the
plugin now tries to search for the artist and title and picks the most
relevant result. Update the default ``sources`` configuration to prioritize
``lrclib`` over other sources since it returns reliable results quicker than
others.
:bug:`5102`

For packagers:

Expand Down
2 changes: 1 addition & 1 deletion docs/plugins/lyrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ configuration file. The available options are:
sources known to be scrapeable.
- **sources**: List of sources to search for lyrics. An asterisk ``*`` expands
to all available sources.
Default: ``google genius tekstowo lrclib``, i.e., all the available sources. The
Default: ``lrclib google genius tekstowo``, i.e., all the available sources. The
``google`` source will be automatically deactivated if no ``google_API_key``
is setup.
The ``google``, ``genius``, and ``tekstowo`` sources will only be enabled if
Expand Down
Loading

0 comments on commit f709ac1

Please sign in to comment.