From 367f92782336148a8550474bfefde9e08eb5c009 Mon Sep 17 00:00:00 2001 From: Rick Brown Date: Thu, 4 Jan 2024 20:17:23 -0500 Subject: [PATCH 1/2] Add get_episodes() for fetching show season/episode ids/names --- PyMovieDb/imdb.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/PyMovieDb/imdb.py b/PyMovieDb/imdb.py index 6eecd57..84a5c2c 100644 --- a/PyMovieDb/imdb.py +++ b/PyMovieDb/imdb.py @@ -52,6 +52,7 @@ def __init__(self): self.NA = json.dumps({"status": 404, "message": "No Result Found!", 'result_count': 0, 'results': []}) # ..................................method to search on IMDB........................................... + def search(self, name, year=None, tv=False, person=False): """ @description:- Helps to search a query on IMDB. @@ -117,6 +118,7 @@ def get(self, url): """ try: response = self.session.get(url) + result = response.html.xpath("//script[@type='application/ld+json']")[0].text result = ''.join(result.splitlines()) # removing newlines result = f"""{result}""" @@ -238,6 +240,95 @@ def get_by_id(self, file_id): url = f"{self.baseURL}/title/{file_id}" return self.get(url) + """ + @description:- Helps to search a list of tv episodes by the tv show's imdb ID. + @parameter-1:- , imdb ID of the tv show. + @parameter-2:- , optional season number (fetches all seasons if None). + @returns:- A JSON string: + - { + 'season_count': , + 'seasons': + } + where : + { + 'id': , + 'episode_count': , + 'episodes': + } + where : + { + 'id': , + 'sid': , + 'fqid': , + 'name': + } + """ + def get_episodes(self, file_id, season_id=None): + assert isinstance(file_id, str) + assert (season_id is None or isinstance(season_id, int) or + (isinstance(season_id, str) and season_id.isdigit())) + + #
S{#}.E{#} {EpisodeName}
+ episode_matcher = re.compile(r'^S(?P[0-9]+).E(?P[0-9]+)\s+.+?\s+(?P.+)$') + + initial_season_id = season_id if season_id else '1' + more_season_ids = [] + episodes_by_season = {} + + def do_request(s_id): + url = f"{self.baseURL}/title/{file_id}/episodes?season={s_id if s_id else '1'}" + try: + r = self.session.get(url) + except requests.exceptions.ConnectionError as e: + r = self.session.get(url, verify=False) + return r + + def extract_episodes(r) -> list: + episodes = [] + for episode_text in r.html.xpath("//div[@class='ipc-title__text']/text()"): + match = episode_matcher.search(episode_text) + if match: + sid, eid = match.group('sid'), match.group('eid') + episodes.append( + { + 'id': match.group('eid'), + 'sid': match.group('sid'), + 'fqid': f"S{sid:0>2}E{eid:0>2}", + 'name': match.group('name'), + } + ) + return episodes + + # Load the initial page + response = do_request(initial_season_id) + + # Grab the remaining season numbers if a season_id is not explicitly specified + if not season_id: + #
  • {SeasonNumber}
  • + more_season_ids = [ + s for s in + response.html.xpath("//li[@data-testid='tab-season-entry']/text()") + if s != '1' + ] + # Grab initial page episodes + episodes_by_season[initial_season_id] = extract_episodes(response) + + # Fetch the other seasons' episodes from their pages if needed + for s in more_season_ids: + response = do_request(s) + episodes_by_season[s] = extract_episodes(response) + + return json.dumps( + { + 'season_count': len(episodes_by_season), + 'seasons': [ + {'id': sid, 'episode_count': len(eps), 'episodes': eps} + for (sid, eps) + in episodes_by_season.items() + ], + }, + indent=2) + # ........................................Methods for person profile................................... def get_person(self, url): """ From 5e30cb8a44b87c582caa6f0a19a068990a70a6f4 Mon Sep 17 00:00:00 2001 From: Rick Brown Date: Mon, 22 Jan 2024 21:22:09 -0500 Subject: [PATCH 2/2] Update additional seasons HTML parsing --- PyMovieDb/imdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyMovieDb/imdb.py b/PyMovieDb/imdb.py index 84a5c2c..27150a3 100644 --- a/PyMovieDb/imdb.py +++ b/PyMovieDb/imdb.py @@ -307,7 +307,7 @@ def extract_episodes(r) -> list: #
  • {SeasonNumber}
  • more_season_ids = [ s for s in - response.html.xpath("//li[@data-testid='tab-season-entry']/text()") + response.html.xpath("//ul/a[@data-testid='tab-season-entry']/text()") if s != '1' ] # Grab initial page episodes