[erooups] rewrite for GalleryExtractor inheritance

mikf · Nov 30, 2023 · f7671e5 · f7671e5
1 parent 16ef895
commit f7671e5
Showing 1 changed file with 38 additions and 40 deletions.
diff --git a/gallery_dl/extractor/erooups.py b/gallery_dl/extractor/erooups.py
@@ -6,48 +6,46 @@
 
 """Extractors for http://erooups.com/"""
 
-from .common import Extractor, Message
+from .common import GalleryExtractor
 from .. import text
 
 
-class ErooupsGalleryExtractor(Extractor):
-    category = 'erooups'
-    directory_fmt = ('{category}', '{title}')
-    filename_fmt = '{filename}.{extension}'
-    archive_fmt = '{date}_{filename}'
-    subcategory = 'gallery'
-    pattern = r'(?:http?://)?(?:www\.)?erooups\.com'
-    root = 'http://erooups.com'
-    example = 'http://erooups.com/2023/10/25/page-title-11-pics.html'
-
-    def items(self):
-        page = self.request(
-            text.ensure_http_scheme(self.url, scheme="http://")).text
-
-        data = self.metadata(page)
-        images = text.extract_iter(page, '</div><img src="', '?')
-
-        yield Message.Directory, data
-        for path in images:
-            if 'erooups' not in path:
-                path = self.root + path
-            image = text.nameext_from_url(path, {
-                'num': text.parse_int(path.split('_')[-1].split('.')[0]),
-                'date': data['date']
-            })
-            yield Message.Url, path, image
+class ErooupsGalleryExtractor(GalleryExtractor):
+    category = "erooups"
+    directory_fmt = ("{category}", "{title}")
+    archive_fmt = "{date}_{filename}"
+    subcategory = "gallery"
+    pattern = (r"(?:http?://)?(?:www\.)?erooups\.com"
+               r"/(\d+)/(\d+)/(\d+)/([^/?#]+)")
+    root = "http://erooups.com"
+    example = "http://erooups.com/2023/10/25/page-title-11-pics.html"
+
+    def __init__(self, match):
+        self.year = match.group(1)
+        self.month = match.group(2)
+        self.day = match.group(3)
+        self.slug = match.group(4)
+        url = "{}/{}/{}/{}/{}".format(
+            self.root, self.year, self.month, self.day, self.slug)
+        GalleryExtractor.__init__(self, match, url)
+
+    def images(self, page):
+        fmt = "http://content.erooups.com/{}".format
+        extr = text.extr(page, 'class="imgs"', "</section>")
+        return [
+            (fmt(i), None) for i in text.extract_iter(
+                extr, 'src="http://content.erooups.com', '"')
+        ]
 
     def metadata(self, page):
-        data = {}
-        data['pageurl'] = self.url
-        data['date'] = '-'.join(self.url.split('/')[3:6])
-        data['title'] = text.extr(
-            page, '<h1 class="title">', '</h1>')
-        data['tag'] = text.extr(
-            page, '"><strong>', '</strong></a>')
-        data['imagecount'] = text.extr(
-            page, '<div class="pics">', '</div>')
-
-        data = {k: text.unescape(data[k]) for k in data if data[k] != ""}
-
-        return data
+        return {
+            "pageurl": self.url,
+            "date": text.parse_datetime(
+                "{}-{}-{}".format(self.year, self.month, self.day)),
+            "title": text.extr(
+                page, '<h1 class="title">', "</h1>"),
+            "tag": text.extr(
+                page, '"><strong>', "</strong></a>"),
+            "count": text.parse_int(text.extr(
+                page, '<div class="pics">', "</div>")),
+        }