Skip to content

Commit

Permalink
[erooups] rewrite for GalleryExtractor inheritance
Browse files Browse the repository at this point in the history
  • Loading branch information
JSouthGB committed Nov 30, 2023
1 parent 16ef895 commit f7671e5
Showing 1 changed file with 38 additions and 40 deletions.
78 changes: 38 additions & 40 deletions gallery_dl/extractor/erooups.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,48 +6,46 @@

"""Extractors for http://erooups.com/"""

from .common import Extractor, Message
from .common import GalleryExtractor
from .. import text


class ErooupsGalleryExtractor(Extractor):
category = 'erooups'
directory_fmt = ('{category}', '{title}')
filename_fmt = '{filename}.{extension}'
archive_fmt = '{date}_{filename}'
subcategory = 'gallery'
pattern = r'(?:http?://)?(?:www\.)?erooups\.com'
root = 'http://erooups.com'
example = 'http://erooups.com/2023/10/25/page-title-11-pics.html'

def items(self):
page = self.request(
text.ensure_http_scheme(self.url, scheme="http://")).text

data = self.metadata(page)
images = text.extract_iter(page, '</div><img src="', '?')

yield Message.Directory, data
for path in images:
if 'erooups' not in path:
path = self.root + path
image = text.nameext_from_url(path, {
'num': text.parse_int(path.split('_')[-1].split('.')[0]),
'date': data['date']
})
yield Message.Url, path, image
class ErooupsGalleryExtractor(GalleryExtractor):
category = "erooups"
directory_fmt = ("{category}", "{title}")
archive_fmt = "{date}_{filename}"
subcategory = "gallery"
pattern = (r"(?:http?://)?(?:www\.)?erooups\.com"
r"/(\d+)/(\d+)/(\d+)/([^/?#]+)")
root = "http://erooups.com"
example = "http://erooups.com/2023/10/25/page-title-11-pics.html"

def __init__(self, match):
self.year = match.group(1)
self.month = match.group(2)
self.day = match.group(3)
self.slug = match.group(4)
url = "{}/{}/{}/{}/{}".format(
self.root, self.year, self.month, self.day, self.slug)
GalleryExtractor.__init__(self, match, url)

def images(self, page):
fmt = "http://content.erooups.com/{}".format
extr = text.extr(page, 'class="imgs"', "</section>")
return [
(fmt(i), None) for i in text.extract_iter(
extr, 'src="http://content.erooups.com', '"')
]

def metadata(self, page):
data = {}
data['pageurl'] = self.url
data['date'] = '-'.join(self.url.split('/')[3:6])
data['title'] = text.extr(
page, '<h1 class="title">', '</h1>')
data['tag'] = text.extr(
page, '"><strong>', '</strong></a>')
data['imagecount'] = text.extr(
page, '<div class="pics">', '</div>')

data = {k: text.unescape(data[k]) for k in data if data[k] != ""}

return data
return {
"pageurl": self.url,
"date": text.parse_datetime(
"{}-{}-{}".format(self.year, self.month, self.day)),
"title": text.extr(
page, '<h1 class="title">', "</h1>"),
"tag": text.extr(
page, '"><strong>', "</strong></a>"),
"count": text.parse_int(text.extr(
page, '<div class="pics">', "</div>")),
}

0 comments on commit f7671e5

Please sign in to comment.