This repository has been archived by the owner on Apr 21, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmain.py
152 lines (133 loc) · 5.78 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import base64
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
# --------------Settings--------------
# The manga URL that in '購入済み書籍一覧', 'Your manga name', 'この本を読む' button
# Right click the 'この本を読む' and click the 'Copy link address'
# 中文站点请单击右键复制“阅读”按钮的网址,例如:https://www.bookwalker.com.tw/browserViewer/56994/read
MANGA_URL = 'https://member.bookwalker.jp/app/03/webstore/cooperation?r=BROWSER_VIEWER/640c0ddd-896c-4881-945f-ad5ce9a070a6/https%3A%2F%2Fbookwalker.jp%2FholdBooks%2F'
# Your cookies
# Go to this url: https://member.bookwalker.jp/app/03/my/profile, and copy the cookies.
# 中文站点请前往“个人专页”获取Cookies:https://www.bookwalker.com.tw/member
COOKIES = 'YOUR_COOKIES_HERE'
# Folder name, where to put the images
IMGDIR = "./TEST"
# Resolution, this manga is 784*1200, you can change it as you want, but check the original image resolution first.
RES = (784, 1200)
# If your network is good, you can change it to 1 second, this is the time to load next page.
SLEEP_TIME = 2
# Keep False, with headless Chrome, only can login with cookies.
MANUAL_LOGIN = False
# Time wait to load first page.
LOADING_WAIT_TIME = 20
# Keep False, now working with headless Chrome.
DEBUG = False
# Cut image, (left, upper, right, lower), if you want to cut 3px at bottom, this should be set to:
# CUT_IMAGE = (0, 0, 0, 3)
# If no need of this, set it to None as default.
# This need Pillow, please install it [pip install Pillow]
CUT_IMAGE = None
# --------------Settings--------------
if CUT_IMAGE is not None:
import PIL.Image as pil_image
from io import BytesIO
left, upper, right, lower = CUT_IMAGE
if MANGA_URL.find('bookwalker.com.tw') != -1:
LOGIN_URL = 'https://www.bookwalker.com.tw/user/login'
print('Manga is on site: bookwalker.com.tw')
else:
LOGIN_URL = 'https://member.bookwalker.jp/app/03/login'
print('Manga is on site: bookwalker.jp')
if not os.path.isdir(IMGDIR):
os.mkdir(IMGDIR)
def get_driver():
option = webdriver.ChromeOptions()
option.add_argument('high-dpi-support=1')
option.add_argument('device-scale-factor=1')
option.add_argument('force-device-scale-factor=1')
option.add_argument(
"user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36")
option.add_argument('window-size=%d,%d' % RES)
if not DEBUG:
option.add_argument('headless')
driver = webdriver.Chrome(chrome_options=option)
return driver
def get_cookie_dict(cookies):
cookies = cookies.split('; ')
cookies_dict = {}
for i in cookies:
kv = i.split('=')
cookies_dict[kv[0]] = kv[1]
return cookies_dict
def add_cookies(driver, cookies):
for i in cookies:
driver.add_cookie({'name': i, 'value': cookies[i]})
def login(driver, email, password):
driver.get(LOGIN_URL)
driver.find_element_by_id('mailAddress').send_keys(email)
driver.find_element_by_id('password').send_keys(password)
driver.find_element_by_name('loginBtn').click()
def check_is_loading(list_ele):
is_loading = False
for i in list_ele:
if i.is_displayed() is True:
is_loading = True
break
return is_loading
def main():
driver = get_driver()
driver.get(LOGIN_URL)
if not MANUAL_LOGIN:
driver.delete_all_cookies()
add_cookies(driver, get_cookie_dict(COOKIES))
else:
print('Please login...')
WebDriverWait(driver, 120).until_not(
lambda x: x.find_elements_by_css_selector('#password'))
print('Login successfully, please wait...')
driver.set_window_size(RES[0], RES[1])
driver.get(MANGA_URL)
print('Preparing for downloading...')
time.sleep(LOADING_WAIT_TIME)
try:
page_count = int(str(driver.find_element_by_id(
'pageSliderCounter').text).split('/')[1])
print('Has %d pages.' % page_count)
driver.execute_script(
'NFBR.a6G.Initializer.B0U.menu.a6l.moveToPage(0)')
time.sleep(SLEEP_TIME)
for i in range(page_count):
WebDriverWait(driver, 30).until_not(lambda x: check_is_loading(
x.find_elements_by_css_selector(".loading")))
canvas = driver.find_element_by_css_selector(
".currentScreen canvas")
img_base64 = driver.execute_script(
"return arguments[0].toDataURL('image/jpeg').substring(22);", canvas)
image_data = base64.b64decode(img_base64)
with open(IMGDIR + '/%d.jpg' % i, 'wb') as f:
if CUT_IMAGE is None:
f.write(image_data)
else:
org_img = pil_image.open(BytesIO(image_data))
width, height = org_img.size
org_img.crop(
(left, upper, width - right, height - lower)).save(f)
print('Page %s Downloaded' % str(i + 1))
if i == page_count - 1:
print('Finished.')
break
driver.execute_script(
'NFBR.a6G.Initializer.B0U.menu.a6l.moveToPage(%d)' % (i + 1))
WebDriverWait(driver, 30).until_not(lambda x: int(
str(driver.find_element_by_id('pageSliderCounter').text).split('/')[0]) == i + 1)
time.sleep(SLEEP_TIME)
except:
driver.save_screenshot('./error.png')
print('Something wrong or download finished, Please check the error.png to see the web page.')
print('Normally, you should logout and login, then renew the cookies to solve this problem.')
if __name__ == '__main__':
main()