-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
222 lines (194 loc) · 7.17 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
import base64
import shutil
from os.path import split as path_split, join as path_join, isfile
from os import listdir
from selenium import webdriver
from selenium.webdriver.common.by import By
from math import floor
import logging
import re
def get_xpath(elm):
"""
https://stackoverflow.com/questions/71699032/find-the-xpath-with-get-attribute-in-python-selenium
"""
e = elm
xpath = elm.tag_name
while e.tag_name != "html":
e = e.find_element(By.XPATH, "..")
neighbours = e.find_elements(By.XPATH, "../" + e.tag_name)
level = e.tag_name
if len(neighbours) > 1:
level += "[" + str(neighbours.index(e) + 1) + "]"
xpath = level + "/" + xpath
return "/" + xpath
def get_blob_contents(driver, uri):
"""
Downloads a blob image and returns the bytes
https://stackoverflow.com/questions/47424245/how-to-download-an-image-with-python-3-selenium-if-the-url-begins-with-blob
Arguments
--------------------
driver
uri: blob:https://mangadex.org/499f94d4-dfae-4354-8875-6fbf22f67eef
Returns
--------------------
bytes: The image data
"""
result = driver.execute_async_script("""
var uri = arguments[0];
var callback = arguments[1];
var toBase64 = function(buffer){for(var r,n=new Uint8Array(buffer),t=n.length,a=new Uint8Array(4*Math.ceil(t/3)),i=new Uint8Array(64),o=0,c=0;64>c;++c)i[c]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".charCodeAt(c);for(c=0;t-t%3>c;c+=3,o+=4)r=n[c]<<16|n[c+1]<<8|n[c+2],a[o]=i[r>>18],a[o+1]=i[r>>12&63],a[o+2]=i[r>>6&63],a[o+3]=i[63&r];return t%3===1?(r=n[t-1],a[o]=i[r>>2],a[o+1]=i[r<<4&63],a[o+2]=61,a[o+3]=61):t%3===2&&(r=(n[t-2]<<8)+n[t-1],a[o]=i[r>>10],a[o+1]=i[r>>4&63],a[o+2]=i[r<<2&63],a[o+3]=61),new TextDecoder("ascii").decode(a)};
var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer';
xhr.onload = function(){ callback(toBase64(xhr.response)) };
xhr.onerror = function(){ callback(xhr.status) };
xhr.open('GET', uri);
xhr.send();
""", uri)
if type(result) == int :
raise Exception("Request failed with status %s" % result)
return base64.b64decode(result)
def is_char_digit(c):
return c in ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
def is_char_period(c):
return c == "."
def extract_chapter_num_string(chapter_title):
"""
Ch. 227 - Tried Various Things (3) -> 227
Returns
------------
ch_num: str
String representation of the chapter number
"""
found_first_digit = False
num_string = ""
for c in chapter_title:
# First character must be a number
if is_char_digit(c) and found_first_digit == False:
found_first_digit = True
num_string += c
continue
elif found_first_digit == False:
continue
# Any subsequent char must be a digit or period
if found_first_digit and (is_char_digit(c) or is_char_period(c)):
num_string += c
# Terminate if no more digits or periods are found
else:
break
if num_string == "":
raise Exception("Unable to find chapter number for '" + chapter_title + "'")
return num_string
def extract_chapter_num_range(num_string):
"""
Returns a tuple of strings
Low, High
If there is no hypen High will be None
If there is no valid number both Low, High will be None
TODO: This does not handle any chapters with a period
and gets rounded down. eg. 46.5 -> 46
TODO: 46.a-c -> 46
"""
orig_num_str = num_string
# If the first character of the num_string is not a number simply return
# eg. Twitter Extras 6
if re.search("^[0-9]", num_string) == None:
logging.warn("[extract_chapter_num_range]: num_string does not contain a number as its first character '" + num_string + "'")
return None, None
# https://www.mangaupdates.com/releases.html?search=73433348200&stype=series
# This has chapters with alpha characters in it delete all of them
# Handle 15a-c
num_string = re.sub("[a-zA-Z]-[a-zA-Z]", "", num_string)
# Handle 15c-19
num_string = re.sub("[a-zA-Z]", "", num_string)
# '35.5 (end)'is currently '35.5 ()' here
# Fix by deleting parentheses
num_string = num_string.replace("(", "")
num_string = num_string.replace(")", "")
# Manifests if "Extras"
if num_string == "":
return None, None
if "-" not in num_string:
if "." in num_string:
logging.warn("[extract_chapter_num_range]: num_string got converted from " + num_string + " to " + str(int(floor(float(num_string)))))
num_string = str(int(floor(float(num_string))))
return num_string, None
else:
try:
return extract_chapter_num_string(num_string), None
except:
logging.warn("[extract_chapter_num_range]: Failed to extract single chapter num! For: " + orig_num_str)
return None, None
else:
splitted = num_string.split("-")
assert(len(splitted) == 2)
low = None
high = None
try:
low = extract_chapter_num_string(splitted[0])
except:
raise Exception("Range cannot have no low value! For: " + orig_num_str)
try:
high = extract_chapter_num_string(splitted[1])
except:
raise Exception("Failed to extract high for range! For: " + orig_num_str)
if "." in low:
logging.warn("[extract_chapter_num_range]: Low got converted from " + low + " to " + str(int(floor(float(low)))))
low = str(int(floor(float(low))))
if "." in high:
logging.warn("[extract_chapter_num_range]: Low got converted from " + high + " to " + str(int(floor(float(high)))))
high = str(int(floor(float(high))))
return low, high
def generate_ch_range(low_str, high_str):
"""
55 None -> [55]
55 57 -> [55, 56, 57]
"""
if low_str == None:
raise Exception("Low should not be None!")
if high_str == None:
return [str(low_str)]
low_num = None
high_num = None
try:
low_num = int(low_str)
high_num = int(high_str)
except:
raise Exception("Unable to convert chapter string to int!")
ret = [str(i) for i in range(low_num, high_num + 1)]
return ret
def count_chapter_existence(ch_strs_list, title_base_path):
"""
Takes a list of strings representing the chapters that should exist
Takes a path to the title folder
Counts the number of chapters for a specfici prefix.
eg. ch_strs_list = ["45", "44", "43"]
eg. path cbzs = ["45.cbz", "43.1.cbz', "43.2.cbz"]
Returns a dict
{
"45" : 1,
"44" : 0,
"43" : 2
}
"""
ret = {}
cbzs = [f for f in listdir(title_base_path) if isfile(path_join(title_base_path, f))]
cbzs = [f.replace(".cbz", "") for f in cbzs if ".cbz" in f]
for ch_str in ch_strs_list:
ret[ch_str] = 0
for ch_str in cbzs:
ch_str_strip = ch_str
period_idx = ch_str.find(".")
if period_idx != -1:
ch_str_strip = ch_str_strip[:period_idx]
if ch_str_strip in ch_strs_list:
ret[ch_str_strip] += 1
return ret
def zip_folder_into_cbz(abs_folder_path):
split_path = path_split(abs_folder_path)
assert(len(split_path) == 2)
shutil.make_archive(abs_folder_path, 'zip', abs_folder_path)
shutil.move(abs_folder_path + ".zip", abs_folder_path + ".cbz")
if __name__ == "__main__":
ns = extract_chapter_num_string("Ch. 227 - Tried Various Things (3)")
print(ns)
zip_folder_into_cbz("test/Isekai Nonbiri Nouka/216")