-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLibScraper.py
102 lines (95 loc) · 4.84 KB
/
LibScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from urllib.request import urlopen
from urllib.request import urlretrieve
from urllib.error import HTTPError
import time
import os
BaseLibUrl = "https://maplelegends.com/lib/monster?page="
EndLibUrl = "&search=&filter=1&order=2&sort=1"
BaseMobUrl = "https://maplelegends.com/lib/monster?id="
BaseMobImage = "https://maplelegends.com/static/images/lib/monster/"
ImagesPath = "images"
page = ""
MonsterID = 0
MonsterName = ""
MonsterLevel = 0
MonsterHP = 0
MonsterAvoid = 0
MonsterWeak = [False] * 5
MonsterStrong = [False] * 5
MonsterImmune = [False] * 5
MAX_PAGES = 1 # 127
MonsterLocations = [False] * 14 # Maple Island, Victoria Island, Orbis/ElNath,Aqua, Ludi/Omega/KFT/Ellin, Mu Lung/Herb Town, Ariant/Magatia, Leafre/ToT, Zipangu, NT, China, Thailand, Taiwan, Singapore,PQ, Manual
def extract_tag(source, StartTag, EndTag):
StartIndex = source.find(StartTag) + len(StartTag)
EndIndex = source[StartIndex:].find(EndTag)
return source[StartIndex:StartIndex + EndIndex]
def element_checker(element):
array = [False] * 5
for string in element:
if string.strip() == "Ice":
array[0] = True
elif string.strip() == "Lightning":
array[1] = True
elif string.strip() == "Fire":
array[2] = True
elif string.strip() == "Poison":
array[3] = True
elif string.strip() == "Holy":
array[4] = True
return array
for i in range(MAX_PAGES): # This section goes through each page of the library
try:
page = urlopen(BaseLibUrl + str(i + 1) + EndLibUrl) # goes through each page
html = page.read().decode("utf-8")
except HTTPError:
print("Error on page " + str(i+1))
table_index = html.find("class=\"table text-center table-bordered\">") # the index of the actual table
html = html[table_index+len("class=\"table text-center table-bordered\">"):]
MOB_PER_PAGE = int(html.count("<a href=") / 2) # There are two instances of an a tag per html file (image and text)
for j in range(1): # Go through each mob found
# time.sleep(10) # don't forget to uncomment this jackass
IDStartindex = html.find("<a href=\"/lib/monster?id=") + len("<a href=\"/lib/monster?id=")
MonsterID = extract_tag(html,"<a href=\"/lib/monster?id=","\">") # gets the mobid
html = html[html.find("</center>"):]
# this section for getting mob name
MonsterName = extract_tag(html, "<a href=\"/lib/monster?id=" + str(MonsterID) + "\">", "</a>")
print(MonsterName)
print(MonsterID)
# access mob page here
try:
MobPage = urlopen("https://maplelegends.com/lib/monster?id=0100100&tab=2") # &tab=2 refers to drops or locations
Mobhtml = MobPage.read().decode("utf-8")
# Now we grab the picture of the mob
# FullPath = os.path.join(ImagesPath, MonsterID + ".png")
# urlretrieve("https://maplelegends.com/static/images/lib/monster/" + MonsterID,FullPath)
# ## For future reference: we can copy paste below if we need other stats
# Find its level
MonsterLevel = extract_tag(Mobhtml, "Level: ", "</td>")
# Find its avoid
MonsterAvoid = extract_tag(Mobhtml, "Avoidability: ", "</td>")
# Find its weaknesses
Weak = extract_tag(Mobhtml, "Weak:", "</p>").split(',')
MonsterWeak = element_checker(Weak)
# Find its strong against
Strong = extract_tag(Mobhtml, "Strong:", "</p>").split(',')
MonsterStrong = element_checker(Strong)
# Find its immunity
Immune = extract_tag(Mobhtml, "Immune:", "</p>").split(',')
MonsterImmune = element_checker(Immune)
# Oh boy now we go see how many maps this thing is in
MapCount = Mobhtml.count("/lib/map?id=")
if MapCount == 0:
MonsterLocations[13] = True # If it doesn't have any maps listed, just review it manually
else:
Mobhtml = Mobhtml[Mobhtml.find("/lib/map?id="):]
for k in range(MapCount):
WorldID = extract_tag(Mobhtml, "/lib/map?id=", "\">") # Find the Map ID
Mobhtml = Mobhtml[Mobhtml.find("</a>"):] # We need to leave this tag to avoid refinding the same thing
Mobhtml = Mobhtml[Mobhtml.find("/lib/map?id="):] # Find the next instance of a map id
# Finish with all mob info, now we add it all to json
except HTTPError:
print("Error opening monster id: " + str(MonsterID))
html = html[IDStartindex:] # shift to next mob
NextIndex = html.find("<center>")
html = html[NextIndex + len("<center>"):] # truncates so we can find next instance of a mob
time.sleep(10) # to be nice and kind to kimmy's server, we wait 10 seconds before moving on to the next page