-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser_code.py
163 lines (144 loc) · 6.46 KB
/
parser_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import requests
import json
import base64
import re
#INFO : GitHub api has request rate limitations (per repo, per hour)
username = ""
token = ""
class Parser:
def __init__(self,URL,dev_dep = False):
global username, token
username, token = self.__get_token()
self.URL = URL
self.dev_dep = dev_dep
self.api_url = self.__get_api_url()
def __get_api_url(self):
owner = self.URL.split("/")[3]
repo = self.URL.split("/")[4]
return "https://api.github.com/repos/"+owner+"/"+repo
def parse(self):
json = {}
dict_ = get_dependencies(self.api_url,self.dev_dep)
json[self.URL.split("/")[4]] = {"license" : get_license(self.api_url), "dependencies" : dict_}
return json
def __get_token(self):
global username, token
file = open("token.txt","r")
user_ = file.readline()
user_ = user_.split("=")[1][:-1]
token_ = file.readline()
token_ = token_.split("=")[1]
file.close()
return user_ , token_,
def get_api_url(URL):
owner = URL.split("/")[3]
repo = URL.split("/")[4]
return "https://api.github.com/repos/"+owner+"/"+repo
def get_license(api_url):
global username, token
#print(api_url)
response = requests.get(api_url+"/license",auth=(username,token))
#print(response.status_code)
if response.status_code == 200:
return str(response.json()["license"]["key"])
else:
print("License information cannot found.")
return "ERROR"
def get_languages(api_url):
global username, token
response = requests.get(api_url + "/languages",auth=(username,token)).json()
return list(response.keys())
def get_repo_from_npm(URL):
response = requests.get(URL)
repo_html = (re.findall('aria-labelledby="repository".*target="_blank" rel="noopener noreferrer nofollow">',response.text))[0]
repo_html = re.findall('href=".*" target', repo_html)[0]
return (repo_html[6:-8])
def get_repo_from_pip(URL):
response = requests.get(URL).json()
list_ = list(response["info"]["project_urls"].values())
repo_url = [url for url in list_ if "https://github.com" in url]
if repo_url:
owner = repo_url[0].split('/')[3]
repo = repo_url[0].split('/')[4]
returned = "https://github.com/" + owner + "/" + repo
return returned # Assumed that all github urls are repo_urls and its variants.
else:
raise Exception("ERROR: GITHUB REPO CANNOT FOUND")
#print("ERROR: GITHUB REPO CANNOT FOUND")
return "ERROR: GITHUB REPO CANNOT FOUND"
def get_dependencies(api_url,dev_dep=False):
global username, token
stored_dep = {}
languages = get_languages(api_url)
for language in languages:
if language == "JavaScript":
npm_url = "https://www.npmjs.com/package/" # JS only
response = requests.get(api_url + "/contents/package.json",auth=(username,token)) # for JS projects only
elif language == "Python":
pip_url = "https://pypi.org/pypi/" # PYTHON only
response = requests.get(api_url + "/contents/requirements.txt",auth=(username,token)) # for PYTHON projects only
else:
#print("WARNING: UNSUPPORTED LANGUAGE: " + language)
continue
if response.status_code == 200:
dependency_dict = (base64.decodebytes(bytearray(response.json()["content"], 'utf-8')).decode('utf-8'))
# try:
if language == "JavaScript":
try:
temp_dict = json.loads(dependency_dict)["dependencies"]
except:
temp_dict = {}
if dev_dep:
temp_dict.update(json.loads(dependency_dict)["devDependencies"])
dependency_dict = temp_dict
elif language == "Python":
temp = dependency_dict
dependency_dict = {}
for line in temp.splitlines():
line = line.split("=")
if line[0][-1] == ">":
dependency_dict[line[0][:-1]] = line[-1]
else:
dependency_dict[line[0]] = line[-1]
#except Exception as E:
# dependency_dict = {}
for package in dependency_dict.keys():
try:
#print("Package : " + package)
if language == "JavaScript":
URL = npm_url + package
repo_url = get_repo_from_npm(URL)
elif language == "Python": #Actually only option is Python
URL = pip_url + package + "/json"
repo_url = get_repo_from_pip(URL)
api_url_ = get_api_url(repo_url)
dict_ = {}
dict_["license"] = get_license(api_url_) # Can be 'other'
dict_["dependencies"] = get_dependencies(api_url_,False) # dev_dep of dev_dep is not checked
dependency_dict[package] = dict_
except Exception as E:
#print(E)
dict_ = {"license": "ERROR", "dependencies" : {"ERROR": "ERROR"}}
dependency_dict[package] = dict_
continue
if language != languages[-1]:
stored_dep.update(dependency_dict)
else:
dependency_dict = {"ERROR": "ERROR"}
#print("Dependencies cannot found.")
try:
dependency_dict.update(stored_dep)
return dependency_dict
except:
return {}
#URL = "https://github.com/aws/aws-sdk-js" # example, normally it is given from web-app
#URL = "https://github.com/Haytaf17/haytaf17database"
#URL = "https://github.com/aydinbugra/Nutbarn"
#URL = "https://github.com/aydinbugra/test"
#URL = "https://github.com/marnusw/cloudinary-tiny-js"
#parser = Parser(URL,False)
#print(parser.parse()) # License Checker can use this function to get all dependincies and their license info
# For not found case
#URL = "https://github.com/aydinbugra/Nutbarn"
#URL = "https://github.com/Haytaf17/haytaf17database"
#print(parse(URL))