-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsimplify_dataset.py
58 lines (49 loc) · 1.57 KB
/
simplify_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#
# This script simplify the csv file related to the CVE entry of the kernel linux
#
# The original file has the following fields:
# - #,
# - CVE ID,
# - CWE ID,
# - # of Exploits,
# - Vulnerability Type(s),
# - Publish Date,
# - Update Date,
# - Score,
# - Gained Access Level,
# - Access,
# - Complexity,
# - Authentication,
# - Conf.,
# - Integ.,
# - Avail.
#
import os
import sys
import csv
INTERESTING_FIELDS_NAMES = [
'CVE ID', 'CWE ID', 'Vulnerability Type(s)', 'Avail.'
]
def usage():
print 'usage: python simplify_dataset.py <path_to_dataset_to_simplify> <output_file>'
def _clean_description(description):
return description.replace('Partial ', '').replace('Complete ', '').replace('None ', '').replace('** DISPUTED ** ', '')
def simplyfy(dataset_path, output_path):
if not os.path.exists(dataset_path):
print "the specified file does not exist"
sys.exit(-1)
with open(dataset_path, 'r') as dataset_f:
cve_reader = csv.DictReader(dataset_f, delimiter=';')
with open(output_path, 'w') as output_f:
output_writer = csv.DictWriter(output_f, fieldnames=INTERESTING_FIELDS_NAMES, delimiter=';')
output_writer.writeheader()
for cve in cve_reader:
cve['Avail.'] = _clean_description(cve['Avail.'])
output_writer.writerow(
{field: cve[field] for field in INTERESTING_FIELDS_NAMES}
)
if __name__ == '__main__':
if len(sys.argv) <= 2:
usage()
sys.exit(0)
simplyfy(sys.argv[1], sys.argv[2])