-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
83 lines (67 loc) · 2.37 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
import argparse
from lib.helpers import fetch, parse, extract
parser = argparse.ArgumentParser(
description="Fetches information about libraries from the German Library Association",
usage='Use "%(prog)s --help" for more information',
)
parser.add_argument(
'-s', '--states', nargs='+',
help="Fetches information for given federal states",
choices={
'baden-wuerttemberg',
'bayern',
'berlin',
'brandenburg',
'bremen',
'hamburg',
'hessen',
'mecklenburg-vorpommern',
'niedersachsen',
'nordrhein-westfalen',
'rheinland-pfalz',
'saarland',
'sachsen-anhalt',
'sachsen',
'schleswig-holstein',
'thueringen'
},
)
if __name__ == "__main__":
# Parsing arguments (if any)
args = parser.parse_args()
# Creating list for holding states & respective URLs
state_urls = []
with open('states.json', 'r') as file:
# Load URLs from JSON
data = json.load(file)
# Store them along with state names
for state, url in data.items():
state_urls.append([state, url])
# Fetching data, storing as JSON
for state, library_url in state_urls:
# (1) Check if any states are provided via CLI
# (2) Check if current state isn't among them
# (3) Move on to next state if both are true
if len(args.states) is not False and state not in args.states:
continue
# Build output path
file = 'data/' + state + '.json'
# Only (over)write files for states whose data ..
# (1) .. doesn't exist or
# (2) .. was aborted during execution
if os.path.isfile(file) is False or os.path.getsize(file) == 0:
with open(file, 'w') as file:
# Fetch library URLs for all states
library_urls = fetch(library_url)
# Holds information of all libraries
libraries = []
# Parse each library URL, extract its data & save it
for library_url in library_urls:
library = parse(library_url)
libraries.append(library)
# Store data as (UTF-8 encoded) JSON file
json.dump(libraries, file, ensure_ascii=False, indent=4)