This repository has been archived by the owner on Jan 8, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch.py
71 lines (51 loc) · 1.65 KB
/
fetch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from collections import defaultdict
from io import BytesIO
from urllib.request import urlopen
import yaml
import os
from platforms import Detector as PlatformDetector
from storage import save_repos, save_orgs, save_libraries
from utils import fetch_libraries
def fetch_orgs(detector):
organizations = []
resp = urlopen(
os.getenv("ORGAS_LIST_URL") or "https://git.sr.ht/~codegouvfr/codegouvfr-data/blob/e97995ed834cf99b765bf3faed12e3798c885c2c/comptes-organismes-publics.yml"
)
data = resp.read()
try:
data = yaml.safe_load(data)
except yaml.YAMLError as exc:
print('YAMLError parsing error',exc)
return 'error'
for org in data:
try:
organizations.extend(detector.to_orgs(org))
except Exception as e:
print(e)
return organizations
detector = PlatformDetector()
organizations = fetch_orgs(detector)
# Save details about each repo for an org
all_repos = defaultdict(list)
for organization in organizations:
print("Fetching repos for: ", organization)
repos = organization.repos_for_org()
for k, v in repos.items():
all_repos[k].extend(v)
save_repos(all_repos)
# Save details about each org
all_orgs = defaultdict(list)
for organization in organizations:
data = organization.get_org()
if data == {}:
continue
print("Fetching details for: ", organization)
for k, v in data.to_dict().items():
all_orgs[k].append(v)
save_orgs(all_orgs)
# Save libraries created by each org
all_libs = []
print("Fetching libraries from librairies.io")
libraries = fetch_libraries(all_orgs)
save_libraries(libraries)
detector.save_swh_data()