forked from NAAleks/GoPo-Project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·121 lines (103 loc) · 3.28 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
import requests
from bs4 import BeautifulSoup
import csv
from time import sleep
URL = "https://www.opensecrets.org/outsidespending/nonprof_cands.php"
open_secret_urls = ["https://www.opensecrets.org/outsidespending/nonprof_cands.php?cycle=2014","https://www.opensecrets.org/outsidespending/nonprof_cands.php?cycle=2016","https://www.opensecrets.org/outsidespending/nonprof_cands.php?cycle=2012"]
CoC_url = "https://www.uschamber.com/how-they-voted/2016"
# Url is https://www.opensecrets.org/outsidespending/nonprof_cands.php
DEBUG = True
def clean_up(line):
l = str(line).strip()
l = l.replace(' ','_')
l = l.replace(',','')
l = l.replace('(D)','')
l = l.replace('(R)','')
l = l.replace('(I)','')
return l
def numericify(data):
d = str(clean_up(data))
d = d.replace('_','')
d = d.replace('$','')
# print(d)
return d
def unpack(stri):
s = stri.replace('_',' ')
s = s.strip()
return s
def lookup(name,rows):
first = name.split(" ")[1]
last = name.split(" ")[0]
if first == "Steven":
first = "Steve" #The only special case that doesn't work with the web scraper.
index = -1
if DEBUG:
print ("Looking up the CoC Index for " + first + " " + last + ".")
for row in rows:
if "Senate" not in str(row.contents[1].text):
continue
match = str(row.contents[1].text).split('Senate')[0].strip()
if (first in match and last in match):
index_str = str(row.contents[5].text).strip()
index_str = index_str[:len(index_str)-1]
index = int(index_str)
return index
def openSecrets(url):
CoC_r = requests.get(CoC_url)
CoC_text_html = CoC_r.text
#print(text_html)
CoC_soup = BeautifulSoup(CoC_text_html,'html.parser')
CoC_rows = CoC_soup.find_all('tr', class_="views-row")
r = requests.get(url)
text_html = r.text
#print(text_html)
soup = BeautifulSoup(text_html,'html.parser')
rows = soup.find_all('tr')
counter = 0
names = []
contributions = []
votes = []
for row in rows:
if counter == 0:
counter += 1
continue
region = str(row.contents[2].text).strip()
name = clean_up(str(row.contents[0].text))
cont = numericify(row.contents[4].text)
if region[2] == 'S' and row.contents[14].text == "Winner":
vote = lookup(unpack(name),CoC_rows)
if not (vote == -1): #THE API IS NOT PERFECT DUE TO TIME LIMITATIONS
if DEBUG:
print("[+] " + unpack(name) + " has recived $" + cont + " dark money contributions and has a CoC index of " + str(vote));
contributions.append(cont)
votes.append(vote)
names.append(name)
else:
print("[-] " + unpack(name) + " Was not FOUND!")
counter += 1
print("Checking to make sure the amount of data collected is equal....")
print(len(names) == len(contributions))
print(len(votes) == len(names))
return (names,contributions,votes)
def write(names,contributions,votes):
with open('data.csv', 'w') as myfile:
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(["Name","Cont","Votes"])
i = 0;
for name in names:
wr.writerow([unpack(name),contributions[i],votes[i]])
i += 1
def main():
total_names = []
total_cont = []
total_votes = []
for url in open_secret_urls:
names, cont,votes = openSecrets(url)
total_names += names
total_cont += cont
total_votes += votes
sleep(1)
write(total_names,total_cont,total_votes)
print(len(total_names))
main()