-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclarityparser.py
118 lines (104 loc) · 5.25 KB
/
clarityparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from slugify import slugify # External dependency. See requirements.txt
import configuration # Local configuration file configuration.py
import io
import os
import zipfile
import csv
import json
from decimal import *
from collections import OrderedDict
import datetime
import titlecase
def bring_clarity(rawtime, countyname):
# And if you're server's not in Florida's time zone, these timestamps are going to be wrong. You need to do some surgery.
# Except Florida has more than one time zone.
snapshotsdir = configuration.snapshotsdir
targetdir = configuration.snapshotsdir
filename = configuration.filename
electiondate = configuration.electiondate
timestamp = datetime.datetime.strftime(rawtime, "%Y%m%d-%H%M%S")
lastupdated = datetime.datetime.strftime(rawtime, "%Y-%m-%dT%H:%M:%S")
statename = 'Florida'
reportingunitname = 'Miami-Dade'
filepath = snapshotsdir + (countyname) + "/" + timestamp + "/"
targetfilename = filepath + (countyname) + ".csv"
os.makedirs(snapshotsdir, exist_ok=True)
getcontext().prec = 10 # Precision
lineheaders = ["id", "raceid", "racetype", "racetypeid", "ballotorder", "candidateid", "description",
"delegatecount", "electiondate", "electtotal", "electwon", "fipscode", "first", "incumbent",
"initialization_data", "is_ballot_measure", "last", "lastupdated", "level", "national",
"officeid", "officename", "party", "polid", "polnum", "precinctsreporting", "precinctsreportingpct",
"precinctstotal", "reportingunitid", "reportingunitname", "runoff", "seatname",
"seatnum", "statename", "statepostal", "test", "uncontested", "votecount", "votepct", "winner"
]
zipfilename = filepath + filename
reader = csv.DictReader(io.TextIOWrapper(zipfile.ZipFile(zipfilename).open('summary.csv')))
reader = list(reader) # Otherwise can only traverse once through
masterlist = []
racevotes = {}
crosswalk = {
"line number": "ballotorder",
"contest name": "officename",
"party name": "party",
"total votes": "votecount",
"percent of votes": "votepct",
"ballots cast": "electtotal"
# "num County total": "precinctstotal",
# "num County rptg": "precinctsreporting"
}
for row in reader:
line = OrderedDict()
for item in lineheaders:
line[item] = ""
for source in crosswalk:
line[crosswalk[source]] = row[source]
if "num County total" in row:
line['precinctstotal'] = row['num County total']
line['precinctsreporting'] = row['num County rptg']
elif 'num Area total' in row:
line['precinctstotal'] = row['num Area total']
line['precinctsreporting'] = row['num Area rptg']
else:
print("Problem with " + countyname + " headers. Cannot parse! Don't know what format this is.")
# Specific cleanups:
peep = row['choice name'].replace('\'\'', '\'').strip() # Replace double single quotes
line['first'] = peep[:peep.rfind(" ")].strip() # First name is everything until the last space
line['last'] = peep[peep.rfind(" "):].strip() # Last name is everything after the last space
precinctstotal = line["precinctstotal"]
if precinctstotal == "0" or precinctstotal == "":
line['precinctsreportingpct'] = 0
else:
try:
line['precinctsreportingpct'] = Decimal(line['precinctsreporting'])/Decimal(precinctstotal)
except:
line['precinctsreportingpct'] = 0
# For Elex-CSV, the "pct" is kept at as a decimal, not a percentage. That is, the number ranges from 0 to 1.
line["raceid"] = (countyname + " " + line['officename'])
line["candidateid"] = ("-".join([(countyname), line["first"], line["last"]]))
if line["raceid"] not in racevotes:
racevotes[line["raceid"]] = 0
if line["votepct"] != "0":
line["votepct"] = Decimal(line["votepct"])/100 # Number isn't a percentage; ranges from 0 to 1.
if line["raceid"] not in racevotes:
racevotes[line["raceid"]] = 0
racevotes[line["raceid"]] += int(line["votecount"])
line['reportingunitid'] = countyname
line['id'] = (line['raceid'] + " " + line['reportingunitid'])
line['electiondate'] = electiondate
line['lastupdated'] = lastupdated
line['level'] = "subunit"
line['statename'] = statename
line['reportingunitname'] = reportingunitname
masterlist.append(line)
for i, line in enumerate(masterlist):
masterlist[i]["electtotal"] = racevotes[line["raceid"]]
# ATTEMPT AT MAKING THIS LOOK LIKE NORMAL PROSE
# for i, line in enumerate(masterlist):
# for item in line:
# masterlist[i][item] = titlecase(line[item])
with open(targetfilename, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(lineheaders)
for row in masterlist:
writer.writerow(list(row.values()))
print("Done parsing out " + countyname)