diff --git a/scripts/legislation_info_generator.py b/scripts/legislation_info_generator.py index fc4acf0..ce23c2f 100644 --- a/scripts/legislation_info_generator.py +++ b/scripts/legislation_info_generator.py @@ -1,10 +1,29 @@ import os import pyairtable import json -LEGISLATION_INFO = os.environ['LEGISLATION_INFO'] -api = pyairtable.Api(LEGISLATION_INFO) -table = api.table('apps7I6q0g9Hyb6j9','tblydWhHOZeqjzycO') -legislation = table.all() -legislation_list = [x['fields'] for x in legislation] -with open("public/legislations_info.json", "w") as outfile: - json.dump(legislation_list, fp = outfile,indent = 4) \ No newline at end of file + +def generate_legislation_info(API_KEY, APP_KEY, TBL_KEY): + """ + Generates legislation information and saves it to a JSON file. + + This function retrieves legislation information from an Airtable database, + converts it to a list of dictionaries, and saves it to a JSON file for the front-end to access. + + Args: + API_KEY (str): The API key for accessing the Airtable database. + APP_KEY (str): The application key for accessing the Airtable database. + TBL_KEY (str): The table key for accessing the specific table in the Airtable database. + + Returns: + None + """ + api = pyairtable.Api(API_KEY) + table = api.table(APP_KEY, TBL_KEY) + legislation = table.all() + legislation_list = [x['fields'] for x in legislation] + with open("public/legislations_info.json", "w") as outfile: + json.dump(legislation_list, fp=outfile, indent=4) + +if __name__ == '__main__': + API_KEY = os.environ['LEGISLATION_INFO'] + generate_legislation_info(API_KEY, 'apps7I6q0g9Hyb6j9', 'tblydWhHOZeqjzycO') diff --git a/scripts/legislation_votes_generator.py b/scripts/legislation_votes_generator.py index 9dc808a..45f1e8a 100644 --- a/scripts/legislation_votes_generator.py +++ b/scripts/legislation_votes_generator.py @@ -2,28 +2,45 @@ import pandas as pd import json -senate_data = [] -with open("public/senate.geo.json", 'r') as f: - senate_data = json.load(f) -assembly_data = [] -with open("public/assembly.geo.json", 'r') as f: - assembly_data = json.load(f) - -# Function to extract unique bill names from "HCMC support" -def extract_unique_bills(features): - unique_bills = set() - for feature in features: - hcmc_support = feature['properties'].get('HCMC support', []) - unique_bills.update(hcmc_support) - return unique_bills +def create_summary_data(counts, house): + """ + Creates a summary of vote counts for each party in the given house. + + Args: + counts (dict): A dictionary containing the vote counts for each party and bill. + house (str): The name of the house. -# Extracting unique bills from Senate and Assembly -unique_bills_senate = extract_unique_bills(senate_data['features']) -unique_bills_assembly = extract_unique_bills(assembly_data['features']) + Returns: + list: A list of dictionaries, where each dictionary represents a party and its vote counts. + Each dictionary contains the following keys: + - "House": The name of the house. + - "Party": The name of the party. + - The bill names as keys, and the corresponding vote counts as string values. + """ + summary = [] + for party, bills in counts.items(): + party_data = {"House": house, "Party": party} + party_data.update({bill: str(count) for bill, count in bills.items()}) + summary.append(party_data) + return summary -# Combining and sorting the unique bills -all_unique_bills = sorted(unique_bills_senate.union(unique_bills_assembly)) def count_bills_support(features, unique_bills): + """ + Counts the number of bills supported by Democrats and Republicans. + + Args: + features (list): A list of features containing information about each legislator. + unique_bills (list): A list of unique bill names. + + Returns: + dict: A dictionary containing the counts of bills supported by Democrats and Republicans. + The dictionary has the following structure: + { + "Democrat": {bill: count}, + "Republican": {bill: count} + } + where 'bill' is the name of the bill and 'count' is the number of occurrences. + """ counts = { "Democrat": {bill: 0 for bill in unique_bills}, "Republican": {bill: 0 for bill in unique_bills} @@ -44,27 +61,71 @@ def count_bills_support(features, unique_bills): return counts -# Counting bills for Senate and Assembly -senate_bills_counts = count_bills_support(senate_data['features'], all_unique_bills) -assembly_bills_counts = count_bills_support(assembly_data['features'], all_unique_bills) -def create_summary_data(counts, house): - summary = [] - for party, bills in counts.items(): - party_data = {"House": house, "Party": party} - party_data.update({bill: str(count) for bill, count in bills.items()}) - summary.append(party_data) - return summary +def extract_unique_bills(features): + """ + Extracts unique bills from a list of features. + + Args: + features (list): A list of features. + + Returns: + set: A set of unique bills. + """ + unique_bills = set() + for feature in features: + hcmc_support = feature['properties'].get('HCMC support', []) + unique_bills.update(hcmc_support) + return unique_bills + +import json + +def generate_legislation_votes(): + """ + Generates a JSON file containing the summary data of legislation votes for the Senate and Assembly. + + This function reads the Senate and Assembly data from the 'public' directory, extracts unique bills, + counts the support for each bill in the Senate and Assembly, creates summary data for each chamber, + combines the summaries, and writes the final summary data to a new JSON file. + + Args: + None + + Returns: + None + """ + + senate_data = [] + with open("public/senate.geo.json", 'r') as f: + senate_data = json.load(f) + assembly_data = [] + with open("public/assembly.geo.json", 'r') as f: + assembly_data = json.load(f) + + # Extracting unique bills from Senate and Assembly + unique_bills_senate = extract_unique_bills(senate_data['features']) + unique_bills_assembly = extract_unique_bills(assembly_data['features']) + + # Combining and sorting the unique bills + all_unique_bills = sorted(unique_bills_senate.union(unique_bills_assembly)) + + # Counting bills for Senate and Assembly + senate_bills_counts = count_bills_support(senate_data['features'], all_unique_bills) + assembly_bills_counts = count_bills_support(assembly_data['features'], all_unique_bills) + + # Creating the summary data for Senate and Assembly + summary_data_senate = create_summary_data(senate_bills_counts, "Senate") + summary_data_assembly = create_summary_data(assembly_bills_counts, "Assembly") + + # Combining both summaries + final_summary_data = summary_data_senate + summary_data_assembly -# Creating the summary data for Senate and Assembly -summary_data_senate = create_summary_data(senate_bills_counts, "Senate") -summary_data_assembly = create_summary_data(assembly_bills_counts, "Assembly") + # Path for the new JSON file + output_file_path_dynamic = "public/legislations_votes.json" -# Combining both summaries -final_summary_data = summary_data_senate + summary_data_assembly + # Writing the data to a new JSON file + with open(output_file_path_dynamic, 'w') as file: + json.dump(final_summary_data, file, indent=4) -# Path for the new JSON file -output_file_path_dynamic = "public/legislations_votes.json" -# Writing the data to a new JSON file -with open(output_file_path_dynamic, 'w') as file: - json.dump(final_summary_data, file, indent=4) \ No newline at end of file +if __name__ == '__main__': + generate_legislation_votes() \ No newline at end of file diff --git a/scripts/legislative_support_generator.py b/scripts/legislative_support_generator.py index 68f6b8e..8c018ed 100644 --- a/scripts/legislative_support_generator.py +++ b/scripts/legislative_support_generator.py @@ -3,40 +3,67 @@ import pandas as pd import geopandas as gpd -LEGISLATIVE_SUPPORT = os.environ['LEGISLATIVE_SUPPORT'] +def generate_legislative_support_geojson(API_KEY, APP_KEY, TBL_KEY): + """ + Generates GeoJSON files for New York State Assembly and Senate districts + with legislative support data. -api = pyairtable.Api(LEGISLATIVE_SUPPORT) + The function retrieves data from an Airtable database using the pyairtable library, + merges it with GeoJSON files for district boundaries, and exports the merged data + as separate GeoJSON files for Assembly and Senate districts. -table = api.table('appD3YhFHjmqJKtZ6','tblgyOlrTfYRaodyb') + Args: + API_KEY (str): The API key for accessing the Airtable database. + APP_KEY (str): The application key for accessing the Airtable database. + TBL_KEY (str): The table key for accessing the Airtable database. -table_dict = table.all() + Returns: + None + """ -rows = [] + # Initialize pyairtable API + api = pyairtable.Api(API_KEY) -for row in table_dict: - rows.append(row['fields']) -df = pd.DataFrame(rows) -# Load GeoJSONs -gdf_assembly = gpd.read_file("public/NYS_Assembly_Districts.geojson").to_crs("EPSG:4326") -gdf_senate = gpd.read_file("public/NYS_Senate_Districts.geojson").rename(columns={'DISTRICT':'District'}).to_crs("EPSG:4326") + # Get table from Airtable + table = api.table(APP_KEY,TBL_KEY) -# Split your dataframe by house -df_assembly = df[df['House'] == 'Assembly'] -df_senate = df[df['House'] == 'Senate'] + # Retrieve all records from the table + table_dict = table.all() -# Merge DataFrames with GeoDataFrames -gdf_assembly = gdf_assembly.merge(df_assembly, on='District') -gdf_senate = gdf_senate.merge(df_senate, on='District') -gdf_assembly['Which HCMC legislation do they support?'] = gdf_assembly['Which HCMC legislation do they support?'].fillna('[]') -gdf_senate['Which HCMC legislation do they support?'] = gdf_senate['Which HCMC legislation do they support?'].fillna('[]') + # Extract fields from the records and create a DataFrame + rows = [] + for row in table_dict: + rows.append(row['fields']) + df = pd.DataFrame(rows) + # Load GeoJSONs for Assembly and Senate districts + gdf_assembly = gpd.read_file("public/NYS_Assembly_Districts.geojson").to_crs("EPSG:4326") + gdf_senate = gpd.read_file("public/NYS_Senate_Districts.geojson").rename(columns={'DISTRICT':'District'}).to_crs("EPSG:4326") -gdf_assembly['Which HCMC legislation do they support?'] = gdf_assembly['Which HCMC legislation do they support?'].apply(lambda x: str(x)) -gdf_senate['Which HCMC legislation do they support?'] = gdf_senate['Which HCMC legislation do they support?'].apply(lambda x: str(x)) + # Split the DataFrame by house (Assembly and Senate) + df_assembly = df[df['House'] == 'Assembly'] + df_senate = df[df['House'] == 'Senate'] -gdf_senate = gdf_senate.rename(columns = {'Which HCMC legislation do they support?':'HCMC support'}) -gdf_assembly = gdf_assembly.rename(columns = {'Which HCMC legislation do they support?':'HCMC support'}) + # Merge DataFrames with GeoDataFrames based on district + gdf_assembly = gdf_assembly.merge(df_assembly, on='District') + gdf_senate = gdf_senate.merge(df_senate, on='District') -# Export the new GeoJSONs -gdf_assembly.to_file('public/assembly.geo.json', driver='GeoJSON') -gdf_senate.to_file('public/senate.geo.json', driver='GeoJSON') \ No newline at end of file + # Fill missing values in the 'Which HCMC legislation do they support?' column with empty lists + gdf_assembly['Which HCMC legislation do they support?'] = gdf_assembly['Which HCMC legislation do they support?'].fillna('[]') + gdf_senate['Which HCMC legislation do they support?'] = gdf_senate['Which HCMC legislation do they support?'].fillna('[]') + + # Convert the 'Which HCMC legislation do they support?' column to string type + gdf_assembly['Which HCMC legislation do they support?'] = gdf_assembly['Which HCMC legislation do they support?'].apply(lambda x: str(x)) + gdf_senate['Which HCMC legislation do they support?'] = gdf_senate['Which HCMC legislation do they support?'].apply(lambda x: str(x)) + + # Rename columns for clarity + gdf_senate = gdf_senate.rename(columns = {'Which HCMC legislation do they support?':'HCMC support'}) + gdf_assembly = gdf_assembly.rename(columns = {'Which HCMC legislation do they support?':'HCMC support'}) + + # Export the new GeoJSONs for Assembly and Senate districts + gdf_assembly.to_file('public/assembly.geo.json', driver='GeoJSON') + gdf_senate.to_file('public/senate.geo.json', driver='GeoJSON') + +if __name__ == '__main__': + API_KEY = os.environ['LEGISLATIVE_SUPPORT'] + generate_legislative_support_geojson(API_KEY, 'appD3YhFHjmqJKtZ6', 'tblgyOlrTfYRaodyb') diff --git a/scripts/members_geocoder.py b/scripts/members_geocoder.py index f4b21a9..e550698 100644 --- a/scripts/members_geocoder.py +++ b/scripts/members_geocoder.py @@ -1,10 +1,7 @@ import os import pyairtable import requests -import PyPDF2 -import requests from requests.structures import CaseInsensitiveDict -import re import json from time import sleep import pandas as pd @@ -14,197 +11,248 @@ from shapely.geometry import Point from types import NoneType import numpy as np +import urllib.parse + +def find_district(point, geojson_file, key): + """ + Finds the district value for a given point within a GeoJSON file. + + Parameters: + - point: A GeoDataFrame representing the point to find the district for. + - geojson_file: The path to the GeoJSON file containing the district polygons. + - key: The column name in the GeoDataFrame that contains the district values. + + Returns: + - The district value if the point is within a polygon, otherwise None. + """ + # Load the GeoJSON file into a GeoDataFrame + gdf = gpd.read_file(geojson_file) + # Ensure the GeoDataFrame and the point have the same CRS + gdf = gdf.to_crs(point.crs) + # Perform spatial join with the point to find the district + joined = gpd.sjoin(point, gdf, how='left', predicate='within') + # Extract the district value if the point is within a polygon + if not joined.empty and key in joined.columns: + return joined.iloc[0][key] + return None + +def find_geographical_info(lat, lon, senate_geojson, senate_key, assembly_geojson, assembly_key, counties_geojson, county_key, zipcode_geojson, zipcode_key): + """ + Finds geographical information (such as zip code, assembly district, senate district, and county) based on latitude and longitude coordinates. + + Parameters: + - lat (float): Latitude coordinate of the location. + - lon (float): Longitude coordinate of the location. + - senate_geojson (str): Filepath to the GeoJSON file containing senate district boundaries. + - senate_key (str): Key to identify the senate district in the GeoJSON file. + - assembly_geojson (str): Filepath to the GeoJSON file containing assembly district boundaries. + - assembly_key (str): Key to identify the assembly district in the GeoJSON file. + - counties_geojson (str): Filepath to the GeoJSON file containing county boundaries. + - county_key (str): Key to identify the county in the GeoJSON file. + - zipcode_geojson (str): Filepath to the GeoJSON file containing zip code boundaries. + - zipcode_key (str): Key to identify the zip code in the GeoJSON file. + + Returns: + - result (dict): A dictionary containing the following geographical information: + - 'Zip_Code': The zip code of the location. + - 'Assembly_District': The assembly district of the location. + - 'Senate_District': The senate district of the location. + - 'County': The county of the location. + """ + + # Initialize the result dictionary + result = { + 'Zip_Code': '', + 'Assembly_District': '', + 'Senate_District': '', + 'County': '' + } + + # Create a GeoDataFrame for the point + if ((isinstance(lat, NoneType)) or (isinstance(lon, NoneType))): + return result -MEMBERS_INFO = os.environ['MEMBERS_INFO'] -GEOCODER_KEY = os.environ['GEOCODER_KEY'] + point = gpd.GeoDataFrame([{'geometry': Point(lon, lat)}], crs="EPSG:4326") -api = pyairtable.Api(MEMBERS_INFO) + # Find the Senate District + result['Senate_District'] = find_district(point, senate_geojson, senate_key) -table = api.table('appsZsPVQ4n7ujxSJ','tblvVPeXE15ZbA68T') + # Find the Assembly District + result['Assembly_District'] = find_district(point, assembly_geojson, assembly_key) -members = table.all() + # Find the County + result['County'] = find_district(point, counties_geojson, county_key) -members_list = hash([x['fields'] for x in members]) + # Find the Zip Code + result['Zip_Code'] = find_district(point, zipcode_geojson, zipcode_key) -path = Path("public") + return result -if os.path.isfile(path / "airtable.pkl"): - with open(path / "airtable.pkl", 'rb') as f: - old_members_list = pickle.load(f) +class NpEncoder(json.JSONEncoder): + """ + Custom JSON encoder that handles encoding of NumPy types. -else: - old_members_list = [] + This encoder is used to handle encoding of NumPy types (such as np.integer, np.floating, and np.ndarray) + when converting Python objects to JSON format. -if (old_members_list != members_list): + It extends the json.JSONEncoder class and overrides the default() method to provide custom encoding logic. + Usage: + encoder = NpEncoder() + encoded_data = encoder.encode(data) - #Then run all cells below also within the if statement - replace_dict = {'\s':'%20', ',':'%2C','#':'%23'} + """ - if os.path.isfile(path / "address_cache.csv"): - address_cache = pd.read_csv(path / "address_cache.csv") + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + if isinstance(obj, np.floating): + return float(obj) + if isinstance(obj, np.ndarray): + return obj.tolist() + return super(NpEncoder, self).default(obj) - else: - address_cache = pd.DataFrame(columns=['address_code', 'lat', 'lon', 'Senate_District', 'Assembly_District', 'County', 'Zip_Code']) +def generate_members_info(AIRTABLE_API_KEY, AIRTABLE_APP_KEY, AIRTABLE_TBL_KEY, GEOAPIFY_API_KEY): + """ + Generate members' information by geocoding their addresses. - if os.path.isfile(path / "nongeocoded_members.csv"): - nongeocoded_members = pd.read_csv(path / "nongeocoded_members.csv") + Args: + AIRTABLE_API_KEY (str): The API key for accessing the Airtable API. + AIRTABLE_APP_KEY (str): The Airtable application key. + AIRTABLE_TBL_KEY (str): The Airtable table key. + GEOAPIFY_API_KEY (str): The API key for accessing the Geoapify API. - else: - nongeocoded_members = pd.DataFrame(columns=['Website', 'Name', 'Legislation', 'Address', 'Membership Status', 'lat', 'lon', 'Phone']) - - #count = 0 + Returns: + None + """ - senate_geojson = "public/NYS_Senate_Districts.geojson" - assembly_geojson = "public/NYS_Assembly_Districts.geojson" - counties_geojson = "public/nys_counties.geo.json" - zipcode_geojson = "public/nys_zipcodes.geo.json" + MEMBERS_INFO = AIRTABLE_API_KEY + GEOCODER_KEY = GEOAPIFY_API_KEY - senate_key = 'DISTRICT' - assembly_key = 'District' - county_key = 'name' - zipcode_key = 'ZCTA5CE10' + api = pyairtable.Api(MEMBERS_INFO) - def find_geographical_info(lat, lon, senate_geojson, senate_key, assembly_geojson, assembly_key, counties_geojson, county_key, zipcode_geojson, zipcode_key): - result = { - 'Zip_Code': '', - 'Assembly_District': '', - 'Senate_District': '', - 'County': '' - } + table = api.table(AIRTABLE_APP_KEY, AIRTABLE_TBL_KEY) - # Create a GeoDataFrame for the point - if ((isinstance(lat, NoneType)) or (isinstance(lon, NoneType))): - return result + members = table.all() - point = gpd.GeoDataFrame([{'geometry': Point(lon, lat)}], crs="EPSG:4326") + members_list = hash([x['fields'] for x in members]) - # Initialize the result dictionary + path = Path("public") + if os.path.isfile(path / "airtable.pkl"): + with open(path / "airtable.pkl", 'rb') as f: + old_members_list = pickle.load(f) - # Function to find the district or area from a geojson file - def find_district(geojson_file, key): - # Load the GeoJSON file into a GeoDataFrame - gdf = gpd.read_file(geojson_file) - # Ensure the GeoDataFrame and the point have the same CRS - gdf = gdf.to_crs(point.crs) - # Perform spatial join with the point to find the district - joined = gpd.sjoin(point, gdf, how='left', predicate='within') - # Extract the district value if the point is within a polygon - if not joined.empty and key in joined.columns: - return joined.iloc[0][key] - return None + else: + old_members_list = [] - # Find the Senate District - result['Senate_District'] = find_district(senate_geojson, senate_key) + if (old_members_list != members_list): + if os.path.isfile(path / "address_cache.csv"): + address_cache = pd.read_csv(path / "address_cache.csv") - # Find the Assembly District - result['Assembly_District'] = find_district(assembly_geojson, assembly_key) + else: + address_cache = pd.DataFrame(columns=['address_code', 'lat', 'lon', 'Senate_District', 'Assembly_District', 'County', 'Zip_Code']) - # Find the County - result['County'] = find_district(counties_geojson, county_key) + if os.path.isfile(path / "nongeocoded_members.csv"): + nongeocoded_members = pd.read_csv(path / "nongeocoded_members.csv") - # Find the Zip Code - result['Zip_Code'] = find_district(zipcode_geojson, zipcode_key) + else: + nongeocoded_members = pd.DataFrame(columns=['Website', 'Name', 'Legislation', 'Address', 'Membership Status', 'lat', 'lon', 'Phone']) + + #count = 0 - return result - - airtable_members = members_list.copy() - - for member in members_list: - print(f"Address:{member['Address']}") - - # if (count%5==0): - # sleep(1) - - address_code = member['Address'].lower() - - if re.search(r'\b\d\S*\s+floor\b', address_code, re.IGNORECASE): - # If the pattern is found, replace it with an empty string - address_code = re.sub(r'\b\d\S*\s+floor\b', '', address_code, flags=re.IGNORECASE) - - for replace_value in replace_dict: - address_code = re.sub(replace_value, replace_dict[replace_value], address_code) - - cached_data = address_cache[address_cache['address_code'] == address_code] - - if not cached_data.empty: - # If the address is in the cache, use the cached latitude and longitude - if cached_data.iloc[0]['lat'] != None and cached_data.iloc[0]['lon'] != None: - member['lat'] = cached_data.iloc[0]['lat'] - member['lon'] = cached_data.iloc[0]['lon'] # Assuming 'long' is the column name - member['Senate_District'] = cached_data.iloc[0]['Senate_District'] - member['Assembly_District'] = cached_data.iloc[0]['Assembly_District'] - member['County'] = cached_data.iloc[0]['County'] - member['Zip_Code'] = cached_data.iloc[0]['Zip_Code'] - print(f"Lat: {member['lat']} | Lon: {member['lon']} | Senate_District: {member['Senate_District']} | Assembly_District: {member['Assembly_District']} | County: {member['County']} | Zip_Code: {member['Zip_Code']} (cached)") - - else: - member['lat'] = None - member['lon'] = None - member['Senate_District'] = None - member['Assembly_District'] = None - member['County'] = None - member['Zip_Code'] = None - nongeocoded_members = pd.concat([nongeocoded_members,pd.Dataframe([member])], ignore_index=True) - print(f"Could not geocode address (cached)") + senate_geojson = "public/NYS_Senate_Districts.geojson" + assembly_geojson = "public/NYS_Assembly_Districts.geojson" + counties_geojson = "public/nys_counties.geo.json" + zipcode_geojson = "public/nys_zipcodes.geo.json" + + senate_key = 'DISTRICT' + assembly_key = 'District' + county_key = 'name' + zipcode_key = 'ZCTA5CE10' - else: - #member['address_code'] = address_code - url = f'https://api.geoapify.com/v1/geocode/search?text={address_code}&lang=en&limit=1&type=amenity&format=json&apiKey={GEOCODER_KEY}' - headers = CaseInsensitiveDict() - headers["Accept"] = "application/json" + airtable_members = members_list.copy() + + for member in members_list: + print(f"Address:{member['Address']}") + + address_code = member['Address'].lower() - resp = requests.get(url, headers=headers) + address_code = urllib.parse.quote(address_code) - couldGeocode = False + cached_data = address_cache[address_cache['address_code'] == address_code] - if (resp.status_code==200): - res = json.loads(resp.text) - try: - member['lat'] = res['results'][0]['lat'] - member['lon'] = res['results'][0]['lon'] - couldGeocode = True + if not cached_data.empty: + # If the address is in the cache, use the cached latitude and longitude + if cached_data.iloc[0]['lat'] != None and cached_data.iloc[0]['lon'] != None: + member['lat'] = cached_data.iloc[0]['lat'] + member['lon'] = cached_data.iloc[0]['lon'] # Assuming 'long' is the column name + member['Senate_District'] = cached_data.iloc[0]['Senate_District'] + member['Assembly_District'] = cached_data.iloc[0]['Assembly_District'] + member['County'] = cached_data.iloc[0]['County'] + member['Zip_Code'] = cached_data.iloc[0]['Zip_Code'] + print(f"Lat: {member['lat']} | Lon: {member['lon']} | Senate_District: {member['Senate_District']} | Assembly_District: {member['Assembly_District']} | County: {member['County']} | Zip_Code: {member['Zip_Code']} (cached)") - except: + else: member['lat'] = None member['lon'] = None - nongeocoded_members = pd.concat([nongeocoded_members,pd.DataFrame([member])], ignore_index=True) - print(f"Could not geocode address") + member['Senate_District'] = None + member['Assembly_District'] = None + member['County'] = None + member['Zip_Code'] = None + nongeocoded_members = pd.concat([nongeocoded_members,pd.Dataframe([member])], ignore_index=True) + print(f"Could not geocode address (cached)") + + else: + #member['address_code'] = address_code + url = f'https://api.geoapify.com/v1/geocode/search?text={address_code}&lang=en&limit=1&type=amenity&format=json&apiKey={GEOCODER_KEY}' + headers = CaseInsensitiveDict() + headers["Accept"] = "application/json" - member.update(find_geographical_info(member['lat'], member['lon'], senate_geojson, senate_key, assembly_geojson, assembly_key, counties_geojson, county_key, zipcode_geojson, zipcode_key)) - - result = member - result['address_code'] = address_code + resp = requests.get(url, headers=headers) - if couldGeocode: - print(f"Lat: {result['lat']} | Lon: {result['lon']} | Senate_District: {result['Senate_District']} | Assembly_District: {result['Assembly_District']} | County: {result['County']} | Zip_Code: {result['Zip_Code']}") + couldGeocode = False - address_cache = pd.concat([address_cache, pd.DataFrame([result])], ignore_index=True) + if (resp.status_code==200): + res = json.loads(resp.text) + try: + member['lat'] = res['results'][0]['lat'] + member['lon'] = res['results'][0]['lon'] + couldGeocode = True - else: - print(f"Error loading page") - # count = count + 1 + except: + member['lat'] = None + member['lon'] = None + nongeocoded_members = pd.concat([nongeocoded_members,pd.DataFrame([member])], ignore_index=True) + print(f"Could not geocode address") + member.update(find_geographical_info(member['lat'], member['lon'], senate_geojson, senate_key, assembly_geojson, assembly_key, counties_geojson, county_key, zipcode_geojson, zipcode_key)) + + result = member + result['address_code'] = address_code - class NpEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, np.integer): - return int(obj) - if isinstance(obj, np.floating): - return float(obj) - if isinstance(obj, np.ndarray): - return obj.tolist() - return super(NpEncoder, self).default(obj) + if couldGeocode: + print(f"Lat: {result['lat']} | Lon: {result['lon']} | Senate_District: {result['Senate_District']} | Assembly_District: {result['Assembly_District']} | County: {result['County']} | Zip_Code: {result['Zip_Code']}") - with open(path / "rtc_members_info.json", 'w') as fout: - json.dump(members_list, fout, cls=NpEncoder) + address_cache = pd.concat([address_cache, pd.DataFrame([result])], ignore_index=True) - address_cache.dropna(axis=1, how='all').to_csv(path / "address_cache.csv") + else: + print(f"Error loading page") + # count = count + 1 - nongeocoded_members.to_csv(path / "nongeocoded_members.csv") - with open(path / "airtable.pkl", 'wb') as f: - pickle.dump(airtable_members, f) + with open(path / "rtc_members_info.json", 'w') as fout: + json.dump(members_list, fout, cls=NpEncoder) - \ No newline at end of file + address_cache.dropna(axis=1, how='all').to_csv(path / "address_cache.csv") + + nongeocoded_members.to_csv(path / "nongeocoded_members.csv") + + with open(path / "airtable.pkl", 'wb') as f: + pickle.dump(airtable_members, f) + + +if __name__ == '__main__': + MEMBERS_INFO = os.environ['MEMBERS_INFO'] + GEOCODER_KEY = os.environ['GEOCODER_KEY'] + generate_members_info(MEMBERS_INFO, 'appsZsPVQ4n7ujxSJ', 'tblvVPeXE15ZbA68T', GEOCODER_KEY)