From e6cf2f34f1919ea185040366c38c3d0f349641b7 Mon Sep 17 00:00:00 2001
From: dt-woods <davisler@gmail.com>
Date: Wed, 21 Aug 2024 23:10:42 -0400
Subject: [PATCH] add EIA API for EBA.zip bulk data; addresses #253

---
 electricitylci/bulk_eia_data.py  |  39 +--
 electricitylci/eia_io_trading.py | 432 ++++++++++++++++++++++++++-----
 electricitylci/utils.py          |  94 ++++++-
 3 files changed, 486 insertions(+), 79 deletions(-)

diff --git a/electricitylci/bulk_eia_data.py b/electricitylci/bulk_eia_data.py
index 59f8d5c..f86206e 100644
--- a/electricitylci/bulk_eia_data.py
+++ b/electricitylci/bulk_eia_data.py
@@ -29,7 +29,7 @@
 consumption mix for a given region.
 
 Last updated:
-    2024-03-20
+    2024-08-21
 """
 __all__ = [
     "ba_exchange_to_df",
@@ -178,8 +178,8 @@ def read_remote_manifest_last_update():
 
 
 def row_to_df(rows, data_type):
-    """Turn rows of a single type from the bulk data text file into a dataframe
-    with the region, datetime, and data as columns.
+    """Turn rows of a single type from the bulk data text file into a data
+    frame with the region, datetime, and data as columns.
 
     Parameters
     ----------
@@ -191,34 +191,38 @@ def row_to_df(rows, data_type):
     Returns
     -------
     pandas.DataFrame
-        Data for all regions in a single df with datatimes converted and UTC.
+        Data for all regions in a single df with datetimes converted to UTC.
     """
     tuple_list = []
     for row in rows:
         try:
-            datetime = pd.to_datetime(
+            date_time = pd.to_datetime(
                 [x[0] for x in row['data']],
                 utc=True,
                 format='%Y%m%dT%HZ'
             )
         except ValueError:
             try:
-                datetime = pd.to_datetime(
+                date_time = pd.to_datetime(
                     [x[0]+":00" for x in row['data']],
                     format='%Y%m%dT%H%z'
                 )
             except ValueError:
                 try:
-                    datetime = pd.to_datetime(
-                        [x[0] for x in row['data']],
-                        format='%Y%m%dT%H'
-                )
+                    # Last ditch, try to infer the format.
+                    # Also, necessary for daily data from API.
+                    date_time = pd.to_datetime([x[0] for x in row['data']])
                 except ValueError:
+                    logging.warning(
+                        "Failed to convert timestamps for %s" % (
+                            row['series_id']
+                        )
+                    )
                     continue
         data = [x[1] for x in row['data']]
         region = row['series_id'].split('-')[0][4:]
         tuple_data = [
-            x for x in zip([region]*len(datetime), list(datetime), data)]
+            x for x in zip([region]*len(date_time), list(date_time), data)]
         tuple_list.extend(tuple_data)
     df = pd.DataFrame(tuple_list, columns=["region", "datetime", data_type])
 
@@ -240,7 +244,7 @@ def ba_exchange_to_df(rows, data_type='ba_to_ba'):
     Returns
     -------
     pandas.DataFrame
-        Data for all regions in a single df with datatimes converted and UTC
+        Data for all regions in a single df with datetimes converted and UTC
     """
     tuple_list = []
     for row in rows:
@@ -258,11 +262,14 @@ def ba_exchange_to_df(rows, data_type='ba_to_ba'):
                 )
             except ValueError:
                 try:
-                    datetime = pd.to_datetime(
-                        [x[0] for x in row['data']],
-                        format='%Y%m%dT%H'
-                )
+                    # For daily data from API.
+                    datetime = pd.to_datetime([x[0] for x in row['data']])
                 except ValueError:
+                    logging.warning(
+                        "Failed to convert timestamps for %s" % (
+                            row['series_id']
+                        )
+                    )
                     continue
         data = [x[1] for x in row['data']]
         from_region = row['series_id'].split('-')[0][4:]
diff --git a/electricitylci/eia_io_trading.py b/electricitylci/eia_io_trading.py
index c665ab0..6158210 100644
--- a/electricitylci/eia_io_trading.py
+++ b/electricitylci/eia_io_trading.py
@@ -6,10 +6,10 @@
 ##############################################################################
 # REQUIRED MODULES
 ##############################################################################
-from datetime import datetime
 import json
 import logging
 import os
+import time
 import zipfile
 
 import numpy as np
@@ -18,6 +18,7 @@
 
 from electricitylci.globals import data_dir
 from electricitylci.globals import paths
+from electricitylci.globals import API_SLEEP
 from electricitylci.bulk_eia_data import download_EBA
 from electricitylci.bulk_eia_data import row_to_df
 from electricitylci.bulk_eia_data import ba_exchange_to_df
@@ -28,6 +29,7 @@
 from electricitylci.utils import read_ba_codes
 from electricitylci.utils import check_output_dir
 from electricitylci.utils import download
+from electricitylci.utils import read_eia_api
 from electricitylci.utils import write_csv_to_output
 from electricitylci.process_dictionary_writer import (
     exchange,
@@ -60,7 +62,7 @@
     52(11), 6666-6675. https://doi.org/10.1021/acs.est.7b05191
 
 Last updated:
-    2024-08-12
+    2024-08-21
 """
 __all__ = [
     "ba_io_trading_model",
@@ -69,6 +71,16 @@
 ]
 
 
+##############################################################################
+# GLOBALS
+##############################################################################
+REGION_ACRONYMS = [
+    'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA',
+    'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW',
+]
+'''list : Region acronyms for BA-to-BA trade.'''
+
+
 ##############################################################################
 # FUNCTIONS
 ##############################################################################
@@ -250,12 +262,130 @@ def _read_ba():
     return df_BA_NA, US_BA_acronyms, ferc_list
 
 
-def _read_bulk():
+def _check_json(d):
+    """Check that EBA.zip JSON data has info.
+
+    If a JSON entry is missing data, send a critical logging statement.
+    The consequence of using this data is that consumption mix processes
+    will not be created in the JSON-LD.
+    See https://github.com/USEPA/ElectricityLCI/discussions/254.
+
+    Parameters
+    ----------
+    d : dict
+        JSON line read from EBA.zip
+    """
+    name = d.get('name', 'n/a')
+    series = d.get('series_id', 'n/a')
+    start = d.get('start', None)
+    end = d.get('end', None)
+    data = d.get('data', [])
+    if start is None or end is None or len(data) == 0:
+        logging.critical("No JSON data for %s, '%s'" % (series, name))
+
+
+def _read_bulk(ba_cols, use_api=True):
+    """Handle both ZIP and API data sources for bulk U.S. Electric System
+    Operating Data.
+
+    Parameters
+    ----------
+    ba_cols : list
+        A list of balancing authority short codes.
+        These are used for querying API demand and net generation data.
+    use_api : bool, optional
+        Whether to use EIA API call, by default True
+
+    Returns
+    -------
+    tuple
+        A tuple of length three.
+        Each item is a list.
+        See :func:`_read_bulk_api` and :func:`read_bulk_zip` for details.
+    """
+    if use_api:
+        return _read_bulk_api(ba_cols)
+    else:
+        return _read_bulk_zip()
+
+
+def _read_bulk_api(ba_cols):
+    """Read demand, net generation, and interchange data from EIA's API.
+
+    Parameters
+    ----------
+    ba_cols : list
+        A list of balancing authority short codes.
+        Used for querying regions for demand and net generation.
+
+    Returns
+    -------
+    tuple
+        A tuple of length three.
+
+        - list : rows associated with net generation.
+        - list : rows associated with BA-to-BA interchange.
+        - list : rows associated with demand.
+
+    Notes
+    -----
+    For API registration, go to: https://www.eia.gov/opendata/.
+
+    See https://github.com/USEPA/ElectricityLCI/discussions/254 for details.
+
+    If you don't want to pass ba_cols, you can find all the respondents
+    on the API by calling (adding ?api_key=YOUR-KEY at the end):
+    https://api.eia.gov/v2/electricity/rto/daily-region-data/facet/respondent
+    The response dictionary should have a key, 'facets' with 'id' and 'name'
+    fields for each BA/region.
+    """
+    api_key = None
+    new_api = "https://www.eia.gov/opendata/"
+    if api_key is None:
+        api_key = input("Enter EIA API key: ")
+        api_key = api_key.strip()
+        if api_key == "":
+            logging.warning(
+                "No API key given!"
+                f"Sign up here: {new_api}"
+            )
+
+    # Define the URLs for the two sub-domains and for hourly and daily data.
+    _baseurl = "https://api.eia.gov/v2/"
+    _sub_domain_h = "electricity/rto/region-data/data/"
+    _sub_domain_d = "electricity/rto/daily-region-data/data/"
+    _sub_domain2_h = "electricity/rto/interchange-data/data/"
+    _sub_domain2_d = "electricity/rto/daily-interchange-data/data/"
+    _freq = "daily" # or 'local-hourly' or 'daily'
+    # NOTE: if using 'local-hourly' these times must be in timezone format!
+    # NOTE: the API time filter is based on day (not hour)!
+    _yr = model_specs.NETL_IO_trading_year
+    _start = "%d-01-01" % _yr
+    _end = "%d-12-31" % _yr
+
+    # Correct URL based on frequency (daily vs hourly)
+    _sub_domain = _sub_domain_h
+    _sub_domain2 = _sub_domain2_h
+    if _freq == 'daily':
+        _sub_domain = _sub_domain_d
+        _sub_domain2 = _sub_domain2_d
+
+    DEMAND_ROWS = _read_dng_api(
+        _baseurl, _sub_domain, api_key, _freq, _start, _end, ba_cols, 'D')
+    NET_GEN_ROWS = _read_dng_api(
+        _baseurl, _sub_domain, api_key, _freq, _start, _end, ba_cols, 'NG')
+    BA_TO_BA_ROWS = _read_id_api(
+        _baseurl, _sub_domain2, api_key, _freq, _start, _end)
+
+    return (NET_GEN_ROWS, BA_TO_BA_ROWS, DEMAND_ROWS)
+
+
+def _read_bulk_zip():
     """Read and parse EIA's U.S. Electric System Operating Data.
 
     Creates three lists of JSON-based dictionaries.
-    Each dictionary contains metadata and a timeseries of data.
-    Time series data appear to go back to 2015.
+    Each dictionary contains metadata and a time series of data.
+    Time series data appear to go back to around 2015.
 
     Returns
     -------
@@ -266,18 +396,19 @@ def _read_bulk():
         - list : rows associated with BA-to-BA trade.
         - list : rows associated with demand.
     """
-    REGION_ACRONYMS = [
-        'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA',
-        'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW',
-    ]
-
-    # Read in the bulk data
-    path = os.path.join(paths.local_path, 'bulk_data', 'EBA.zip')
+    # Initialize return lists
     NET_GEN_ROWS = []
     BA_TO_BA_ROWS = []
     DEMAND_ROWS = []
 
+    # Changing to regex matches to allow compatibility with past and present
+    # bulk data. [2024-08-16; MJ]
+    ngh_matches = "^EBA[\S\w\d]+[^NG]\.NG\.H$"
+    idh_matches = "^EBA.+\.ID\.H$"
+    dh_matches = "^EBA.+\.D\.H$"
+
     # HOTFIX: Check file vintage [2024-03-12; TWD]
+    path = os.path.join(paths.local_path, 'bulk_data', 'EBA.zip')
     check_EBA_vintage()
 
     try:
@@ -289,31 +420,41 @@ def _read_bulk():
     else:
         logging.info("Using existing bulk data download")
 
-    # Changing to regex matches to allow compatibility with past and present
-    # bulk data. [2024-08-16; MJ]
-    ngh_matches = rb"\"EBA[\S\w\d]+[^NG]\.NG\.H\""
-    idh_matches = rb"\"EBA.+\.ID\.H\""
-    dh_matches = rb"\"EBA.+\.D\.H\""
-
     logging.info("Loading bulk data to json")
     with z.open('EBA.txt') as f:
         for line in f:
-            # All but one BA is currently reporting net generation in UTC
-            # and local time. For that one BA (GRMA) only UTC time is
-            # reported - so only pulling that for now.
-            if re.search(ngh_matches,line) is not None:
-                NET_GEN_ROWS.append(json.loads(line))
-
-            # Similarly there are 5 interchanges that report interchange
-            # in UTC but not in local time.
-            elif re.search(idh_matches,line) is not None:
-                exchange_line = json.loads(line)
-                s_txt = exchange_line['series_id'].split('-')[0][4:]
-                if s_txt not in REGION_ACRONYMS:
-                    BA_TO_BA_ROWS.append(exchange_line)
-            # Keeping these here just in case
-            elif re.search(dh_matches,line) is not None:
-                DEMAND_ROWS.append(json.loads(line))
+            # To improve compatibility with old/new EBA.zip
+            f_json = json.loads(line)
+
+            # All the entries should have a 'series_id' and an 'f' key.
+            # 'H' for UTC hourly; 'HL' for local hourly; hard-coded to UTC.
+            # See https://github.com/USEPA/ElectricityLCI/discussions/254.
+            if 'series_id' in f_json.keys() and f_json.get('f', '') == 'H':
+                series_id = f_json['series_id']
+
+                # LEGACY NOTES --- The 2016 Baseline
+                # All but one BA is reporting net generation in UTC
+                # and local time. For that one BA (GRMA) only UTC time is
+                # reported - so only pulling that for now.
+
+                if re.search(ngh_matches, series_id) is not None:
+                    # HOTFIX: add single instance of JSON line checker
+                    # will throw about 82 warnings that data are not available.
+                    # (e.g., August 19, 2024 EBA.zip)
+                    _check_json(f_json)
+                    NET_GEN_ROWS.append(f_json)
+
+                # Similarly there are 5 interchanges that report interchange
+                # in UTC but not in local time.
+                elif re.search(idh_matches, series_id) is not None:
+                    # Split on intersection, rstrip "EBA."
+                    s_txt = f_json['series_id'].split('-')[0][4:]
+                    if s_txt not in REGION_ACRONYMS:
+                        BA_TO_BA_ROWS.append(f_json)
+
+                # Keeping these here just in case
+                elif re.search(dh_matches, series_id) is not None:
+                    DEMAND_ROWS.append(f_json)
 
     logging.debug(f"Net gen rows: {len(NET_GEN_ROWS)}")
     logging.debug(f"BA to BA rows:{len(BA_TO_BA_ROWS)}")
@@ -322,6 +463,173 @@ def _read_bulk():
     return (NET_GEN_ROWS, BA_TO_BA_ROWS, DEMAND_ROWS)
 
 
+def _read_id_api(baseurl, sub_domain, api_key, freq, start, end):
+    """Return list of interchanges at the given frequency and time period."""
+    r_list = []
+    d_dict = {}
+
+    if freq not in ['daily', 'hourly', 'local-hourly']:
+        raise ValueError(
+            "Frequency must be 'daily' 'hourly' or 'local-hourly', "
+            "not '%s'!" % freq)
+    if api_key is None or api_key == '':
+        raise ValueError(
+            "Missing EIA API key! Register online "
+            "https://www.eia.gov/opendata/"
+        )
+
+    # Provide starting values to get into the while loop.
+    recs_captured = 0
+    total_recs = 2
+    offset = 0
+    while recs_captured < (total_recs - 1):
+        url_id = (
+            f"{baseurl}{sub_domain}?api_key={api_key}&out=json"
+            f"&frequency={freq}"
+            f"&start={start}"
+            f"&end={end}"
+            "&sort[0][column]=period"
+            "&sort[0][direction]=asc"
+            "&data[]=value"
+            f"&offset={offset}"
+            "&length=4999"
+        )
+
+        # Variable idx for series ID, and add a timezone for daily downloads.
+        _idx = 'H'
+        if freq == 'daily':
+            _idx = 'D'
+            _url += "&facets[timezone][]=Central"
+        elif freq == 'local-hourly':
+            _idx = 'HL'
+
+        # Make request and sleep, so as to not be a hater.
+        d_json, _ = read_eia_api(url_id)
+        time.sleep(API_SLEEP)
+
+        # Check response
+        d_resp = d_json.get('response', {})
+        try:
+            # The total number of records available, not necessarily how
+            # many you get this call.
+            total_recs = d_resp.get("total", 0)
+            total_recs = int(total_recs)
+        except:
+            total_recs = 0
+
+        # See how many records are in this response and update your counters
+        # NOTE: this is different from 'total', which is all records.
+        d_rec = len(d_resp.get('data', []))
+        recs_captured += d_rec
+        offset = recs_captured + 1
+        logging.info("Retrieved %d entries out of %d ID records" % (
+            recs_captured, total_recs))
+
+        # Proceed if you have data.
+        if d_rec > 0:
+            for d in d_resp.get('data', []):
+                # Recreate the data format of EBA.zip
+                f_ba = d['fromba']
+                t_ba = d['toba']
+
+                # Employ the same filter used in read_bulk_zip
+                if f_ba not in REGION_ACRONYMS:
+                    series_id = "EBA.%s-%s.ID.%s" % (f_ba, t_ba, _idx)
+
+                    # Use d_dict to store each unique BA-BA pairing and
+                    # build-out the data list. It's done this way because
+                    # we know the trade regions of interest, REGION_ACRONYMS,
+                    # but we don't know who they're trading with.
+                    # HOTFIX: for some reason, I cannot stop duplicate
+                    # entries, so use dictionary for uniqueness!
+                    if series_id in d_dict.keys():
+                        d_dict[series_id]['data'][d['period']] = d['value']
+                    else:
+                        d_dict[series_id] = {
+                            'series_id': series_id,
+                            'data': {}
+                        }
+                        d_dict[series_id]['data'][d['period']] = d['value']
+
+    # Take the data lists and series ids and make them a list of dicts.
+    for k in d_dict.keys():
+        d = d_dict[k]
+        # Convert dictionary to list (will likely lose sorting)
+        d['data'] = [[x, y] for x, y in d['data'].items()]
+        r_list.append(d)
+
+    return r_list
+
+
+def _read_dng_api(baseurl, sub_domain, api_key, freq, start, end, ba_cols, m):
+    """Return list of net gen or demand for given frequency and time period."""
+    r_list = []
+    if m not in ['D', 'NG']:
+        raise ValueError("Metric must be either 'D' or 'NG', not '%s'!" % m)
+    if freq not in ['daily', 'hourly', 'local-hourly']:
+        raise ValueError(
+            "Frequency must be 'daily' 'hourly' or 'local-hourly', "
+            "not '%s'!" % freq)
+
+    # For logging
+    _metric = 'demand'
+    if m == 'NG':
+        _metric = 'net gen'
+
+    # For demand and net gen, we only need U.S. BA areas:
+    # Due to API response limits, request each BA individually.
+    for ba in ba_cols:
+        _url = (
+            f"{baseurl}{sub_domain}?api_key={api_key}&out=json"
+            f"&frequency={freq}"
+            f"&start={start}"
+            f"&end={end}"
+            f"&facets[respondent][]={ba}"
+            f"&facets[type][]={m}"
+            "&data[]=value"
+        )
+
+        # Variable idx for series ID, and add a timezone for daily downloads.
+        _idx = 'H'
+        if freq == 'daily':
+            _idx = 'D'
+            _url += "&facets[timezone][]=Central"
+        elif freq == 'local-hourly':
+            _idx = 'HL'
+
+        # Make request and sleep, so as to not be a hater.
+        d_json, url_tries = read_eia_api(_url)
+        time.sleep(API_SLEEP)
+
+        # Check response
+        d_resp = d_json.get('response', {})
+        if 'warnings' in d_resp.keys():
+            logging.warning(d_resp['warnings'])
+        try:
+            d_tot = d_resp.get("total", 0)
+            d_tot = int(d_tot)
+        except:
+            d_tot = 0
+        logging.info("Retrieved %d %s %s entries in %d request(s)" % (
+            d_tot, ba, _metric, url_tries))
+
+        # Proceed if there is data:
+        if d_tot > 0:
+            # Recreate the data format of EBA.zip
+            d_dict = {}
+            d_dict['series_id'] = "EBA.%s-ALL.%s.%s" % (ba, m, _idx)
+            # HOTFIX: Can't get rid of duplicate entries in the daily API,
+            # even with timezone setting! Use dictionary for uniqueness.
+            d_dict['data'] = {}
+            for d in d_resp.get('data', []):
+                d_dict['data'][d['period']] = d['value']
+            # Convert dictionary back to list of lists
+            d_dict['data'] = [[x,y] for x,y in d_dict['data'].items()]
+            r_list.append(d_dict)
+
+    return r_list
+
+
 def _read_ca_imports(year):
     """Return the Canadian import data frames based on Canadian electricity
     sales data (either from eLCI data hub or from source).
@@ -542,13 +850,6 @@ def _make_net_gen(year, ba_cols, ng_json_list):
     2016-01-01 03:00:00+00:00  575.0  ...  5551.0  1081.0   165.0  170.0
     2016-01-01 04:00:00+00:00  586.0  ...  5394.0  1055.0   160.0  171.0
     """
-    # Subset for specified eia_gen_year
-    start_datetime = '{}-01-01 00:00:00+00:00'.format(year)
-    end_datetime = '{}-12-31 23:00:00+00:00'.format(year)
-
-    start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z')
-    end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z')
-
     # Net Generation Data Import
     logging.info("Creating net generation data frame with datetime")
     df_net_gen = row_to_df(ng_json_list, 'net_gen')
@@ -565,22 +866,28 @@ def _make_net_gen(year, ba_cols, ng_json_list):
     ba_ref_set = set(ba_cols)
 
     col_diff = list(ba_ref_set - gen_cols_set)
-    col_diff.sort(key = str.upper)
+    col_diff.sort(key=str.upper)
 
     # Add in missing columns, then sort in alphabetical order
-    logging.info("Cleaning net_gen dataframe")
+    logging.info("Cleaning net_gen data frame")
     for i in col_diff:
         df_net_gen[i] = 0
 
-    # Keep only the columns that match the balancing authority names,
+    # Keep only the columns that match the balancing authority names;
     # there are several other columns included in the dataset
     # that represent states (e.g., TEX, NY, FL) and other areas (US48)
     df_net_gen = df_net_gen[ba_cols]
-    cols_to_change=df_net_gen.columns[df_net_gen.dtypes.eq('object')]
-    df_net_gen[cols_to_change]=df_net_gen[cols_to_change].apply(pd.to_numeric, errors="coerce")
+
+    # Convert columns made of strings to numeric.
+    cols_to_change = df_net_gen.columns[df_net_gen.dtypes.eq('object')]
+    df_net_gen[cols_to_change] = df_net_gen[cols_to_change].apply(
+        pd.to_numeric, errors="coerce")
+
     # Re-sort columns so the headers are in alpha order
     df_net_gen = df_net_gen.sort_index(axis=1)
     df_net_gen = df_net_gen.fillna(value=0)
+
+    # Filter for the year of interest (NOTE: UTC dates)
     df_net_gen = df_net_gen.loc[df_net_gen.index.year==year]
 
     return df_net_gen
@@ -755,13 +1062,7 @@ def _make_trade_pivot(year, ba_cols, trade_df):
         columns ('Importing_BAA') representing importing BAs, and values for
         the traded amount.
     """
-    # Subset for specified eia_gen_year
-    start_datetime = '{}-01-01 00:00:00+00:00'.format(year)
-    end_datetime = '{}-12-31 23:00:00+00:00'.format(year)
-
-    start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z')
-    end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z')
-
+    logging.info("Creating trading data frame")
     ba_trade = trade_df.set_index('datetime')
     ba_trade['transacting regions'] = (
         ba_trade['from_region'] + '-' + ba_trade['to_region'])
@@ -780,9 +1081,15 @@ def _make_trade_pivot(year, ba_cols, trade_df):
     df_ba_trade_pivot = ba_trade.pivot(
         columns='transacting regions', values='ba_to_ba'
     )
-    df_ba_trade_pivot = df_ba_trade_pivot.loc[df_ba_trade_pivot.index.year==year]
-    cols_to_change=df_ba_trade_pivot.columns[df_ba_trade_pivot.dtypes.eq('object')]
-    df_ba_trade_pivot[cols_to_change]=df_ba_trade_pivot[cols_to_change].apply(pd.to_numeric, errors="coerce")
+    # Filter for year of interest (NOTE: UTC timestamps)
+    df_ba_trade_pivot = df_ba_trade_pivot.loc[
+        df_ba_trade_pivot.index.year==year]
+
+    cols_to_change = df_ba_trade_pivot.columns[
+        df_ba_trade_pivot.dtypes.eq('object')]
+    df_ba_trade_pivot[cols_to_change] = df_ba_trade_pivot[
+        cols_to_change].apply(pd.to_numeric, errors="coerce")
+
     # Sum columns - represents the net transacted amount between the two BAs
     df_ba_trade_sum = df_ba_trade_pivot.sum(axis=0).to_frame()
     df_ba_trade_sum = df_ba_trade_sum.reset_index()
@@ -1205,7 +1512,12 @@ def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None):
 
     # Read necessary data from EIA's bulk data download.
     # WARNING: this is a lot of data in memory!
-    NET_GEN_ROWS, BA_TO_BA_ROWS, DEMAND_ROWS = _read_bulk()
+    # UPDATE: now send ba_cols and whether to use API
+    NET_GEN_ROWS, BA_TO_BA_ROWS, DEMAND_ROWS = _read_bulk(ba_cols, True)
+
+    # Net Generation Data Import
+    df_net_gen = _make_net_gen(year, ba_cols, NET_GEN_ROWS)
+    del(NET_GEN_ROWS)
 
     # Create EIA generation dataset and Form 860 balancing authority list.
     eia_gen_ba, eia860_ba_list = _read_eia_gen(year)
@@ -1220,10 +1532,6 @@ def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None):
     logging.info("Reading canadian import data")
     df_CA_Imports_Gen, df_CA_Imports_Rows = _read_ca_imports(year)
 
-    # Net Generation Data Import
-    df_net_gen = _make_net_gen(year, ba_cols, NET_GEN_ROWS)
-    del(NET_GEN_ROWS)
-
     # Combine and correct net generation data frame with Canada.
     df_net_gen_sum = _make_net_gen_sum(
         df_net_gen, eia_gen_ba, df_CA_Imports_Gen)
@@ -1324,7 +1632,7 @@ def qio_model(net_gen_df, trade_pivot, ba_map, ba_list, roi=None, thresh=1e-5):
 
     -   Input-output approach developed by Qu et al.
     -   Transfer is enabled through infinite electricity supply chains
-    -   Virtual flows of emissions should follow the pattern of intergrid
+    -   Virtual flows of emissions should follow the pattern of inter-grid
         electricity transfers.
 
     Parameters
diff --git a/electricitylci/utils.py b/electricitylci/utils.py
index 9b457c5..47d5d34 100644
--- a/electricitylci/utils.py
+++ b/electricitylci/utils.py
@@ -6,6 +6,7 @@
 ##############################################################################
 # REQUIRED MODULES
 ##############################################################################
+import datetime
 import io
 import json
 import logging
@@ -26,7 +27,7 @@
 __doc__ = """Small utility functions for use throughout the repository.
 
 Last updated:
-    2024-08-09
+    2024-08-21
 
 Changelog:
     -   [24.08.05]: Create new BA code getter w/ FERC mapping.
@@ -38,6 +39,7 @@
 __all__ = [
     "check_output_dir",
     "create_ba_region_map",
+    "decode_str",
     "download",
     "download_unzip",
     "fill_default_provider_uuids",
@@ -45,6 +47,7 @@
     "join_with_underscore",
     "make_valid_version_num",
     "read_ba_codes",
+    "read_eia_api",
     "read_json",
     "set_dir",
 ]
@@ -148,6 +151,33 @@ def create_ba_region_map(match_fn="BA code match.csv",
     return map_series
 
 
+def decode_str(bstring):
+    """Return a Python string.
+
+    Decodes a byte string.
+
+    Parameters
+    ----------
+    bstring : bytes
+        An encoded byte string.
+
+    Returns
+    -------
+    str
+        A Python string.
+    """
+    if isinstance(bstring, bytes):
+        try:
+            bstring = bstring.decode("utf-8")
+        except:
+            bstring = ""
+    elif isinstance(bstring, str):
+        pass
+    else:
+        bstring = ""
+    return bstring
+
+
 def download(url, file_path):
     """Helper method to download a file from a URL.
 
@@ -470,6 +500,68 @@ def read_ba_codes():
     return df
 
 
+def read_eia_api(url, url_try=0, max_tries=5):
+    """Return a JSON data response from EIA's API.
+
+    Parameters
+    ----------
+    url : str
+        The URL in proper syntax.
+    url_try : int
+        Internal counter for URL retries; default is 0
+    max_tries : int
+        When to stop retrying; default is 5
+
+    Returns:
+    (dict, int)
+        The JSON response and URL try count.
+        The JSON dictionary includes keys:
+
+        -   'response' (dict): with keys:
+
+            -   'total' (int): count of records in 'data'
+            -   'dateFormat' (str): For example, 'YYYY-MM-DD"T"HH24'
+            -   'frequency' (str): For example, 'hourly'
+            -   'description' (str): Data description
+            -   'data' (list): Dictionaries with keys:
+
+                -   'period'
+                -   'fromba': for ID only
+                -   'fromba-name': for ID only
+                -   'toba': for ID only
+                -   'toba-name': for ID only
+                -   'respondent': for D and NG only
+                -   'respondent-name': for D and NG only
+                -   'type': for D and NG only
+                -   'type-name': for D and NG only
+                -   'value'
+                -   'value-units'
+
+        -   'request' (dict): Parameters sent to the API
+        -   'apiVersion' (str): API version string (e.g., '2.1.7')
+        -   'ExcelAddInVersion' (str): AddIn version string (e.g., '2.1.0')
+    """
+    r_dict = {}
+    url_try += 1
+    r = requests.get(url)
+    r_status = r.status_code
+    if r_status == 200:
+        r_content = r.content
+        try:
+            r_dict = r.json()
+        except:
+            # If at first you, fail...
+            r_content = decode_str(r_content)
+            r_dict = json.loads(r_content)
+    else:
+        if url_try < max_tries:
+            r_dict, url_try = read_eia_api(url, url_try)
+        else:
+            logging.error("Requests failed!")
+
+    return (r_dict, url_try)
+
+
 def read_json(json_path):
     """Read a JSON-formatted file into a Python dictionary.