Changes to resolve various items in Who's putting NaNs in my exchange…

… tables? USEPA#246 New source strings were added to differentiate between sources. This allows better alignment with calculated total electricity values when the data are aggregated and avoids NaNs particularly when some of the internal data haven't been updated to the latest year.
KeyLogicLCA · Aug 14, 2024 · 8851f54 · 8851f54
1 parent 43fb567
commit 8851f54
Show file tree

Hide file tree

Showing 13 changed files with 33 additions and 23 deletions.
diff --git a/electricitylci/coal_upstream.py b/electricitylci/coal_upstream.py
@@ -821,7 +821,7 @@ def wtd_mean(pdser, total_db):
     )
     merged_coal_upstream.reset_index(drop=True, inplace=True)
     merged_coal_upstream["Year"] = year
-    merged_coal_upstream["Source"] = "netl"
+    merged_coal_upstream["Source"] = "netlcoaleiafuel"
 
     return merged_coal_upstream
 

diff --git a/electricitylci/combinator.py b/electricitylci/combinator.py
@@ -101,6 +101,7 @@ def add_fuel_inputs(gen_df, upstream_df, upstream_dict):
     fuel_df["FacilityID"] = upstream_reduced["plant_id"]
     fuel_df["FuelCategory"] = upstream_reduced["FuelCategory"]
     fuel_df["Year"] = upstream_reduced["Year"]
+    fuel_df["Source"] = upstream_reduced["Source"]
     merge_cols = [
         "Age",
         "Balancing Authority Code",
@@ -122,7 +123,7 @@ def add_fuel_inputs(gen_df, upstream_df, upstream_dict):
         how="left",
     )
     fuel_df.dropna(subset=["Electricity"], inplace=True)
-    fuel_df["Source"] = "eia"
+    #fuel_df["Source"] = "eia"
     fuel_df = add_temporal_correlation_score(
         fuel_df, model_specs.electricity_lci_target_year)
     fuel_df["DataCollection"] = 5
@@ -268,6 +269,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
         "FlowName_orig",
         "Compartment_path_orig",
         "Unit_orig",
+        "Source"
     ]
     upstream_df["FlowAmount"] = upstream_df["FlowAmount"].astype(float)
     if "Electricity" in upstream_df.columns:
@@ -317,7 +319,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
         upstream_mapped_df["Compartment"].str.contains("resource"),
         "ElementaryFlowPrimeContext",
     ] = "resource"
-    upstream_mapped_df["Source"] = "netl"
+    #upstream_mapped_df["Source"] = "netl"
     # WARNING: don't use with HYDRO, which has its own data year
     upstream_mapped_df["Year"] = eia_gen_year
     final_columns = [
@@ -362,6 +364,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
                     "FlowName",
                     "Compartment",
                     "Unit",
+                    "Source"
                 ]
             ).groups
             unique_mapped_set = set(unique_mapped.keys())

diff --git a/electricitylci/generation.py b/electricitylci/generation.py
@@ -522,8 +522,14 @@ def calculate_electricity_by_source(db, subregion="BA"):
     """
     all_sources = '_'.join(sorted(list(db["Source"].unique())))
     power_plant_criteria = db["stage_code"]=="Power plant"
-    db_powerplant = db.loc[power_plant_criteria, :].copy()
-    db_nonpower = db.loc[~power_plant_criteria, :].copy()
+
+    # HOTFIX: not separating the dataframe in hopes of generating electricity
+    # amounts for fuel inputs that doesn't make the plants "too efficient"
+    # [2024-08-14 MBJ]
+    #db_powerplant = db.loc[power_plant_criteria, :].copy()
+    #db_nonpower = db.loc[~power_plant_criteria, :].copy()
+    db_powerplant = db.copy()
+
     region_agg = subregion_col(subregion)
 
     fuel_agg = ["FuelCategory"]
@@ -586,8 +592,9 @@ def calculate_electricity_by_source(db, subregion="BA"):
             # HOTFIX: it doesn't make sense to groupby a different group;
             # it gives different results from the first-pass filter;
             # changed to match criteria above. [2023-12-19; TWD]
+            # HOTFIX undone [2024-08-13; MBJ]
             source_df = pd.DataFrame(
-                db_multiple_sources.groupby(["FlowName", "Compartment"])[
+                db_multiple_sources.groupby(groupby_cols)[
                     ["Source"]].apply(combine_source_lambda),
                 columns=["source_list"],
             )
@@ -602,8 +609,8 @@ def calculate_electricity_by_source(db, subregion="BA"):
             old_index = db_multiple_sources.index
             db_multiple_sources = db_multiple_sources.merge(
                 right=source_df,
-                left_on=["FlowName", "Compartment"],
-                right_on=["FlowName", "Compartment"],
+                left_on=groupby_cols,
+                right_on=groupby_cols,
                 how="left",
             )
             db_multiple_sources.index = old_index
@@ -628,7 +635,7 @@ def calculate_electricity_by_source(db, subregion="BA"):
             )
         ]
         sub_db = db.loc[src_filter, :].copy()
-        sub_db.drop_duplicates(subset=fuel_agg + ["eGRID_ID"], inplace=True)
+        sub_db.drop_duplicates(subset=fuel_agg + ["eGRID_ID","Year"], inplace=True)
         # HOTFIX: fix pandas futurewarning syntax [2024-03-08; TWD]
         sub_db_group = sub_db.groupby(elec_groupby_cols, as_index=False).agg(
             {"Electricity": ["sum", "mean"], "eGRID_ID": "count"}
@@ -640,12 +647,12 @@ def calculate_electricity_by_source(db, subregion="BA"):
         ]
         sub_db_group["source_string"] = src
         elec_sum_lists.append(sub_db_group)
-    db_nonpower["source_string"] = all_sources
-    db_nonpower["source_list"] = [all_sources]*len(db_nonpower)
+    #db_nonpower["source_string"] = all_sources
+    #db_nonpower["source_list"] = [all_sources]*len(db_nonpower)
     elec_sums = pd.concat(elec_sum_lists, ignore_index=True)
     elec_sums.sort_values(by=elec_groupby_cols, inplace=True)
-    db = pd.concat([db_powerplant, db_nonpower])
-
+    #db = pd.concat([db_powerplant, db_nonpower])
+    db = db_powerplant
     return db, elec_sums
 
 

diff --git a/electricitylci/hydro_upstream.py b/electricitylci/hydro_upstream.py
@@ -114,7 +114,7 @@ def generate_hydro_emissions():
 
     # Add other necessary metadata
     hydro_df["Year"] = 2016
-    hydro_df["Source"] = "netl"
+    hydro_df["Source"] = "netlhydro"
 
     # Read in 2016 power plant location data (i.e., state, NERC, BA).
     eia860_df = eia860_balancing_authority(2016)

diff --git a/electricitylci/import_impacts.py b/electricitylci/import_impacts.py
@@ -218,7 +218,7 @@ def generate_canadian_mixes(us_inventory, gen_year):
     ca_mix_inventory.rename(
         columns={"Code": "Balancing Authority Code"}, inplace=True
     )
-    ca_mix_inventory["Source"] = "netl"
+    ca_mix_inventory["Source"] = "netlca"
     ca_mix_inventory["Year"] = us_inventory["Year"].mode().to_numpy()[0]
     ca_mix_inventory["FuelCategory"] = "ALL"
     ca_mix_inventory["eGRID_ID"] = ca_mix_inventory[

diff --git a/electricitylci/natural_gas_upstream.py b/electricitylci/natural_gas_upstream.py
@@ -150,7 +150,7 @@ def generate_upstream_ng(year):
         inplace=True
     )
     ng_lci_basin["Year"] = year
-    ng_lci_basin["Source"] = "netl"
+    ng_lci_basin["Source"] = "netlgaseiafuel"
     ng_lci_basin["ElementaryFlowPrimeContext"] = "emission"
     ng_lci_basin.loc[
         ng_lci_basin["Compartment"].str.contains("resource/"),

diff --git a/electricitylci/petroleum_upstream.py b/electricitylci/petroleum_upstream.py
@@ -218,7 +218,7 @@ def generate_petroleum_upstream(year):
     merged_inventory['Compartment'] = merged_inventory[
         'Compartment'].map(compartment_dict)
     merged_inventory.dropna(inplace=True)
-
+    merged_inventory["Source"] = "netlpetro"
     return merged_inventory
 
 

diff --git a/electricitylci/plant_water_use.py b/electricitylci/plant_water_use.py
@@ -237,7 +237,7 @@ def generate_plant_water_use(year):
     final_water["plant_id"] = final_water["FacilityID"]
     final_water["eGRID_ID"] = final_water["FacilityID"]
     final_water["Year"] = year
-    final_water["Source"] = "netl"
+    final_water["Source"] = "netlwater"
     final_water["Unit"] = "kg"
     final_water["stage_code"] = "Power plant"
     final_water["TechnologicalCorrelation"] = 1

diff --git a/electricitylci/power_plant_construction.py b/electricitylci/power_plant_construction.py
@@ -227,7 +227,7 @@ def generate_power_plant_construction(year):
     construction_df["fuel_type"] = "Construction"
     construction_df["Unit"] = construction_df["Unit"].str.replace(
         "mj","MJ", regex=False)
-
+    construction_df["Source"]="netlconst"
     return construction_df
 
 

diff --git a/electricitylci/solar_thermal_upstream.py b/electricitylci/solar_thermal_upstream.py
@@ -127,7 +127,7 @@ def generate_upstream_solarthermal(year):
         'Compartment'].map(compartment_map)
     solarthermal_upstream["Unit"] = "kg"
     solarthermal_upstream["input"] = False
-
+    solarthermal_upstream["Source"] = "netlsolarthermal"
     return solarthermal_upstream
 
 

diff --git a/electricitylci/solar_upstream.py b/electricitylci/solar_upstream.py
@@ -125,7 +125,7 @@ def generate_upstream_solar(year):
         compartment_map)
     solar_upstream["Unit"] = "kg"
     solar_upstream["input"] = False
-
+    solar_upstream["Source"] = "netlnrelsolarpv"
     return solar_upstream
 
 

diff --git a/electricitylci/upstream_dict.py b/electricitylci/upstream_dict.py
@@ -332,7 +332,7 @@ def olcaschema_genupstream_processes(merged):
             "Compartment",
             "plant_id",
             "Unit",
-            "input"
+            "input",
         ],
         as_index=False,
     ).agg({"FlowAmount": "sum", "quantity": "mean"})

diff --git a/electricitylci/wind_upstream.py b/electricitylci/wind_upstream.py
@@ -123,7 +123,7 @@ def generate_upstream_wind(year):
     wind_upstream["input"] = False
     wind_upstream.loc[wind_upstream["Compartment"]=="input", "input"] = True
     wind_upstream["Unit"] = "kg"
-
+    wind_upstream["Source"] = "netlnrelwind"
     return wind_upstream