Skip to content

Commit

Permalink
Changes to resolve various items in Who's putting NaNs in my exchange…
Browse files Browse the repository at this point in the history
… tables? USEPA#246

New source strings were added to differentiate between sources. This allows better alignment with calculated total electricity values when the data are aggregated and avoids NaNs particularly when some of the internal data haven't been updated to the latest year.
  • Loading branch information
m-jamieson committed Aug 14, 2024
1 parent 43fb567 commit 8851f54
Show file tree
Hide file tree
Showing 13 changed files with 33 additions and 23 deletions.
2 changes: 1 addition & 1 deletion electricitylci/coal_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,7 +821,7 @@ def wtd_mean(pdser, total_db):
)
merged_coal_upstream.reset_index(drop=True, inplace=True)
merged_coal_upstream["Year"] = year
merged_coal_upstream["Source"] = "netl"
merged_coal_upstream["Source"] = "netlcoaleiafuel"

return merged_coal_upstream

Expand Down
7 changes: 5 additions & 2 deletions electricitylci/combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def add_fuel_inputs(gen_df, upstream_df, upstream_dict):
fuel_df["FacilityID"] = upstream_reduced["plant_id"]
fuel_df["FuelCategory"] = upstream_reduced["FuelCategory"]
fuel_df["Year"] = upstream_reduced["Year"]
fuel_df["Source"] = upstream_reduced["Source"]
merge_cols = [
"Age",
"Balancing Authority Code",
Expand All @@ -122,7 +123,7 @@ def add_fuel_inputs(gen_df, upstream_df, upstream_dict):
how="left",
)
fuel_df.dropna(subset=["Electricity"], inplace=True)
fuel_df["Source"] = "eia"
#fuel_df["Source"] = "eia"
fuel_df = add_temporal_correlation_score(
fuel_df, model_specs.electricity_lci_target_year)
fuel_df["DataCollection"] = 5
Expand Down Expand Up @@ -268,6 +269,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
"FlowName_orig",
"Compartment_path_orig",
"Unit_orig",
"Source"
]
upstream_df["FlowAmount"] = upstream_df["FlowAmount"].astype(float)
if "Electricity" in upstream_df.columns:
Expand Down Expand Up @@ -317,7 +319,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
upstream_mapped_df["Compartment"].str.contains("resource"),
"ElementaryFlowPrimeContext",
] = "resource"
upstream_mapped_df["Source"] = "netl"
#upstream_mapped_df["Source"] = "netl"
# WARNING: don't use with HYDRO, which has its own data year
upstream_mapped_df["Year"] = eia_gen_year
final_columns = [
Expand Down Expand Up @@ -362,6 +364,7 @@ def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
"FlowName",
"Compartment",
"Unit",
"Source"
]
).groups
unique_mapped_set = set(unique_mapped.keys())
Expand Down
27 changes: 17 additions & 10 deletions electricitylci/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,14 @@ def calculate_electricity_by_source(db, subregion="BA"):
"""
all_sources = '_'.join(sorted(list(db["Source"].unique())))
power_plant_criteria = db["stage_code"]=="Power plant"
db_powerplant = db.loc[power_plant_criteria, :].copy()
db_nonpower = db.loc[~power_plant_criteria, :].copy()

# HOTFIX: not separating the dataframe in hopes of generating electricity
# amounts for fuel inputs that doesn't make the plants "too efficient"
# [2024-08-14 MBJ]
#db_powerplant = db.loc[power_plant_criteria, :].copy()
#db_nonpower = db.loc[~power_plant_criteria, :].copy()
db_powerplant = db.copy()

region_agg = subregion_col(subregion)

fuel_agg = ["FuelCategory"]
Expand Down Expand Up @@ -586,8 +592,9 @@ def calculate_electricity_by_source(db, subregion="BA"):
# HOTFIX: it doesn't make sense to groupby a different group;
# it gives different results from the first-pass filter;
# changed to match criteria above. [2023-12-19; TWD]
# HOTFIX undone [2024-08-13; MBJ]
source_df = pd.DataFrame(
db_multiple_sources.groupby(["FlowName", "Compartment"])[
db_multiple_sources.groupby(groupby_cols)[
["Source"]].apply(combine_source_lambda),
columns=["source_list"],
)
Expand All @@ -602,8 +609,8 @@ def calculate_electricity_by_source(db, subregion="BA"):
old_index = db_multiple_sources.index
db_multiple_sources = db_multiple_sources.merge(
right=source_df,
left_on=["FlowName", "Compartment"],
right_on=["FlowName", "Compartment"],
left_on=groupby_cols,
right_on=groupby_cols,
how="left",
)
db_multiple_sources.index = old_index
Expand All @@ -628,7 +635,7 @@ def calculate_electricity_by_source(db, subregion="BA"):
)
]
sub_db = db.loc[src_filter, :].copy()
sub_db.drop_duplicates(subset=fuel_agg + ["eGRID_ID"], inplace=True)
sub_db.drop_duplicates(subset=fuel_agg + ["eGRID_ID","Year"], inplace=True)
# HOTFIX: fix pandas futurewarning syntax [2024-03-08; TWD]
sub_db_group = sub_db.groupby(elec_groupby_cols, as_index=False).agg(
{"Electricity": ["sum", "mean"], "eGRID_ID": "count"}
Expand All @@ -640,12 +647,12 @@ def calculate_electricity_by_source(db, subregion="BA"):
]
sub_db_group["source_string"] = src
elec_sum_lists.append(sub_db_group)
db_nonpower["source_string"] = all_sources
db_nonpower["source_list"] = [all_sources]*len(db_nonpower)
#db_nonpower["source_string"] = all_sources
#db_nonpower["source_list"] = [all_sources]*len(db_nonpower)
elec_sums = pd.concat(elec_sum_lists, ignore_index=True)
elec_sums.sort_values(by=elec_groupby_cols, inplace=True)
db = pd.concat([db_powerplant, db_nonpower])

#db = pd.concat([db_powerplant, db_nonpower])
db = db_powerplant
return db, elec_sums


Expand Down
2 changes: 1 addition & 1 deletion electricitylci/hydro_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def generate_hydro_emissions():

# Add other necessary metadata
hydro_df["Year"] = 2016
hydro_df["Source"] = "netl"
hydro_df["Source"] = "netlhydro"

# Read in 2016 power plant location data (i.e., state, NERC, BA).
eia860_df = eia860_balancing_authority(2016)
Expand Down
2 changes: 1 addition & 1 deletion electricitylci/import_impacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def generate_canadian_mixes(us_inventory, gen_year):
ca_mix_inventory.rename(
columns={"Code": "Balancing Authority Code"}, inplace=True
)
ca_mix_inventory["Source"] = "netl"
ca_mix_inventory["Source"] = "netlca"
ca_mix_inventory["Year"] = us_inventory["Year"].mode().to_numpy()[0]
ca_mix_inventory["FuelCategory"] = "ALL"
ca_mix_inventory["eGRID_ID"] = ca_mix_inventory[
Expand Down
2 changes: 1 addition & 1 deletion electricitylci/natural_gas_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def generate_upstream_ng(year):
inplace=True
)
ng_lci_basin["Year"] = year
ng_lci_basin["Source"] = "netl"
ng_lci_basin["Source"] = "netlgaseiafuel"
ng_lci_basin["ElementaryFlowPrimeContext"] = "emission"
ng_lci_basin.loc[
ng_lci_basin["Compartment"].str.contains("resource/"),
Expand Down
2 changes: 1 addition & 1 deletion electricitylci/petroleum_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def generate_petroleum_upstream(year):
merged_inventory['Compartment'] = merged_inventory[
'Compartment'].map(compartment_dict)
merged_inventory.dropna(inplace=True)

merged_inventory["Source"] = "netlpetro"
return merged_inventory


Expand Down
2 changes: 1 addition & 1 deletion electricitylci/plant_water_use.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def generate_plant_water_use(year):
final_water["plant_id"] = final_water["FacilityID"]
final_water["eGRID_ID"] = final_water["FacilityID"]
final_water["Year"] = year
final_water["Source"] = "netl"
final_water["Source"] = "netlwater"
final_water["Unit"] = "kg"
final_water["stage_code"] = "Power plant"
final_water["TechnologicalCorrelation"] = 1
Expand Down
2 changes: 1 addition & 1 deletion electricitylci/power_plant_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def generate_power_plant_construction(year):
construction_df["fuel_type"] = "Construction"
construction_df["Unit"] = construction_df["Unit"].str.replace(
"mj","MJ", regex=False)

construction_df["Source"]="netlconst"
return construction_df


Expand Down
2 changes: 1 addition & 1 deletion electricitylci/solar_thermal_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def generate_upstream_solarthermal(year):
'Compartment'].map(compartment_map)
solarthermal_upstream["Unit"] = "kg"
solarthermal_upstream["input"] = False

solarthermal_upstream["Source"] = "netlsolarthermal"
return solarthermal_upstream


Expand Down
2 changes: 1 addition & 1 deletion electricitylci/solar_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def generate_upstream_solar(year):
compartment_map)
solar_upstream["Unit"] = "kg"
solar_upstream["input"] = False

solar_upstream["Source"] = "netlnrelsolarpv"
return solar_upstream


Expand Down
2 changes: 1 addition & 1 deletion electricitylci/upstream_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def olcaschema_genupstream_processes(merged):
"Compartment",
"plant_id",
"Unit",
"input"
"input",
],
as_index=False,
).agg({"FlowAmount": "sum", "quantity": "mean"})
Expand Down
2 changes: 1 addition & 1 deletion electricitylci/wind_upstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def generate_upstream_wind(year):
wind_upstream["input"] = False
wind_upstream.loc[wind_upstream["Compartment"]=="input", "input"] = True
wind_upstream["Unit"] = "kg"

wind_upstream["Source"] = "netlnrelwind"
return wind_upstream


Expand Down

0 comments on commit 8851f54

Please sign in to comment.