Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add numeric_only=True to relevant pandas operations #259

Merged
merged 6 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions melodies_monet/plots/aircraftplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
if df_reg is not None:
# JianHe: include options for percentile calculation (set in yaml file)
if ptile is None:
df_mean=df_reg.groupby(['siteid'],as_index=False).mean()
df_mean=df_reg.groupby(['siteid'],as_index=False).mean(numeric_only=True)
else:
df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100.)
df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)

#Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
#and then uses -1*val_max value for the minimum.
Expand All @@ -129,9 +129,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
else:
# JianHe: include options for percentile calculation (set in yaml file)
if ptile is None:
df_mean=df.groupby(['siteid'],as_index=False).mean()
df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
else:
df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100.)
df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)

#Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
#and then uses -1*val_max value for the minimum.
Expand Down Expand Up @@ -315,19 +315,19 @@ def make_vertprofile(df, column=None, label=None, ax=None, bins=None, altitude_v
bin_midpoints = altitude_bins.apply(lambda x: x.mid)
# Convert bin_midpoints to a column in the DataFrame
df['bin_midpoints'] = bin_midpoints
median = df.groupby(altitude_bins, observed=True)[column].median()
q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25)
q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75)
median = df.groupby(altitude_bins, observed=True)[column].median(numeric_only=True)
q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25, numeric_only=True)
q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75, numeric_only=True)
# Convert bin_midpoints to a numerical data type
df['bin_midpoints'] = df['bin_midpoints'].astype(float)

p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05)
p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10)
p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90)
p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95)
p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05, numeric_only=True)
p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10, numeric_only=True)
p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90, numeric_only=True)
p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95, numeric_only=True)

# Calculate the mean of bin_midpoints grouped by altitude bins
binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean()
binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean(numeric_only=True)

##Plotting vertprofile starts
plot_kwargs_fillbetween = plot_kwargs.copy()
Expand Down Expand Up @@ -420,20 +420,20 @@ def make_vertprofile(df, column=None, label=None, ax=None, bins=None, altitude_v
# Convert bin_midpoints to a column in the DataFrame
df['bin_midpoints'] = bin_midpoints
# can be .groupby(bin_midpoints) as well (qzr)
median = df.groupby(altitude_bins, observed=True)[column].median()
q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25)
q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75)
median = df.groupby(altitude_bins, observed=True)[column].median(numeric_only=True)
q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25, numeric_only=True)
q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75, numeric_only=True)
# Convert bin_midpoints to a numerical data type
df['bin_midpoints'] = df['bin_midpoints'].astype(float)

# Calculate the 10th, 90th, 5th, and 95th percentiles
p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10)
p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90)
p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05)
p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95)
p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10, numeric_only=True)
p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90, numeric_only=True)
p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05, numeric_only=True)
p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95, numeric_only=True)

# Calculate the mean of bin_midpoints grouped by altitude bins
binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean()
binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean(numeric_only=True)

plot_kwargs_fillbetween = plot_dict.copy()
del plot_kwargs_fillbetween['marker']
Expand Down
2 changes: 1 addition & 1 deletion melodies_monet/plots/satplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def make_spatial_overlay(df, vmodel, column_o=None, label_o=None, column_m=None,
ylabel = column_o

#Take the mean for each siteid
df_mean=df.groupby(['siteid'],as_index=False).mean()
df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)

#Take the mean over time for the model output
vmodel_mean = vmodel[column_m].mean(dim='time').squeeze()
Expand Down
20 changes: 10 additions & 10 deletions melodies_monet/plots/surfplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def make_24hr_regulatory(df, col=None):
def calc_24hr_ave_v1(df, col=None):
df.index = df.time_local
# select sites with nobs >=18, 75% completeness
df_24hr_ave = (df.groupby("siteid")[col].resample("D").sum(min_count=18)/df.groupby("siteid")[col].resample("D").count()).reset_index().dropna()
df_24hr_ave = (df.groupby("siteid")[col].resample("D").sum(min_count=18, numeric_only=True)/df.groupby("siteid")[col].resample("D").count()).reset_index().dropna()
rschwant marked this conversation as resolved.
Show resolved Hide resolved
df = df.reset_index(drop=True)
return df.merge(df_24hr_ave, on=["siteid", "time_local"])

Expand All @@ -67,10 +67,10 @@ def make_8hr_regulatory(df, col=None):

def calc_8hr_rolling_max_v1(df, col=None, window=None):
df.index = df.time_local
df_rolling = df.groupby("siteid")[col].rolling(window,min_periods=6,center=True, win_type="boxcar").mean().reset_index().dropna()
df_rolling = df.groupby("siteid")[col].rolling(window,min_periods=6,center=True, win_type="boxcar").mean(numeric_only=True).reset_index().dropna()
# JianHe: select sites with nobs >=18, 75% completeness based on EPA
df_rolling.index = df_rolling.time_local
df_rolling_max = df_rolling.groupby("siteid").resample("D").max(min_count=18).reset_index(drop=True).dropna()
df_rolling_max = df_rolling.groupby("siteid").resample("D").max(min_count=18, numeric_only=True).reset_index(drop=True).dropna()
df = df.reset_index(drop=True)
return df.merge(df_rolling_max, on=["siteid", "time_local"])

Expand Down Expand Up @@ -325,9 +325,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
if df_reg is not None:
# JianHe: include options for percentile calculation (set in yaml file)
if ptile is None:
df_mean=df_reg.groupby(['siteid'],as_index=False).mean()
df_mean=df_reg.groupby(['siteid'],as_index=False).mean(numeric_only=True)
else:
df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100.)
df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)

#Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
#and then uses -1*val_max value for the minimum.
Expand All @@ -337,9 +337,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
else:
# JianHe: include options for percentile calculation (set in yaml file)
if ptile is None:
df_mean=df.groupby(['siteid'],as_index=False).mean()
df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
else:
df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100.)
df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)

#Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
#and then uses -1*val_max value for the minimum.
Expand Down Expand Up @@ -697,7 +697,7 @@ def make_spatial_overlay(df, vmodel, column_o=None, label_o=None, column_m=None,
ylabel = column_o

#Take the mean for each siteid
df_mean=df.groupby(['siteid'],as_index=False).mean()
df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)

#Take the mean over time for the model output
vmodel_mean = vmodel[column_m].mean(dim='time').squeeze()
Expand Down Expand Up @@ -1266,14 +1266,14 @@ def make_spatial_bias_exceedance(df, column_o=None, label_o=None, column_m=None,

# calculate exceedance
if column_o == 'OZONE_reg':
df_mean=df.groupby(['siteid'],as_index=False).quantile(0.95) #concentrations not used in plotting, get the correct format for plotting
df_mean=df.groupby(['siteid'],as_index=False).quantile(0.95, numeric_only=True) #concentrations not used in plotting, get the correct format for plotting
# get the exceedance days for each site
df_counto = df[df[column_o]> 70.].groupby(['siteid'],as_index=False)[column_o].count()
df_countm = df[df[column_m]> 70.].groupby(['siteid'],as_index=False)[column_m].count()
ylabel2 = 'O3'

elif column_o == 'PM2.5_reg':
df_mean=df.groupby(['siteid'],as_index=False).mean() #concentrations not used in plotting, get the correct format for plotting
df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True) #concentrations not used in plotting, get the correct format for plotting
# get the exceedance days for each site
df_counto = df[df[column_o]> 35.].groupby(['siteid'],as_index=False)[column_o].count()
df_countm = df[df[column_m]> 35.].groupby(['siteid'],as_index=False)[column_m].count()
Expand Down
2 changes: 1 addition & 1 deletion melodies_monet/util/satellite_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def omps_l3_daily_o3_pairing(model_data,obs_data,ozone_ppbv_varname):
grid_adjust = xe.Regridder(model_data[['latitude','longitude']],obs_data[['latitude','longitude']],'bilinear')
mod_col_obsgrid = grid_adjust(column)
# Aggregate time-step to daily means
daily_mean = mod_col_obsgrid.groupby('time.date').mean().compute()
daily_mean = mod_col_obsgrid.groupby('time.date').mean(numeric_only=True).compute()

# change dimension name for date to time
daily_mean = daily_mean.rename({'date':'time'})
Expand Down
15 changes: 9 additions & 6 deletions melodies_monet/util/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def kolmogorov_zurbenko_filter(df, col, window, iterations):
for i in range(iterations):
z.index = z.time_local
z = z.groupby('siteid')[col].rolling(
window, center=True, min_periods=1).mean().reset_index().dropna()
window, center=True, min_periods=1).mean(numeric_only=True).reset_index().dropna()
df = df.reset_index(drop=True)
return df.merge(z, on=['siteid', 'time_local'])

Expand Down Expand Up @@ -119,31 +119,34 @@ def long_to_wide(df):
def calc_8hr_rolling_max(df, col=None, window=None):
df.index = df.time_local
df_rolling = df.groupby('siteid')[col].rolling(
window, center=True, win_type='boxcar').mean().reset_index().dropna()
window, center=True, win_type='boxcar').mean(
numeric_only=True).reset_index().dropna()
df_rolling_max = df_rolling.groupby('siteid').resample(
'D', on='time_local').max().reset_index(drop=True)
'D', on='time_local').max(numeric_only=True).reset_index(drop=True)
df = df.reset_index(drop=True)
return df.merge(df_rolling_max, on=['siteid', 'time_local'])


def calc_24hr_ave(df, col=None):
df.index = df.time_local
df_24hr_ave = df.groupby('siteid')[col].resample('D').mean().reset_index()
df_24hr_ave = df.groupby('siteid')[col].resample('D').mean(
numeric_only=True).reset_index()
df = df.reset_index(drop=True)
return df.merge(df_24hr_ave, on=['siteid', 'time_local'])


def calc_3hr_ave(df, col=None):
df.index = df.time_local
df_3hr_ave = df.groupby('siteid')[col].resample('3H').mean().reset_index()
df_3hr_ave = df.groupby('siteid')[col].resample('3H').mean(
numeric_only=True).reset_index()
df = df.reset_index(drop=True)
return df.merge(df_3hr_ave, on=['siteid', 'time_local'])


def calc_annual_ave(df, col=None):
df.index = df.time_local
df_annual_ave = df.groupby('siteid')[col].resample(
'A').mean().reset_index()
'A').mean(numeric_only=True).reset_index()
df = df.reset_index(drop=True)
return df.merge(df_annual_ave, on=['siteid', 'time_local'])

Expand Down
Loading