NOAA-CSL · rschwant · Jul 15, 2024 · Jun 20, 2024 · Jun 24, 2024 · Jun 25, 2024
diff --git a/melodies_monet/plots/aircraftplots.py b/melodies_monet/plots/aircraftplots.py
@@ -117,9 +117,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
     if df_reg is not None:
         # JianHe: include options for percentile calculation (set in yaml file)
         if ptile is None:
-            df_mean=df_reg.groupby(['siteid'],as_index=False).mean()
+            df_mean=df_reg.groupby(['siteid'],as_index=False).mean(numeric_only=True)
         else:
-            df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100.)
+            df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)
 
         #Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
         #and then uses -1*val_max value for the minimum.
@@ -129,9 +129,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
     else:
         # JianHe: include options for percentile calculation (set in yaml file)
         if ptile is None:
-            df_mean=df.groupby(['siteid'],as_index=False).mean()
+            df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
         else:
-            df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100.)
+            df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)
 
         #Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
         #and then uses -1*val_max value for the minimum.
@@ -315,19 +315,19 @@ def make_vertprofile(df, column=None, label=None, ax=None, bins=None, altitude_v
         bin_midpoints = altitude_bins.apply(lambda x: x.mid)
         # Convert bin_midpoints to a column in the DataFrame
         df['bin_midpoints'] = bin_midpoints
-        median = df.groupby(altitude_bins, observed=True)[column].median()
-        q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25)
-        q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75)
+        median = df.groupby(altitude_bins, observed=True)[column].median(numeric_only=True)
+        q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25, numeric_only=True)
+        q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75, numeric_only=True)
         # Convert bin_midpoints to a numerical data type
         df['bin_midpoints'] = df['bin_midpoints'].astype(float)
 
-        p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05)
-        p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10)
-        p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90)
-        p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95)
+        p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05, numeric_only=True)
+        p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10, numeric_only=True)
+        p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90, numeric_only=True)
+        p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95, numeric_only=True)
 
         # Calculate the mean of bin_midpoints grouped by altitude bins
-        binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean()
+        binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean(numeric_only=True)
 
         ##Plotting vertprofile starts
         plot_kwargs_fillbetween = plot_kwargs.copy()
@@ -420,20 +420,20 @@ def make_vertprofile(df, column=None, label=None, ax=None, bins=None, altitude_v
         # Convert bin_midpoints to a column in the DataFrame
         df['bin_midpoints'] = bin_midpoints
         # can be .groupby(bin_midpoints) as well (qzr)
-        median = df.groupby(altitude_bins, observed=True)[column].median()
-        q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25)
-        q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75)
+        median = df.groupby(altitude_bins, observed=True)[column].median(numeric_only=True)
+        q1 = df.groupby(altitude_bins, observed=True)[column].quantile(0.25, numeric_only=True)
+        q3 = df.groupby(altitude_bins, observed=True)[column].quantile(0.75, numeric_only=True)
         # Convert bin_midpoints to a numerical data type
         df['bin_midpoints'] = df['bin_midpoints'].astype(float)
 
         # Calculate the 10th, 90th, 5th, and 95th percentiles
-        p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10)
-        p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90)
-        p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05)
-        p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95)
+        p10 = df.groupby(altitude_bins, observed=True)[column].quantile(0.10, numeric_only=True)
+        p90 = df.groupby(altitude_bins, observed=True)[column].quantile(0.90, numeric_only=True)
+        p5 = df.groupby(altitude_bins, observed=True)[column].quantile(0.05, numeric_only=True)
+        p95 = df.groupby(altitude_bins, observed=True)[column].quantile(0.95, numeric_only=True)
 
         # Calculate the mean of bin_midpoints grouped by altitude bins
-        binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean()
+        binmidpoint = df.groupby(altitude_bins, observed=True)['bin_midpoints'].mean(numeric_only=True)
 
         plot_kwargs_fillbetween = plot_dict.copy()
         del plot_kwargs_fillbetween['marker']

diff --git a/melodies_monet/plots/satplots.py b/melodies_monet/plots/satplots.py
@@ -438,7 +438,7 @@ def make_spatial_overlay(df, vmodel, column_o=None, label_o=None, column_m=None,
         ylabel = column_o
 
     #Take the mean for each siteid
-    df_mean=df.groupby(['siteid'],as_index=False).mean()
+    df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
 
     #Take the mean over time for the model output
     vmodel_mean = vmodel[column_m].mean(dim='time').squeeze()

diff --git a/melodies_monet/plots/surfplots.py b/melodies_monet/plots/surfplots.py
@@ -43,7 +43,7 @@ def make_24hr_regulatory(df, col=None):
 def calc_24hr_ave_v1(df, col=None):
     df.index = df.time_local
     # select sites with nobs >=18, 75% completeness
-    df_24hr_ave = (df.groupby("siteid")[col].resample("D").sum(min_count=18)/df.groupby("siteid")[col].resample("D").count()).reset_index().dropna()
+    df_24hr_ave = (df.groupby("siteid")[col].resample("D").sum(min_count=18, numeric_only=True)/df.groupby("siteid")[col].resample("D").count()).reset_index().dropna()
     df = df.reset_index(drop=True)
     return df.merge(df_24hr_ave, on=["siteid", "time_local"])
 
@@ -67,10 +67,10 @@ def make_8hr_regulatory(df, col=None):
 
 def calc_8hr_rolling_max_v1(df, col=None, window=None):
     df.index = df.time_local
-    df_rolling = df.groupby("siteid")[col].rolling(window,min_periods=6,center=True, win_type="boxcar").mean().reset_index().dropna()
+    df_rolling = df.groupby("siteid")[col].rolling(window,min_periods=6,center=True, win_type="boxcar").mean(numeric_only=True).reset_index().dropna()
     # JianHe: select sites with nobs >=18, 75% completeness based on EPA
     df_rolling.index = df_rolling.time_local
-    df_rolling_max = df_rolling.groupby("siteid").resample("D").max(min_count=18).reset_index(drop=True).dropna()
+    df_rolling_max = df_rolling.groupby("siteid").resample("D").max(min_count=18, numeric_only=True).reset_index(drop=True).dropna()
     df = df.reset_index(drop=True)
     return df.merge(df_rolling_max, on=["siteid", "time_local"])
 
@@ -325,9 +325,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
     if df_reg is not None:
         # JianHe: include options for percentile calculation (set in yaml file)
         if ptile is None:
-            df_mean=df_reg.groupby(['siteid'],as_index=False).mean()
+            df_mean=df_reg.groupby(['siteid'],as_index=False).mean(numeric_only=True)
         else:
-            df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100.)
+            df_mean=df_reg.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)
 
         #Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
         #and then uses -1*val_max value for the minimum.
@@ -337,9 +337,9 @@ def make_spatial_bias(df, df_reg=None, column_o=None, label_o=None, column_m=Non
     else:
         # JianHe: include options for percentile calculation (set in yaml file)
         if ptile is None:
-            df_mean=df.groupby(['siteid'],as_index=False).mean()
+            df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
         else:
-            df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100.)
+            df_mean=df.groupby(['siteid'],as_index=False).quantile(ptile/100., numeric_only=True)
 
         #Specify val_max = vdiff. the sp_scatter_bias plot in MONET only uses the val_max value
         #and then uses -1*val_max value for the minimum.
@@ -697,7 +697,7 @@ def make_spatial_overlay(df, vmodel, column_o=None, label_o=None, column_m=None,
         ylabel = column_o
 
     #Take the mean for each siteid
-    df_mean=df.groupby(['siteid'],as_index=False).mean()
+    df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True)
 
     #Take the mean over time for the model output
     vmodel_mean = vmodel[column_m].mean(dim='time').squeeze()
@@ -1266,14 +1266,14 @@ def make_spatial_bias_exceedance(df, column_o=None, label_o=None, column_m=None,
 
     # calculate exceedance
     if column_o == 'OZONE_reg':
-        df_mean=df.groupby(['siteid'],as_index=False).quantile(0.95) #concentrations not used in plotting, get the correct format for plotting
+        df_mean=df.groupby(['siteid'],as_index=False).quantile(0.95, numeric_only=True) #concentrations not used in plotting, get the correct format for plotting
         # get the exceedance days for each site
         df_counto = df[df[column_o]> 70.].groupby(['siteid'],as_index=False)[column_o].count()
         df_countm = df[df[column_m]> 70.].groupby(['siteid'],as_index=False)[column_m].count()     
         ylabel2 = 'O3'  
 
     elif column_o == 'PM2.5_reg':
-        df_mean=df.groupby(['siteid'],as_index=False).mean() #concentrations not used in plotting, get the correct format for plotting
+        df_mean=df.groupby(['siteid'],as_index=False).mean(numeric_only=True) #concentrations not used in plotting, get the correct format for plotting
         # get the exceedance days for each site
         df_counto = df[df[column_o]> 35.].groupby(['siteid'],as_index=False)[column_o].count()
         df_countm = df[df[column_m]> 35.].groupby(['siteid'],as_index=False)[column_m].count()

diff --git a/melodies_monet/util/satellite_utilities.py b/melodies_monet/util/satellite_utilities.py
@@ -140,7 +140,7 @@ def omps_l3_daily_o3_pairing(model_data,obs_data,ozone_ppbv_varname):
     grid_adjust = xe.Regridder(model_data[['latitude','longitude']],obs_data[['latitude','longitude']],'bilinear')
     mod_col_obsgrid = grid_adjust(column)
     # Aggregate time-step to daily means
-    daily_mean = mod_col_obsgrid.groupby('time.date').mean().compute()
+    daily_mean = mod_col_obsgrid.groupby('time.date').mean(numeric_only=True).compute()
 
     # change dimension name for date to time
     daily_mean = daily_mean.rename({'date':'time'})

diff --git a/melodies_monet/util/tools.py b/melodies_monet/util/tools.py
@@ -80,7 +80,7 @@ def kolmogorov_zurbenko_filter(df, col, window, iterations):
     for i in range(iterations):
         z.index = z.time_local
         z = z.groupby('siteid')[col].rolling(
-            window, center=True, min_periods=1).mean().reset_index().dropna()
+            window, center=True, min_periods=1).mean(numeric_only=True).reset_index().dropna()
     df = df.reset_index(drop=True)
     return df.merge(z, on=['siteid', 'time_local'])
 
@@ -119,31 +119,34 @@ def long_to_wide(df):
 def calc_8hr_rolling_max(df, col=None, window=None):
     df.index = df.time_local
     df_rolling = df.groupby('siteid')[col].rolling(
-        window, center=True, win_type='boxcar').mean().reset_index().dropna()
+        window, center=True, win_type='boxcar').mean(
+                numeric_only=True).reset_index().dropna()
     df_rolling_max = df_rolling.groupby('siteid').resample(
-        'D', on='time_local').max().reset_index(drop=True)
+        'D', on='time_local').max(numeric_only=True).reset_index(drop=True)
     df = df.reset_index(drop=True)
     return df.merge(df_rolling_max, on=['siteid', 'time_local'])
 
 
 def calc_24hr_ave(df, col=None):
     df.index = df.time_local
-    df_24hr_ave = df.groupby('siteid')[col].resample('D').mean().reset_index()
+    df_24hr_ave = df.groupby('siteid')[col].resample('D').mean(
+            numeric_only=True).reset_index()
     df = df.reset_index(drop=True)
     return df.merge(df_24hr_ave, on=['siteid', 'time_local'])
 
 
 def calc_3hr_ave(df, col=None):
     df.index = df.time_local
-    df_3hr_ave = df.groupby('siteid')[col].resample('3H').mean().reset_index()
+    df_3hr_ave = df.groupby('siteid')[col].resample('3H').mean(
+            numeric_only=True).reset_index()
     df = df.reset_index(drop=True)
     return df.merge(df_3hr_ave, on=['siteid', 'time_local'])
 
 
 def calc_annual_ave(df, col=None):
     df.index = df.time_local
     df_annual_ave = df.groupby('siteid')[col].resample(
-        'A').mean().reset_index()
+        'A').mean(numeric_only=True).reset_index()
     df = df.reset_index(drop=True)
     return df.merge(df_annual_ave, on=['siteid', 'time_local'])