Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish renaming #79

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@
api.definitions
api.data_readers
api.dimensions
api.named_dim_arrays
api.flodym_arrays
api.processes
api.export_and_plotting
4 changes: 2 additions & 2 deletions examples/example5.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@
" * vehicle_mfa_2.parameters[\"vehicle material content\"]\n",
" * 1e-9\n",
")\n",
"global_stock_by_material_type = stock_by_material_type.sum_nda_over(sum_over_dims=(\"r\"))\n",
"global_stock_by_material_type = stock_by_material_type.sum_over(sum_over_dims=(\"r\"))\n",
"global_stock_by_material_type_in_2017 = global_stock_by_material_type[{\"t\": 2017}]\n",
"\n",
"stock_df = global_stock_by_material_type_in_2017.to_df(index=False)\n",
Expand Down Expand Up @@ -833,7 +833,7 @@
],
"source": [
"np.nan_to_num(vehicle_mfa_2.flows[\"scrap => sysenv\"].values, copy=False)\n",
"scrap_outflow = vehicle_mfa_2.flows[\"scrap => sysenv\"].sum_nda_over(sum_over_dims=(\"m\"))\n",
"scrap_outflow = vehicle_mfa_2.flows[\"scrap => sysenv\"].sum_over(sum_over_dims=(\"m\"))\n",
"outflow_df = scrap_outflow.to_df(dim_to_columns=\"waste\")\n",
"outflow_df = outflow_df[outflow_df.index > 2017]\n",
"fig = px.line(outflow_df, title=\"Scrap outflow\")\n",
Expand Down
4 changes: 2 additions & 2 deletions examples/example5.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def vehicle_new_registration(self, data, dims):
* vehicle_mfa_2.parameters["vehicle material content"]
* 1e-9
)
global_stock_by_material_type = stock_by_material_type.sum_nda_over(sum_over_dims=("r"))
global_stock_by_material_type = stock_by_material_type.sum_over(sum_over_dims=("r"))
global_stock_by_material_type_in_2017 = global_stock_by_material_type[{"t": 2017}]

stock_df = global_stock_by_material_type_in_2017.to_df(index=False)
Expand All @@ -339,7 +339,7 @@ def vehicle_new_registration(self, data, dims):

# %%
np.nan_to_num(vehicle_mfa_2.flows["scrap => sysenv"].values, copy=False)
scrap_outflow = vehicle_mfa_2.flows["scrap => sysenv"].sum_nda_over(sum_over_dims=("m"))
scrap_outflow = vehicle_mfa_2.flows["scrap => sysenv"].sum_over(sum_over_dims=("m"))
outflow_df = scrap_outflow.to_df(dim_to_columns="waste")
outflow_df = outflow_df[outflow_df.index > 2017]
fig = px.line(outflow_df, title="Scrap outflow")
Expand Down
52 changes: 27 additions & 25 deletions flodym/_df_to_nda.py → flodym/_df_to_flodym_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
from .flodym_arrays import FlodymArray


class NDADataFormat(PydanticBaseModel):
class FlodymDataFormat(PydanticBaseModel):

type: Literal["long", "wide"]
value_column: str = "value"
columns_dim: Optional[str] = None


class DataFrameToNDADataConverter:
class DataFrameToFlodymDataConverter:
"""Converts a panda DataFrame with various possible formats to a numpy array that can be used
as values of a FlodymArray.

Expand All @@ -28,14 +28,14 @@ class DataFrameToNDADataConverter:
In case of errors, turning on debug logging might help to understand the process.
"""

def __init__(self, df: pd.DataFrame, nda: "FlodymArray"):
def __init__(self, df: pd.DataFrame, flodym_array: "FlodymArray"):
self.df = df.copy()
self.nda = nda
self.nda_values = self.get_nda_values()
self.flodym_array = flodym_array
self.target_values = self.get_target_values()

def get_nda_values(self) -> np.ndarray:
def get_target_values(self) -> np.ndarray:
logging.debug(
f"Start setting values for FlodymArray {self.nda.name} with dimensions {self.nda.dims.names} from dataframe."
f"Start setting values for FlodymArray {self.flodym_array.name} with dimensions {self.flodym_array.dims.names} from dataframe."
)
self._reset_non_default_index()
self._determine_format()
Expand All @@ -44,7 +44,7 @@ def get_nda_values(self) -> np.ndarray:
self._convert_type()
self._sort_df()
self._check_data_complete()
return self.df[self.format.value_column].values.reshape(self.nda.shape)
return self.df[self.format.value_column].values.reshape(self.flodym_array.shape)

def _reset_non_default_index(self):
if isinstance(self.df.index, pd.MultiIndex):
Expand All @@ -63,7 +63,7 @@ def _determine_format(self):
self._check_value_columns()

def _get_dim_columns_by_name(self):
self.dim_columns = [c for c in self.df.columns if c in self.nda.dims.names]
self.dim_columns = [c for c in self.df.columns if c in self.flodym_array.dims.names]
logging.debug(f"Recognized index columns by name: {self.dim_columns}")

def _check_if_first_row_are_items(self):
Expand All @@ -74,7 +74,7 @@ def _check_if_first_row_are_items(self):
column_name = self.df.columns[0]
col_items = self.df[column_name].unique()
extended_col_items = [column_name] + col_items.tolist()
for dim in self.nda.dims:
for dim in self.flodym_array.dims:
if self.same_items(extended_col_items, dim):
self._add_column_names_as_row(column_name, dim)

Expand Down Expand Up @@ -108,7 +108,7 @@ def _check_for_dim_columns_by_items(self):
def _check_if_dim_column_by_items(self, column_name: str) -> bool:
logging.debug(f"Checking if {column_name} is a dimension by comparing items with dim items")
col_items = self.df[column_name].unique()
for dim in self.nda.dims:
for dim in self.flodym_array.dims:
if self.same_items(col_items, dim):
logging.debug(f"{column_name} is dimension {dim.name}.")
self.df.rename(columns={column_name: dim.name}, inplace=True)
Expand All @@ -125,10 +125,10 @@ def _check_value_columns(self):

def _check_if_value_columns_match_dim_items(self, value_cols: list[str]) -> bool:
logging.debug("Trying to match set of value column names with items of dimension.")
for dim in self.nda.dims:
for dim in self.flodym_array.dims:
if self.same_items(value_cols, dim):
logging.debug(f"Value columns match dimension items of {dim.name}.")
self.format = NDADataFormat(type="wide", columns_dim=dim.name)
self.format = FlodymDataFormat(type="wide", columns_dim=dim.name)
if dim.dtype is not None:
for c in value_cols:
self.df.rename(columns={c: dim.dtype(c)}, inplace=True)
Expand All @@ -140,7 +140,7 @@ def _check_if_valid_long_format(self, value_cols: list[str]):
"Could not find dimension with same item set as value column names. Assuming long format, i.e. one value column."
)
if len(value_cols) == 1:
self.format = NDADataFormat(type="long", value_column=value_cols[0])
self.format = FlodymDataFormat(type="long", value_column=value_cols[0])
logging.debug(f"Value column name is {value_cols[0]}.")
else:
raise ValueError(
Expand All @@ -152,29 +152,29 @@ def _df_to_long_format(self):
if self.format.type != "wide":
return
logging.debug("Converting wide format to long format.")
value_cols = self.nda.dims[self.format.columns_dim].items
value_cols = self.flodym_array.dims[self.format.columns_dim].items
self.df = self.df.melt(
id_vars=[c for c in self.df.columns if c not in value_cols],
value_vars=value_cols,
var_name=self.format.columns_dim,
value_name=self.format.value_column,
)
self.dim_columns.append(self.format.columns_dim)
self.format = NDADataFormat(type="long", value_column=self.format.value_column)
self.format = FlodymDataFormat(type="long", value_column=self.format.value_column)

def _check_missing_dim_columns(self):
missing_dim_columns = np.setdiff1d(list(self.nda.dims.names), self.dim_columns)
missing_dim_columns = np.setdiff1d(list(self.flodym_array.dims.names), self.dim_columns)
for c in missing_dim_columns:
if len(self.nda.dims[c].items) == 1:
self.df[c] = self.nda.dims[c].items[0]
if len(self.flodym_array.dims[c].items) == 1:
self.df[c] = self.flodym_array.dims[c].items[0]
self.dim_columns.append(c)
else:
raise ValueError(
f"Dimension {c} from array has more than one item, but is not found in df. Please specify column in dataframe."
)

def _convert_type(self):
for dim in self.nda.dims:
for dim in self.flodym_array.dims:
if dim.dtype is not None:
self.df[dim.name] = self.df[dim.name].map(dim.dtype)
self.df[self.format.value_column] = self.df[self.format.value_column].astype(np.float64)
Expand All @@ -186,19 +186,21 @@ def _sort_df(self):
FlodymArray.
"""
# sort columns
self.df = self.df[list(self.nda.dims.names) + [self.format.value_column]]
self.df = self.df[list(self.flodym_array.dims.names) + [self.format.value_column]]
# sort rows
self.df = self.df.sort_values(
by=list(self.nda.dims.names),
key=lambda x: x.map(lambda y: self.nda.dims[x.name].items.index(y)),
by=list(self.flodym_array.dims.names),
key=lambda x: x.map(lambda y: self.flodym_array.dims[x.name].items.index(y)),
)

def _check_data_complete(self):
# Generate expected index tuples from FlodymArray dimensions
if self.nda.dims.ndim == 0:
if self.flodym_array.dims.ndim == 0:
expected_index_tuples = set()
else:
expected_index_tuples = set(itertools.product(*(dim.items for dim in self.nda.dims)))
expected_index_tuples = set(
itertools.product(*(dim.items for dim in self.flodym_array.dims))
)

# Generate actual index tuples from DataFrame columns
actual_index_tuples = set(
Expand Down
10 changes: 5 additions & 5 deletions flodym/flodym_array_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
from .dimensions import Dimension


def named_dim_array_stack(named_dim_arrays: list[FlodymArray], dimension: Dimension) -> FlodymArray:
def flodym_array_stack(flodym_arrays: list[FlodymArray], dimension: Dimension) -> FlodymArray:
"""Stack a list of FlodymArray objects using a new dimension.
Like numpy.stack with axis=-1, but for `FlodymArray`s.
Method can be applied to `FlodymArray`s, `StockArray`s, `Parameter`s and `Flow`s.
"""
named_dim_array0 = named_dim_arrays[0]
extended_dimensions = named_dim_array0.dims.expand_by([dimension])
flodym_array0 = flodym_arrays[0]
extended_dimensions = flodym_array0.dims.expand_by([dimension])
extended = FlodymArray(dims=extended_dimensions)
for item, nda in zip(dimension.items, named_dim_arrays):
extended[{dimension.letter: item}] = nda
for item, flodym_array in zip(dimension.items, flodym_arrays):
extended[{dimension.letter: item}] = flodym_array
return extended
Loading