mllam · sadamov · May 22, 2024 · May 5, 2024 · May 5, 2024 · May 6, 2024
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@ graphs
 *.sif
 sweeps
 test_*.sh
+.vscode
 
 ### Python ###
 # Byte-compiled / optimized / DLL files

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,11 +5,14 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-
 ## [unreleased](https://github.com/joeloskarsson/neural-lam/compare/v0.1.0...HEAD)
 
 ### Added
 
+- Replaced `constants.py` with `data_config.yaml` for data configuration management
+  [\#31](https://github.com/joeloskarsson/neural-lam/pull/31)
+  @sadamov
+
 - new metrics (`nll` and `crps_gauss`) and `metrics` submodule, stddiv output option
   [c14b6b4](https://github.com/joeloskarsson/neural-lam/commit/c14b6b4323e6b56f1f18632b6ca8b0d65c3ce36a)
   @joeloskarsson
@@ -24,6 +27,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+- Updated scripts and modules to use `data_config.yaml` instead of `constants.py`
+  [\#31](https://github.com/joeloskarsson/neural-lam/pull/31)
+  @sadamov
+
+- Added new flags in `train_model.py` for configuration previously in `constants.py`
+  [\#31](https://github.com/joeloskarsson/neural-lam/pull/31)
+  @sadamov
+
 - moved batch-static features ("water cover") into forcing component return by `WeatherDataset`
   [\#13](https://github.com/joeloskarsson/neural-lam/pull/13)
   @joeloskarsson
@@ -44,8 +55,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   [\#13](https://github.com/joeloskarsson/neural-lam/pull/13)
   @joeloskarsson
 
-
 ## [v0.1.0](https://github.com/joeloskarsson/neural-lam/releases/tag/v0.1.0)
 
 First tagged release of `neural-lam`, matching Oskarsson et al 2023 publication
-(https://arxiv.org/abs/2309.17370)
+(<https://arxiv.org/abs/2309.17370>)
diff --git a/README.md b/README.md
@@ -45,7 +45,7 @@ Still, some restrictions are inevitable:
 ## A note on the limited area setting
 Currently we are using these models on a limited area covering the Nordic region, the so called MEPS area (see [paper](https://arxiv.org/abs/2309.17370)).
 There are still some parts of the code that is quite specific for the MEPS area use case.
-This is in particular true for the mesh graph creation (`create_mesh.py`) and some of the constants used (`neural_lam/constants.py`).
+This is in particular true for the mesh graph creation (`create_mesh.py`) and some of the constants set in a `data_config.yaml` file (path specified in `train_model.py --data_config` ).
 If there is interest to use Neural-LAM for other areas it is not a substantial undertaking to refactor the code to be fully area-agnostic.
 We would be happy to support such enhancements.
 See the issues https://github.com/joeloskarsson/neural-lam/issues/2, https://github.com/joeloskarsson/neural-lam/issues/3 and https://github.com/joeloskarsson/neural-lam/issues/4 for some initial ideas on how this could be done.
@@ -104,13 +104,12 @@ The graph-related files are stored in a directory called `graphs`.
 
 ### Create remaining static features
 To create the remaining static files run the scripts `create_grid_features.py` and `create_parameter_weights.py`.
-The main option to set for these is just which dataset to use.
 
 ## Weights & Biases Integration
 The project is fully integrated with [Weights & Biases](https://www.wandb.ai/) (W&B) for logging and visualization, but can just as easily be used without it.
 When W&B is used, training configuration, training/test statistics and plots are sent to the W&B servers and made available in an interactive web interface.
 If W&B is turned off, logging instead saves everything locally to a directory like `wandb/dryrun...`.
-The W&B project name is set to `neural-lam`, but this can be changed in `neural_lam/constants.py`.
+The W&B project name is set to `neural-lam`, but this can be changed in the flags of `train_model.py` (using argsparse).
 See the [W&B documentation](https://docs.wandb.ai/) for details.
 
 If you would like to login and use W&B, run:

diff --git a/create_grid_features.py b/create_grid_features.py
@@ -6,21 +6,25 @@
 import numpy as np
 import torch
 
+# First-party
+from neural_lam import config
+
 
 def main():
     """
     Pre-compute all static features related to the grid nodes
     """
     parser = ArgumentParser(description="Training arguments")
     parser.add_argument(
-        "--dataset",
+        "--data_config",
         type=str,
-        default="meps_example",
-        help="Dataset to compute weights for (default: meps_example)",
+        default="neural_lam/data_config.yaml",
+        help="Path to data config file (default: neural_lam/data_config.yaml)",
     )
     args = parser.parse_args()
+    config_loader = config.Config.from_file(args.data_config)
 
-    static_dir_path = os.path.join("data", args.dataset, "static")
+    static_dir_path = os.path.join("data", config_loader.dataset.name, "static")
 
     # -- Static grid node features --
     grid_xy = torch.tensor(

diff --git a/create_mesh.py b/create_mesh.py
@@ -12,6 +12,9 @@
 import torch_geometric as pyg
 from torch_geometric.utils.convert import from_networkx
 
+# First-party
+from neural_lam import config
+
 
 def plot_graph(graph, title=None):
     fig, axis = plt.subplots(figsize=(8, 8), dpi=200)  # W,H
@@ -153,11 +156,10 @@ def prepend_node_index(graph, new_index):
 def main():
     parser = ArgumentParser(description="Graph generation arguments")
     parser.add_argument(
-        "--dataset",
+        "--data_config",
         type=str,
-        default="meps_example",
-        help="Dataset to load grid point coordinates from "
-        "(default: meps_example)",
+        default="neural_lam/data_config.yaml",
+        help="Path to data config file (default: neural_lam/data_config.yaml)",
     )
     parser.add_argument(
         "--graph",
@@ -187,7 +189,8 @@ def main():
     args = parser.parse_args()
 
     # Load grid positions
-    static_dir_path = os.path.join("data", args.dataset, "static")
+    config_loader = config.Config.from_file(args.data_config)
+    static_dir_path = os.path.join("data", config_loader.dataset.name, "static")
     graph_dir_path = os.path.join("graphs", args.graph)
     os.makedirs(graph_dir_path, exist_ok=True)
 

diff --git a/create_parameter_weights.py b/create_parameter_weights.py
@@ -8,7 +8,7 @@
 from tqdm import tqdm
 
 # First-party
-from neural_lam import constants
+from neural_lam import config
 from neural_lam.weather_dataset import WeatherDataset
 
 
@@ -18,10 +18,10 @@ def main():
     """
     parser = ArgumentParser(description="Training arguments")
     parser.add_argument(
-        "--dataset",
+        "--data_config",
         type=str,
-        default="meps_example",
-        help="Dataset to compute weights for (default: meps_example)",
+        default="neural_lam/data_config.yaml",
+        help="Path to data config file (default: neural_lam/data_config.yaml)",
     )
     parser.add_argument(
         "--batch_size",
@@ -43,7 +43,8 @@ def main():
     )
     args = parser.parse_args()
 
-    static_dir_path = os.path.join("data", args.dataset, "static")
+    config_loader = config.Config.from_file(args.data_config)
+    static_dir_path = os.path.join("data", config_loader.dataset.name, "static")
 
     # Create parameter weights based on height
     # based on fig A.1 in graph cast paper
@@ -56,7 +57,10 @@ def main():
         "500": 0.03,
     }
     w_list = np.array(
-        [w_dict[par.split("_")[-2]] for par in constants.PARAM_NAMES]
+        [
+            w_dict[par.split("_")[-2]]
+            for par in config_loader.dataset.var_longnames
+        ]
     )
     print("Saving parameter weights...")
     np.save(
@@ -66,7 +70,7 @@ def main():
 
     # Load dataset without any subsampling
     ds = WeatherDataset(
-        args.dataset,
+        config_loader.dataset.name,
         split="train",
         subsample_step=1,
         pred_length=63,
@@ -113,7 +117,7 @@ def main():
     # Compute mean and std.-dev. of one-step differences across the dataset
     print("Computing mean and std.-dev. for one-step differences...")
     ds_standard = WeatherDataset(
-        args.dataset,
+        config_loader.dataset.name,
         split="train",
         subsample_step=1,
         pred_length=63,

diff --git a/neural_lam/config.py b/neural_lam/config.py
@@ -0,0 +1,62 @@
+# Standard library
+import functools
+from pathlib import Path
+
+# Third-party
+import cartopy.crs as ccrs
+import yaml
+
+
+class Config:
+    """
+    Class for loading configuration files.
+
+    This class loads a configuration file and provides a way to access its
+    values as attributes.
+    """
+
+    def __init__(self, values):
+        self.values = values
+
+    @classmethod
+    def from_file(cls, filepath):
+        """Load a configuration file."""
+        if filepath.endswith(".yaml"):
+            with open(filepath, encoding="utf-8", mode="r") as file:
+                return cls(values=yaml.safe_load(file))
+        else:
+            raise NotImplementedError(Path(filepath).suffix)
+
+    def __getattr__(self, name):
+        keys = name.split(".")
+        value = self.values
+        for key in keys:
+            if key in value:
+                value = value[key]
+            else:
+                return None
+        if isinstance(value, dict):
+            return Config(values=value)
+        return value
+
+    def __getitem__(self, key):
+        value = self.values[key]
+        if isinstance(value, dict):
+            return Config(values=value)
+        return value
+
+    def __contains__(self, key):
+        return key in self.values
+
+    def num_data_vars(self):
+        """Return the number of data variables for a given key."""
+        return len(self.dataset.var_names)
+
+    @functools.cached_property
+    def coords_projection(self):
+        """Return the projection."""
+        proj_config = self.values["projection"]
+        proj_class_name = proj_config["class"]
+        proj_class = getattr(ccrs, proj_class_name)
+        proj_params = proj_config.get("kwargs", {})
+        return proj_class(**proj_params)
diff --git a/neural_lam/constants.py b/neural_lam/constants.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ graphs @@
     *.sif
     sweeps
     test_*.sh
+    .vscode
     ### Python ###
     # Byte-compiled / optimized / DLL files
@@ Expand Down @@