From fb3c7e599e57e17b1c5aafc173ebb44d5b8816bd Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Thu, 12 Dec 2024 11:08:48 +0100
Subject: [PATCH 01/12] First_fix_for_multimigrid

---
 Snakefile                          |   5 +-
 scripts/build_demand.py            | 439 ++++++++++++++++-------------
 scripts/clean_earth_osm_data.py    |  27 +-
 scripts/cluster_buildings.py       | 107 ++++---
 scripts/create_network.py          | 118 ++++----
 scripts/download_osm_data.py       |  94 +++---
 test/config.distribution.test.yaml |   6 +
 7 files changed, 450 insertions(+), 346 deletions(-)

diff --git a/Snakefile b/Snakefile
index 9d4cddc..0a70286 100644
--- a/Snakefile
+++ b/Snakefile
@@ -104,6 +104,7 @@ rule ramp_build_demand_profile:
 rule build_demand:
     params:
         tier=config["tier"],
+        snapshots=config["snapshots"],
         build_demand_model=config["build_demand_type"],
     input:
         **{
@@ -163,7 +164,7 @@ if config["enable"].get("download_osm_buildings", True):
 
     rule download_osm_data:
         output:
-            building_resources="resources/" + RDIR + "osm/raw/all_raw_building.geojson",
+            building_resources="resources/" + RDIR + "osm/raw/all_raw_buildings.geojson",
         log:
             "logs/" + RDIR + "download_osm_data.log",
         benchmark:
@@ -177,7 +178,7 @@ if config["enable"].get("download_osm_buildings", True):
 
 rule clean_earth_osm_data:
     input:
-        all_buildings="resources/" + RDIR + "osm/raw/all_raw_building.geojson",
+        all_buildings="resources/" + RDIR + "osm/raw/all_raw_buildings.geojson",
         microgrid_shapes="resources/shapes/microgrid_shapes.geojson",
     output:
         microgrid_building="resources/buildings/microgrid_building.geojson",
diff --git a/scripts/build_demand.py b/scripts/build_demand.py
index 1443dd9..7d50ad8 100644
--- a/scripts/build_demand.py
+++ b/scripts/build_demand.py
@@ -132,23 +132,19 @@ def get_WorldPop_data(
     return WorldPop_inputfile, WorldPop_filename
 
 
-# Estimate the total population of tghe microgrid
 def estimate_microgrid_population(
     n, p, raster_path, shapes_path, sample_profile, output_file
 ):
-    # Read the sample profile of electricity demand and extract the column corresponding to the electric load
-    per_unit_load = pd.read_csv(sample_profile)["0"] / p
+    population_data = {}
 
-    # Dataframe of the load
-    microgrid_load = pd.DataFrame()
-
-    # Load the GeoJSON file with the shapes to mask the raster
     shapes = gpd.read_file(shapes_path)
 
-    # Mask the raster with each shape and save each masked raster as a new file
     for i, shape in shapes.iterrows():
+        
+        name = shape["name"]  
+
         with rasterio.open(raster_path) as src:
-            # Mask the raster with the current shape
+            
             masked, out_transform = rasterio.mask.mask(src, [shape.geometry], crop=True)
             out_meta = src.meta.copy()
             out_meta.update(
@@ -160,12 +156,19 @@ def estimate_microgrid_population(
                 }
             )
 
+        
         pop_microgrid = masked[masked >= 0].sum()
 
-        col_name = "microgrid_1_bus_572666767"
-        microgrid_load[col_name] = per_unit_load * pop_microgrid
+        
+        population_data[name] = pop_microgrid
+
+    
+    population_df = pd.DataFrame(
+        list(population_data.items()), columns=["Microgrid_Name", "Population"]
+    )
+    population_df.to_csv(output_file, index=False)
 
-    return pop_microgrid, microgrid_load
+    return population_df
 
 
 def calculate_load(
@@ -177,27 +180,72 @@ def calculate_load(
     geojson_file,
     output_file,
     input_path,
+    microgrids_list,
+    start_date,
+    end_date,
+    inclusive,
 ):
     # Estimate the microgrid population and load using the existing function
-    pop_microgrid, microgrid_load = estimate_microgrid_population(
+    pop_microgrid= estimate_microgrid_population(
         n, p, raster_path, shapes_path, sample_profile, output_file
     )
     building_class = pd.read_csv(input_path)
-    total_buildings = building_class["count"].sum()
-    building_for_cluster = pd.DataFrame(
-        building_class.groupby("cluster_id").sum()["count"]
-    )
-    population_per_building = pop_microgrid / total_buildings
-    population_per_cluster = building_for_cluster * population_per_building
-    per_unit_load = pd.read_csv(sample_profile)["0"] / p
-    load_per_cluster = population_per_cluster["count"].apply(
-        lambda x: x * per_unit_load
-    )
-    load_per_cluster = load_per_cluster.T
-    load_per_cluster.insert(0, "snapshots", n.snapshots)
-    load_per_cluster.to_csv(output_file, index=True)
+    # DataFrame per accumulare i risultati di tutte le microgrid
+    microgrid_dataframes = {}
+
+    # Carica il profilo di carico e crea l'indice temporale
+    df = pd.read_csv(sample_profile)
+    per_unit_load = df["0"] / p
+    df["per_unit_load"] = per_unit_load
+    time_index = pd.date_range(start="2013-01-01", end="2013-12-31 23:00:00", freq="h")
+    df = df.set_index(time_index)
+
+    # Gestione del filtro temporale
+    if inclusive == "left":
+        end_date = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
+            "%Y-%m-%d"
+        )
+    df_filtered = df.loc[start_date:end_date]
+    per_unit_load = df_filtered["per_unit_load"].values
+
+    # Ciclo su ciascuna microgrid
+    for grid_name, grid_data in microgrids_list.items():
+        total_buildings = building_class[building_class["name_microgrid"] == grid_name]
+        total_buildings = total_buildings["count"].sum()
+        building_for_cluster = pd.DataFrame(
+            building_class[building_class["name_microgrid"] == grid_name]
+            .groupby("cluster_id")
+            .sum()["count"]
+        )
+        pop_for_microgrid = pop_microgrid.loc[
+            pop_microgrid["Microgrid_Name"] == grid_name, "Population"
+        ].values[0]
+        population_per_building = pop_for_microgrid / total_buildings
+        population_per_cluster = building_for_cluster * population_per_building
+        # Calcolo del carico per cluster
+        load_per_cluster = pd.DataFrame(
+            np.outer(population_per_cluster["count"].values, per_unit_load)
+        )
+        load_per_cluster = load_per_cluster.T
+        # Rinomina le colonne con il nome della microgrid
+        new_column_names = {
+            i: f"{grid_name}_bus_{i}" for i in range(load_per_cluster.shape[1])
+        }
+        load_per_cluster.rename(columns=new_column_names, inplace=True)
+        # Aggiungi il DataFrame della microgrid al dizionario
+        microgrid_dataframes[grid_name] = load_per_cluster
+    # Concatenazione orizzontale dei DataFrame di tutte le microgrid
+    all_load_per_cluster = pd.concat(microgrid_dataframes.values(), axis=1)
+    # Aggiungi l'indicizzazione temporale basata su `n.snapshots`
+    if hasattr(n, "snapshots") and len(n.snapshots) == len(all_load_per_cluster):
+        all_load_per_cluster.insert(0, "timestamp", n.snapshots)
+    else:
+        raise ValueError("Mismatch between the length of snapshots and load data rows.")
+    # Salva i risultati cumulativi su un file CSV
+    all_load_per_cluster.to_csv(output_file, index=False)
+
+    return all_load_per_cluster
 
-    return load_per_cluster
 
 
 def calculate_load_ramp(
@@ -215,107 +263,95 @@ def calculate_load_ramp(
     input_file_profile_tier5,
     output_path_csv,
     tier_percent,
+    date_start,
+    date_end,
+    inclusive,
 ):
+    # Caricamento dei dati e calcolo della densità di popolazione
     cleaned_buildings = gpd.read_file(input_file_buildings)
     house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
-    area_tot = house["area_m2"].sum()
-
     pop_microgrid, microgrid_load = estimate_microgrid_population(
         n, p, raster_path, shapes_path, sample_profile, output_file
     )
-    density = pop_microgrid / area_tot
+    density = pop_microgrid / house["area_m2"].sum()
 
+    # Calcolo superficie e popolazione per cluster
     grouped_buildings = cleaned_buildings.groupby("cluster_id")
     clusters = np.sort(cleaned_buildings["cluster_id"].unique())
-    house_area_for_cluster = []
-    for cluster in clusters:
-        cluster_buildings = pd.DataFrame(grouped_buildings.get_group(cluster))
-        house = cluster_buildings[cluster_buildings["tags_building"] == "house"]
-        area_house = house["area_m2"].sum()
-        house_area_for_cluster.append(area_house)
-
-    population_df = pd.DataFrame()
-    population_df["cluster"] = clusters
-    population_df.set_index("cluster", inplace=True)
-    population_df["house_area_for_cluster"] = house_area_for_cluster
-    people_for_cluster = (population_df["house_area_for_cluster"] * density).round()
-    population_df["people_for_cluster"] = people_for_cluster
-
-    # tier_percent = [0.2, 0.2, 0.3, 0.2, 0.05, 0.05]
-    people_for_cluster = population_df["people_for_cluster"]
-    tier_pop_df = population_df["people_for_cluster"].apply(
-        lambda x: pd.Series([x * y for y in tier_percent])
-    )
-    demand_tier_1 = pd.read_excel(input_file_profile_tier1)
-    demand_tier_2 = pd.read_excel(input_file_profile_tier2)
-    demand_tier_3 = pd.read_excel(input_file_profile_tier3)
-    demand_tier_4 = pd.read_excel(input_file_profile_tier4)
-    demand_tier_5 = pd.read_excel(input_file_profile_tier5)
-
-    # Creazione di un DataFrame con tutti i tier e la domanda media oraria per ognuno
-    mean_demand_tier_df = pd.DataFrame()
-    demand_tiers = [
-        demand_tier_1,
-        demand_tier_2,
-        demand_tier_3,
-        demand_tier_4,
-        demand_tier_5,
+    house_area_for_cluster = [
+        grouped_buildings.get_group(cluster)[
+            grouped_buildings.get_group(cluster)["tags_building"] == "house"
+        ]["area_m2"].sum()
+        for cluster in clusters
     ]
-
-    for i, demand_tier in enumerate(demand_tiers, start=1):
-        mean_column_name = f"tier_{i}"
-        mean_demand_tier_df[mean_column_name] = demand_tier["mean"]
-    mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
-
-    hours_index = pd.date_range(
-        start="00:00:00", periods=len(mean_demand_tier_df), freq="H", normalize=True
+    population_df = pd.DataFrame(
+        {"cluster": clusters, "house_area_for_cluster": house_area_for_cluster}
+    ).set_index("cluster")
+    population_df["people_for_cluster"] = (
+        population_df["house_area_for_cluster"] * density
+    ).round()
+    tier_pop_df = pd.DataFrame(
+        np.outer(population_df["people_for_cluster"], tier_percent),
+        index=population_df.index,
     )
-    mean_demand_tier_df.index = hours_index.time
 
-    # Creazione di un DataFrame con tutti i tier e la std media oraria per ognuno
-    std_demand_tier_df = pd.DataFrame()
+    # Caricamento e creazione di DataFrames di domanda media e deviazione standard per ogni tier
+    demand_files = [
+        input_file_profile_tier1,
+        input_file_profile_tier2,
+        input_file_profile_tier3,
+        input_file_profile_tier4,
+        input_file_profile_tier5,
+    ]
+    mean_demand_tier_df = pd.DataFrame(
+        {
+            f"tier_{i+1}": pd.read_excel(file)["mean"]
+            for i, file in enumerate(demand_files)
+        }
+    )
+    mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
+    mean_demand_tier_df.index = pd.date_range(
+        "00:00:00", periods=len(mean_demand_tier_df), freq="H"
+    ).time
 
-    for i, demand_tier in enumerate(demand_tiers, start=1):
-        mean_column_name = f"tier_{i}"
-        std_demand_tier_df[mean_column_name] = demand_tier["std"]
-    std_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
+    if inclusive == "left":
+        date_range = pd.date_range(start=date_start, end=date_end, freq="D")[:-1]
+    else:
+        date_range = pd.date_range(start=date_start, end=date_end, freq="D")
 
-    std_demand_tier_df.index = hours_index.time
+    mean_demand_tier_df_extended = pd.concat(
+        [mean_demand_tier_df] * len(date_range), ignore_index=True
+    )
 
+    # Calcolo del carico totale per ogni cluster e tier
     result_dict = {}
-    for k in range(len(tier_pop_df)):  # Itero sui cluster
-        pop_cluster = tier_pop_df.iloc[k, :]  # Seleziono tutto i tier per quel cluster
-        nome_dataframe = f"bus_{k}"
+    for k, pop_cluster in tier_pop_df.iterrows():
         load_df = pd.DataFrame()
-        for j in range(len(pop_cluster)):  # Itero su tutti i tier per quel cluster
-            n_person = int(pop_cluster[j])
-            mean_load_person = mean_demand_tier_df.iloc[:, j].values
-            total_load = pd.Series(n_person * mean_load_person)
+        for j, n_person in enumerate(
+            pop_cluster / 7
+        ):  # Scala la popolazione per famiglia
+            mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
+            total_load = (mean_load) / 1e6
             load_df[f"tier_{j}"] = total_load
+        result_dict[f"bus_{k}"] = load_df
+
+    # Aggregazione del carico totale per cluster
+    tot_result_dict = {
+        f"{k}": df.sum(axis=1).rename(f"{k}") for k, df in result_dict.items()
+    }
+    tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
+    if inclusive == "left":
+        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
+    else:
+        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
+    tot_loads_df.index = date_range_tot
 
-        result_dict[nome_dataframe] = load_df
-
-    tot_result_dict = {}
-    for key in result_dict:
-        nome_dataframe = f"{key}"
-        load = result_dict[key]
-        load_tot = pd.DataFrame(load.sum(axis=1))
-        load_tot.rename(columns={0: key}, inplace=True)
-        tot_result_dict[nome_dataframe] = load_tot
-
-    tot_loads_df = pd.DataFrame()
-    for key, cluster_load in tot_result_dict.items():
-        tot_loads_df = pd.concat([tot_loads_df, cluster_load], axis=1)
+    # Sostituzione dei valori zero con un valore minimo per evitare problemi di plotting
+    small_value = 1e-26
+    tot_loads_df.loc[:, (tot_loads_df == 0).all()] = small_value
 
-    date_range = pd.date_range(start="2013-01-01", end="2013-12-31", freq="D")
-    yearly_mean_demand_tier_df = pd.concat(
-        [tot_loads_df] * len(date_range), ignore_index=True
-    )
-    date_time_index = pd.date_range(
-        start="2013-01-01", end="2013-12-31 23:00:00", freq="H"
-    )
-    yearly_mean_demand_tier_df.index = date_time_index
-    yearly_mean_demand_tier_df.to_csv(output_path_csv)
+    # Esportazione del DataFrame finale
+    tot_loads_df.to_csv(output_path_csv)
 
 
 def calculate_load_ramp_std(
@@ -333,112 +369,112 @@ def calculate_load_ramp_std(
     input_file_profile_tier5,
     output_path_csv,
     tier_percent,
+    date_start,
+    date_end,
+    inclusive,
 ):
+    # Caricamento dei dati e calcolo della densità di popolazione
     cleaned_buildings = gpd.read_file(input_file_buildings)
     house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
-    area_tot = house["area_m2"].sum()
-
     pop_microgrid, microgrid_load = estimate_microgrid_population(
         n, p, raster_path, shapes_path, sample_profile, output_file
     )
-    density = pop_microgrid / area_tot
+    density = pop_microgrid / house["area_m2"].sum()
 
+    # Calcolo superficie e popolazione per cluster
     grouped_buildings = cleaned_buildings.groupby("cluster_id")
     clusters = np.sort(cleaned_buildings["cluster_id"].unique())
-    house_area_for_cluster = []
-    for cluster in clusters:
-        cluster_buildings = pd.DataFrame(grouped_buildings.get_group(cluster))
-        house = cluster_buildings[cluster_buildings["tags_building"] == "house"]
-        area_house = house["area_m2"].sum()
-        house_area_for_cluster.append(area_house)
-
-    population_df = pd.DataFrame()
-    population_df["cluster"] = clusters
-    population_df.set_index("cluster", inplace=True)
-    population_df["house_area_for_cluster"] = house_area_for_cluster
-    people_for_cluster = (population_df["house_area_for_cluster"] * density).round()
-    population_df["people_for_cluster"] = people_for_cluster
-
-    people_for_cluster = population_df["people_for_cluster"]
-    tier_pop_df = population_df["people_for_cluster"].apply(
-        lambda x: pd.Series([x * y for y in tier_percent])
-    )
-    demand_tier_1 = pd.read_excel(input_file_profile_tier1)
-    demand_tier_2 = pd.read_excel(input_file_profile_tier2)
-    demand_tier_3 = pd.read_excel(input_file_profile_tier3)
-    demand_tier_4 = pd.read_excel(input_file_profile_tier4)
-    demand_tier_5 = pd.read_excel(input_file_profile_tier5)
-    mean_demand_tier_df = pd.DataFrame()
-
-    demand_tiers = [
-        demand_tier_1,
-        demand_tier_2,
-        demand_tier_3,
-        demand_tier_4,
-        demand_tier_5,
+    house_area_for_cluster = [
+        grouped_buildings.get_group(cluster)[
+            grouped_buildings.get_group(cluster)["tags_building"] == "house"
+        ]["area_m2"].sum()
+        for cluster in clusters
     ]
-    for i, demand_tier in enumerate(demand_tiers, start=1):
-        mean_column_name = f"tier_{i}"
-        mean_demand_tier_df[mean_column_name] = demand_tier["mean"]
-    mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
-
-    hours_index = pd.date_range(
-        start="00:00:00", periods=len(mean_demand_tier_df), freq="H", normalize=True
+    population_df = pd.DataFrame(
+        {"cluster": clusters, "house_area_for_cluster": house_area_for_cluster}
+    ).set_index("cluster")
+    population_df["people_for_cluster"] = (
+        population_df["house_area_for_cluster"] * density
+    ).round()
+    tier_pop_df = pd.DataFrame(
+        np.outer(population_df["people_for_cluster"], tier_percent),
+        index=population_df.index,
     )
-    mean_demand_tier_df.index = hours_index.time
 
-    # Creazione di un DataFrame con tutti i tier e la std media oraria per ognuno
-    std_demand_tier_df = pd.DataFrame()
-
-    for i, demand_tier in enumerate(demand_tiers, start=1):
-        mean_column_name = f"tier_{i}"
-        std_demand_tier_df[mean_column_name] = demand_tier["std"]
+    # Caricamento e creazione di DataFrames di domanda media e deviazione standard per ogni tier
+    demand_files = [
+        input_file_profile_tier1,
+        input_file_profile_tier2,
+        input_file_profile_tier3,
+        input_file_profile_tier4,
+        input_file_profile_tier5,
+    ]
+    mean_demand_tier_df = pd.DataFrame(
+        {
+            f"tier_{i+1}": pd.read_excel(file)["mean"]
+            for i, file in enumerate(demand_files)
+        }
+    )
+    std_demand_tier_df = pd.DataFrame(
+        {
+            f"tier_{i+1}": pd.read_excel(file)["std"]
+            for i, file in enumerate(demand_files)
+        }
+    )
+    mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     std_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
+    mean_demand_tier_df.index = pd.date_range(
+        "00:00:00", periods=len(mean_demand_tier_df), freq="H"
+    ).time
+    std_demand_tier_df.index = pd.date_range(
+        "00:00:00", periods=len(mean_demand_tier_df), freq="H"
+    ).time
+
+    if inclusive == "left":
+        date_range = pd.date_range(start=date_start, end=date_end, freq="D")[:-1]
+    else:
+        date_range = pd.date_range(start=date_start, end=date_end, freq="D")
 
-    std_demand_tier_df.index = hours_index.time
+    mean_demand_tier_df_extended = pd.concat(
+        [mean_demand_tier_df] * len(date_range), ignore_index=True
+    )
+    std_demand_tier_df_extended = pd.concat(
+        [std_demand_tier_df] * len(date_range), ignore_index=True
+    )
 
+    # Calcolo del carico totale per ogni cluster e tier
     result_dict = {}
-    for k in range(len(tier_pop_df)):  # Itero sui cluster
-        pop_cluster = tier_pop_df.iloc[k, :]  # Seleziono tutto i tier per quel cluster
-        nome_dataframe = f"bus_{k}"
+    for k, pop_cluster in tier_pop_df.iterrows():
         load_df = pd.DataFrame()
-        std_df = pd.DataFrame()
-        for j in range(len(pop_cluster)):  # Itero su tutti i tier per quel cluster
-            n_person = int(pop_cluster[j])
-            mean_load_person = mean_demand_tier_df.iloc[:, j].values
-            mean_load = pd.Series(n_person * mean_load_person)
-
-            sqrt_n_person = np.sqrt(n_person)
-            std_load_person = std_demand_tier_df.iloc[:, j].values
-            std_load = np.random.normal(0, std_load_person) * sqrt_n_person
-            std_total = pd.Series(std_load)
-
-            total_load = pd.Series(mean_load.values + std_total.values)
+        for j, n_person in enumerate(
+            pop_cluster / 7
+        ):  # Scala la popolazione per famiglia
+            mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
+            std_load = np.random.normal(
+                mean_demand_tier_df_extended.iloc[:, j],
+                std_demand_tier_df_extended.iloc[:, j],
+            ) * np.sqrt(n_person)
+            total_load = (mean_load + std_load) / 1e6
             load_df[f"tier_{j}"] = total_load
+        result_dict[f"bus_{k}"] = load_df
+
+    # Aggregazione del carico totale per cluster
+    tot_result_dict = {
+        f"{k}": df.sum(axis=1).rename(f"{k}") for k, df in result_dict.items()
+    }
+    tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
+    if inclusive == "left":
+        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
+    else:
+        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
+    tot_loads_df.index = date_range_tot
 
-        result_dict[nome_dataframe] = load_df
-
-    tot_result_dict = {}
-    for key in result_dict:
-        nome_dataframe = f"{key}"
-        load = result_dict[key]
-        load_tot = pd.DataFrame(load.sum(axis=1))
-        load_tot.rename(columns={0: key}, inplace=True)
-        tot_result_dict[nome_dataframe] = load_tot
+    # Sostituzione dei valori zero con un valore minimo per evitare problemi di plotting
+    small_value = 1e-26
+    tot_loads_df.loc[:, (tot_loads_df == 0).all()] = small_value
 
-    tot_loads_df = pd.DataFrame()
-    for key, cluster_load in tot_result_dict.items():
-        tot_loads_df = pd.concat([tot_loads_df, cluster_load], axis=1)
-
-    date_range = pd.date_range(start="2013-01-01", end="2013-12-31", freq="D")
-    yearly_mean_demand_tier_df = pd.concat(
-        [tot_loads_df] * len(date_range), ignore_index=True
-    )
-    date_time_index = pd.date_range(
-        start="2013-01-01", end="2013-12-31 23:00:00", freq="H"
-    )
-    yearly_mean_demand_tier_df.index = date_time_index
-    yearly_mean_demand_tier_df.to_csv(output_path_csv)
+    # Esportazione del DataFrame finale
+    tot_loads_df.to_csv(output_path_csv)
 
 
 if __name__ == "__main__":
@@ -454,6 +490,11 @@ def calculate_load_ramp_std(
     n = pypsa.Network(snakemake.input.create_network)
     sample_profile = snakemake.input["sample_profile"]
     tier_percent = snakemake.params.tier["tier_percent"]
+    date_start = snakemake.params.snapshots["start"]
+    date_end = snakemake.params.snapshots["end"]
+    inclusive = snakemake.params.snapshots["inclusive"]
+    microgrids_list = snakemake.config["microgrids_list"]
+
     build_demand_model = snakemake.params.build_demand_model["type"]
 
     assert (
@@ -486,6 +527,10 @@ def calculate_load_ramp_std(
             snakemake.input["clusters_with_buildings"],
             snakemake.output["electric_load"],
             snakemake.input["building_csv"],
+            microgrids_list,
+            date_start,
+            date_end,
+            inclusive,
         )
 
     elif build_demand_model == 1:
@@ -504,6 +549,9 @@ def calculate_load_ramp_std(
             snakemake.input["profile_Tier5"],
             snakemake.output["electric_load"],
             tier_percent,
+            date_start,
+            date_end,
+            inclusive,
         )
     elif build_demand_model == 2:
 
@@ -522,4 +570,7 @@ def calculate_load_ramp_std(
             snakemake.input["profile_Tier5"],
             snakemake.output["electric_load"],
             tier_percent,
+            date_start,
+            date_end,
+            inclusive,
         )
diff --git a/scripts/clean_earth_osm_data.py b/scripts/clean_earth_osm_data.py
index b186bbb..caf8a7d 100644
--- a/scripts/clean_earth_osm_data.py
+++ b/scripts/clean_earth_osm_data.py
@@ -14,15 +14,32 @@
 
 
 def extract_points(microgrid_shape_path, buildings_path, output_path):
+    # Carica i file GeoJSON
     microgrid = gpd.read_file(microgrid_shape_path)
-    xmin, ymin, xmax, ymax = microgrid.total_bounds
-
     buildings = gpd.read_file(buildings_path)
-    buildings_in_microgrid = buildings.cx[xmin:xmax, ymin:ymax]
 
-    buildings_in_microgrid.to_file(output_path)
+    # Crea un GeoDataFrame per accumulare i risultati
+    result = gpd.GeoDataFrame(columns=buildings.columns)
+
+    # Itera su ogni geometria della microrete
+    for idx, microgrid_shape in microgrid.iterrows():
+        # Estrai il nome della microrete
+        microgrid_name = microgrid_shape["name"]
+
+        # Filtra gli edifici che si trovano nella geometria della microrete
+        buildings_in_microgrid = buildings[buildings.geometry.within(microgrid_shape.geometry)]
+
+        # Aggiungi o sostituisci il campo "name_microgrid" con il nome calcolato
+        buildings_in_microgrid = buildings_in_microgrid.copy()
+        buildings_in_microgrid["name_microgrid"] = microgrid_name
+
+        # Aggiungi gli edifici filtrati al risultato finale
+        result = gpd.GeoDataFrame(pd.concat([result, buildings_in_microgrid], ignore_index=True))
+
+    # Salva il risultato come GeoJSON
+    result.to_file(output_path, driver="GeoJSON")
 
-    return buildings_in_microgrid
+    return result
 
 
 if __name__ == "__main__":
diff --git a/scripts/cluster_buildings.py b/scripts/cluster_buildings.py
index f94d80f..c540d53 100644
--- a/scripts/cluster_buildings.py
+++ b/scripts/cluster_buildings.py
@@ -38,7 +38,6 @@ def buildings_classification(input_file, crs):
     microgrid_buildings.loc[idxs_house, "tags_building"] = "house"
     return microgrid_buildings
 
-
 def get_central_points_geojson_with_buildings(
     input_filepath,
     output_filepath_centroids,
@@ -47,56 +46,77 @@ def get_central_points_geojson_with_buildings(
     house_area_limit,
     output_filepath_buildings,
     output_path_csv,
+    microgrids_list
 ):
     """
     Divides the buildings into the desired number of clusters by using the kmeans function
-    and returns three different output: a geodataframe with the coordinates of the centroids of each cluster,
-    a dataframe with all of the buildings divided into clusters,
-    a csv file where for each cluster the building types are counted
+    and generates three outputs:
+    - GeoJSON with the coordinates of the centroids of each cluster,
+    - GeoJSON with all the buildings divided into clusters,
+    - CSV file where the building types are counted for each cluster.
     """
+    
     microgrid_buildings = buildings_classification(input_filepath, crs)
-    centroids_building = [
-        (row.geometry.centroid.x, row.geometry.centroid.y)
-        for row in microgrid_buildings.itertuples()
-    ]
-    centroids_building = np.array(centroids_building)
-    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(centroids_building)
-    centroids = kmeans.cluster_centers_
-    central_points = []
-
-    for i in range(kmeans.n_clusters):
-        cluster_points = centroids_building[kmeans.labels_ == i]
-        distances = np.linalg.norm(cluster_points - centroids[i], axis=1)
-        central_point_idx = np.argmin(distances)
-        central_points.append(cluster_points[central_point_idx])
-    central_features = []
-    for i, central_point in enumerate(central_points):
-        central_features.append(
-            {
-                "geometry": Point(central_point),
-                "cluster": i,
-            }
-        )
-    central_features = gpd.GeoDataFrame(
-        central_features, crs=microgrid_buildings.crs
-    ).to_crs("EPSG:4326")
-    central_features.to_file(output_filepath_centroids, driver="GeoJSON")
 
-    clusters = []
-    for i, row in enumerate(microgrid_buildings.itertuples()):
-        cluster_id = kmeans.labels_[i]
-        clusters.append(cluster_id)
+    
+    all_central_features = gpd.GeoDataFrame(columns=["geometry", "cluster", "name_microgrid"])
+    all_microgrid_buildings = gpd.GeoDataFrame(columns=microgrid_buildings.columns)
+    all_buildings_class = pd.DataFrame()
+
+    
+    for grid_name, grid_data in microgrids_list.items():
+        
+        filtered_buildings = microgrid_buildings[microgrid_buildings["name_microgrid"] == grid_name]
+
+    
+        centroids_building = [
+            (row.geometry.centroid.x, row.geometry.centroid.y)
+            for row in filtered_buildings.itertuples()
+        ]
+        centroids_building = np.array(centroids_building)
+        kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(centroids_building)
+        centroids = kmeans.cluster_centers_
+
+        
+        central_points = []
+        for i in range(kmeans.n_clusters):
+            cluster_points = centroids_building[kmeans.labels_ == i]
+            distances = np.linalg.norm(cluster_points - centroids[i], axis=1)
+            central_point_idx = np.argmin(distances)
+            central_points.append(cluster_points[central_point_idx])
+
+        
+        central_features = []
+        for i, central_point in enumerate(central_points):
+            central_features.append(
+                {
+                    "geometry": Point(central_point),
+                    "cluster": i,
+                    "name_microgrid": grid_name,
+                }
+            )
+        central_features_gdf = gpd.GeoDataFrame(
+            central_features, crs=filtered_buildings.crs
+        ).to_crs("EPSG:4326")
+        all_central_features = pd.concat([all_central_features, central_features_gdf], ignore_index=True)
+
+        
+        clusters = kmeans.labels_
+        filtered_buildings["cluster_id"] = clusters.astype(int)
+        all_microgrid_buildings = pd.concat([all_microgrid_buildings, filtered_buildings], ignore_index=True)
+
+       
+        buildings_class = (
+            filtered_buildings.groupby("cluster_id").tags_building.value_counts().reset_index(name="count")
+        )
+        buildings_class["name_microgrid"] = grid_name
+        all_buildings_class = pd.concat([all_buildings_class, buildings_class], ignore_index=True)
 
-    microgrid_buildings["cluster_id"] = clusters
+    
+    all_central_features.to_file(output_filepath_centroids, driver="GeoJSON")
+    all_microgrid_buildings.to_file(output_filepath_buildings, driver="GeoJSON")
+    all_buildings_class.to_csv(output_path_csv, index=False)
 
-    microgrid_buildings_gdf = gpd.GeoDataFrame(
-        microgrid_buildings, crs=microgrid_buildings.crs
-    )
-    microgrid_buildings_gdf.to_file(output_filepath_buildings)
-    buildings_class = pd.DataFrame(
-        microgrid_buildings_gdf.groupby("cluster_id").tags_building.value_counts()
-    )
-    buildings_class.to_csv(output_path_csv)
 
 
 if __name__ == "__main__":
@@ -120,4 +140,5 @@ def get_central_points_geojson_with_buildings(
         house_area_limit,
         snakemake.output["clusters_with_buildings"],
         snakemake.output["buildings_type"],
+        snakemake.config["microgrids_list"],
     )
diff --git a/scripts/create_network.py b/scripts/create_network.py
index a9f5a42..d6f8aa8 100644
--- a/scripts/create_network.py
+++ b/scripts/create_network.py
@@ -35,82 +35,67 @@ def create_network():
 
     # Return the created network
     return n
-
-
-def create_microgrid_network(
-    n, input_file, number_microgrids, voltage_level, line_type
-):
+            
+def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_list):
     """
     Creates local microgrid networks within the PyPSA network. The local microgrid networks are distribution networks created based on
     the buildings data, stored in "resources/buildings/microgrids_buildings.geojson". Then the buses are connected together through lines
     according to the output of a Delaunay Triangulation.
     """
     # Load the GeoJSON file
-
-    with open(input_file) as f:
-        data = json.load(f)
-
-    # Keep track of the bus coordinates and microgrid IDs
+    data = gpd.read_file(input_file)
     bus_coords = set()
-    number_microgrids = len(number_microgrids.keys())
-    microgrid_ids = [f"microgrid_{i+1}" for i in range(number_microgrids)]
-    # microgrid_ids = set()
-
-    # Iterate over each feature in the GeoDataFrame
-    for feature in data["features"]:
-        # Get the point geometry
-        point_geom = feature["geometry"]
-
-        # Create a bus at the point location with microgrid ID included in bus name
-        bus_name = f"bus_{feature['properties']['cluster']}"
-
-        x, y = point_geom["coordinates"][0], point_geom["coordinates"][1]
-
-        # Check for overlapping microgrids and raise an error if happening
-        if (x, y) in bus_coords:
-            raise ValueError(
-                "Overlapping microgrids detected, adjust the coordinates in the config.yaml file"
-            )
-
-        # Add the buses to the network and update the set of bus coordinates and microgrid IDs
-        n.add("Bus", bus_name, x=x, y=y, v_nom=voltage_level)
-        bus_coords.add((x, y))
-
-    # Iterate over each microgrid
-    for microgrid_id in microgrid_ids:
-        coords = np.column_stack((n.buses.x.values, n.buses.y.values))
-
-        # Create a Delaunay triangulation of the bus coordinates
+    for grid_name, grid_data in microgrid_list.items():
+        # Filter data for the current microgrid
+        grid_data = data[data["name_microgrid"] == grid_name]
+        # Create a SubNetwork for the current microgrid
+        if grid_name not in n.sub_networks.index:
+            n.add("SubNetwork", grid_name, carrier="electricity")
+        # List to store bus names for this microgrid
+        microgrid_buses = []
+        for _, feature in grid_data.iterrows():
+            point_geom = feature.geometry
+            bus_name = f"{grid_name}_bus_{feature['cluster']}"
+            x, y = point_geom.x, point_geom.y
+            # Avoid adding duplicate buses
+            if bus_name in n.buses.index:
+                continue
+            if (x, y) in bus_coords:
+                raise ValueError(
+                    f"Overlapping microgrids detected at {x}, {y}. Adjust the configuration."
+                )
+            # Add the bus and assign it to the SubNetwork
+            n.add("Bus", bus_name, x=x, y=y, v_nom=voltage_level, sub_network=grid_name)
+            bus_coords.add((x, y))
+            microgrid_buses.append(bus_name)
+        # Filter coordinates for the current microgrid
+        coords = np.column_stack((
+            n.buses.loc[microgrid_buses].x.values,
+            n.buses.loc[microgrid_buses].y.values
+        ))
+        # Check if there are enough points for triangulation
+        if len(coords) < 3:
+            print(f"Not enough points for triangulation in {grid_name}. Skipping.")
+            continue
+        # Create a Delaunay triangulation of the filtered bus coordinates
         tri = Delaunay(coords)
-
-        # Remove edges that connect the same pair of buses
-        edges = []
+        edges = set()
         for simplex in tri.simplices:
             for i in range(3):
-                if i < 2:
-                    edge = sorted([simplex[i], simplex[i + 1]])
-            else:
-                edge = sorted([simplex[i], simplex[0]])
-            if edge not in edges:
-                edges.append(edge)
-
-        # # Create a matrix of bus coordinates
-
-        # # Create a Delaunay triangulation of the bus coordinates
-        # tri = Delaunay(coords)
-        # edges = tri.simplices[(tri.simplices < len(coords)).all(axis=1)]
-
-        line_type = line_type
+                edge = tuple(sorted([simplex[i], simplex[(i + 1) % 3]]))
+                edges.add(edge)
+        # Add lines for the current microgrid
+        for i, j in edges:
+            bus0 = microgrid_buses[i]
+            bus1 = microgrid_buses[j]
+            line_name = f"{grid_name}_line_{i}_{j}"
+            if line_name in n.lines.index:
+                continue  # Skip if the line already exists
+            x1, y1 = n.buses.loc[bus0].x, n.buses.loc[bus0].y
+            x2, y2 = n.buses.loc[bus1].x, n.buses.loc[bus1].y
+            length = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
+            n.add("Line", line_name, bus0=bus0, bus1=bus1, type=line_type, length=length)
 
-    # Add lines to the network between connected buses in the Delaunay triangulation
-    for i, j in edges:
-        bus0 = n.buses.index[i]
-        bus1 = n.buses.index[j]
-        line_name = f"{microgrid_id}_line_{i}_{j}"
-        x1, y1 = n.buses.x[i], n.buses.y[i]
-        x2, y2 = n.buses.x[j], n.buses.y[j]
-        length = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
-        n.add("Line", line_name, bus0=bus0, bus1=bus1, type=line_type, length=length)
 
 
 def add_bus_at_center(n, number_microgrids, voltage_level, line_type):
@@ -207,13 +192,14 @@ def plot_microgrid_network(n):
     configure_logging(snakemake)
 
     n = create_network()
+    microgrids_list = snakemake.config["microgrids_list"]
 
     create_microgrid_network(
         n,
         snakemake.input["clusters"],
-        snakemake.config["microgrids_list"],
         snakemake.config["electricity"]["voltage"],
         snakemake.config["electricity"]["line_type"],
+        microgrids_list,
     )
 
     # add_bus_at_center(n,
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index d6697a3..2e0d18d 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -67,7 +67,7 @@ def convert_iso_to_geofk(
         return iso_code
 
 
-def retrieve_osm_data_geojson(coordinates, features, url, path):
+def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
     """
     The buildings inside the specified coordinates are retrieved by using overpass API.
     The region coordinates should be defined in the config.yaml file.
@@ -82,59 +82,81 @@ def retrieve_osm_data_geojson(coordinates, features, url, path):
     path : str
         Directory where the GeoJSON file will be saved.
     """
+    geojson_features = []  # Collect all features from all microgrids
 
-    for item in coordinates.keys():
+    for grid_name, grid_data in microgrids_list.items():
+        lat_min = grid_data["lat_min"]
+        lon_min = grid_data["lon_min"]
+        lat_max = grid_data["lat_max"]
+        lon_max = grid_data["lon_max"]
 
         overpass_query = f"""
         [out:json];
-        way["{features}"]({coordinates[item]["lat_min"]}, {coordinates[item]["lon_min"]}, {coordinates[item]["lat_max"]}, {coordinates[item]["lon_max"]});
+        way["{feature_name}"]({lat_min},{lon_min},{lat_max},{lon_max});
         (._;>;);
         out body;
         """
 
         try:
-            # Send request to API Overpass
+            logger.info(f"Querying Overpass API for microgrid: {grid_name}")
             response = requests.get(url, params={"data": overpass_query})
             response.raise_for_status()
             data = response.json()
-            # Create a dictionary to map nodes with their coordinates
+
+            if "elements" not in data:
+                logger.error(f"No elements found for microgrid: {grid_name}")
+                continue
+
             node_coordinates = {
                 node["id"]: [node["lon"], node["lat"]]
                 for node in data["elements"]
                 if node["type"] == "node"
             }
-            # Choose the output path to save the file.
-            outpath = Path(path) / f"all_raw_building.geojson"
-            # outpath = Path(path) / f"all_raw_building_{item}.geojson" #ATTENTION: Currently the other parts of the code ( clean earth osm data,cluster building, and others) have not been updated to run on multiple microgrids simultaneously. For now we do not exploit this to run the code. As soon as we update the other parts of the code as well, we will exploit it.
-            outpath.parent.mkdir(parents=True, exist_ok=True)
-            # Write the geojson file
-            with open(outpath, "w") as f:
-                f.write('{"type":"FeatureCollection","features":[\n')
-                features = []
-                for element in data["elements"]:
-                    if element["type"] == "way" and "nodes" in element:
-                        coordinates = [
-                            node_coordinates[node_id]
-                            for node_id in element["nodes"]
-                            if node_id in node_coordinates
-                        ]
-                        properties = {"id": element["id"]}
-                        if "tags" in element:
-                            properties.update(element["tags"])
-                        feature = {
-                            "type": "Feature",
-                            "properties": properties,
-                            "geometry": {
-                                "type": "Polygon",
-                                "coordinates": [coordinates],
-                            },
-                        }
-                        features.append(json.dumps(feature, separators=(",", ":")))
-                f.write(",\n".join(features))
-                f.write("\n]}\n")
-        except (json.JSONDecodeError, requests.exceptions.RequestException) as e:
-            logger.error(f"Error downloading osm data for the specified coordinates")
 
+            for element in data["elements"]:
+                if element["type"] == "way" and "nodes" in element:
+                    coordinates = [
+                        node_coordinates[node_id]
+                        for node_id in element["nodes"]
+                        if node_id in node_coordinates
+                    ]
+                    if not coordinates:
+                        continue
+
+                    properties = {"name_microgrid": grid_name, "id": element["id"]}
+                    if "tags" in element:
+                        properties.update(element["tags"])
+
+                    feature = {
+                        "type": "Feature",
+                        "properties": properties,
+                        "geometry": {
+                            "type": "Polygon",
+                            "coordinates": [coordinates],
+                        },
+                    }
+                    # Serialize each feature as a compact JSON string
+                    geojson_features.append(json.dumps(feature, separators=(",", ":")))
+
+        except json.JSONDecodeError:
+            logger.error(f"JSON decoding error for microgrid: {grid_name}")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request error for microgrid: {grid_name}: {e}")
+
+    # Save all features to a single GeoJSON file
+    try:
+        outpath = Path(path) / "all_raw_buildings.geojson"
+        outpath.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(outpath, "w") as f:
+            f.write('{"type":"FeatureCollection","features":[\n')
+            f.write(",\n".join(geojson_features))  # Write features in one-line format
+            f.write("\n]}\n")
+
+        logger.info(f"Combined GeoJSON saved to {outpath}")
+
+    except IOError as e:
+        logger.error(f"Error saving GeoJSON file: {e}")
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
diff --git a/test/config.distribution.test.yaml b/test/config.distribution.test.yaml
index db0b3f4..bea555c 100644
--- a/test/config.distribution.test.yaml
+++ b/test/config.distribution.test.yaml
@@ -124,6 +124,12 @@ microgrids_list:
     lat_min: 4.6151
     lat_max: 4.7208
 
+  microgrid_2:     # WORKING
+    lon_max: 7.18935
+    lon_min: 7.01048
+    lat_min: 6.17059
+    lat_max: 6.29329
+
 load:
   scaling_factor: 15000
 

From ddd969ba69348bc3ea6273dfdd96d9050cb856e9 Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Sat, 14 Dec 2024 17:09:06 +0100
Subject: [PATCH 02/12] Fix_for_multimicrogrid_and_add_docstring

---
 scripts/build_demand.py              | 293 ++++++++++++++++-----------
 scripts/build_shapes.py              |  67 ++++--
 scripts/clean_earth_osm_data.py      |  42 ++--
 scripts/cluster_buildings.py         |  94 ++++++---
 scripts/create_network.py            | 187 +++++++++--------
 scripts/download_osm_data.py         |  56 ++---
 scripts/ramp_build_demand_profile.py |  31 +++
 7 files changed, 477 insertions(+), 293 deletions(-)

diff --git a/scripts/build_demand.py b/scripts/build_demand.py
index 7d50ad8..877b6ab 100644
--- a/scripts/build_demand.py
+++ b/scripts/build_demand.py
@@ -133,19 +133,39 @@ def get_WorldPop_data(
 
 
 def estimate_microgrid_population(
-    n, p, raster_path, shapes_path, sample_profile, output_file
+    raster_path, shapes_path, output_file
 ):
-    population_data = {}
+    """
+    Estimates the population within each microgrid by using raster data and shape geometries.
+    The function processes population density raster data and calculates the total population 
+    for each microgrid by masking the raster data using the corresponding geometries from a 
+    GeoJSON file. The population estimates are saved as a CSV file.
 
+    Parameters
+    ----------
+    raster_path : str
+        Path to the population density raster file (GeoTIFF format).
+    shapes_path : str
+        Path to the GeoJSON file containing the microgrid geometries.
+    output_file : str
+        Path to the CSV file where the population estimates will be saved.
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing the names of microgrids and their corresponding population estimates.
+    """
+    # Dictionary to store the population data for each microgrid
+    population_data = {}
+    # Load the GeoJSON file containing microgrid geometries
     shapes = gpd.read_file(shapes_path)
-
+    # Iterate through each microgrid geometry
     for i, shape in shapes.iterrows():
-        
-        name = shape["name"]  
-
+        name = shape["name"]  # Extract the name of the microgrid
+        # Open the raster file and mask it using the microgrid geometry
         with rasterio.open(raster_path) as src:
-            
+            # Mask the raster data to only include the area within the microgrid
             masked, out_transform = rasterio.mask.mask(src, [shape.geometry], crop=True)
+            # Update the raster metadata for the masked area
             out_meta = src.meta.copy()
             out_meta.update(
                 {
@@ -155,18 +175,15 @@ def estimate_microgrid_population(
                     "transform": out_transform,
                 }
             )
-
-        
+        # Calculate the total population within the microgrid by summing non-negative raster values
         pop_microgrid = masked[masked >= 0].sum()
-
-        
         population_data[name] = pop_microgrid
-
-    
+    # Convert the population data dictionary to a DataFrame
     population_df = pd.DataFrame(
         list(population_data.items()), columns=["Microgrid_Name", "Population"]
     )
-    population_df.to_csv(output_file, index=False)
+    # Save the population estimates to a CSV file
+    #population_df.to_csv(output_file, index=False)
 
     return population_df
 
@@ -177,7 +194,6 @@ def calculate_load(
     raster_path,
     shapes_path,
     sample_profile,
-    geojson_file,
     output_file,
     input_path,
     microgrids_list,
@@ -185,65 +201,103 @@ def calculate_load(
     end_date,
     inclusive,
 ):
-    # Estimate the microgrid population and load using the existing function
-    pop_microgrid= estimate_microgrid_population(
-        n, p, raster_path, shapes_path, sample_profile, output_file
+    """
+    Calculate the microgrid demand based on a load profile provided as input, 
+    appropriately scaled according to the population calculated for each cluster
+    The output includes a time-indexed DataFrame containing the load for each bus in the microgrid
+    and is saved as a CSV file.
+
+    Parameters
+    ----------
+    n : object
+        PyPSA network object containing snapshots.
+    p : int or float
+        Scaling factor for the per-unit load.
+    raster_path : str
+        Path to the raster file containing population density data.
+    shapes_path : str
+        Path to the GeoJSON file containing the geometries of the microgrids.
+    sample_profile : str
+        Path to the CSV file containing the sample load profile.
+    output_file : str
+        Path where the resulting load profile CSV file will be saved.
+    input_path : str
+        Path to the CSV file containing building classifications.
+    microgrids_list : dict
+        Dictionary with microgrid names as keys and their cluster information as values.
+    start_date : str
+        Start date for filtering the time series data 
+    end_date : str
+        End date for filtering the time series data
+    inclusive : str
+        Specifies whether the filtering is inclusive of the start or end date. Possible values: "left" or "right".
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame containing the calculated load profile for all microgrids.
+
+    """
+     # Estimate the population for the two microgrid
+    pop_microgrid = estimate_microgrid_population(
+        raster_path, shapes_path, output_file
     )
+    # Load the building classification data
     building_class = pd.read_csv(input_path)
-    # DataFrame per accumulare i risultati di tutte le microgrid
+    # Dictionary to store the load profiles for each microgrid
     microgrid_dataframes = {}
-
-    # Carica il profilo di carico e crea l'indice temporale
+    # Load the sample load profile and create the time index
     df = pd.read_csv(sample_profile)
-    per_unit_load = df["0"] / p
+    per_unit_load = df["0"] / p  # Scale the load using the provided factor `p`
     df["per_unit_load"] = per_unit_load
     time_index = pd.date_range(start="2013-01-01", end="2013-12-31 23:00:00", freq="h")
     df = df.set_index(time_index)
 
-    # Gestione del filtro temporale
+    # Apply time filtering based on the specified start and end dates
     if inclusive == "left":
         end_date = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
             "%Y-%m-%d"
         )
-    df_filtered = df.loc[start_date:end_date]
+    df_filtered = df.loc[start_date:end_date]  # Filter the time series data
     per_unit_load = df_filtered["per_unit_load"].values
-
-    # Ciclo su ciascuna microgrid
+    # Loop over each microgrid
     for grid_name, grid_data in microgrids_list.items():
+        # Filter buildings belonging to the current microgrid
         total_buildings = building_class[building_class["name_microgrid"] == grid_name]
         total_buildings = total_buildings["count"].sum()
+        # Group buildings by cluster and count the number of buildings per cluster
         building_for_cluster = pd.DataFrame(
             building_class[building_class["name_microgrid"] == grid_name]
             .groupby("cluster_id")
             .sum()["count"]
         )
+        # Retrieve the population for the current microgrid
         pop_for_microgrid = pop_microgrid.loc[
             pop_microgrid["Microgrid_Name"] == grid_name, "Population"
         ].values[0]
+        # Calculate the population per building and per cluster
         population_per_building = pop_for_microgrid / total_buildings
         population_per_cluster = building_for_cluster * population_per_building
-        # Calcolo del carico per cluster
+        # Calculate the load for each cluster
         load_per_cluster = pd.DataFrame(
             np.outer(population_per_cluster["count"].values, per_unit_load)
         )
-        load_per_cluster = load_per_cluster.T
-        # Rinomina le colonne con il nome della microgrid
+        load_per_cluster = load_per_cluster.T  # Transpose for time indexing
+        # Rename columns to represent the buses of the microgrid
         new_column_names = {
             i: f"{grid_name}_bus_{i}" for i in range(load_per_cluster.shape[1])
         }
         load_per_cluster.rename(columns=new_column_names, inplace=True)
-        # Aggiungi il DataFrame della microgrid al dizionario
+        # Add the DataFrame for the microgrid to the dictionary
         microgrid_dataframes[grid_name] = load_per_cluster
-    # Concatenazione orizzontale dei DataFrame di tutte le microgrid
+    # Concatenate all microgrid DataFrames horizontally
     all_load_per_cluster = pd.concat(microgrid_dataframes.values(), axis=1)
-    # Aggiungi l'indicizzazione temporale basata su `n.snapshots`
+    # Add time indexing based on the PyPSA network snapshots
     if hasattr(n, "snapshots") and len(n.snapshots) == len(all_load_per_cluster):
         all_load_per_cluster.insert(0, "timestamp", n.snapshots)
     else:
         raise ValueError("Mismatch between the length of snapshots and load data rows.")
-    # Salva i risultati cumulativi su un file CSV
+    # Save the cumulative results to a CSV file
     all_load_per_cluster.to_csv(output_file, index=False)
-
     return all_load_per_cluster
 
 
@@ -268,16 +322,16 @@ def calculate_load_ramp(
     inclusive,
 ):
     # Caricamento dei dati e calcolo della densità di popolazione
-    cleaned_buildings = gpd.read_file(input_file_buildings)
-    house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
-    pop_microgrid, microgrid_load = estimate_microgrid_population(
-        n, p, raster_path, shapes_path, sample_profile, output_file
+    microgrid_buildings = gpd.read_file(input_file_buildings)
+    house = microgrid_buildings[microgrid_buildings["tags_building"] == "house"]
+    pop_microgrid = estimate_microgrid_population(
+        raster_path, shapes_path, output_file
     )
     density = pop_microgrid / house["area_m2"].sum()
 
     # Calcolo superficie e popolazione per cluster
-    grouped_buildings = cleaned_buildings.groupby("cluster_id")
-    clusters = np.sort(cleaned_buildings["cluster_id"].unique())
+    grouped_buildings = microgrid_buildings.groupby("cluster_id")
+    clusters = np.sort(microgrid_buildings["cluster_id"].unique())
     house_area_for_cluster = [
         grouped_buildings.get_group(cluster)[
             grouped_buildings.get_group(cluster)["tags_building"] == "house"
@@ -372,36 +426,10 @@ def calculate_load_ramp_std(
     date_start,
     date_end,
     inclusive,
+    microgrid_list,
 ):
-    # Caricamento dei dati e calcolo della densità di popolazione
+    # Upload of buildings and data demand for each tier
     cleaned_buildings = gpd.read_file(input_file_buildings)
-    house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
-    pop_microgrid, microgrid_load = estimate_microgrid_population(
-        n, p, raster_path, shapes_path, sample_profile, output_file
-    )
-    density = pop_microgrid / house["area_m2"].sum()
-
-    # Calcolo superficie e popolazione per cluster
-    grouped_buildings = cleaned_buildings.groupby("cluster_id")
-    clusters = np.sort(cleaned_buildings["cluster_id"].unique())
-    house_area_for_cluster = [
-        grouped_buildings.get_group(cluster)[
-            grouped_buildings.get_group(cluster)["tags_building"] == "house"
-        ]["area_m2"].sum()
-        for cluster in clusters
-    ]
-    population_df = pd.DataFrame(
-        {"cluster": clusters, "house_area_for_cluster": house_area_for_cluster}
-    ).set_index("cluster")
-    population_df["people_for_cluster"] = (
-        population_df["house_area_for_cluster"] * density
-    ).round()
-    tier_pop_df = pd.DataFrame(
-        np.outer(population_df["people_for_cluster"], tier_percent),
-        index=population_df.index,
-    )
-
-    # Caricamento e creazione di DataFrames di domanda media e deviazione standard per ogni tier
     demand_files = [
         input_file_profile_tier1,
         input_file_profile_tier2,
@@ -409,18 +437,11 @@ def calculate_load_ramp_std(
         input_file_profile_tier4,
         input_file_profile_tier5,
     ]
+
     mean_demand_tier_df = pd.DataFrame(
-        {
-            f"tier_{i+1}": pd.read_excel(file)["mean"]
-            for i, file in enumerate(demand_files)
-        }
-    )
+    {f"tier_{i+1}": pd.read_excel(file)["mean"] for i, file in enumerate(demand_files)})
     std_demand_tier_df = pd.DataFrame(
-        {
-            f"tier_{i+1}": pd.read_excel(file)["std"]
-            for i, file in enumerate(demand_files)
-        }
-    )
+    {f"tier_{i+1}": pd.read_excel(file)["std"] for i, file in enumerate(demand_files)})
     mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     std_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     mean_demand_tier_df.index = pd.date_range(
@@ -430,51 +451,84 @@ def calculate_load_ramp_std(
         "00:00:00", periods=len(mean_demand_tier_df), freq="H"
     ).time
 
-    if inclusive == "left":
-        date_range = pd.date_range(start=date_start, end=date_end, freq="D")[:-1]
-    else:
-        date_range = pd.date_range(start=date_start, end=date_end, freq="D")
+    pop= estimate_microgrid_population(raster_path, shapes_path,output_file)
+
+    all_microgrid_loads = pd.DataFrame()
+
+    for grid_name, grid_data in microgrid_list.items():
+        microgrid_buildings=cleaned_buildings[cleaned_buildings["name_microgrid"]==grid_name]
+        # Calculate the population density for the current microgrid based only on house buildings
+        house = microgrid_buildings[microgrid_buildings["tags_building"] == "house"]
+        pop_microgrid = pop.loc[pop["Microgrid_Name"] == grid_name, "Population"].values[0]
+        density = pop_microgrid / house["area_m2"].sum()
+
+        # Calculate population per cluster
+        grouped_buildings = microgrid_buildings.groupby("cluster_id")
+        clusters = np.sort(microgrid_buildings["cluster_id"].unique())
+        house_area_for_cluster = [
+            grouped_buildings.get_group(cluster)[
+                grouped_buildings.get_group(cluster)["tags_building"] == "house"
+            ]["area_m2"].sum()
+            for cluster in clusters
+        ]
+        population_df = pd.DataFrame(
+            {"cluster": clusters, "house_area_for_cluster": house_area_for_cluster}
+        ).set_index("cluster")
+        population_df["people_for_cluster"] = (
+            population_df["house_area_for_cluster"] * density
+        ).round()
+        tier_pop_df = pd.DataFrame(
+            np.outer(population_df["people_for_cluster"], tier_percent),
+            index=population_df.index.astype(int),
+        )
 
-    mean_demand_tier_df_extended = pd.concat(
-        [mean_demand_tier_df] * len(date_range), ignore_index=True
-    )
-    std_demand_tier_df_extended = pd.concat(
-        [std_demand_tier_df] * len(date_range), ignore_index=True
-    )
+        if inclusive == "left":
+            date_range = pd.date_range(start=date_start, end=date_end, freq="D")[:-1]
+        else:
+            date_range = pd.date_range(start=date_start, end=date_end, freq="D")
 
-    # Calcolo del carico totale per ogni cluster e tier
-    result_dict = {}
-    for k, pop_cluster in tier_pop_df.iterrows():
-        load_df = pd.DataFrame()
-        for j, n_person in enumerate(
-            pop_cluster / 7
-        ):  # Scala la popolazione per famiglia
-            mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
-            std_load = np.random.normal(
-                mean_demand_tier_df_extended.iloc[:, j],
-                std_demand_tier_df_extended.iloc[:, j],
-            ) * np.sqrt(n_person)
-            total_load = (mean_load + std_load) / 1e6
-            load_df[f"tier_{j}"] = total_load
-        result_dict[f"bus_{k}"] = load_df
+        mean_demand_tier_df_extended = pd.concat(
+            [mean_demand_tier_df] * len(date_range), ignore_index=True
+        )
+        std_demand_tier_df_extended = pd.concat(
+            [std_demand_tier_df] * len(date_range), ignore_index=True
+        )
 
-    # Aggregazione del carico totale per cluster
-    tot_result_dict = {
-        f"{k}": df.sum(axis=1).rename(f"{k}") for k, df in result_dict.items()
-    }
-    tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
-    if inclusive == "left":
-        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
-    else:
-        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
-    tot_loads_df.index = date_range_tot
+     # Calculate load for each cluster and tier
+        result_dict = {}
+        for k, pop_cluster in tier_pop_df.iterrows():
+            load_df = pd.DataFrame()
+            for j, n_person in enumerate(
+                pop_cluster / 7            # Scale by family size
+            ):  
+                mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
+                std_load = np.random.normal(
+                    mean_demand_tier_df_extended.iloc[:, j],
+                    std_demand_tier_df_extended.iloc[:, j],
+                ) * np.sqrt(n_person)
+                total_load = (mean_load + std_load) / 1e6
+                load_df[f"tier_{j}"] = total_load
+            result_dict[f"{grid_name}_bus_{k}"] = load_df
+
+        # Aggregate total load per cluster
+        tot_result_dict = {
+            f"{k}": df.sum(axis=1).rename(f"{k}") for k, df in result_dict.items()
+        }
+        tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
+        if inclusive == "left":
+            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
+        else:
+            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
+        tot_loads_df.index = date_range_tot
 
-    # Sostituzione dei valori zero con un valore minimo per evitare problemi di plotting
-    small_value = 1e-26
-    tot_loads_df.loc[:, (tot_loads_df == 0).all()] = small_value
+        # Replace zero values with a small value just for avoid problem with plotting
+        small_value = 1e-26
+        tot_loads_df.loc[:, (tot_loads_df == 0).all()] = small_value
+
+        all_microgrid_loads = pd.concat([all_microgrid_loads, tot_loads_df], axis=1)
 
     # Esportazione del DataFrame finale
-    tot_loads_df.to_csv(output_path_csv)
+    all_microgrid_loads.to_csv(output_path_csv)
 
 
 if __name__ == "__main__":
@@ -510,11 +564,8 @@ def calculate_load_ramp_std(
     )
 
     estimate_microgrid_population(
-        n,
-        snakemake.config["load"]["scaling_factor"],
         worldpop_path,
         snakemake.input["microgrid_shapes"],
-        sample_profile,
         snakemake.output["electric_load"],
     )
     if build_demand_model == 0:
@@ -524,7 +575,6 @@ def calculate_load_ramp_std(
             worldpop_path,
             snakemake.input["microgrid_shapes"],
             sample_profile,
-            snakemake.input["clusters_with_buildings"],
             snakemake.output["electric_load"],
             snakemake.input["building_csv"],
             microgrids_list,
@@ -573,4 +623,5 @@ def calculate_load_ramp_std(
             date_start,
             date_end,
             inclusive,
+            microgrids_list,
         )
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 6918679..b092fe7 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -15,84 +15,109 @@ def create_microgrid_shapes(microgrids_list, output_path):
     """
     Creates rectangular shapes for each microgrid in the list of microgrids in the config.yaml file
     and saves them as a GeoJSON file.
-    """
+    Parameters
+    ----------
+    microgrids_list : dict
+        Dictionary containing the microgrid names and their bounding box coordinates (lat_min, lon_min, lat_max, lon_max).
 
-    microgrids_list = microgrids_list
+    output_path : str
+       Path where the GeoJSON file will be saved.
+    """
+    
+    # Open the input dictionary into a pandas DataFrame for easier processing
     microgrids_list_df = pd.DataFrame(microgrids_list)
 
+    # Initialize lists to store shapes and names oc each microgrids
     microgrid_shapes = []
     microgrid_names = []
 
+    # Iterate over each column (representing a microgrid) in the DataFrame
     for col in range(len(microgrids_list_df.columns)):
+        # Extract the bounds of the rectangle for the current microgrid
         values = microgrids_list_df.iloc[:, col]
-
-        # Definition of the vertices of the rectangle
+        # Define the vertices of the rectangle
         Top_left = (values[0], values[3])
         Top_right = (values[1], values[3])
         Bottom_right = (values[1], values[2])
         Bottom_left = (values[0], values[2])
-
+        # Create a Polygon shape from the rectangle's vertices
         microgrid_shape = Polygon(
             [Top_left, Top_right, Bottom_right, Bottom_left, Top_left]
         )
-
+        # Assign a unique name to the microgrid based on its name in the config
         microgrid_name = f"microgrid_{col+1}"
+        # Append the shape and name to the respective lists
         microgrid_shapes.append(microgrid_shape)
         microgrid_names.append(microgrid_name)
 
+    # Create a GeoDataFrame with the collected names and shapes
     microgrid_gdf = gpd.GeoDataFrame(
         {"name": microgrid_names, "geometry": microgrid_shapes}
     )
-
+    # Save the GeoDataFrame to a GeoJSON file
     save_to_geojson(microgrid_gdf, output_path)
 
 
 def create_bus_regions(microgrids_list, output_path):
     """
     Creates bus regions for each microgrid in the list of microgrids and saves them as a GeoJSON file.
+    The generated shape will be used for the calculation of renewable energy producibility, 
+    which will be associated with the bus generated at the center of the geometry.
+    Parameters
+    ----------
+    microgrids_list : dict
+        Dictionary containing the microgrid names and their bounding box coordinates (lat_min, lon_min, lat_max, lon_max).
+        
+    output_path : str
+       Path where the GeoJSON file will be saved.
     """
 
-    microgrids_list = microgrids_list
+    # Open the input dictionary as pandas DataFrame for easier processing
     microgrids_list_df = pd.DataFrame(microgrids_list)
 
+    # Initialize lists to store shapes, names, and coordinates
     microgrid_shapes = []
     microgrid_names = []
-    microgrid_x = []
-    microgrid_y = []
+    microgrid_x = []  # Stores the x-coordinates of the centers of the rectangles
+    microgrid_y = []  # Stores the y-coordinates of the centers of the rectangles
 
+    # Iterate over each column in the DataFrame
     for col in range(len(microgrids_list_df.columns)):
-        values = microgrids_list_df.iloc[:, col]
+        values = microgrids_list_df.iloc[:, col]  
+        microgrid_name = microgrids_list_df.columns[col] + "_bus_renewable"  
 
-        # Definition of the vertices of the rectangle
+        # Define the vertices of the rectangle
         Top_left = (values[0], values[3])
         Top_right = (values[1], values[3])
         Bottom_right = (values[1], values[2])
         Bottom_left = (values[0], values[2])
 
+        # Create a Polygon shape from the rectangle's vertices
         microgrid_shape = Polygon(
             [Top_left, Top_right, Bottom_right, Bottom_left, Top_left]
         )
 
-        # The bus is the central bus of each microgrid
-        microgrid_name = f"bus_9"
+        # Append the shape and name to the respective lists
         microgrid_shapes.append(microgrid_shape)
         microgrid_names.append(microgrid_name)
 
-        # Centre of the rectangle of the microgrid
+        # Calculate the center of the rectangle
         x = (values[0] + values[1]) / 2
         y = (values[2] + values[3]) / 2
-        microgrid_x.append(x)
-        microgrid_y.append(y)
+        microgrid_x.append(x)  # Append the x-coordinate of the center
+        microgrid_y.append(y)  # Append the y-coordinate of the center
 
+    # Create a GeoDataFrame from the collected names, shapes, and coordinates
     microgrid_gdf = gpd.GeoDataFrame(
         {
-            "name": microgrid_names,
-            "x": microgrid_x,
-            "y": microgrid_y,
-            "geometry": microgrid_shapes,
+            "name": microgrid_names,  # Names of the bus regions
+            "x": microgrid_x,         # x-coordinates of the centers
+            "y": microgrid_y,         # y-coordinates of the centers
+            "geometry": microgrid_shapes,  # Polygon shapes of the regions
         }
     )
 
+    # Save the GeoDataFrame to a GeoJSON file
     save_to_geojson(microgrid_gdf, output_path)
 
 
diff --git a/scripts/clean_earth_osm_data.py b/scripts/clean_earth_osm_data.py
index caf8a7d..47fef4c 100644
--- a/scripts/clean_earth_osm_data.py
+++ b/scripts/clean_earth_osm_data.py
@@ -14,29 +14,43 @@
 
 
 def extract_points(microgrid_shape_path, buildings_path, output_path):
-    # Carica i file GeoJSON
+    """
+    From the downloaded data, extracts buildings located within the boundaries of each microgrid geometry 
+    and associates them with the respective microgrid name.
+
+    Parameters
+    ----------
+    microgrid_shape_path : str
+        Path to the GeoJSON file containing microgrid geometries.
+    buildings_path : str
+        Path to the GeoJSON file containing building geometries.
+    output_path : str
+        Path where the resulting GeoJSON file will be saved.
+
+    Returns
+    -------
+    GeoDataFrame
+        A GeoDataFrame containing the filtered buildings with an added field "name_microgrid" 
+        that associates each building to its corresponding microgrid.
+    """
+    
+    # Load the GeoJSON files
     microgrid = gpd.read_file(microgrid_shape_path)
     buildings = gpd.read_file(buildings_path)
-
-    # Crea un GeoDataFrame per accumulare i risultati
+    # Create a GeoDataFrame to accumulate the results
     result = gpd.GeoDataFrame(columns=buildings.columns)
-
-    # Itera su ogni geometria della microrete
+    # Iterate over each microgrid geometry
     for idx, microgrid_shape in microgrid.iterrows():
-        # Estrai il nome della microrete
+        # Extract the name of the microgrid
         microgrid_name = microgrid_shape["name"]
-
-        # Filtra gli edifici che si trovano nella geometria della microrete
+        # Filter buildings located within the microgrid geometry
         buildings_in_microgrid = buildings[buildings.geometry.within(microgrid_shape.geometry)]
-
-        # Aggiungi o sostituisci il campo "name_microgrid" con il nome calcolato
+        # Add or replace the "name_microgrid" field with the microgrid name
         buildings_in_microgrid = buildings_in_microgrid.copy()
         buildings_in_microgrid["name_microgrid"] = microgrid_name
-
-        # Aggiungi gli edifici filtrati al risultato finale
+        # Append the filtered buildings to the final result
         result = gpd.GeoDataFrame(pd.concat([result, buildings_in_microgrid], ignore_index=True))
-
-    # Salva il risultato come GeoJSON
+    # Save the final result as a GeoJSON file
     result.to_file(output_path, driver="GeoJSON")
 
     return result
diff --git a/scripts/cluster_buildings.py b/scripts/cluster_buildings.py
index c540d53..d115580 100644
--- a/scripts/cluster_buildings.py
+++ b/scripts/cluster_buildings.py
@@ -21,23 +21,42 @@
 
 def buildings_classification(input_file, crs):
     """
-    Filters the data contained in all_raw_building, selecting only Polygon elements,
-    after which the plan area is calculated for each building with the specified coordinate system
-    and adds the information to the geodataframe.
+    Filters the data contained in the input GeoJSON file, selecting only Polygon elements.
+    Calculates the plan area for each building based on the specified coordinate system (CRS) 
+    and adds this information as a new column to the GeoDataFrame.
+    Buildings classified as "yes" with an area below a predefined limit are reclassified as "house".
+
+    Parameters
+    ----------
+    input_file : str
+        Path to the input GeoJSON file containing building data.
+    crs : str
+        The coordinate reference system (CRS) to be used for area calculation.
+    Returns
+    -------
+    GeoDataFrame
+        A GeoDataFrame containing filtered and classified building data with the added "area_m2" column.
     """
+    # Load the GeoJSON file
     microgrid_buildings = gpd.read_file(input_file)
     microgrid_buildings.rename(columns={"building": "tags_building"}, inplace=True)
+    # Filter out elements that are Points, keeping only Polygons and MultiPolygons
     microgrid_buildings = microgrid_buildings.loc[
         microgrid_buildings.geometry.type != "Point"
     ]
+    # Convert the GeoDataFrame to the specified CRS
     microgrid_buildings = microgrid_buildings.to_crs(crs)
+    # Calculate the area of each building and store it in a new column "area_m2"
     microgrid_buildings["area_m2"] = microgrid_buildings.geometry.area
+    # Identify buildings with "tags_building" = "yes" and area below the house_area_limit and reclassify these buildings as "house"
     idxs_house = microgrid_buildings.query(
         "(tags_building == 'yes') and (area_m2 < @house_area_limit)"
     ).index
     microgrid_buildings.loc[idxs_house, "tags_building"] = "house"
+
     return microgrid_buildings
 
+
 def get_central_points_geojson_with_buildings(
     input_filepath,
     output_filepath_centroids,
@@ -46,51 +65,67 @@ def get_central_points_geojson_with_buildings(
     house_area_limit,
     output_filepath_buildings,
     output_path_csv,
-    microgrids_list
+    microgrids_list,
 ):
     """
-    Divides the buildings into the desired number of clusters by using the kmeans function
-    and generates three outputs:
-    - GeoJSON with the coordinates of the centroids of each cluster,
-    - GeoJSON with all the buildings divided into clusters,
-    - CSV file where the building types are counted for each cluster.
+    Divides buildings into a specified number of clusters using the KMeans algorithm and generates:
+    - GeoJSON file containing the centroids of each cluster,
+    - GeoJSON file containing all buildings with cluster assignments,
+    - CSV file summarizing the count of building types within each cluster.
+
+    Parameters
+    ----------
+    input_filepath : str
+        Path to the input GeoJSON file containing building data.
+    output_filepath_centroids : str
+        Path to the output GeoJSON file for cluster centroids.
+    n_clusters : int
+        Number of clusters to divide the buildings into.
+    crs : str
+        The coordinate reference system (CRS) for spatial operations.
+    house_area_limit : float
+        The maximum area (in square meters) to classify a building as a "house".
+    output_filepath_buildings : str
+        Path to the output GeoJSON file containing clustered buildings.
+    output_path_csv : str
+        Path to the output CSV file summarizing building types per cluster.
+    microgrids_list : dict
+        Dictionary of microgrids with their names and bounding coordinates.
+
     """
-    
+    # Classify and process the buildings
     microgrid_buildings = buildings_classification(input_filepath, crs)
-
-    
+    # Prepare GeoDataFrames and DataFrames to accumulate results
     all_central_features = gpd.GeoDataFrame(columns=["geometry", "cluster", "name_microgrid"])
     all_microgrid_buildings = gpd.GeoDataFrame(columns=microgrid_buildings.columns)
     all_buildings_class = pd.DataFrame()
-
-    
+    # Process each microgrid individually
     for grid_name, grid_data in microgrids_list.items():
-        
+        # Filter buildings belonging to the current microgrid
         filtered_buildings = microgrid_buildings[microgrid_buildings["name_microgrid"] == grid_name]
-
-    
+        # Extract centroids of each building as coordinates
         centroids_building = [
             (row.geometry.centroid.x, row.geometry.centroid.y)
             for row in filtered_buildings.itertuples()
         ]
         centroids_building = np.array(centroids_building)
+        # Apply KMeans clustering to group the buildings
         kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(centroids_building)
-        centroids = kmeans.cluster_centers_
-
-        
+        # Get the coordinates of cluster centroids
+        centroids = kmeans.cluster_centers_  
+        # Identify the central point for each cluster
         central_points = []
         for i in range(kmeans.n_clusters):
             cluster_points = centroids_building[kmeans.labels_ == i]
             distances = np.linalg.norm(cluster_points - centroids[i], axis=1)
             central_point_idx = np.argmin(distances)
             central_points.append(cluster_points[central_point_idx])
-
-        
+        # Create GeoDataFrame for cluster centroids
         central_features = []
         for i, central_point in enumerate(central_points):
             central_features.append(
                 {
-                    "geometry": Point(central_point),
+                    "geometry": Point(central_point), 
                     "cluster": i,
                     "name_microgrid": grid_name,
                 }
@@ -100,23 +135,22 @@ def get_central_points_geojson_with_buildings(
         ).to_crs("EPSG:4326")
         all_central_features = pd.concat([all_central_features, central_features_gdf], ignore_index=True)
 
-        
+        # Assign cluster IDs to buildings and append to the results
         clusters = kmeans.labels_
         filtered_buildings["cluster_id"] = clusters.astype(int)
         all_microgrid_buildings = pd.concat([all_microgrid_buildings, filtered_buildings], ignore_index=True)
 
-       
+        # Count building types within each cluster and append to the summary
         buildings_class = (
             filtered_buildings.groupby("cluster_id").tags_building.value_counts().reset_index(name="count")
         )
         buildings_class["name_microgrid"] = grid_name
         all_buildings_class = pd.concat([all_buildings_class, buildings_class], ignore_index=True)
 
-    
-    all_central_features.to_file(output_filepath_centroids, driver="GeoJSON")
-    all_microgrid_buildings.to_file(output_filepath_buildings, driver="GeoJSON")
-    all_buildings_class.to_csv(output_path_csv, index=False)
-
+    # Save all the results to their respective output files
+    all_central_features.to_file(output_filepath_centroids, driver="GeoJSON")  # Save cluster centroids as GeoJSON
+    all_microgrid_buildings.to_file(output_filepath_buildings, driver="GeoJSON")  # Save clustered buildings as GeoJSON
+    all_buildings_class.to_csv(output_path_csv, index=False)  # Save building type counts as CSV
 
 
 if __name__ == "__main__":
diff --git a/scripts/create_network.py b/scripts/create_network.py
index d6f8aa8..adf045a 100644
--- a/scripts/create_network.py
+++ b/scripts/create_network.py
@@ -39,9 +39,30 @@ def create_network():
 def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_list):
     """
     Creates local microgrid networks within the PyPSA network. The local microgrid networks are distribution networks created based on
-    the buildings data, stored in "resources/buildings/microgrids_buildings.geojson". Then the buses are connected together through lines
-    according to the output of a Delaunay Triangulation.
+    the buildings data, stored in "resources/buildings/microgrids_buildings.geojson".
+    Each bus corresponds to a cluster of buildings within a microgrid, with its coordinates defined in the input GeoJSON file.
+    The lines connecting buses are determined using Delaunay triangulation,ensuring minimal total line length.
+    The function avoids duplicate buses and ensures buses are assigned to the correct SubNetwork.
+        Parameters
+    ----------
+    n : pypsa.Network
+        The PyPSA network object to which microgrid buses and lines will be added.
+    input_file : str
+        Path to the GeoJSON file containing building and microgrid data.
+    voltage_level : float
+        The nominal voltage level to be assigned to the buses.
+    line_type : str
+        The type of lines to be used for connecting the buses (e.g., "AC").
+    microgrid_list : dict
+        A dictionary containing the list of microgrids. Keys are microgrid names, 
+        and values are metadata about each microgrid.
+    Output
+    ------
+    The PyPSA network (`n`) is updated with:
+    - Buses for each microgrid, identified by cluster ID and associated with a SubNetwork.
+    - Lines connecting buses within each microgrid based on Delaunay triangulation.
     """
+    
     # Load the GeoJSON file
     data = gpd.read_file(input_file)
     bus_coords = set()
@@ -98,87 +119,87 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
 
 
 
-def add_bus_at_center(n, number_microgrids, voltage_level, line_type):
-    """
-    Adds a new bus to each network at the center of the existing buses.
-    This is the bus to which the generation, the storage and the load will be attached.
-    """
-    number_microgrids = len(number_microgrids.keys())
-    microgrid_ids = [f"microgrid_{i+1}" for i in range(number_microgrids)]
-
-    # Iterate over each microgrid
-    for microgrid_id in microgrid_ids:
-        # Select the buses belonging to this microgrid
-        microgrid_buses = n.buses.loc[
-            n.buses.index.str.startswith(f"{microgrid_id}_bus_")
-        ]
-
-        # Create a matrix of bus coordinates
-        coords = np.column_stack((microgrid_buses.x.values, microgrid_buses.y.values))
-        polygon = Polygon(coords)
-        s = gpd.GeoSeries(polygon)
-        s = s.centroid
-
-        # Create a new bus at the centroid
-        center_bus_name = f"new_bus_{microgrid_id}"
-        n.add(
-            "Bus",
-            center_bus_name,
-            x=float(s.x.iloc[0]),
-            y=float(s.y.iloc[0]),
-            v_nom=voltage_level,
-        )
-
-        # Find the two closest buses to the new bus
-        closest_buses = microgrid_buses.iloc[
-            distance.cdist([(float(s.x.iloc[0]), float(s.y.iloc[0]))], coords).argmin()
-        ]
-        closest_buses = closest_buses.iloc[[0, 1]]
-        line_type = line_type
-
-        # Add lines to connect the new bus to the closest buses)
-
-        # Add lines to connect the new bus to the closest buses
-        for _, bus in closest_buses.to_frame().iterrows():
-            line_name = f"{microgrid_id}_line_{center_bus_name}_{bus.name}"
-            x1, y1 = n.buses.loc[bus.index].x, n.buses.loc[bus.index].y
-            x2, y2 = n.buses.loc[center_bus_name].x, n.buses.loc[center_bus_name].y
-            length = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
-            n.add(
-                "Line",
-                line_name,
-                bus0=center_bus_name,
-                bus1=bus.index,
-                type=line_type,
-                length=length,
-            )
-
-
-def plot_microgrid_network(n):
-    # Create a new figure and axis
-    fig, ax = plt.subplots()
-
-    # Plot each bus in the network
-    for bus_name, bus in n.buses.iterrows():
-        ax.plot(bus.x, bus.y, "o", color="blue")
-
-    # Plot each line in the network
-    for line_name, line in n.lines.iterrows():
-        bus0 = n.buses.loc[line.bus0]
-        bus1 = n.buses.loc[line.bus1]
-        ax.plot([bus0.x, bus1.x], [bus0.y, bus1.y], "-", color="black")
-
-    # Set the axis limits to include all buses in the network
-    ax.set_xlim(n.buses.x.min() - 0.1, n.buses.x.max() + 0.1)
-    ax.set_ylim(n.buses.y.min() - 0.1, n.buses.y.max() + 0.1)
-
-    # Set the title and labels for the plot
-    ax.set_title("Networks of the microgrids")
-    ax.set_xlabel("X Coordinate")
-    ax.set_ylabel("Y Coordinate")
-
-    # Show the plot
-    plt.show()
+# def add_bus_at_center(n, number_microgrids, voltage_level, line_type):
+#     """
+#     Adds a new bus to each network at the center of the existing buses.
+#     This is the bus to which the generation, the storage and the load will be attached.
+#     """
+#     number_microgrids = len(number_microgrids.keys())
+#     microgrid_ids = [f"microgrid_{i+1}" for i in range(number_microgrids)]
+
+#     # Iterate over each microgrid
+#     for microgrid_id in microgrid_ids:
+#         # Select the buses belonging to this microgrid
+#         microgrid_buses = n.buses.loc[
+#             n.buses.index.str.startswith(f"{microgrid_id}_bus_")
+#         ]
+
+#         # Create a matrix of bus coordinates
+#         coords = np.column_stack((microgrid_buses.x.values, microgrid_buses.y.values))
+#         polygon = Polygon(coords)
+#         s = gpd.GeoSeries(polygon)
+#         s = s.centroid
+
+#         # Create a new bus at the centroid
+#         center_bus_name = f"new_bus_{microgrid_id}"
+#         n.add(
+#             "Bus",
+#             center_bus_name,
+#             x=float(s.x.iloc[0]),
+#             y=float(s.y.iloc[0]),
+#             v_nom=voltage_level,
+#         )
+
+#         # Find the two closest buses to the new bus
+#         closest_buses = microgrid_buses.iloc[
+#             distance.cdist([(float(s.x.iloc[0]), float(s.y.iloc[0]))], coords).argmin()
+#         ]
+#         closest_buses = closest_buses.iloc[[0, 1]]
+#         line_type = line_type
+
+#         # Add lines to connect the new bus to the closest buses)
+
+#         # Add lines to connect the new bus to the closest buses
+#         for _, bus in closest_buses.to_frame().iterrows():
+#             line_name = f"{microgrid_id}_line_{center_bus_name}_{bus.name}"
+#             x1, y1 = n.buses.loc[bus.index].x, n.buses.loc[bus.index].y
+#             x2, y2 = n.buses.loc[center_bus_name].x, n.buses.loc[center_bus_name].y
+#             length = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
+#             n.add(
+#                 "Line",
+#                 line_name,
+#                 bus0=center_bus_name,
+#                 bus1=bus.index,
+#                 type=line_type,
+#                 length=length,
+#             )
+
+
+# def plot_microgrid_network(n):
+#     # Create a new figure and axis
+#     fig, ax = plt.subplots()
+
+#     # Plot each bus in the network
+#     for bus_name, bus in n.buses.iterrows():
+#         ax.plot(bus.x, bus.y, "o", color="blue")
+
+#     # Plot each line in the network
+#     for line_name, line in n.lines.iterrows():
+#         bus0 = n.buses.loc[line.bus0]
+#         bus1 = n.buses.loc[line.bus1]
+#         ax.plot([bus0.x, bus1.x], [bus0.y, bus1.y], "-", color="black")
+
+#     # Set the axis limits to include all buses in the network
+#     ax.set_xlim(n.buses.x.min() - 0.1, n.buses.x.max() + 0.1)
+#     ax.set_ylim(n.buses.y.min() - 0.1, n.buses.y.max() + 0.1)
+
+#     # Set the title and labels for the plot
+#     ax.set_title("Networks of the microgrids")
+#     ax.set_xlabel("X Coordinate")
+#     ax.set_ylabel("Y Coordinate")
+
+#     # Show the plot
+#     plt.show()
 
 
 if __name__ == "__main__":
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index 2e0d18d..bba6c4d 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -73,8 +73,8 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
     The region coordinates should be defined in the config.yaml file.
     Parameters
     ----------
-    coordinates : dict
-        Coordinates of the rectangular region where buildings to be downloaded from osm resides.
+    microgrids_list : dict
+        Dictionary containing the microgrid names and their bounding box coordinates (lat_min, lon_min, lat_max, lon_max).
     features : str
         The feature that is searched in the osm database
     url : str
@@ -82,39 +82,43 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
     path : str
         Directory where the GeoJSON file will be saved.
     """
-    geojson_features = []  # Collect all features from all microgrids
+    # Collect all features from all microgrids
+    geojson_features = []  
 
     for grid_name, grid_data in microgrids_list.items():
+        # Extract the bounding box coordinates for the current microgrid to construct the query
         lat_min = grid_data["lat_min"]
         lon_min = grid_data["lon_min"]
         lat_max = grid_data["lat_max"]
         lon_max = grid_data["lon_max"]
 
+        # Construct the Overpass API query for the specified feature
         overpass_query = f"""
         [out:json];
         way["{feature_name}"]({lat_min},{lon_min},{lat_max},{lon_max});
         (._;>;);
         out body;
         """
-
         try:
-            logger.info(f"Querying Overpass API for microgrid: {grid_name}")
-            response = requests.get(url, params={"data": overpass_query})
-            response.raise_for_status()
-            data = response.json()
+            logger.info(f"Querying Overpass API for microgrid: {grid_name}")  # Log the current query
+            response = requests.get(url, params={"data": overpass_query})  # Send the query to Overpass API
+            response.raise_for_status()  # Raise an error if the request fails
+            data = response.json()  # Parse the JSON response
 
+            # Check if the response contains any elements
             if "elements" not in data:
                 logger.error(f"No elements found for microgrid: {grid_name}")
                 continue
-
+            # Extract node coordinates from the response
             node_coordinates = {
                 node["id"]: [node["lon"], node["lat"]]
                 for node in data["elements"]
                 if node["type"] == "node"
             }
-
+            # Process "way" elements to construct polygon geometries
             for element in data["elements"]:
                 if element["type"] == "way" and "nodes" in element:
+                    # Get the coordinates of the nodes that form the way
                     coordinates = [
                         node_coordinates[node_id]
                         for node_id in element["nodes"]
@@ -123,10 +127,12 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
                     if not coordinates:
                         continue
 
+                    # Add properties for the feature, including the microgrid name and element ID
                     properties = {"name_microgrid": grid_name, "id": element["id"]}
-                    if "tags" in element:
+                    if "tags" in element:  # Include additional tags if available
                         properties.update(element["tags"])
 
+                    # Create a GeoJSON feature for the way
                     feature = {
                         "type": "Feature",
                         "properties": properties,
@@ -135,28 +141,30 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
                             "coordinates": [coordinates],
                         },
                     }
-                    # Serialize each feature as a compact JSON string
+                    # Serialize each feature as a compact JSON string and add it to the list
                     geojson_features.append(json.dumps(feature, separators=(",", ":")))
 
         except json.JSONDecodeError:
+            # Handle JSON parsing errors
             logger.error(f"JSON decoding error for microgrid: {grid_name}")
         except requests.exceptions.RequestException as e:
+            # Handle request-related errors
             logger.error(f"Request error for microgrid: {grid_name}: {e}")
+        
+            # Save all features to a single GeoJSON file
+        try:
+            outpath = Path(path) / "all_raw_buildings.geojson"
+            outpath.parent.mkdir(parents=True, exist_ok=True)
 
-    # Save all features to a single GeoJSON file
-    try:
-        outpath = Path(path) / "all_raw_buildings.geojson"
-        outpath.parent.mkdir(parents=True, exist_ok=True)
-
-        with open(outpath, "w") as f:
-            f.write('{"type":"FeatureCollection","features":[\n')
-            f.write(",\n".join(geojson_features))  # Write features in one-line format
-            f.write("\n]}\n")
+            with open(outpath, "w") as f:
+                f.write('{"type":"FeatureCollection","features":[\n')
+                f.write(",\n".join(geojson_features))  # Write features in one-line format
+                f.write("\n]}\n")
 
-        logger.info(f"Combined GeoJSON saved to {outpath}")
+            logger.info(f"Combined GeoJSON saved to {outpath}")
 
-    except IOError as e:
-        logger.error(f"Error saving GeoJSON file: {e}")
+        except IOError as e:
+            logger.error(f"Error saving GeoJSON file: {e}")
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
diff --git a/scripts/ramp_build_demand_profile.py b/scripts/ramp_build_demand_profile.py
index 65c6581..d4f8b93 100644
--- a/scripts/ramp_build_demand_profile.py
+++ b/scripts/ramp_build_demand_profile.py
@@ -16,6 +16,37 @@ def create_demand_profile(
     excel_profiles_output_path,
     excel_daily_profile_output_path,
 ):
+    """
+    Generates daily and hourly demand profiles for a specified number of days, 
+    based on user data from an input Excel file, and saves the results to Excel files.
+    The function:
+    Load user-specific data from the provided input Excel file.
+    Generate daily load profiles, normalized by the number of users.
+    Compute hourly averages of demand from minute-level data.
+    Reshape the hourly data into daily profiles and calculate statistical measures 
+    (mean and standard deviation) to represent a "typical day."
+    Save the processed profiles and statistics to Excel files.
+
+    Parameters
+    ----------
+    days : int
+        The number of days for which the demand profile should be generated.
+    start : str
+        The starting date for the profiles 
+    xlsx_input_path : str
+        Path to the input Excel file containing user-specific data
+    excel_profiles_output_path : str
+        Path to the output Excel file where the daily profiles (hourly data for each day) will be saved.
+    excel_daily_profile_output_path : str
+        Path to the output Excel file where the typical daily profile (mean and standard deviation) will be saved.
+
+    Output Files
+    ------------
+    - `excel_profiles_output_path`: Contains a DataFrame where each column represents the hourly profile of a specific day.
+    - `excel_daily_profile_output_path`: Contains a DataFrame with two columns, `mean` and `std`, representing 
+      the mean hourly demand and its standard deviation over the specified days.
+
+    """
     use_case = UseCase()
     use_case.load(xlsx_input_path)
 

From d2c3f45e3a6daa0cb9ed929654ac5462d7c66320 Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Sat, 14 Dec 2024 18:49:42 +0100
Subject: [PATCH 03/12] Fix_conflict_create_network

---
 scripts/create_network.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/scripts/create_network.py b/scripts/create_network.py
index adf045a..9009133 100644
--- a/scripts/create_network.py
+++ b/scripts/create_network.py
@@ -11,8 +11,12 @@
 import pandas as pd
 import pypsa
 from _helpers_dist import configure_logging, read_geojson, sets_path_to_root
+from pyproj import Transformer
 from scipy.spatial import Delaunay, distance
-from shapely.geometry import Polygon
+from shapely.geometry import Point, Polygon
+
+_logger = logging.getLogger(__name__)
+_logger.setLevel(logging.INFO)
 
 _logger = logging.getLogger(__name__)
 _logger.setLevel(logging.INFO)
@@ -96,7 +100,7 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
         ))
         # Check if there are enough points for triangulation
         if len(coords) < 3:
-            print(f"Not enough points for triangulation in {grid_name}. Skipping.")
+            print(f"Not enough points for triangulation in {grid_name}.")
             continue
         # Create a Delaunay triangulation of the filtered bus coordinates
         tri = Delaunay(coords)
@@ -114,9 +118,25 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
                 continue  # Skip if the line already exists
             x1, y1 = n.buses.loc[bus0].x, n.buses.loc[bus0].y
             x2, y2 = n.buses.loc[bus1].x, n.buses.loc[bus1].y
-            length = ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
-            n.add("Line", line_name, bus0=bus0, bus1=bus1, type=line_type, length=length)
-
+            transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
+            x1, y1 = transformer.transform(x1, y1)
+            x2, y2 = transformer.transform(x2, y2)
+
+            coords_0 = Point(x1, y1)
+            coords_1 = Point(x2, y2)
+
+        length = (coords_0.distance(coords_1)) / 1000
+        n.add(
+            "Line",
+            line_name,
+            bus0=bus0,
+            bus1=bus1,
+            type="24-AL1/4-ST1A 0.4",
+            length=length,
+            s_nom=0.1,
+            s_nom_extendable=True,
+        )
+           
 
 
 # def add_bus_at_center(n, number_microgrids, voltage_level, line_type):

From 36ed4f66177ac857e5fa68530804c62c828c1786 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 14 Dec 2024 18:00:55 +0000
Subject: [PATCH 04/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 scripts/build_demand.py              | 58 +++++++++++++++-------------
 scripts/build_shapes.py              | 14 +++----
 scripts/clean_earth_osm_data.py      | 14 ++++---
 scripts/cluster_buildings.py         | 42 ++++++++++++++------
 scripts/create_network.py            | 19 +++++----
 scripts/download_osm_data.py         | 19 ++++++---
 scripts/ramp_build_demand_profile.py |  8 ++--
 7 files changed, 105 insertions(+), 69 deletions(-)

diff --git a/scripts/build_demand.py b/scripts/build_demand.py
index d0d1049..e02c088 100644
--- a/scripts/build_demand.py
+++ b/scripts/build_demand.py
@@ -132,13 +132,11 @@ def get_WorldPop_data(
     return WorldPop_inputfile, WorldPop_filename
 
 
-def estimate_microgrid_population(
-    raster_path, shapes_path, output_file
-):
+def estimate_microgrid_population(raster_path, shapes_path, output_file):
     """
     Estimates the population within each microgrid by using raster data and shape geometries.
-    The function processes population density raster data and calculates the total population 
-    for each microgrid by masking the raster data using the corresponding geometries from a 
+    The function processes population density raster data and calculates the total population
+    for each microgrid by masking the raster data using the corresponding geometries from a
     GeoJSON file. The population estimates are saved as a CSV file.
 
     Parameters
@@ -183,7 +181,7 @@ def estimate_microgrid_population(
         list(population_data.items()), columns=["Microgrid_Name", "Population"]
     )
     # Save the population estimates to a CSV file
-    #population_df.to_csv(output_file, index=False)
+    # population_df.to_csv(output_file, index=False)
 
     return population_df
 
@@ -202,7 +200,7 @@ def calculate_load(
     inclusive,
 ):
     """
-    Calculate the microgrid demand based on a load profile provided as input, 
+    Calculate the microgrid demand based on a load profile provided as input,
     appropriately scaled according to the population calculated for each cluster
     The output includes a time-indexed DataFrame containing the load for each bus in the microgrid
     and is saved as a CSV file.
@@ -226,7 +224,7 @@ def calculate_load(
     microgrids_list : dict
         Dictionary with microgrid names as keys and their cluster information as values.
     start_date : str
-        Start date for filtering the time series data 
+        Start date for filtering the time series data
     end_date : str
         End date for filtering the time series data
     inclusive : str
@@ -237,10 +235,8 @@ def calculate_load(
         DataFrame containing the calculated load profile for all microgrids.
 
     """
-     # Estimate the population for the two microgrid
-    pop_microgrid = estimate_microgrid_population(
-        raster_path, shapes_path, output_file
-    )
+    # Estimate the population for the two microgrid
+    pop_microgrid = estimate_microgrid_population(raster_path, shapes_path, output_file)
     # Load the building classification data
     building_class = pd.read_csv(input_path)
     # Dictionary to store the load profiles for each microgrid
@@ -252,7 +248,6 @@ def calculate_load(
     time_index = pd.date_range(start="2013-01-01", end="2013-12-31 23:00:00", freq="h")
     df = df.set_index(time_index)
 
-
     # Apply time filtering based on the specified start and end dates
     if inclusive == "left":
         end_date = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
@@ -303,7 +298,6 @@ def calculate_load(
     return all_load_per_cluster
 
 
-
 def calculate_load_ramp(
     input_file_buildings,
     n,
@@ -323,7 +317,7 @@ def calculate_load_ramp(
     date_end,
     inclusive,
 ):
-    
+
     cleaned_buildings = gpd.read_file(input_file_buildings)
     house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
     pop_microgrid, microgrid_load = estimate_microgrid_population(
@@ -331,7 +325,6 @@ def calculate_load_ramp(
     )
     density = pop_microgrid / house["area_m2"].sum()
 
-
     grouped_buildings = cleaned_buildings.groupby("cluster_id")
     clusters = np.sort(cleaned_buildings["cluster_id"].unique())
     house_area_for_cluster = [
@@ -441,9 +434,17 @@ def calculate_load_ramp_std(
     ]
 
     mean_demand_tier_df = pd.DataFrame(
-    {f"tier_{i+1}": pd.read_excel(file)["mean"] for i, file in enumerate(demand_files)})
+        {
+            f"tier_{i+1}": pd.read_excel(file)["mean"]
+            for i, file in enumerate(demand_files)
+        }
+    )
     std_demand_tier_df = pd.DataFrame(
-    {f"tier_{i+1}": pd.read_excel(file)["std"] for i, file in enumerate(demand_files)})
+        {
+            f"tier_{i+1}": pd.read_excel(file)["std"]
+            for i, file in enumerate(demand_files)
+        }
+    )
     mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     std_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     mean_demand_tier_df.index = pd.date_range(
@@ -453,15 +454,19 @@ def calculate_load_ramp_std(
         "00:00:00", periods=len(mean_demand_tier_df), freq="H"
     ).time
 
-    pop= estimate_microgrid_population(raster_path, shapes_path,output_file)
+    pop = estimate_microgrid_population(raster_path, shapes_path, output_file)
 
     all_microgrid_loads = pd.DataFrame()
 
     for grid_name, grid_data in microgrid_list.items():
-        microgrid_buildings=cleaned_buildings[cleaned_buildings["name_microgrid"]==grid_name]
+        microgrid_buildings = cleaned_buildings[
+            cleaned_buildings["name_microgrid"] == grid_name
+        ]
         # Calculate the population density for the current microgrid based only on house buildings
         house = microgrid_buildings[microgrid_buildings["tags_building"] == "house"]
-        pop_microgrid = pop.loc[pop["Microgrid_Name"] == grid_name, "Population"].values[0]
+        pop_microgrid = pop.loc[
+            pop["Microgrid_Name"] == grid_name, "Population"
+        ].values[0]
         density = pop_microgrid / house["area_m2"].sum()
 
         # Calculate population per cluster
@@ -496,13 +501,11 @@ def calculate_load_ramp_std(
             [std_demand_tier_df] * len(date_range), ignore_index=True
         )
 
-     # Calculate load for each cluster and tier
+        # Calculate load for each cluster and tier
         result_dict = {}
         for k, pop_cluster in tier_pop_df.iterrows():
             load_df = pd.DataFrame()
-            for j, n_person in enumerate(
-                pop_cluster / 7            # Scale by family size
-            ):  
+            for j, n_person in enumerate(pop_cluster / 7):  # Scale by family size
                 mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
                 std_load = np.random.normal(
                     mean_demand_tier_df_extended.iloc[:, j],
@@ -518,7 +521,9 @@ def calculate_load_ramp_std(
         }
         tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
         if inclusive == "left":
-            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
+            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[
+                :-1
+            ]
         else:
             date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
         tot_loads_df.index = date_range_tot
@@ -533,7 +538,6 @@ def calculate_load_ramp_std(
     all_microgrid_loads.to_csv(output_path_csv)
 
 
-
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers_dist import mock_snakemake
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index b092fe7..481ed40 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -23,7 +23,7 @@ def create_microgrid_shapes(microgrids_list, output_path):
     output_path : str
        Path where the GeoJSON file will be saved.
     """
-    
+
     # Open the input dictionary into a pandas DataFrame for easier processing
     microgrids_list_df = pd.DataFrame(microgrids_list)
 
@@ -61,13 +61,13 @@ def create_microgrid_shapes(microgrids_list, output_path):
 def create_bus_regions(microgrids_list, output_path):
     """
     Creates bus regions for each microgrid in the list of microgrids and saves them as a GeoJSON file.
-    The generated shape will be used for the calculation of renewable energy producibility, 
+    The generated shape will be used for the calculation of renewable energy producibility,
     which will be associated with the bus generated at the center of the geometry.
     Parameters
     ----------
     microgrids_list : dict
         Dictionary containing the microgrid names and their bounding box coordinates (lat_min, lon_min, lat_max, lon_max).
-        
+
     output_path : str
        Path where the GeoJSON file will be saved.
     """
@@ -83,8 +83,8 @@ def create_bus_regions(microgrids_list, output_path):
 
     # Iterate over each column in the DataFrame
     for col in range(len(microgrids_list_df.columns)):
-        values = microgrids_list_df.iloc[:, col]  
-        microgrid_name = microgrids_list_df.columns[col] + "_bus_renewable"  
+        values = microgrids_list_df.iloc[:, col]
+        microgrid_name = microgrids_list_df.columns[col] + "_bus_renewable"
 
         # Define the vertices of the rectangle
         Top_left = (values[0], values[3])
@@ -111,8 +111,8 @@ def create_bus_regions(microgrids_list, output_path):
     microgrid_gdf = gpd.GeoDataFrame(
         {
             "name": microgrid_names,  # Names of the bus regions
-            "x": microgrid_x,         # x-coordinates of the centers
-            "y": microgrid_y,         # y-coordinates of the centers
+            "x": microgrid_x,  # x-coordinates of the centers
+            "y": microgrid_y,  # y-coordinates of the centers
             "geometry": microgrid_shapes,  # Polygon shapes of the regions
         }
     )
diff --git a/scripts/clean_earth_osm_data.py b/scripts/clean_earth_osm_data.py
index 47fef4c..2e86609 100644
--- a/scripts/clean_earth_osm_data.py
+++ b/scripts/clean_earth_osm_data.py
@@ -15,7 +15,7 @@
 
 def extract_points(microgrid_shape_path, buildings_path, output_path):
     """
-    From the downloaded data, extracts buildings located within the boundaries of each microgrid geometry 
+    From the downloaded data, extracts buildings located within the boundaries of each microgrid geometry
     and associates them with the respective microgrid name.
 
     Parameters
@@ -30,10 +30,10 @@ def extract_points(microgrid_shape_path, buildings_path, output_path):
     Returns
     -------
     GeoDataFrame
-        A GeoDataFrame containing the filtered buildings with an added field "name_microgrid" 
+        A GeoDataFrame containing the filtered buildings with an added field "name_microgrid"
         that associates each building to its corresponding microgrid.
     """
-    
+
     # Load the GeoJSON files
     microgrid = gpd.read_file(microgrid_shape_path)
     buildings = gpd.read_file(buildings_path)
@@ -44,12 +44,16 @@ def extract_points(microgrid_shape_path, buildings_path, output_path):
         # Extract the name of the microgrid
         microgrid_name = microgrid_shape["name"]
         # Filter buildings located within the microgrid geometry
-        buildings_in_microgrid = buildings[buildings.geometry.within(microgrid_shape.geometry)]
+        buildings_in_microgrid = buildings[
+            buildings.geometry.within(microgrid_shape.geometry)
+        ]
         # Add or replace the "name_microgrid" field with the microgrid name
         buildings_in_microgrid = buildings_in_microgrid.copy()
         buildings_in_microgrid["name_microgrid"] = microgrid_name
         # Append the filtered buildings to the final result
-        result = gpd.GeoDataFrame(pd.concat([result, buildings_in_microgrid], ignore_index=True))
+        result = gpd.GeoDataFrame(
+            pd.concat([result, buildings_in_microgrid], ignore_index=True)
+        )
     # Save the final result as a GeoJSON file
     result.to_file(output_path, driver="GeoJSON")
 
diff --git a/scripts/cluster_buildings.py b/scripts/cluster_buildings.py
index d115580..fba1de0 100644
--- a/scripts/cluster_buildings.py
+++ b/scripts/cluster_buildings.py
@@ -22,7 +22,7 @@
 def buildings_classification(input_file, crs):
     """
     Filters the data contained in the input GeoJSON file, selecting only Polygon elements.
-    Calculates the plan area for each building based on the specified coordinate system (CRS) 
+    Calculates the plan area for each building based on the specified coordinate system (CRS)
     and adds this information as a new column to the GeoDataFrame.
     Buildings classified as "yes" with an area below a predefined limit are reclassified as "house".
 
@@ -96,13 +96,17 @@ def get_central_points_geojson_with_buildings(
     # Classify and process the buildings
     microgrid_buildings = buildings_classification(input_filepath, crs)
     # Prepare GeoDataFrames and DataFrames to accumulate results
-    all_central_features = gpd.GeoDataFrame(columns=["geometry", "cluster", "name_microgrid"])
+    all_central_features = gpd.GeoDataFrame(
+        columns=["geometry", "cluster", "name_microgrid"]
+    )
     all_microgrid_buildings = gpd.GeoDataFrame(columns=microgrid_buildings.columns)
     all_buildings_class = pd.DataFrame()
     # Process each microgrid individually
     for grid_name, grid_data in microgrids_list.items():
         # Filter buildings belonging to the current microgrid
-        filtered_buildings = microgrid_buildings[microgrid_buildings["name_microgrid"] == grid_name]
+        filtered_buildings = microgrid_buildings[
+            microgrid_buildings["name_microgrid"] == grid_name
+        ]
         # Extract centroids of each building as coordinates
         centroids_building = [
             (row.geometry.centroid.x, row.geometry.centroid.y)
@@ -112,7 +116,7 @@ def get_central_points_geojson_with_buildings(
         # Apply KMeans clustering to group the buildings
         kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(centroids_building)
         # Get the coordinates of cluster centroids
-        centroids = kmeans.cluster_centers_  
+        centroids = kmeans.cluster_centers_
         # Identify the central point for each cluster
         central_points = []
         for i in range(kmeans.n_clusters):
@@ -125,7 +129,7 @@ def get_central_points_geojson_with_buildings(
         for i, central_point in enumerate(central_points):
             central_features.append(
                 {
-                    "geometry": Point(central_point), 
+                    "geometry": Point(central_point),
                     "cluster": i,
                     "name_microgrid": grid_name,
                 }
@@ -133,24 +137,38 @@ def get_central_points_geojson_with_buildings(
         central_features_gdf = gpd.GeoDataFrame(
             central_features, crs=filtered_buildings.crs
         ).to_crs("EPSG:4326")
-        all_central_features = pd.concat([all_central_features, central_features_gdf], ignore_index=True)
+        all_central_features = pd.concat(
+            [all_central_features, central_features_gdf], ignore_index=True
+        )
 
         # Assign cluster IDs to buildings and append to the results
         clusters = kmeans.labels_
         filtered_buildings["cluster_id"] = clusters.astype(int)
-        all_microgrid_buildings = pd.concat([all_microgrid_buildings, filtered_buildings], ignore_index=True)
+        all_microgrid_buildings = pd.concat(
+            [all_microgrid_buildings, filtered_buildings], ignore_index=True
+        )
 
         # Count building types within each cluster and append to the summary
         buildings_class = (
-            filtered_buildings.groupby("cluster_id").tags_building.value_counts().reset_index(name="count")
+            filtered_buildings.groupby("cluster_id")
+            .tags_building.value_counts()
+            .reset_index(name="count")
         )
         buildings_class["name_microgrid"] = grid_name
-        all_buildings_class = pd.concat([all_buildings_class, buildings_class], ignore_index=True)
+        all_buildings_class = pd.concat(
+            [all_buildings_class, buildings_class], ignore_index=True
+        )
 
     # Save all the results to their respective output files
-    all_central_features.to_file(output_filepath_centroids, driver="GeoJSON")  # Save cluster centroids as GeoJSON
-    all_microgrid_buildings.to_file(output_filepath_buildings, driver="GeoJSON")  # Save clustered buildings as GeoJSON
-    all_buildings_class.to_csv(output_path_csv, index=False)  # Save building type counts as CSV
+    all_central_features.to_file(
+        output_filepath_centroids, driver="GeoJSON"
+    )  # Save cluster centroids as GeoJSON
+    all_microgrid_buildings.to_file(
+        output_filepath_buildings, driver="GeoJSON"
+    )  # Save clustered buildings as GeoJSON
+    all_buildings_class.to_csv(
+        output_path_csv, index=False
+    )  # Save building type counts as CSV
 
 
 if __name__ == "__main__":
diff --git a/scripts/create_network.py b/scripts/create_network.py
index 7956fdc..2e2376b 100644
--- a/scripts/create_network.py
+++ b/scripts/create_network.py
@@ -36,7 +36,8 @@ def create_network():
 
     # Return the created network
     return n
-            
+
+
 def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_list):
     """
     Creates local microgrid networks within the PyPSA network. The local microgrid networks are distribution networks created based on
@@ -55,7 +56,7 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
     line_type : str
         The type of lines to be used for connecting the buses (e.g., "AC").
     microgrid_list : dict
-        A dictionary containing the list of microgrids. Keys are microgrid names, 
+        A dictionary containing the list of microgrids. Keys are microgrid names,
         and values are metadata about each microgrid.
     Output
     ------
@@ -63,7 +64,7 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
     - Buses for each microgrid, identified by cluster ID and associated with a SubNetwork.
     - Lines connecting buses within each microgrid based on Delaunay triangulation.
     """
-    
+
     # Load the GeoJSON file
     data = gpd.read_file(input_file)
     bus_coords = set()
@@ -91,10 +92,12 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
             bus_coords.add((x, y))
             microgrid_buses.append(bus_name)
         # Filter coordinates for the current microgrid
-        coords = np.column_stack((
-            n.buses.loc[microgrid_buses].x.values,
-            n.buses.loc[microgrid_buses].y.values
-        ))
+        coords = np.column_stack(
+            (
+                n.buses.loc[microgrid_buses].x.values,
+                n.buses.loc[microgrid_buses].y.values,
+            )
+        )
         # Check if there are enough points for triangulation
         if len(coords) < 3:
             print(f"Not enough points for triangulation in {grid_name}.")
@@ -133,7 +136,7 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
             s_nom=0.1,
             s_nom_extendable=True,
         )
-           
+
 
 # def add_bus_at_center(n, number_microgrids, voltage_level, line_type):
 #     """
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index bba6c4d..aea2215 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -67,7 +67,7 @@ def convert_iso_to_geofk(
         return iso_code
 
 
-def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
+def retrieve_osm_data_geojson(microgrids_list, feature_name, url, path):
     """
     The buildings inside the specified coordinates are retrieved by using overpass API.
     The region coordinates should be defined in the config.yaml file.
@@ -83,7 +83,7 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
         Directory where the GeoJSON file will be saved.
     """
     # Collect all features from all microgrids
-    geojson_features = []  
+    geojson_features = []
 
     for grid_name, grid_data in microgrids_list.items():
         # Extract the bounding box coordinates for the current microgrid to construct the query
@@ -100,8 +100,12 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
         out body;
         """
         try:
-            logger.info(f"Querying Overpass API for microgrid: {grid_name}")  # Log the current query
-            response = requests.get(url, params={"data": overpass_query})  # Send the query to Overpass API
+            logger.info(
+                f"Querying Overpass API for microgrid: {grid_name}"
+            )  # Log the current query
+            response = requests.get(
+                url, params={"data": overpass_query}
+            )  # Send the query to Overpass API
             response.raise_for_status()  # Raise an error if the request fails
             data = response.json()  # Parse the JSON response
 
@@ -150,7 +154,7 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
         except requests.exceptions.RequestException as e:
             # Handle request-related errors
             logger.error(f"Request error for microgrid: {grid_name}: {e}")
-        
+
             # Save all features to a single GeoJSON file
         try:
             outpath = Path(path) / "all_raw_buildings.geojson"
@@ -158,7 +162,9 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
 
             with open(outpath, "w") as f:
                 f.write('{"type":"FeatureCollection","features":[\n')
-                f.write(",\n".join(geojson_features))  # Write features in one-line format
+                f.write(
+                    ",\n".join(geojson_features)
+                )  # Write features in one-line format
                 f.write("\n]}\n")
 
             logger.info(f"Combined GeoJSON saved to {outpath}")
@@ -166,6 +172,7 @@ def retrieve_osm_data_geojson( microgrids_list, feature_name, url, path):
         except IOError as e:
             logger.error(f"Error saving GeoJSON file: {e}")
 
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers_dist import mock_snakemake, sets_path_to_root
diff --git a/scripts/ramp_build_demand_profile.py b/scripts/ramp_build_demand_profile.py
index d4f8b93..32a977a 100644
--- a/scripts/ramp_build_demand_profile.py
+++ b/scripts/ramp_build_demand_profile.py
@@ -17,13 +17,13 @@ def create_demand_profile(
     excel_daily_profile_output_path,
 ):
     """
-    Generates daily and hourly demand profiles for a specified number of days, 
+    Generates daily and hourly demand profiles for a specified number of days,
     based on user data from an input Excel file, and saves the results to Excel files.
     The function:
     Load user-specific data from the provided input Excel file.
     Generate daily load profiles, normalized by the number of users.
     Compute hourly averages of demand from minute-level data.
-    Reshape the hourly data into daily profiles and calculate statistical measures 
+    Reshape the hourly data into daily profiles and calculate statistical measures
     (mean and standard deviation) to represent a "typical day."
     Save the processed profiles and statistics to Excel files.
 
@@ -32,7 +32,7 @@ def create_demand_profile(
     days : int
         The number of days for which the demand profile should be generated.
     start : str
-        The starting date for the profiles 
+        The starting date for the profiles
     xlsx_input_path : str
         Path to the input Excel file containing user-specific data
     excel_profiles_output_path : str
@@ -43,7 +43,7 @@ def create_demand_profile(
     Output Files
     ------------
     - `excel_profiles_output_path`: Contains a DataFrame where each column represents the hourly profile of a specific day.
-    - `excel_daily_profile_output_path`: Contains a DataFrame with two columns, `mean` and `std`, representing 
+    - `excel_daily_profile_output_path`: Contains a DataFrame with two columns, `mean` and `std`, representing
       the mean hourly demand and its standard deviation over the specified days.
 
     """

From 2fe24bc66154bfeb87d8b19cd98fd9e8e70a11fa Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Mon, 16 Dec 2024 15:35:47 +0100
Subject: [PATCH 05/12] code_build_demand_lightening

---
 scripts/build_demand.py | 172 +++++++---------------------------------
 1 file changed, 29 insertions(+), 143 deletions(-)

diff --git a/scripts/build_demand.py b/scripts/build_demand.py
index 877b6ab..6d93796 100644
--- a/scripts/build_demand.py
+++ b/scripts/build_demand.py
@@ -301,7 +301,6 @@ def calculate_load(
     return all_load_per_cluster
 
 
-
 def calculate_load_ramp(
     input_file_buildings,
     n,
@@ -320,113 +319,8 @@ def calculate_load_ramp(
     date_start,
     date_end,
     inclusive,
-):
-    # Caricamento dei dati e calcolo della densità di popolazione
-    microgrid_buildings = gpd.read_file(input_file_buildings)
-    house = microgrid_buildings[microgrid_buildings["tags_building"] == "house"]
-    pop_microgrid = estimate_microgrid_population(
-        raster_path, shapes_path, output_file
-    )
-    density = pop_microgrid / house["area_m2"].sum()
-
-    # Calcolo superficie e popolazione per cluster
-    grouped_buildings = microgrid_buildings.groupby("cluster_id")
-    clusters = np.sort(microgrid_buildings["cluster_id"].unique())
-    house_area_for_cluster = [
-        grouped_buildings.get_group(cluster)[
-            grouped_buildings.get_group(cluster)["tags_building"] == "house"
-        ]["area_m2"].sum()
-        for cluster in clusters
-    ]
-    population_df = pd.DataFrame(
-        {"cluster": clusters, "house_area_for_cluster": house_area_for_cluster}
-    ).set_index("cluster")
-    population_df["people_for_cluster"] = (
-        population_df["house_area_for_cluster"] * density
-    ).round()
-    tier_pop_df = pd.DataFrame(
-        np.outer(population_df["people_for_cluster"], tier_percent),
-        index=population_df.index,
-    )
-
-    # Caricamento e creazione di DataFrames di domanda media e deviazione standard per ogni tier
-    demand_files = [
-        input_file_profile_tier1,
-        input_file_profile_tier2,
-        input_file_profile_tier3,
-        input_file_profile_tier4,
-        input_file_profile_tier5,
-    ]
-    mean_demand_tier_df = pd.DataFrame(
-        {
-            f"tier_{i+1}": pd.read_excel(file)["mean"]
-            for i, file in enumerate(demand_files)
-        }
-    )
-    mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
-    mean_demand_tier_df.index = pd.date_range(
-        "00:00:00", periods=len(mean_demand_tier_df), freq="H"
-    ).time
-
-    if inclusive == "left":
-        date_range = pd.date_range(start=date_start, end=date_end, freq="D")[:-1]
-    else:
-        date_range = pd.date_range(start=date_start, end=date_end, freq="D")
-
-    mean_demand_tier_df_extended = pd.concat(
-        [mean_demand_tier_df] * len(date_range), ignore_index=True
-    )
-
-    # Calcolo del carico totale per ogni cluster e tier
-    result_dict = {}
-    for k, pop_cluster in tier_pop_df.iterrows():
-        load_df = pd.DataFrame()
-        for j, n_person in enumerate(
-            pop_cluster / 7
-        ):  # Scala la popolazione per famiglia
-            mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
-            total_load = (mean_load) / 1e6
-            load_df[f"tier_{j}"] = total_load
-        result_dict[f"bus_{k}"] = load_df
-
-    # Aggregazione del carico totale per cluster
-    tot_result_dict = {
-        f"{k}": df.sum(axis=1).rename(f"{k}") for k, df in result_dict.items()
-    }
-    tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
-    if inclusive == "left":
-        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
-    else:
-        date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
-    tot_loads_df.index = date_range_tot
-
-    # Sostituzione dei valori zero con un valore minimo per evitare problemi di plotting
-    small_value = 1e-26
-    tot_loads_df.loc[:, (tot_loads_df == 0).all()] = small_value
-
-    # Esportazione del DataFrame finale
-    tot_loads_df.to_csv(output_path_csv)
-
-
-def calculate_load_ramp_std(
-    input_file_buildings,
-    n,
-    p,
-    raster_path,
-    shapes_path,
-    sample_profile,
-    output_file,
-    input_file_profile_tier1,
-    input_file_profile_tier2,
-    input_file_profile_tier3,
-    input_file_profile_tier4,
-    input_file_profile_tier5,
-    output_path_csv,
-    tier_percent,
-    date_start,
-    date_end,
-    inclusive,
     microgrid_list,
+    std,
 ):
     # Upload of buildings and data demand for each tier
     cleaned_buildings = gpd.read_file(input_file_buildings)
@@ -495,20 +389,32 @@ def calculate_load_ramp_std(
         )
 
      # Calculate load for each cluster and tier
-        result_dict = {}
-        for k, pop_cluster in tier_pop_df.iterrows():
-            load_df = pd.DataFrame()
-            for j, n_person in enumerate(
-                pop_cluster / 7            # Scale by family size
-            ):  
-                mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
-                std_load = np.random.normal(
-                    mean_demand_tier_df_extended.iloc[:, j],
-                    std_demand_tier_df_extended.iloc[:, j],
-                ) * np.sqrt(n_person)
-                total_load = (mean_load + std_load) / 1e6
-                load_df[f"tier_{j}"] = total_load
-            result_dict[f"{grid_name}_bus_{k}"] = load_df
+        if std == "on":
+            result_dict = {}
+            for k, pop_cluster in tier_pop_df.iterrows():
+                load_df = pd.DataFrame()
+                for j, n_person in enumerate(
+                    pop_cluster / 7            # Scale by family size
+                ):  
+                    mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
+                    std_load = np.random.normal(
+                        mean_demand_tier_df_extended.iloc[:, j],
+                        std_demand_tier_df_extended.iloc[:, j],
+                    ) * np.sqrt(n_person)
+                    total_load = (mean_load + std_load) / 1e6
+                    load_df[f"tier_{j}"] = total_load
+                result_dict[f"{grid_name}_bus_{k}"] = load_df
+        elif std == "off":
+            result_dict = {}
+            for k, pop_cluster in tier_pop_df.iterrows():
+                load_df = pd.DataFrame()
+                for j, n_person in enumerate(
+                    pop_cluster / 7            # Scale by family size
+                ):  
+                    mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
+                    total_load = (mean_load) / 1e6
+                    load_df[f"tier_{j}"] = total_load
+                result_dict[f"{grid_name}_bus_{k}"] = load_df
 
         # Aggregate total load per cluster
         tot_result_dict = {
@@ -548,8 +454,8 @@ def calculate_load_ramp_std(
     date_end = snakemake.params.snapshots["end"]
     inclusive = snakemake.params.snapshots["inclusive"]
     microgrids_list = snakemake.config["microgrids_list"]
-
     build_demand_model = snakemake.params.build_demand_model["type"]
+    std = snakemake.params.build_demand_model["std"]
 
     assert (
         len(snakemake.config["countries"]) == 1
@@ -602,26 +508,6 @@ def calculate_load_ramp_std(
             date_start,
             date_end,
             inclusive,
-        )
-    elif build_demand_model == 2:
-
-        calculate_load_ramp_std(
-            snakemake.input["clusters_with_buildings"],
-            n,
-            snakemake.config["load"]["scaling_factor"],
-            worldpop_path,
-            snakemake.input["microgrid_shapes"],
-            sample_profile,
-            snakemake.output["electric_load"],
-            snakemake.input["profile_Tier1"],
-            snakemake.input["profile_Tier2"],
-            snakemake.input["profile_Tier3"],
-            snakemake.input["profile_Tier4"],
-            snakemake.input["profile_Tier5"],
-            snakemake.output["electric_load"],
-            tier_percent,
-            date_start,
-            date_end,
-            inclusive,
             microgrids_list,
+            std,
         )

From c6928e202d8fe222f8903222b2e5132d635862b6 Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Mon, 16 Dec 2024 15:36:27 +0100
Subject: [PATCH 06/12] code_build_demand_lightening

---
 config.distribution.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.distribution.yaml b/config.distribution.yaml
index 6111a72..d61ef9f 100644
--- a/config.distribution.yaml
+++ b/config.distribution.yaml
@@ -58,6 +58,7 @@ house_area_limit:
 
 build_demand_type:
   type: 0
+  std : "on"
 # type allows to select the mode by which the microgrid demand profile is generated.
 # 1 = a predetermined hourly profile is used
 # 2 = an average hourly profile is calculated by exploiting the ramp tool

From 3293b94de498fe0953ca09d08d9082aace3a9601 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Dec 2024 14:45:12 +0000
Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 config.distribution.yaml |  2 +-
 scripts/build_demand.py  | 11 +++--------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/config.distribution.yaml b/config.distribution.yaml
index 89b62ba..5278275 100644
--- a/config.distribution.yaml
+++ b/config.distribution.yaml
@@ -58,7 +58,7 @@ house_area_limit:
 
 build_demand_type:
   type: 0
-  std : "on"
+  std: "on"
 # type allows to select the mode by which the microgrid demand profile is generated.
 # 0 = a predetermined hourly profile is used
 # 1 = an average hourly profile is calculated by exploiting the ramp tool
diff --git a/scripts/build_demand.py b/scripts/build_demand.py
index bdd6a5c..37f1f27 100644
--- a/scripts/build_demand.py
+++ b/scripts/build_demand.py
@@ -397,15 +397,12 @@ def calculate_load_ramp(
             [std_demand_tier_df] * len(date_range), ignore_index=True
         )
 
-
-     # Calculate load for each cluster and tier
+        # Calculate load for each cluster and tier
         if std == "on":
             result_dict = {}
             for k, pop_cluster in tier_pop_df.iterrows():
                 load_df = pd.DataFrame()
-                for j, n_person in enumerate(
-                    pop_cluster / 7            # Scale by family size
-                ):  
+                for j, n_person in enumerate(pop_cluster / 7):  # Scale by family size
                     mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
                     std_load = np.random.normal(
                         mean_demand_tier_df_extended.iloc[:, j],
@@ -418,9 +415,7 @@ def calculate_load_ramp(
             result_dict = {}
             for k, pop_cluster in tier_pop_df.iterrows():
                 load_df = pd.DataFrame()
-                for j, n_person in enumerate(
-                    pop_cluster / 7            # Scale by family size
-                ):  
+                for j, n_person in enumerate(pop_cluster / 7):  # Scale by family size
                     mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
                     total_load = (mean_load) / 1e6
                     load_df[f"tier_{j}"] = total_load

From 6e22c9478cec31bd318f11e02c8894cde2935c9c Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Mon, 16 Dec 2024 15:51:46 +0100
Subject: [PATCH 08/12] fix_test_config

---
 test/config.distribution.test.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/config.distribution.test.yaml b/test/config.distribution.test.yaml
index 798c52d..4c46967 100644
--- a/test/config.distribution.test.yaml
+++ b/test/config.distribution.test.yaml
@@ -56,6 +56,7 @@ house_area_limit:
 
 build_demand_type:
   type: 0
+  std: "on"
 
 # definition of the Coordinate Reference Systems
 crs:

From 2deca465287251f70e0c53ebbc0f7f1e8433725e Mon Sep 17 00:00:00 2001
From: Margherita Capitani <m.capitani2@studenti.unipi.it>
Date: Mon, 16 Dec 2024 16:24:36 +0100
Subject: [PATCH 09/12] Fix_create_network_multimicrogrid

---
 scripts/create_network.py | 79 +++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 36 deletions(-)

diff --git a/scripts/create_network.py b/scripts/create_network.py
index 2e2376b..f59596f 100644
--- a/scripts/create_network.py
+++ b/scripts/create_network.py
@@ -65,77 +65,84 @@ def create_microgrid_network(n, input_file, voltage_level, line_type, microgrid_
     - Lines connecting buses within each microgrid based on Delaunay triangulation.
     """
 
-    # Load the GeoJSON file
     data = gpd.read_file(input_file)
-    bus_coords = set()
+    bus_coords = set()  # Keep track of bus coordinates to avoid duplicates
+
     for grid_name, grid_data in microgrid_list.items():
         # Filter data for the current microgrid
         grid_data = data[data["name_microgrid"] == grid_name]
-        # Create a SubNetwork for the current microgrid
+
+        # Create a SubNetwork for the current microgrid if it does not exist
         if grid_name not in n.sub_networks.index:
             n.add("SubNetwork", grid_name, carrier="electricity")
-        # List to store bus names for this microgrid
+
+        # List to store bus names and their positions for triangulation
         microgrid_buses = []
+        bus_positions = []
+
         for _, feature in grid_data.iterrows():
             point_geom = feature.geometry
             bus_name = f"{grid_name}_bus_{feature['cluster']}"
             x, y = point_geom.x, point_geom.y
-            # Avoid adding duplicate buses
+
+            # Skip duplicate buses or overlapping coordinates
             if bus_name in n.buses.index:
                 continue
             if (x, y) in bus_coords:
                 raise ValueError(
                     f"Overlapping microgrids detected at {x}, {y}. Adjust the configuration."
                 )
-            # Add the bus and assign it to the SubNetwork
+
+            # Add the bus to the network and assign it to the SubNetwork
             n.add("Bus", bus_name, x=x, y=y, v_nom=voltage_level, sub_network=grid_name)
             bus_coords.add((x, y))
             microgrid_buses.append(bus_name)
-        # Filter coordinates for the current microgrid
-        coords = np.column_stack(
-            (
-                n.buses.loc[microgrid_buses].x.values,
-                n.buses.loc[microgrid_buses].y.values,
-            )
-        )
+            bus_positions.append((x, y))
+
         # Check if there are enough points for triangulation
-        if len(coords) < 3:
+        if len(bus_positions) < 3:
             print(f"Not enough points for triangulation in {grid_name}.")
             continue
-        # Create a Delaunay triangulation of the filtered bus coordinates
+
+        # Perform Delaunay triangulation to determine bus connections
+        coords = np.array(bus_positions)
         tri = Delaunay(coords)
+
+        # Collect unique edges from the Delaunay triangulation
         edges = set()
         for simplex in tri.simplices:
             for i in range(3):
                 edge = tuple(sorted([simplex[i], simplex[(i + 1) % 3]]))
                 edges.add(edge)
-        # Add lines for the current microgrid
+
+        # Add lines to the network based on the triangulation edges
         for i, j in edges:
             bus0 = microgrid_buses[i]
             bus1 = microgrid_buses[j]
-            line_name = f"{grid_name}_line_{i}_{j}"
+            line_name = f"{grid_name}_line_{bus0}_{bus1}"
+
+            # Skip if the line already exists
             if line_name in n.lines.index:
-                continue  # Skip if the line already exists
+                continue
+
+            # Retrieve the coordinates of the buses
             x1, y1 = n.buses.loc[bus0].x, n.buses.loc[bus0].y
             x2, y2 = n.buses.loc[bus1].x, n.buses.loc[bus1].y
-            transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True)
-            x1, y1 = transformer.transform(x1, y1)
-            x2, y2 = transformer.transform(x2, y2)
-
-            coords_0 = Point(x1, y1)
-            coords_1 = Point(x2, y2)
-
-        length = (coords_0.distance(coords_1)) / 1000
-        n.add(
-            "Line",
-            line_name,
-            bus0=bus0,
-            bus1=bus1,
-            type="24-AL1/4-ST1A 0.4",
-            length=length,
-            s_nom=0.1,
-            s_nom_extendable=True,
-        )
+
+            # Calculate the distance between buses (in km)
+            length = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) / 1000
+
+            # Add the line to the network
+            n.add(
+                "Line",
+                line_name,
+                bus0=bus0,
+                bus1=bus1,
+                type=line_type,
+                length=length,
+                s_nom=0.1,
+                s_nom_extendable=True,
+            )
 
 
 # def add_bus_at_center(n, number_microgrids, voltage_level, line_type):

From 0033261a7f094bf14cf43f52c9dac5cd6f642d38 Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Tue, 31 Dec 2024 15:52:35 +0100
Subject: [PATCH 10/12] Update environment

---
 doc/requirements.txt  | 1 -
 envs/environment.yaml | 9 +++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/doc/requirements.txt b/doc/requirements.txt
index 015821b..001d2e1 100644
--- a/doc/requirements.txt
+++ b/doc/requirements.txt
@@ -9,7 +9,6 @@ sphinx_rtd_theme
 setuptools<58.3.0
 
 pypsa
-vresutils>=0.3.1
 powerplantmatching>=0.4.8
 atlite>=0.2.2
 dask<=2021.3.1
diff --git a/envs/environment.yaml b/envs/environment.yaml
index b3b933e..50dce4a 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -15,8 +15,8 @@ dependencies:
 - pypsa>=0.24, <0.25
 # - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
 - dask
-- powerplantmatching
-- earth-osm>=2.1
+- powerplantmatching>=0.5.19
+- earth-osm>=2.3.post1
 - atlite
 
   # Dependencies of the workflow itself
@@ -28,7 +28,7 @@ dependencies:
 - ruamel.yaml<=0.17.26
 - pytables
 - lxml
-- numpy
+- numpy<2
 - pandas
 - geopandas>=0.11.0, <=0.14.3
 - fiona<1.10.0
@@ -81,11 +81,8 @@ dependencies:
 - gurobi
 
 - pip:
-  - earth-osm>=2.2 # until conda release it out
-  - powerplantmatching>=0.5.19 # until conda release it out
   - rampdemand
   - git+https://github.com/davide-f/google-drive-downloader@master  # google drive with fix for virus scan
-  - git+https://github.com/FRESNA/vresutils@master  # until new pip release > 0.3.1 (strictly)
   - tsam>=1.1.0
   - chaospy  # lastest version only available on pip
   - fake_useragent

From 11a5662d839127423366460ad5993bd3df09584c Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Tue, 31 Dec 2024 16:11:59 +0100
Subject: [PATCH 11/12] Update environment

---
 .github/workflows/ci-linux.yaml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
index 3ba84fc..d6095ce 100644
--- a/.github/workflows/ci-linux.yaml
+++ b/.github/workflows/ci-linux.yaml
@@ -38,13 +38,15 @@ jobs:
     steps:
     - uses: actions/checkout@v2
 
-    - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v2
+    - name: Setup micromamba
+      uses: mamba-org/setup-micromamba@v2
       with:
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
-        activate-environment: pypsa-earth
-        use-mamba: true
+        micromamba-version: '1.5.9-1'
+        environment-file: "envs/environment.yaml"
+        log-level: debug
+        init-shell: bash
+        cache-environment: true
+        cache-downloads: true
 
     - name: Fill submodule
       run: |
@@ -62,8 +64,6 @@ jobs:
       if: steps.cache.outputs.cache-hit != 'true'
       run: |
         mamba env update -n pypsa-earth -f envs/environment.yaml
-        conda activate pypsa-earth
-        pip install rampdemand
 
     - name: Conda list
       run: |

From a0b6377c41882f30cd7c38e6d907118bacadef21 Mon Sep 17 00:00:00 2001
From: Davide Fioriti <fioritidavidesubs@gmail.com>
Date: Tue, 31 Dec 2024 16:26:56 +0100
Subject: [PATCH 12/12] Update environment

---
 envs/environment.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/envs/environment.yaml b/envs/environment.yaml
index 50dce4a..ae095a3 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -86,3 +86,4 @@ dependencies:
   - tsam>=1.1.0
   - chaospy  # lastest version only available on pip
   - fake_useragent
+  - numpy<=1.26.4