From 3f9a5b72dafb964e2f8dd59010db40d1275b81ee Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 00:05:46 +0200
Subject: [PATCH 01/40] _helpers.py os to pathlib

---
 Snakefile           |   2 +-
 doc/conf.py         |   6 +-
 scripts/_helpers.py | 172 +++++++++++++++++++++++++++++++++++---------
 3 files changed, 144 insertions(+), 36 deletions(-)

diff --git a/Snakefile b/Snakefile
index ac8724ef3..32c92ba5b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -6,7 +6,7 @@ import sys
 
 sys.path.append("./scripts")
 
-from os.path import normpath, exists, isdir
+from os.path import normpath, exists
 from shutil import copyfile, move
 
 from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
diff --git a/doc/conf.py b/doc/conf.py
index 8bd5c798a..fdf42ba93 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -12,16 +12,16 @@
 # -- Path setup --------------------------------------------------------------
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
+# documentation root, use pathlib.Path.absolute to make it absolute, like shown here.
 #
 import datetime
-import os
+import pathlib
 import shutil
 import sys
 
 from git import Repo
 
-sys.path.insert(0, os.path.abspath("../scripts"))
+sys.path.insert(0, str(pathlib.Path("../scripts").absolute()))
 for p in sys.path:
     print(p)
 
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 4fdf000b9..9d88c26b7 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -7,9 +7,9 @@
 
 import logging
 import os
+import pathlib
 import subprocess
 import sys
-from pathlib import Path
 
 import country_converter as coco
 import geopandas as gpd
@@ -96,12 +96,12 @@ def read_osm_config(*args):
     {"Africa": {"DZ": "algeria", ...}, ...}
     """
     if "__file__" in globals():
-        base_folder = os.path.dirname(__file__)
-        if not os.path.exists(os.path.join(base_folder, "configs")):
-            base_folder = os.path.dirname(base_folder)
+        base_folder = get_dirname_path(__file__)
+        if not path_exists(get_path(base_folder, "configs")):
+            base_folder = get_dirname_path(base_folder)
     else:
-        base_folder = os.getcwd()
-    osm_config_path = os.path.join(base_folder, "configs", REGIONS_CONFIG)
+        base_folder = get_current_directory_path()
+    osm_config_path = get_path(base_folder, "configs", REGIONS_CONFIG)
     with open(osm_config_path, "r") as f:
         osm_config = yaml.safe_load(f)
     if len(args) == 0:
@@ -132,8 +132,8 @@ def sets_path_to_root(root_directory_name):
     while n >= 0:
         n -= 1
         # if repo_name is current folder name, stop and set path
-        if repo_name == os.path.basename(os.path.abspath(".")):
-            repo_path = os.getcwd()  # os.getcwd() = current_path
+        if repo_name == get_basename_abs_path("."):
+            repo_path = get_current_directory_path()  # current_path
             os.chdir(repo_path)  # change dir_path to repo_path
             print("This is the repository path: ", repo_path)
             print("Had to go %d folder(s) up." % (n0 - 1 - n))
@@ -143,8 +143,7 @@ def sets_path_to_root(root_directory_name):
             print("Can't find the repo path.")
         # if repo_name NOT current folder name, go one directory higher
         else:
-            upper_path = os.path.dirname(os.path.abspath("."))  # name of upper folder
-            os.chdir(upper_path)
+            change_to_script_dir(".")  # change to the upper folder
 
 
 def configure_logging(snakemake, skip_handlers=False):
@@ -171,8 +170,8 @@ def configure_logging(snakemake, skip_handlers=False):
     kwargs.setdefault("level", "INFO")
 
     if skip_handlers is False:
-        fallback_path = Path(__file__).parent.joinpath(
-            "..", "logs", f"{snakemake.rule}.log"
+        fallback_path = get_path(
+            get_dirname_path(__file__), "..", "logs", f"{snakemake.rule}.log"
         )
         logfile = snakemake.log.get(
             "python", snakemake.log[0] if snakemake.log else fallback_path
@@ -249,7 +248,7 @@ def pdbcast(v, h):
 
 
 def load_network_for_plots(
-    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
+        fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
 ):
     import pypsa
     from add_electricity import load_costs, update_transmission_costs
@@ -260,7 +259,7 @@ def load_network_for_plots(
     n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
 
     n.links["carrier"] = (
-        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
+            n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
     )
     n.lines["carrier"] = "AC line"
     n.transformers["carrier"] = "AC transformer"
@@ -331,8 +330,8 @@ def aggregate_p_curtailed(n):
         [
             (
                 (
-                    n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt)
-                    - n.generators_t.p.sum()
+                        n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt)
+                        - n.generators_t.p.sum()
                 )
                 .groupby(n.generators.carrier)
                 .sum()
@@ -358,7 +357,7 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
 
     costs = {}
     for c, (p_nom, p_attr) in zip(
-        n.iterate_components(components.keys(), skip_empty=False), components.values()
+            n.iterate_components(components.keys(), skip_empty=False), components.values()
     ):
         if c.df.empty:
             continue
@@ -390,7 +389,7 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
 
 
 def progress_retrieve(
-    url, file, data=None, headers=None, disable_progress=False, roundto=1.0
+        url, file, data=None, headers=None, disable_progress=False, roundto=1.0
 ):
     """
     Function to download data from a url with a progress bar progress in
@@ -472,19 +471,18 @@ def mock_snakemake(rulename, **wildcards):
         keyword arguments fixing the wildcards. Only necessary if wildcards are
         needed.
     """
-    import os
 
     import snakemake as sm
     from pypsa.descriptors import Dict
     from snakemake.script import Snakemake
 
-    script_dir = Path(__file__).parent.resolve()
+    script_dir = pathlib.Path(__file__).parent.resolve()
     assert (
-        Path.cwd().resolve() == script_dir
+            pathlib.Path.cwd().resolve() == script_dir
     ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}"
     os.chdir(script_dir.parent)
     for p in sm.SNAKEFILE_CHOICES:
-        if os.path.exists(p):
+        if path_exists(p):
             snakefile = p
             break
     workflow = sm.Workflow(
@@ -508,7 +506,7 @@ def mock_snakemake(rulename, **wildcards):
     def make_accessable(*ios):
         for io in ios:
             for i in range(len(io)):
-                io[i] = os.path.abspath(io[i])
+                io[i] = get_abs_path(io[i])
 
     make_accessable(job.input, job.output, job.log)
     snakemake = Snakemake(
@@ -527,7 +525,7 @@ def make_accessable(*ios):
 
     # create log and output dir if not existent
     for path in list(snakemake.log) + list(snakemake.output):
-        Path(path).parent.mkdir(parents=True, exist_ok=True)
+        build_directory(path)
 
     os.chdir(script_dir)
     return snakemake
@@ -636,8 +634,8 @@ def country_name_2_two_digits(country_name):
         2-digit country name
     """
     if (
-        country_name
-        == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"
+            country_name
+            == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"
     ):
         return "SN-GM"
 
@@ -652,7 +650,7 @@ def read_csv_nafix(file, **kwargs):
     if "na_values" not in kwargs:
         kwargs["na_values"] = NA_VALUES
 
-    if os.stat(file).st_size > 0:
+    if get_path_size(file) > 0:
         return pd.read_csv(file, **kwargs)
     else:
         return pd.DataFrame()
@@ -670,8 +668,7 @@ def to_csv_nafix(df, path, **kwargs):
 
 
 def save_to_geojson(df, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)  # remove file if it exists
+    pathlib.Path(fn).unlink(missing_ok=True)  # remove file if it exists
 
     # save file if the (Geo)DataFrame is non-empty
     if df.empty:
@@ -701,7 +698,7 @@ def read_geojson(fn, cols=[], dtype=None, crs="EPSG:4326"):
         CRS of the GeoDataFrame
     """
     # if the file is non-zero, read the geodataframe and return it
-    if os.path.getsize(fn) > 0:
+    if get_path_size(fn) > 0:
         return gpd.read_file(fn)
     else:
         # else return an empty GeoDataFrame
@@ -747,7 +744,7 @@ def filter_codes(c_list, iso_coding=True):
         selected(iso_coding=False), ignore iso-specific names.
         """
         if (
-            iso_coding
+                iso_coding
         ):  # if country lists are in iso coding, then check if they are 2-string
             # 2-code countries
             ret_list = [c for c in c_list if len(c) == 2]
@@ -805,7 +802,7 @@ def get_last_commit_message(path):
     """
     _logger = logging.getLogger(__name__)
     last_commit_message = None
-    backup_cwd = os.getcwd()
+    backup_cwd = get_current_directory_path()
     try:
         os.chdir(path)
         last_commit_message = (
@@ -821,3 +818,114 @@ def get_last_commit_message(path):
 
     os.chdir(backup_cwd)
     return last_commit_message
+
+
+def get_dirname_path(path):
+    """
+    It returns the directory name of the path.
+    """
+    return pathlib.Path(path).parent
+
+
+def get_abs_path(path):
+    """
+    It returns the absolutized version of the path.
+    """
+    return pathlib.Path(path).absolute()
+
+
+def get_basename_abs_path(path):
+    """
+    It returns the base name of a normalized and absolutized version of the
+    path.
+    """
+    return pathlib.Path(path).absolute().name
+
+
+def get_basename_path(path):
+    """
+    It returns the base name of the path.
+    """
+    return pathlib.Path(path).name
+
+
+def get_path(*args):
+    """
+    It returns a new path string.
+    """
+    return pathlib.Path(*args)
+
+
+def get_path_size(path):
+    """
+    It returns the size of a path (in bytes)
+    """
+    return pathlib.Path(path).stat().st_size
+
+
+def build_directory(path):
+    """
+    It creates recursively the directory and its leaf directories.
+    Parameters:
+        path (str): The path to the file
+    """
+
+    # Check if the provided path points to a directory
+    if is_directory_path(path):
+        pathlib.Path(path).mkdir(parents=True, exist_ok=True)
+    else:
+        pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True)
+
+
+def change_to_script_dir(path):
+    """
+    Change the current working directory to the directory containing the given
+    script.
+    Parameters:
+        path (str): The path to the file.
+    """
+
+    # Get the absolutized and normalized path of directory containing the file
+    directory_path = pathlib.Path(path).absolute().parent
+
+    # Change the current working directory to the script directory
+    os.chdir(directory_path)
+
+
+def get_current_directory_path():
+    """
+    It returns the current directory path.
+    """
+    return pathlib.Path.cwd()
+
+
+def is_directory_path(path):
+    """
+    It returns True if the path points to a directory.
+    False otherwise.
+    """
+    return pathlib.Path(path).is_dir()
+
+
+def is_file_path(path):
+    """
+    It returns True if the path points to a file.
+    False otherwise.
+    """
+    return pathlib.Path(path).is_file()
+
+
+def get_relative_path(path, start_path="."):
+    """
+    It returns a relative path to path from start_path.
+    Default for start_path is the current directory
+    """
+    return pathlib.Path(path).relative_to(start_path)
+
+
+def path_exists(path):
+    """
+    It returns True if the path exists.
+    False otherwise.
+    """
+    return pathlib.Path(path).exists()

From a3b8022d8f144e1e6d68e66715487aa373bacb89 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 11:22:12 +0200
Subject: [PATCH 02/40] os to pathlib for add_electricity,
 add_extra_components, augmented_line_connectetions, base_network,
 build_bus_regions

---
 scripts/add_electricity.py            | 12 ++++++++----
 scripts/add_extra_components.py       |  6 +++---
 scripts/augmented_line_connections.py |  5 ++---
 scripts/base_network.py               | 15 ++++++++++-----
 scripts/build_bus_regions.py          |  5 ++---
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 0911588f7..19360c9c6 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -84,14 +84,18 @@
 - additional open- and combined-cycle gas turbines (if ``OCGT`` and/or ``CCGT`` is listed in the config setting ``electricity: extendable_carriers``)
 """
 
-import os
-
 import numpy as np
 import pandas as pd
 import powerplantmatching as pm
 import pypsa
 import xarray as xr
-from _helpers import configure_logging, create_logger, read_csv_nafix, update_p_nom_max
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    read_csv_nafix,
+    update_p_nom_max,
+)
 from powerplantmatching.export import map_country_bus
 
 idx = pd.IndexSlice
@@ -809,7 +813,7 @@ def add_nice_carrier_names(n, config):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake, sets_path_to_root
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("add_electricity")
         sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)
diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py
index 29c57e60c..94023ad89 100644
--- a/scripts/add_extra_components.py
+++ b/scripts/add_extra_components.py
@@ -52,12 +52,12 @@
 
 - ``Stores`` of carrier 'H2' and/or 'battery' in combination with ``Links``. If this option is chosen, the script adds extra buses with corresponding carrier where energy ``Stores`` are attached and which are connected to the corresponding power buses via two links, one each for charging and discharging. This leads to three investment variables for the energy capacity, charging and discharging capacity of the storage unit.
 """
-import os
+
 
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import configure_logging, create_logger
+from _helpers import change_to_script_dir, configure_logging, create_logger
 from add_electricity import (
     _add_missing_carriers_from_costs,
     add_nice_carrier_names,
@@ -267,7 +267,7 @@ def attach_hydrogen_pipelines(n, costs, config):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10)
     configure_logging(snakemake)
 
diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py
index 3b0072457..c6f9520be 100644
--- a/scripts/augmented_line_connections.py
+++ b/scripts/augmented_line_connections.py
@@ -28,13 +28,12 @@
 Description
 -----------
 """
-import os
 
 import networkx as nx
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import configure_logging, create_logger
+from _helpers import change_to_script_dir, configure_logging, create_logger
 from add_electricity import load_costs
 from networkx.algorithms import complement
 from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
@@ -54,7 +53,7 @@ def haversine(p):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "augmented_line_connections", network="elec", simpl="", clusters="54"
         )
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 04e0c388d..0f997078b 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -55,7 +55,6 @@
 Description
 -----------
 """
-import os
 
 import geopandas as gpd
 import networkx as nx
@@ -65,7 +64,13 @@
 import scipy as sp
 import shapely.prepared
 import shapely.wkt
-from _helpers import configure_logging, create_logger, read_csv_nafix
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_path_size,
+    read_csv_nafix,
+)
 from shapely.ops import unary_union
 
 logger = create_logger(__name__)
@@ -202,7 +207,7 @@ def _load_lines_from_osm(fp_osm_lines):
 # TODO Seems to be not needed anymore
 def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config):
     # the links file can be empty
-    if os.path.getsize(fp_osm_converters) == 0:
+    if get_path_size(fp_osm_converters) == 0:
         links = pd.DataFrame()
         return links
 
@@ -231,7 +236,7 @@ def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config
 
 def _load_converters_from_osm(fp_osm_converters, buses):
     # the links file can be empty
-    if os.path.getsize(fp_osm_converters) == 0:
+    if get_path_size(fp_osm_converters) == 0:
         converters = pd.DataFrame()
         return converters
 
@@ -556,7 +561,7 @@ def base_network(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
 
         snakemake = mock_snakemake("base_network")
     configure_logging(snakemake)
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index d1e4f5e3c..42f6b6b61 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -42,12 +42,11 @@
 Description
 -----------
 """
-import os
 
 import geopandas as gpd
 import pandas as pd
 import pypsa
-from _helpers import REGION_COLS, configure_logging, create_logger
+from _helpers import REGION_COLS, change_to_script_dir, configure_logging, create_logger
 
 logger = create_logger(__name__)
 
@@ -150,7 +149,7 @@ def get_gadm_shape(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_bus_regions")
     configure_logging(snakemake)
 

From 8c118956b60a089c12f72d2937a83ae18c70c915 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 12:07:26 +0200
Subject: [PATCH 03/40] re-formatting

---
 scripts/build_cutout.py             |  5 ++---
 scripts/build_demand_profiles.py    | 14 ++++++++++----
 scripts/build_natura_raster.py      | 16 ++++++++++++----
 scripts/build_osm_network.py        | 12 +++++-------
 scripts/build_powerplants.py        | 14 ++++++++------
 scripts/build_renewable_profiles.py | 10 +++++++---
 6 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py
index 06e5a24cd..83f5e1509 100644
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@@ -93,12 +93,11 @@
 -----------
 
 """
-import os
 
 import atlite
 import geopandas as gpd
 import pandas as pd
-from _helpers import configure_logging, create_logger
+from _helpers import change_to_script_dir, configure_logging, create_logger
 
 logger = create_logger(__name__)
 
@@ -107,7 +106,7 @@
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5")
     configure_logging(snakemake)
 
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index c5dad677b..ebb230903 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -40,7 +40,7 @@
 Then with a function that takes in the PyPSA network "base.nc", region and gadm shape data, the countries of interest, a scale factor, and the snapshots,
 it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population.
 """
-import os
+
 from itertools import product
 
 import geopandas as gpd
@@ -49,7 +49,13 @@
 import pypsa
 import scipy.sparse as sparse
 import xarray as xr
-from _helpers import configure_logging, create_logger, read_osm_config
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_path,
+    read_osm_config,
+)
 from shapely.prepared import prep
 from shapely.validation import make_valid
 
@@ -108,7 +114,7 @@ def get_load_paths_gegis(ssp_parentfolder, config):
 
     load_paths = []
     for continent in region_load:
-        load_path = os.path.join(
+        load_path = get_path(
             ssp_parentfolder,
             str(ssp),
             str(prediction_year),
@@ -246,7 +252,7 @@ def upsample(cntry, group):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake, sets_path_to_root
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_demand_profiles")
         sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index 9593f7767..ae9fd478a 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -50,7 +50,13 @@
 import geopandas as gpd
 import numpy as np
 import rasterio as rio
-from _helpers import configure_logging, create_logger
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_path,
+    is_directory_path,
+)
 from rasterio.features import geometry_mask
 from rasterio.warp import transform_bounds
 
@@ -65,14 +71,16 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)):
 
     list_fileshapes = []
     for lf in list_paths:
-        if os.path.isdir(lf):  # if it is a folder, then list all shapes files contained
+        if is_directory_path(
+            lf
+        ):  # if it is a folder, then list all shapes files contained
             # loop over all dirs and subdirs
             for path, subdirs, files in os.walk(lf):
                 # loop over all files
                 for subfile in files:
                     # add the subfile if it is a shape file
                     if subfile.endswith(accepted_formats):
-                        list_fileshapes.append(os.path.join(path, subfile))
+                        list_fileshapes.append(str(get_path(path, subfile)))
 
         elif lf.endswith(accepted_formats):
             list_fileshapes.append(lf)
@@ -178,7 +186,7 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"]
         )
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index 1ebef49e9..25f8d7a9c 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -5,12 +5,12 @@
 
 # -*- coding: utf-8 -*-
 
-import os
-
 import geopandas as gpd
 import numpy as np
 import pandas as pd
 from _helpers import (
+    build_directory,
+    change_to_script_dir,
     configure_logging,
     create_logger,
     read_geojson,
@@ -875,16 +875,14 @@ def built_network(
     logger.info("Save outputs")
 
     # create clean directory if not already exist
-    if not os.path.exists(outputs["lines"]):
-        os.makedirs(os.path.dirname(outputs["lines"]), exist_ok=True)
+    build_directory(outputs["lines"])
 
     to_csv_nafix(lines, outputs["lines"])  # Generate CSV
     to_csv_nafix(converters, outputs["converters"])  # Generate CSV
     to_csv_nafix(transformers, outputs["transformers"])  # Generate CSV
 
     # create clean directory if not already exist
-    if not os.path.exists(outputs["substations"]):
-        os.makedirs(os.path.dirname(outputs["substations"]), exist_ok=True)
+    build_directory(outputs["substations"])
     # Generate CSV
     to_csv_nafix(buses, outputs["substations"])
 
@@ -895,7 +893,7 @@ def built_network(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_osm_network")
     configure_logging(snakemake)
 
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index 71b701c1b..b61331241 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -100,8 +100,6 @@
 4. OSM extraction was supposed to be ignoring non-generation features like CHP and Natural Gas storage (in contrast to PPM).
 """
 
-import os
-
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -109,8 +107,12 @@
 import pypsa
 import yaml
 from _helpers import (
+    change_to_script_dir,
     configure_logging,
     create_logger,
+    get_current_directory_path,
+    get_path,
+    get_path_size,
     read_csv_nafix,
     to_csv_nafix,
     two_digits_2_name_country,
@@ -122,7 +124,7 @@
 
 
 def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm):
-    if os.stat(filepath_ppl_osm).st_size == 0:
+    if get_path_size(filepath_ppl_osm) == 0:
         return to_csv_nafix(pd.DataFrame(), filepath_ppl_pm, index=False)
 
     add_ppls = read_csv_nafix(filepath_ppl_osm, index_col=0, dtype={"bus": "str"})
@@ -298,7 +300,7 @@ def replace_natural_gas_technology(df: pd.DataFrame):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_powerplants")
 
     configure_logging(snakemake)
@@ -325,8 +327,8 @@ def replace_natural_gas_technology(df: pd.DataFrame):
                 "Please check file configs/powerplantmatching_config.yaml"
             )
         logger.info("Parsing OSM generator data to powerplantmatching format")
-        config["EXTERNAL_DATABASE"]["fn"] = os.path.join(
-            os.getcwd(), filepath_osm2pm_ppl
+        config["EXTERNAL_DATABASE"]["fn"] = get_path(
+            get_current_directory_path(), filepath_osm2pm_ppl
         )
     else:
         # create an empty file
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index eb79f5752..d2d211aca 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -191,7 +191,6 @@
   reached.
 """
 import functools
-import os
 import time
 from math import isnan
 
@@ -202,7 +201,12 @@
 import pandas as pd
 import progressbar as pgb
 import xarray as xr
-from _helpers import configure_logging, create_logger, sets_path_to_root
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    sets_path_to_root,
+)
 from add_electricity import load_powerplants
 from dask.distributed import Client, LocalCluster
 from pypsa.geo import haversine
@@ -488,7 +492,7 @@ def create_scaling_factor(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_renewable_profiles", technology="solar")
         sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)

From 04378db63060de2f195f2ccd2736c7ea066f2dc4 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 12:32:22 +0200
Subject: [PATCH 04/40] os to pathlib build_shapes.py, build_test_configs.py,
 clean_osm_data.py, cluster_network.py, download_osm_data.py,
 make_statistics.py

---
 scripts/build_shapes.py       | 57 ++++++++++++++++++++---------------
 scripts/build_test_configs.py |  9 +++---
 scripts/clean_osm_data.py     | 14 ++++-----
 scripts/cluster_network.py    |  8 ++---
 scripts/download_osm_data.py  | 29 +++++++++++-------
 scripts/make_statistics.py    | 32 ++++++++++++--------
 6 files changed, 84 insertions(+), 65 deletions(-)

diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index cc883ef9e..77a97b669 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -6,7 +6,7 @@
 # -*- coding: utf-8 -*-
 
 import multiprocessing as mp
-import os
+import pathlib
 import shutil
 from itertools import takewhile
 from operator import attrgetter
@@ -19,8 +19,14 @@
 import requests
 import xarray as xr
 from _helpers import (
+    build_directory,
+    change_to_script_dir,
     configure_logging,
     create_logger,
+    get_current_directory_path,
+    get_dirname_path,
+    get_path,
+    path_exists,
     sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
@@ -88,21 +94,21 @@ def download_GADM(country_code, update=False, out_logging=False):
     GADM_filename = get_GADM_filename(country_code)
     GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg"
 
-    GADM_inputfile_gpkg = os.path.join(
-        os.getcwd(),
+    GADM_inputfile_gpkg = get_path(
+        get_current_directory_path(),
         "data",
         "gadm",
         GADM_filename,
         GADM_filename + ".gpkg",
     )  # Input filepath gpkg
 
-    if not os.path.exists(GADM_inputfile_gpkg) or update is True:
+    if not path_exists(GADM_inputfile_gpkg) or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}"
             )
         #  create data/osm directory
-        os.makedirs(os.path.dirname(GADM_inputfile_gpkg), exist_ok=True)
+        build_directory(GADM_inputfile_gpkg)
 
         try:
             r = requests.get(GADM_url, stream=True, timeout=300)
@@ -296,8 +302,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0
 
 
 def save_to_geojson(df, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)  # remove file if it exists
+    pathlib.Path(fn).unlink(missing_ok=True)  # remove file if it exists
     if not isinstance(df, gpd.GeoDataFrame):
         df = gpd.GeoDataFrame(dict(geometry=df))
 
@@ -319,9 +324,9 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"):
     The dataset shall be downloaded independently by the user (see
     guide) or together with pypsa-earth package.
     """
-    if not os.path.exists(EEZ_gpkg):
+    if not path_exists(EEZ_gpkg):
         raise Exception(
-            f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {os.path.dirname(EEZ_gpkg)}"
+            f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_dirname_path(EEZ_gpkg)}"
         )
 
     geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs)
@@ -479,17 +484,17 @@ def download_WorldPop_standard(
             f"https://data.worldpop.org/GIS/Population/Global_2000_2020_Constrained/2020/maxar_v1/{two_2_three_digits_country(country_code).upper()}/{WorldPop_filename}",
         ]
 
-    WorldPop_inputfile = os.path.join(
-        os.getcwd(), "data", "WorldPop", WorldPop_filename
+    WorldPop_inputfile = get_path(
+        get_current_directory_path(), "data", "WorldPop", WorldPop_filename
     )  # Input filepath tif
 
-    if not os.path.exists(WorldPop_inputfile) or update is True:
+    if not path_exists(WorldPop_inputfile) or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}"
             )
         #  create data/osm directory
-        os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True)
+        build_directory(WorldPop_inputfile)
 
         loaded = False
         for WorldPop_url in WorldPop_urls:
@@ -533,10 +538,10 @@ def download_WorldPop_API(
 
     WorldPop_filename = f"{two_2_three_digits_country(country_code).lower()}_ppp_{year}_UNadj_constrained.tif"
     # Request to get the file
-    WorldPop_inputfile = os.path.join(
-        os.getcwd(), "data", "WorldPop", WorldPop_filename
+    WorldPop_inputfile = get_path(
+        get_current_directory_path(), "data", "WorldPop", WorldPop_filename
     )  # Input filepath tif
-    os.makedirs(os.path.dirname(WorldPop_inputfile), exist_ok=True)
+    build_directory(WorldPop_inputfile)
     year_api = int(str(year)[2:])
     loaded = False
     WorldPop_api_urls = [
@@ -571,17 +576,19 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False):
     name_file_tif = name_file_nc[:-2] + "tif"
 
     # path of the nc file
-    GDP_nc = os.path.join(os.getcwd(), "data", "GDP", name_file_nc)  # Input filepath nc
+    GDP_nc = get_path(
+        get_current_directory_path(), "data", "GDP", name_file_nc
+    )  # Input filepath nc
 
     # path of the tif file
-    GDP_tif = os.path.join(
-        os.getcwd(), "data", "GDP", name_file_tif
+    GDP_tif = get_path(
+        get_current_directory_path(), "data", "GDP", name_file_tif
     )  # Input filepath nc
 
     # Check if file exists, otherwise throw exception
-    if not os.path.exists(GDP_nc):
+    if not path_exists(GDP_nc):
         raise Exception(
-            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {os.path.dirname(GDP_nc)}"
+            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_dirname_path(GDP_nc)}"
         )
 
     # open nc dataset
@@ -620,11 +627,11 @@ def load_GDP(
 
     # path of the nc file
     name_file_tif = name_file_nc[:-2] + "tif"
-    GDP_tif = os.path.join(
-        os.getcwd(), "data", "GDP", name_file_tif
+    GDP_tif = get_path(
+        get_current_directory_path(), "data", "GDP", name_file_tif
     )  # Input filepath tif
 
-    if update | (not os.path.exists(GDP_tif)):
+    if update | (not path_exists(GDP_tif)):
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"
@@ -1310,7 +1317,7 @@ def gadm(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_shapes")
         sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)
diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py
index 349a1ef00..d19d86cb9 100644
--- a/scripts/build_test_configs.py
+++ b/scripts/build_test_configs.py
@@ -14,9 +14,8 @@
 """
 import collections.abc
 import copy
-import os
-from pathlib import Path
 
+from _helpers import change_to_script_dir, get_current_directory_path, get_path
 from ruamel.yaml import YAML
 
 
@@ -37,7 +36,7 @@ def _parse_inputconfig(input_config, yaml):
         return input_config
 
     if isinstance(input_config, str):
-        input_config = Path(Path.cwd(), input_config)
+        input_config = get_path(get_current_directory_path(), input_config)
 
     with open(input_config) as fp:
         return yaml.load(fp)
@@ -76,7 +75,7 @@ def create_test_config(default_config, diff_config, output_path):
 
     # Output path
     if isinstance(output_path, str):
-        output_path = Path(Path.cwd(), output_path)
+        output_path = get_path(get_current_directory_path(), output_path)
 
     # Save file
     yaml.dump(merged_config, output_path)
@@ -88,7 +87,7 @@ def create_test_config(default_config, diff_config, output_path):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_test_configs")
 
     # Input paths
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
index 01b535454..9f7d3ed9a 100644
--- a/scripts/clean_osm_data.py
+++ b/scripts/clean_osm_data.py
@@ -5,16 +5,16 @@
 
 # -*- coding: utf-8 -*-
 
-import os
-
 import geopandas as gpd
 import numpy as np
 import pandas as pd
 import reverse_geocode as rg
 from _helpers import (
     REGION_COLS,
+    change_to_script_dir,
     configure_logging,
     create_logger,
+    get_path_size,
     save_to_geojson,
     to_csv_nafix,
 )
@@ -902,7 +902,7 @@ def clean_data(
 ):
     logger.info("Process OSM lines")
 
-    if os.path.getsize(input_files["lines"]) > 0:
+    if get_path_size(input_files["lines"]) > 0:
         # Load raw data lines
         df_lines = load_network_data("lines", data_options)
 
@@ -917,7 +917,7 @@ def clean_data(
     df_all_lines = df_lines
 
     # load cables only if data are stored
-    if os.path.getsize(input_files["cables"]) > 0:
+    if get_path_size(input_files["cables"]) > 0:
         logger.info("Add OSM cables to data")
         # Load raw data lines
         df_cables = load_network_data("cables", data_options)
@@ -967,7 +967,7 @@ def clean_data(
 
     logger.info("Process OSM substations")
 
-    if os.path.getsize(input_files["substations"]) > 0:
+    if get_path_size(input_files["substations"]) > 0:
         df_all_substations = load_network_data("substations", data_options)
 
         # prepare dataset for substations
@@ -1027,7 +1027,7 @@ def clean_data(
 
     logger.info("Process OSM generators")
 
-    if os.path.getsize(input_files["generators"]) > 0:
+    if get_path_size(input_files["generators"]) > 0:
         df_all_generators = gpd.read_file(input_files["generators"])
 
         # prepare the generator dataset
@@ -1064,7 +1064,7 @@ def clean_data(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("clean_osm_data")
     configure_logging(snakemake)
 
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 34b116a99..aa6fc4fa8 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -121,7 +121,7 @@
     :align: center
 """
 
-import os
+import pathlib
 from functools import reduce
 
 import geopandas as gpd
@@ -131,6 +131,7 @@
 import pypsa
 from _helpers import (
     REGION_COLS,
+    change_to_script_dir,
     configure_logging,
     create_logger,
     get_aggregation_strategies,
@@ -633,8 +634,7 @@ def clustering_for_n_clusters(
 
 
 def save_to_geojson(s, fn):
-    if os.path.exists(fn):
-        os.unlink(fn)
+    pathlib.Path(fn).unlink(missing_ok=True)
     df = s.reset_index()
     schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"}
     df.to_file(fn, driver="GeoJSON", schema=schema)
@@ -658,7 +658,7 @@ def cluster_regions(busmaps, inputs, output):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "cluster_network", network="elec", simpl="", clusters="min"
         )
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index ec99baecd..b822cc574 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -26,11 +26,17 @@
 - ``data/osm/out``:  Prepared power data as .geojson and .csv files per country
 - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
 """
-import os
+import pathlib
 import shutil
-from pathlib import Path
 
-from _helpers import configure_logging, create_logger, read_osm_config
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_current_directory_path,
+    get_path,
+    read_osm_config,
+)
 from earth_osm import eo
 
 logger = create_logger(__name__)
@@ -94,15 +100,17 @@ def convert_iso_to_geofk(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake, sets_path_to_root
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("download_osm_data")
         sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)
 
     run = snakemake.config.get("run", {})
     RDIR = run["name"] + "/" if run.get("name") else ""
-    store_path_resources = Path.joinpath(Path().cwd(), "resources", RDIR, "osm", "raw")
-    store_path_data = Path.joinpath(Path().cwd(), "data", "osm")
+    store_path_resources = get_path(
+        get_current_directory_path(), "resources", RDIR, "osm", "raw"
+    )
+    store_path_data = get_path(get_current_directory_path(), "data", "osm")
     country_list = country_list_to_geofk(snakemake.params.countries)
 
     eo.save_osm_data(
@@ -117,10 +125,9 @@ def convert_iso_to_geofk(
         out_aggregate=True,
     )
 
-    out_path = Path.joinpath(store_path_resources, "out")
+    out_path = get_path(store_path_resources, "out")
     names = ["generator", "cable", "line", "substation"]
     out_formats = ["csv", "geojson"]
-    new_files = os.listdir(out_path)  # list downloaded osm files
 
     # earth-osm (eo) only outputs files with content
     # If the file is empty, it is not created
@@ -129,9 +136,9 @@ def convert_iso_to_geofk(
     # Rename and move osm files to the resources folder output
     for name in names:
         for f in out_formats:
-            new_file_name = Path.joinpath(store_path_resources, f"all_raw_{name}s.{f}")
-            old_files = list(Path(out_path).glob(f"*{name}.{f}"))
-            # if file is missing, create empty file, otherwise rename it an move it
+            new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}")
+            old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}"))
+            # if file is missing, create empty file, otherwise rename it and move it
             if not old_files:
                 with open(new_file_name, "w") as f:
                     pass
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index 5c544b61a..dd410d33d 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -23,15 +23,21 @@
 -------
 This rule creates a dataframe containing in the columns the relevant statistics for the current run.
 """
-import os
-from pathlib import Path
 
 import geopandas as gpd
 import numpy as np
 import pandas as pd
 import pypsa
 import xarray as xr
-from _helpers import create_logger, mock_snakemake, sets_path_to_root, to_csv_nafix
+from _helpers import (
+    change_to_script_dir,
+    create_logger,
+    get_path_size,
+    is_file_path,
+    mock_snakemake,
+    sets_path_to_root,
+    to_csv_nafix,
+)
 from build_test_configs import create_test_config
 from shapely.validation import make_valid
 
@@ -43,7 +49,7 @@ def _multi_index_scen(rulename, keys):
 
 
 def _mock_snakemake(rule, **kwargs):
-    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    change_to_script_dir(__file__)
     snakemake = mock_snakemake(rule, **kwargs)
     sets_path_to_root("pypsa-earth")
     return snakemake
@@ -123,7 +129,7 @@ def collect_basic_osm_stats(path, rulename, header):
     """
     Collect basic statistics on OSM data: number of items
     """
-    if Path(path).is_file() and Path(path).stat().st_size > 0:
+    if is_file_path(path) and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
 
@@ -142,7 +148,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"):
     - length of the stored shapes
     - length of objects with tag_frequency == 0 (DC elements)
     """
-    if Path(path).is_file() and Path(path).stat().st_size > 0:
+    if is_file_path(path) and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
         obj_length = (
@@ -244,7 +250,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"):
 
     df = pd.DataFrame()
 
-    if Path(fp_onshore).is_file() and Path(fp_offshore).is_file():
+    if is_file_path(fp_onshore) and is_file_path(fp_offshore):
         gdf_onshore = gpd.read_file(fp_onshore)
         gdf_offshore = gpd.read_file(fp_offshore)
 
@@ -286,7 +292,7 @@ def capacity_stats(df):
         else:
             return df.groupby("carrier").p_nom.sum().astype(float)
 
-    if Path(network_path).is_file():
+    if is_file_path(network_path):
         n = pypsa.Network(network_path)
 
         lines_length = float((n.lines.length * n.lines.num_parallel).sum())
@@ -341,7 +347,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
     """
     snakemake = _mock_snakemake(rulename)
 
-    if not Path(snakemake.output.africa_shape).is_file():
+    if not is_file_path(snakemake.output.africa_shape):
         return pd.DataFrame()
 
     df_continent = gpd.read_file(snakemake.output.africa_shape)
@@ -352,7 +358,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
         .geometry.area.iloc[0]
     )
 
-    if not Path(snakemake.output.gadm_shapes).is_file():
+    if not is_file_path(snakemake.output.gadm_shapes):
         return pd.DataFrame()
 
     df_gadm = gpd.read_file(snakemake.output.gadm_shapes)
@@ -466,7 +472,7 @@ def collect_renewable_stats(rulename, technology):
     """
     snakemake = _mock_snakemake(rulename, technology=technology)
 
-    if Path(snakemake.output.profile).is_file():
+    if is_file_path(snakemake.output.profile):
         res = xr.open_dataset(snakemake.output.profile)
 
         if technology == "hydro":
@@ -499,7 +505,7 @@ def add_computational_stats(df, snakemake, column_name=None):
     comp_data = [np.nan] * 3  # total_time, mean_load and max_memory
 
     if snakemake.benchmark:
-        if not Path(snakemake.benchmark).is_file():
+        if not is_file_path(snakemake.benchmark):
             return df
 
         bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t")
@@ -581,7 +587,7 @@ def calculate_stats(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("make_statistics")
 
     sets_path_to_root("pypsa-earth")

From 16e6c46d6b13f8f44cbd26116f942fd9d0164629 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 13:10:24 +0200
Subject: [PATCH 05/40] os to pathlib make_summary.py, monte_carlo.py,
 non_workflow/zip_folder.py, plot_network.py, plot_summary.py,
 prepare_network.py, retrieve_databundle_light.py, simplify_network.py,
 solve_network.py

---
 scripts/make_summary.py              |  23 +++---
 scripts/monte_carlo.py               |   8 +-
 scripts/non_workflow/zip_folder.py   |   7 +-
 scripts/plot_network.py              |   5 +-
 scripts/plot_summary.py              |   7 +-
 scripts/prepare_network.py           |  23 ++++--
 scripts/retrieve_databundle_light.py | 105 ++++++++++++++-------------
 scripts/simplify_network.py          |   4 +-
 scripts/solve_network.py             |  13 ++--
 9 files changed, 103 insertions(+), 92 deletions(-)

diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index 583766ac4..c74938027 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -51,11 +51,16 @@
 
 Replacing *summaries* with *plots* creates nice colored maps of the results.
 """
-import os
 
 import pandas as pd
 import pypsa
-from _helpers import configure_logging
+from _helpers import (
+    build_directory,
+    change_to_script_dir,
+    configure_logging,
+    get_path,
+    path_exists,
+)
 from add_electricity import create_logger, load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
@@ -496,7 +501,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
     for label, filename in networks_dict.items():
         print(label, filename)
-        if not os.path.exists(filename):
+        if not path_exists(filename):
             print("does not exist!!")
             continue
 
@@ -527,16 +532,16 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
 
 def to_csv(dfs, dir):
-    os.makedirs(dir, exist_ok=True)
+    build_directory(dir)
     for key, df in dfs.items():
-        df.to_csv(os.path.join(dir, f"{key}.csv"))
+        df.to_csv(get_path(dir, f"{key}.csv"))
 
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "make_summary",
             simpl="",
@@ -551,9 +556,9 @@ def to_csv(dfs, dir):
 
     scenario_name = snakemake.config.get("run", {}).get("name", "")
     if scenario_name:
-        network_dir = os.path.join(network_dir, "results", scenario_name, "networks")
+        network_dir = get_path(network_dir, "results", scenario_name, "networks")
     else:
-        network_dir = os.path.join(network_dir, "results", "networks")
+        network_dir = get_path(network_dir, "results", "networks")
 
     configure_logging(snakemake)
 
@@ -569,7 +574,7 @@ def expand_from_wildcard(key):
         ll = [snakemake.wildcards.ll]
 
     networks_dict = {
-        (simpl, clusters, l, opts): os.path.join(
+        (simpl, clusters, l, opts): get_path(
             network_dir, f"elec_s{simpl}_" f"{clusters}_ec_l{l}_{opts}.nc"
         )
         for simpl in expand_from_wildcard("simpl")
diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index a448d142b..22f8874f8 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -17,7 +17,7 @@
         add_to_snakefile: false # When set to true, enables Monte Carlo sampling
         samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number
         sampling_strategy: "chaospy"  # "pydoe2", "chaospy", "scipy", packages that are supported
-        seed: 42 # set seedling for reproducibilty
+        seed: 42 # set seedling for reproducibility
     uncertainties:
         loads_t.p_set:
           type: uniform
@@ -67,14 +67,13 @@
 wildcard {unc}, which is described in the config.yaml and created in the Snakefile as a range from
 0 to (total number of) SAMPLES.
 """
-import os
 
 import chaospy
 import numpy as np
 import pandas as pd
 import pypsa
 import seaborn as sns
-from _helpers import configure_logging, create_logger
+from _helpers import change_to_script_dir, configure_logging, create_logger
 from pyDOE2 import lhs
 from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
 from sklearn.preprocessing import MinMaxScaler
@@ -135,7 +134,6 @@ def monte_carlo_sampling_chaospy(
     Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors)
     Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples
     """
-    import chaospy
     from scipy.stats import qmc
 
     # generate a Nfeatures-dimensional latin hypercube varying between 0 and 1:
@@ -350,7 +348,7 @@ def validate_parameters(
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "monte_carlo",
             simpl="",
diff --git a/scripts/non_workflow/zip_folder.py b/scripts/non_workflow/zip_folder.py
index 0bac2de21..63dbddf24 100644
--- a/scripts/non_workflow/zip_folder.py
+++ b/scripts/non_workflow/zip_folder.py
@@ -8,9 +8,8 @@
 Module to zip the desired folders to be stored in google drive, or equivalent.
 """
 import os
+import pathlib
 import zipfile
-from os.path import basename
-from xml.etree.ElementInclude import include
 
 from _helpers import sets_path_to_root
 
@@ -25,7 +24,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True):
             for filename in filenames:
                 if filter(filename):
                     # create complete filepath of file in directory
-                    filePath = os.path.join(folderName, filename)
+                    filePath = str(pathlib.Path(folderName, filename))
 
                     # path of the zip file
                     if include_parent:
@@ -41,7 +40,7 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True):
 
 if __name__ == "__main__":
     # Set path to this file
-    os.chdir(os.path.dirname(os.path.abspath(__file__)))
+    os.chdir(pathlib.Path(__file__).parent.absolute())
     # Required to set path to pypsa-earth
     sets_path_to_root("pypsa-earth")
 
diff --git a/scripts/plot_network.py b/scripts/plot_network.py
index 8f2763509..124c6c891 100644
--- a/scripts/plot_network.py
+++ b/scripts/plot_network.py
@@ -17,8 +17,6 @@
 -----------
 """
 
-import os
-
 import cartopy.crs as ccrs
 import geopandas as gpd
 import matplotlib as mpl
@@ -28,6 +26,7 @@
 from _helpers import (
     aggregate_costs,
     aggregate_p,
+    change_to_script_dir,
     configure_logging,
     create_logger,
     load_network_for_plots,
@@ -360,7 +359,7 @@ def split_costs(n):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "plot_network",
             network="elec",
diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py
index 1491b6692..f6f126a47 100644
--- a/scripts/plot_summary.py
+++ b/scripts/plot_summary.py
@@ -16,11 +16,10 @@
 Description
 -----------
 """
-import os
 
 import matplotlib.pyplot as plt
 import pandas as pd
-from _helpers import configure_logging, create_logger
+from _helpers import change_to_script_dir, configure_logging, create_logger, get_path
 
 logger = create_logger(__name__)
 
@@ -219,7 +218,7 @@ def plot_energy(infn, snmk, fn=None):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "plot_summary",
             summary="energy",
@@ -241,7 +240,7 @@ def plot_energy(infn, snmk, fn=None):
         logger.error(f"plotting function for {summary} has not been defined")
 
     func(
-        os.path.join(snakemake.input[0], f"{summary}.csv"),
+        get_path(snakemake.input[0], f"{summary}.csv"),
         snakemake,
         snakemake.output[0],
     )
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index 3b92cd31d..299d69280 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -56,7 +56,7 @@
     for all ``scenario`` s in the configuration file
     the rule :mod:`prepare_network`.
 """
-import os
+import pathlib
 import re
 from zipfile import ZipFile
 
@@ -65,7 +65,13 @@
 import pandas as pd
 import pypsa
 import requests
-from _helpers import configure_logging, create_logger
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_current_directory_path,
+    get_path,
+)
 from add_electricity import load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
@@ -87,13 +93,14 @@ def download_emission_data():
         with requests.get(url) as rq:
             with open("data/co2.zip", "wb") as file:
                 file.write(rq.content)
-        rootpath = os.getcwd()
-        file_path = os.path.join(rootpath, "data/co2.zip")
+        root_path = get_current_directory_path()
+        file_path = get_path(root_path, "data/co2.zip")
         with ZipFile(file_path, "r") as zipObj:
             zipObj.extract(
-                "v60_CO2_excl_short-cycle_org_C_1970_2018.xls", rootpath + "/data"
+                "v60_CO2_excl_short-cycle_org_C_1970_2018.xls",
+                get_path(root_path, "data"),
             )
-        os.remove(file_path)
+        pathlib.Path(file_path).unlink(missing_ok=True)
         return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
     except:
         logger.error(f"Failed download resource from '{url}'.")
@@ -120,7 +127,7 @@ def emission_extractor(filename, emission_year, country_names):
     """
 
     # data reading process
-    datapath = os.path.join(os.getcwd(), "data", filename)
+    datapath = get_path(get_current_directory_path(), "data", filename)
     df = pd.read_excel(datapath, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8)
     df.columns = df.iloc[0]
     df = df.set_index("Country_code_A3")
@@ -319,7 +326,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "prepare_network",
             simpl="",
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 1583cc245..e3b0c191b 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -81,7 +81,7 @@
 
 """
 import datetime as dt
-import os
+import pathlib
 import re
 from zipfile import ZipFile
 
@@ -89,9 +89,14 @@
 import pandas as pd
 import yaml
 from _helpers import (
+    change_to_script_dir,
     configure_logging,
     create_country_list,
     create_logger,
+    get_basename_path,
+    get_current_directory_path,
+    get_path,
+    get_relative_path,
     progress_retrieve,
     sets_path_to_root,
 )
@@ -119,9 +124,9 @@ def load_databundle_config(config):
     return config
 
 
-def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=False):
+def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_zenodo(config, rootpath, dest_path, hot_run=True,
+    download_and_unzip_zenodo(config, root_path, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download and unzip the data from zenodo
@@ -130,7 +135,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
@@ -143,8 +148,8 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    file_path = os.path.join(rootpath, "tempfile.zip")
-    destination = os.path.relpath(config["destination"])
+    file_path = get_path(root_path, "tempfile.zip")
+    destination = get_relative_path(config["destination"])
     url = config["urls"]["zenodo"]
 
     if hot_run:
@@ -155,7 +160,7 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F
             with ZipFile(file_path, "r") as zipObj:
                 # Extract all the contents of zip file in current directory
                 zipObj.extractall(path=destination)
-            os.remove(file_path)
+            pathlib.Path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
         except:
             logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
@@ -164,9 +169,9 @@ def download_and_unzip_zenodo(config, rootpath, hot_run=True, disable_progress=F
     return True
 
 
-def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=False):
+def download_and_unzip_gdrive(config, root_path, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_gdrive(config, rootpath, dest_path, hot_run=True,
+    download_and_unzip_gdrive(config, root_path, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download and unzip the data from google drive
@@ -175,7 +180,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
@@ -188,8 +193,8 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    file_path = os.path.join(rootpath, "tempfile.zip")
-    destination = os.path.relpath(config["destination"])
+    file_path = get_path(root_path, "tempfile.zip")
+    destination = get_relative_path(config["destination"])
     url = config["urls"]["gdrive"]
 
     # retrieve file_id from path
@@ -216,8 +221,7 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F
     # if hot run enabled
     if hot_run:
         # remove file
-        if os.path.exists(file_path):
-            os.remove(file_path)
+        pathlib.Path(file_path).unlink(missing_ok=True)
         # download file from google drive
         gdd.download_file_from_google_drive(
             file_id=file_id,
@@ -238,10 +242,10 @@ def download_and_unzip_gdrive(config, rootpath, hot_run=True, disable_progress=F
 
 
 def download_and_unzip_protectedplanet(
-    config, rootpath, attempts=3, hot_run=True, disable_progress=False
+    config, root_path, attempts=3, hot_run=True, disable_progress=False
 ):
     """
-    download_and_unzip_protectedplanet(config, rootpath, dest_path,
+    download_and_unzip_protectedplanet(config, root_path, dest_path,
     hot_run=True, disable_progress=False)
 
     Function to download and unzip the data by category from protectedplanet
@@ -250,7 +254,7 @@ def download_and_unzip_protectedplanet(
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     attempts : int (default 3)
         Number of attempts to download the data by month.
@@ -266,8 +270,8 @@ def download_and_unzip_protectedplanet(
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    file_path = os.path.join(rootpath, "tempfile_wpda.zip")
-    destination = os.path.relpath(config["destination"])
+    file_path = get_path(root_path, "tempfile_wpda.zip")
+    destination = get_relative_path(config["destination"])
     url = config["urls"]["protectedplanet"]
 
     def get_first_day_of_month(date):
@@ -282,8 +286,7 @@ def get_first_day_of_previous_month(date):
     )
 
     if hot_run:
-        if os.path.exists(file_path):
-            os.remove(file_path)
+        pathlib.Path(file_path).unlink(missing_ok=True)
 
         downloaded = False
 
@@ -320,17 +323,17 @@ def get_first_day_of_previous_month(date):
                 for fzip in zip_files:
                     # final path of the file
                     try:
-                        inner_zipname = os.path.join(destination, fzip)
+                        inner_zipname = get_path(destination, fzip)
 
                         zip_obj.extract(fzip, path=destination)
 
-                        dest_nested = os.path.join(destination, fzip.split(".")[0])
+                        dest_nested = get_path(destination, fzip.split(".")[0])
 
                         with ZipFile(inner_zipname, "r") as nested_zip:
                             nested_zip.extractall(path=dest_nested)
 
                         # remove inner zip file
-                        os.remove(inner_zipname)
+                        pathlib.Path(inner_zipname).unlink(missing_ok=True)
 
                         logger.info(f"{resource} - Successfully unzipped file '{fzip}'")
                     except:
@@ -340,7 +343,7 @@ def get_first_day_of_previous_month(date):
 
                 # close and remove outer zip file
                 zip_obj.close()
-                os.remove(file_path)
+                pathlib.Path(file_path).unlink(missing_ok=True)
 
                 logger.info(
                     f"Downloaded resource '{resource_iter}' from cloud '{url_iter}'."
@@ -391,8 +394,7 @@ def download_and_unpack(
     True when download is successful, False otherwise
     """
     if hot_run:
-        if os.path.exists(file_path):
-            os.remove(file_path)
+        pathlib.Path(file_path).unlink(missing_ok=True)
 
         try:
             logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
@@ -404,9 +406,9 @@ def download_and_unpack(
             # then unzip it and remove the original file
             if unzip:
                 with ZipFile(file_path, "r") as zipfile:
-                    zipfile.extractall(destination)
+                    zipfile.extractall(path=destination)
 
-                os.remove(file_path)
+                pathlib.Path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
             return True
         except:
@@ -414,9 +416,9 @@ def download_and_unpack(
             return False
 
 
-def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=False):
+def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_direct(config, rootpath, dest_path, hot_run=True,
+    download_and_unzip_direct(config, root_path, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download the data by category from a direct url with no processing.
@@ -426,7 +428,7 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
@@ -439,10 +441,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    destination = os.path.relpath(config["destination"])
+    destination = get_relative_path(config["destination"])
     url = config["urls"]["direct"]
 
-    file_path = os.path.join(destination, os.path.basename(url))
+    file_path = get_path(destination, get_basename_path(url))
 
     unzip = config.get("unzip", False)
 
@@ -457,10 +459,10 @@ def download_and_unzip_direct(config, rootpath, hot_run=True, disable_progress=F
 
 
 def download_and_unzip_hydrobasins(
-    config, rootpath, hot_run=True, disable_progress=False
+    config, root_path, hot_run=True, disable_progress=False
 ):
     """
-    download_and_unzip_basins(config, rootpath, dest_path, hot_run=True,
+    download_and_unzip_basins(config, root_path, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download and unzip the data for hydrobasins from HydroBASINS database
@@ -480,7 +482,7 @@ def download_and_unzip_hydrobasins(
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
@@ -493,7 +495,7 @@ def download_and_unzip_hydrobasins(
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    destination = os.path.relpath(config["destination"])
+    destination = get_relative_path(config["destination"])
     url_templ = config["urls"]["hydrobasins"]["base_url"]
     suffix_list = config["urls"]["hydrobasins"]["suffixes"]
 
@@ -504,7 +506,7 @@ def download_and_unzip_hydrobasins(
 
     for rg in suffix_list:
         url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
-        file_path = os.path.join(destination, os.path.basename(url))
+        file_path = get_path(destination, get_basename_path(url))
 
         all_downloaded &= download_and_unpack(
             url=url,
@@ -520,9 +522,9 @@ def download_and_unzip_hydrobasins(
     return all_downloaded
 
 
-def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=False):
+def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_post(config, rootpath, dest_path, hot_run=True,
+    download_and_unzip_post(config, root_path, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download the data by category from a post request.
@@ -531,7 +533,7 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal
     ------
     config : Dict
         Configuration data for the category to download
-    rootpath : str
+    root_path : str
         Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
@@ -544,18 +546,17 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal
     True when download is successful, False otherwise
     """
     resource = config["category"]
-    destination = os.path.relpath(config["destination"])
+    destination = get_relative_path(config["destination"])
 
     # load data for post method
     postdata = config["urls"]["post"]
     # remove url feature
     url = postdata.pop("url")
 
-    file_path = os.path.join(destination, os.path.basename(url))
+    file_path = get_path(destination, get_basename_path(url))
 
     if hot_run:
-        if os.path.exists(file_path):
-            os.remove(file_path)
+        pathlib.Path(file_path).unlink(missing_ok=True)
 
         # try:
         logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
@@ -571,9 +572,9 @@ def download_and_unzip_post(config, rootpath, hot_run=True, disable_progress=Fal
         # then unzip it and remove the original file
         if config.get("unzip", False):
             with ZipFile(file_path, "r") as zipfile:
-                zipfile.extractall(destination)
+                zipfile.extractall(path=destination)
 
-            os.remove(file_path)
+            pathlib.Path(file_path).unlink(missing_ok=True)
         logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
         # except:
         #     logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
@@ -804,7 +805,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
     gpdf_list = [None] * len(files_to_merge)
     logger.info("Merging hydrobasins files into: " + output_fl)
     for i, f_name in tqdm(enumerate(files_to_merge)):
-        gpdf_list[i] = gpd.read_file(os.path.join(basins_path, f_name))
+        gpdf_list[i] = gpd.read_file(get_path(basins_path, f_name))
     fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates(
         subset="HYBAS_ID", ignore_index=True
     )
@@ -813,7 +814,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         from _helpers import mock_snakemake
 
         snakemake = mock_snakemake("retrieve_databundle_light")
@@ -822,7 +823,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
 
     sets_path_to_root("pypsa-earth")
 
-    rootpath = os.getcwd()
+    root_path = get_current_directory_path()
     tutorial = snakemake.params.tutorial
     countries = snakemake.params.countries
     logger.info(f"Retrieving data for {len(countries)} countries.")
@@ -866,7 +867,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
             try:
                 download_and_unzip = globals()[f"download_and_unzip_{host}"]
                 if download_and_unzip(
-                    config_bundles[b_name], rootpath, disable_progress=disable_progress
+                    config_bundles[b_name], root_path, disable_progress=disable_progress
                 ):
                     downloaded_bundle = True
             except Exception:
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 30d60e32f..48f18c4a9 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -84,7 +84,6 @@
 
 4. Optionally, if an integer were provided for the wildcard ``{simpl}`` (e.g. ``networks/elec_s500.nc``), the network is clustered to this number of clusters with the routines from the ``cluster_network`` rule with the function ``cluster_network.cluster(...)``. This step is usually skipped!
 """
-import os
 import sys
 from functools import reduce
 
@@ -94,6 +93,7 @@
 import pypsa
 import scipy as sp
 from _helpers import (
+    change_to_script_dir,
     configure_logging,
     create_logger,
     get_aggregation_strategies,
@@ -963,7 +963,7 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake("simplify_network", simpl="")
     configure_logging(snakemake)
 
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index f83b47478..8a84f9499 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -77,14 +77,17 @@
     for all ``scenario`` s in the configuration file
     the rule :mod:`solve_network`.
 """
-import os
 import re
-from pathlib import Path
 
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import configure_logging, create_logger
+from _helpers import (
+    build_directory,
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+)
 from pypsa.descriptors import get_switchable_as_dense as get_as_dense
 from pypsa.linopf import (
     define_constraints,
@@ -544,7 +547,7 @@ def solve_network(n, config, opts="", **kwargs):
     if "snakemake" not in globals():
         from _helpers import mock_snakemake
 
-        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "solve_network",
             simpl="",
@@ -556,7 +559,7 @@ def solve_network(n, config, opts="", **kwargs):
 
     tmpdir = snakemake.params.solving.get("tmpdir")
     if tmpdir is not None:
-        Path(tmpdir).mkdir(parents=True, exist_ok=True)
+        build_directory(tmpdir)
     opts = snakemake.wildcards.opts.split("-")
     solve_opts = snakemake.params.solving["options"]
 

From 56911ffd60e4d12f7ed0538e12ed5175e05f4f2f Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 13:15:33 +0200
Subject: [PATCH 06/40] add unit test setup

---
 test/__init__.py             |   4 +
 test/conftest.py             |  21 +++++
 test/test_helpers.py         | 158 +++++++++++++++++++++++++++++++++++
 test/test_prepare_network.py |  13 +++
 4 files changed, 196 insertions(+)
 create mode 100644 test/__init__.py
 create mode 100644 test/conftest.py
 create mode 100644 test/test_helpers.py
 create mode 100644 test/test_prepare_network.py

diff --git a/test/__init__.py b/test/__init__.py
new file mode 100644
index 000000000..fa7a7644d
--- /dev/null
+++ b/test/__init__.py
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..3ba165e42
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+
+import pytest
+
+_content_temp_file = "content"
+_name_temp_file = "hello.txt"
+
+
+@pytest.fixture(scope="function")
+def get_temp_file(tmpdir):
+    p = tmpdir.join(_name_temp_file)
+    p.write(_content_temp_file)
+    yield p
+    pathlib.Path(p).unlink(missing_ok=True)
diff --git a/test/test_helpers.py b/test/test_helpers.py
new file mode 100644
index 000000000..a7981ede8
--- /dev/null
+++ b/test/test_helpers.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import os
+import pathlib
+from test.conftest import _content_temp_file, _name_temp_file, get_temp_file
+
+from scripts._helpers import (
+    change_to_script_dir,
+    get_abs_path,
+    get_basename_abs_path,
+    get_current_directory_path,
+    get_dirname_path,
+    get_path,
+    get_path_size,
+    get_relative_path,
+    is_directory_path,
+    is_file_path,
+    path_exists,
+)
+
+path_cwd = str(pathlib.Path.cwd())
+
+
+def test_get_abs_path():
+    """
+    Verify the path returned by get_abs_path()
+    """
+    abs_file = get_abs_path(__file__)
+    assert str(abs_file) == os.path.abspath(__file__)
+    assert str(abs_file) == __file__
+
+
+def test_change_to_script_dir():
+    """
+    Verify the path returned by change_to_script_dir()
+    """
+    change_to_script_dir(__file__)
+    assert str(pathlib.Path.cwd()) == path_cwd + "/test"
+    change_to_script_dir(".")
+    assert str(pathlib.Path.cwd()) == path_cwd
+
+
+def test_get_dirname_path():
+    """
+    Verify the path returned by get_dirname_path()
+    """
+    dir_name_file = get_dirname_path(__file__)
+    dir_name_cwd = get_dirname_path(".")
+    assert str(dir_name_file) == os.path.dirname(__file__)
+    assert str(dir_name_file) == path_cwd + "/test"
+    assert str(dir_name_cwd) == "."
+
+
+def test_get_basename_abs_path():
+    """
+    Verify the path returned by get_basename_abs_path()
+    """
+    base_name_file = get_basename_abs_path(__file__)
+    assert str(base_name_file) == os.path.basename(os.path.abspath(__file__))
+    assert str(base_name_file) == "test_helpers.py"
+
+
+def test_get_path():
+    """
+    Verify the path returned by get_path()
+    """
+    file_name_path_one = get_path(
+        path_cwd,
+        "sub_path_1",
+        "sub_path_2",
+        "sub_path_3",
+        "sub_path_4",
+        "sub_path_5",
+        "file.nc",
+    )
+    path_name_path_two = get_path(
+        pathlib.Path(__file__).parent, "..", "logs", "rule.log"
+    )
+    assert str(file_name_path_one) == os.path.join(
+        path_cwd,
+        "sub_path_1",
+        "sub_path_2",
+        "sub_path_3",
+        "sub_path_4",
+        "sub_path_5",
+        "file.nc",
+    )
+    assert (
+        str(file_name_path_one)
+        == path_cwd + "/sub_path_1/sub_path_2/sub_path_3/sub_path_4/sub_path_5/file.nc"
+    )
+    assert str(path_name_path_two) == str(
+        pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log")
+    )
+
+
+def test_get_path_size(get_temp_file):
+    """
+    Verify the path size (in bytes) returned by get_path_size()
+    """
+    path = get_temp_file
+    file_size = get_path_size(path)
+    assert file_size == os.stat(path).st_size
+    assert file_size == len(_content_temp_file)
+
+
+def test_get_current_directory_path():
+    """
+    Verify the current directory path returned by get_current_directory_path()
+    """
+    path = get_current_directory_path()
+    assert str(path) == os.getcwd()
+
+
+def test_is_directory_path(tmpdir):
+    """
+    Verify if is_directory_path() returns True when path points to directory.
+    """
+    assert is_directory_path(tmpdir)
+    assert is_directory_path(tmpdir) == os.path.isdir(tmpdir)
+    assert not is_directory_path(__file__)
+
+
+def test_is_file_path(get_temp_file, tmpdir):
+    """
+    Verify if is_file_path() returns True when path points to file.
+    """
+    path = get_temp_file
+    assert is_file_path(path)
+    assert is_file_path(path) == os.path.isfile(path)
+    assert not is_file_path(tmpdir)
+
+
+def test_get_relative_path(get_temp_file):
+    """
+    Verify the relative path returned by get_relative_path()
+    """
+    path = get_temp_file
+    # path relative to the parent directory of the temp file
+    relative_path = get_relative_path(path, get_path(path).parent)
+    assert str(relative_path) == _name_temp_file
+    assert str(relative_path) == os.path.relpath(path, start=get_path(path).parent)
+
+
+def test_path_exists(get_temp_file):
+    """
+    Verify if path_exists() returns True when path exists.
+    """
+    path = get_temp_file
+    pathlib_path = get_path(path)
+    assert path_exists(path)
+    assert path_exists(pathlib_path)
+    assert path_exists(path) == os.path.exists(path)
diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py
new file mode 100644
index 000000000..9bad7c220
--- /dev/null
+++ b/test/test_prepare_network.py
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+from scripts.prepare_network import download_emission_data
+
+
+def test_download_emission_data():
+    filename = download_emission_data()
+    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"

From 75d27a565560abccef9bb9e788cb67a6772d6e35 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 15:20:20 +0200
Subject: [PATCH 07/40] reformatting

---
 scripts/_helpers.py  | 546 ++++++++++++++++++++++++++++++++++++++-----
 test/test_helpers.py | 307 ++++++++++++++++++++++++
 2 files changed, 794 insertions(+), 59 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 9d88c26b7..9a421aa39 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -8,13 +8,23 @@
 import logging
 import os
 import pathlib
+import shutil
 import subprocess
 import sys
+import zipfile
 
 import country_converter as coco
+import fiona
 import geopandas as gpd
+import numpy as np
 import pandas as pd
+import requests
+import snakemake as sm
 import yaml
+from pypsa.components import component_attrs, components
+from pypsa.descriptors import Dict
+from shapely.geometry import Point
+from snakemake.script import Snakemake
 
 logger = logging.getLogger(__name__)
 
@@ -34,21 +44,21 @@ def handle_exception(exc_type, exc_value, exc_traceback):
     tb = exc_traceback
     while tb.tb_next:
         tb = tb.tb_next
-    flname = tb.tb_frame.f_globals.get("__file__")
-    funcname = tb.tb_frame.f_code.co_name
+    fl_name = tb.tb_frame.f_globals.get("__file__")
+    func_name = tb.tb_frame.f_code.co_name
 
     if issubclass(exc_type, KeyboardInterrupt):
         logger.error(
             "Manual interruption %r, function %r: %s",
-            flname,
-            funcname,
+            fl_name,
+            func_name,
             exc_value,
         )
     else:
         logger.error(
             "An error happened in module %r, function %r: %s",
-            flname,
-            funcname,
+            fl_name,
+            func_name,
             exc_value,
             exc_info=(exc_type, exc_value, exc_traceback),
         )
@@ -59,12 +69,12 @@ def create_logger(logger_name, level=logging.INFO):
     Create a logger for a module and adds a handler needed to capture in logs
     traceback from exceptions emerging during the workflow.
     """
-    logger = logging.getLogger(logger_name)
-    logger.setLevel(level)
+    logger_instance = logging.getLogger(logger_name)
+    logger_instance.setLevel(level)
     handler = logging.StreamHandler(stream=sys.stdout)
-    logger.addHandler(handler)
+    logger_instance.addHandler(handler)
     sys.excepthook = handle_exception
-    return logger
+    return logger_instance
 
 
 def read_osm_config(*args):
@@ -112,7 +122,7 @@ def read_osm_config(*args):
         return tuple([osm_config[a] for a in args])
 
 
-def sets_path_to_root(root_directory_name):
+def sets_path_to_root(root_directory_name, n=8):
     """
     Search and sets path to the given root directory (root/path/file).
 
@@ -123,10 +133,8 @@ def sets_path_to_root(root_directory_name):
     n : int
         Number of folders the function will check upwards/root directed.
     """
-    import os
 
     repo_name = root_directory_name
-    n = 8  # check max 8 levels above. Random default.
     n0 = n
 
     while n >= 0:
@@ -219,36 +227,30 @@ def load_network(import_name=None, custom_components=None):
     from pypsa.descriptors import Dict
 
     override_components = None
-    override_component_attrs = None
+    override_component_attrs_dict = None
 
     if custom_components is not None:
         override_components = pypsa.components.components.copy()
-        override_component_attrs = Dict(
+        override_component_attrs_dict = Dict(
             {k: v.copy() for k, v in pypsa.components.component_attrs.items()}
         )
         for k, v in custom_components.items():
             override_components.loc[k] = v["component"]
-            override_component_attrs[k] = pd.DataFrame(
+            override_component_attrs_dict[k] = pd.DataFrame(
                 columns=["type", "unit", "default", "description", "status"]
             )
             for attr, val in v["attributes"].items():
-                override_component_attrs[k].loc[attr] = val
+                override_component_attrs_dict[k].loc[attr] = val
 
     return pypsa.Network(
         import_name=import_name,
         override_components=override_components,
-        override_component_attrs=override_component_attrs,
-    )
-
-
-def pdbcast(v, h):
-    return pd.DataFrame(
-        v.values.reshape((-1, 1)) * h.values, index=v.index, columns=h.index
+        override_component_attrs=override_component_attrs_dict,
     )
 
 
 def load_network_for_plots(
-        fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
+    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
 ):
     import pypsa
     from add_electricity import load_costs, update_transmission_costs
@@ -259,7 +261,7 @@ def load_network_for_plots(
     n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
 
     n.links["carrier"] = (
-            n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
+        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
     )
     n.lines["carrier"] = "AC line"
     n.transformers["carrier"] = "AC transformer"
@@ -284,11 +286,13 @@ def load_network_for_plots(
 
 
 def update_p_nom_max(n):
-    # if extendable carriers (solar/onwind/...) have capacity >= 0,
-    # e.g. existing assets from the OPSD project are included to the network,
-    # the installed capacity might exceed the expansion limit.
-    # Hence, we update the assumptions.
+    """
+    If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing
+    assets from the OPSD project are included to the network, the installed
+    capacity might exceed the expansion limit.
 
+    Hence, we update the assumptions.
+    """
     n.generators.p_nom_max = n.generators[["p_nom_min", "p_nom_max"]].max(1)
 
 
@@ -330,8 +334,8 @@ def aggregate_p_curtailed(n):
         [
             (
                 (
-                        n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt)
-                        - n.generators_t.p.sum()
+                    n.generators_t.p_max_pu.sum().multiply(n.generators.p_nom_opt)
+                    - n.generators_t.p.sum()
                 )
                 .groupby(n.generators.carrier)
                 .sum()
@@ -346,7 +350,7 @@ def aggregate_p_curtailed(n):
 
 
 def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
-    components = dict(
+    components_dict = dict(
         Link=("p_nom", "p0"),
         Generator=("p_nom", "p"),
         StorageUnit=("p_nom", "p"),
@@ -357,7 +361,8 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
 
     costs = {}
     for c, (p_nom, p_attr) in zip(
-            n.iterate_components(components.keys(), skip_empty=False), components.values()
+        n.iterate_components(components_dict.keys(), skip_empty=False),
+        components_dict.values(),
     ):
         if c.df.empty:
             continue
@@ -389,10 +394,10 @@ def aggregate_costs(n, flatten=False, opts=None, existing_only=False):
 
 
 def progress_retrieve(
-        url, file, data=None, headers=None, disable_progress=False, roundto=1.0
+    url, file, data=None, headers=None, disable_progress=False, round_to_value=1.0
 ):
     """
-    Function to download data from a url with a progress bar progress in
+    Function to download data from an url with a progress bar progress in
     retrieving data.
 
     Parameters
@@ -405,7 +410,7 @@ def progress_retrieve(
         Data for the request (default None), when not none Post method is used
     disable_progress : bool
         When true, no progress bar is shown
-    roundto : float
+    round_to_value : float
         (default 0) Precision used to report the progress
         e.g. 0.1 stands for 88.1, 10 stands for 90, 80
     """
@@ -415,8 +420,11 @@ def progress_retrieve(
 
     pbar = tqdm(total=100, disable=disable_progress)
 
-    def dlProgress(count, blockSize, totalSize, roundto=roundto):
-        pbar.n = round(count * blockSize * 100 / totalSize / roundto) * roundto
+    def dl_progress(count, block_size, total_size):
+        pbar.n = (
+            round(count * block_size * 100 / total_size / round_to_value)
+            * round_to_value
+        )
         pbar.refresh()
 
     if data is not None:
@@ -427,7 +435,7 @@ def dlProgress(count, blockSize, totalSize, roundto=roundto):
         opener.addheaders = headers
         urllib.request.install_opener(opener)
 
-    urllib.request.urlretrieve(url, file, reporthook=dlProgress, data=data)
+    urllib.request.urlretrieve(url, file, reporthook=dl_progress, data=data)
 
 
 def get_aggregation_strategies(aggregation_strategies):
@@ -455,7 +463,7 @@ def get_aggregation_strategies(aggregation_strategies):
     return bus_strategies, generator_strategies
 
 
-def mock_snakemake(rulename, **wildcards):
+def mock_snakemake(rule_name, **wildcards):
     """
     This function is expected to be executed from the "scripts"-directory of "
     the snakemake project. It returns a snakemake.script.Snakemake object,
@@ -465,20 +473,16 @@ def mock_snakemake(rulename, **wildcards):
 
     Parameters
     ----------
-    rulename: str
+    rule_name: str
         name of the rule for which the snakemake object should be generated
     wildcards:
         keyword arguments fixing the wildcards. Only necessary if wildcards are
         needed.
     """
 
-    import snakemake as sm
-    from pypsa.descriptors import Dict
-    from snakemake.script import Snakemake
-
     script_dir = pathlib.Path(__file__).parent.resolve()
     assert (
-            pathlib.Path.cwd().resolve() == script_dir
+        pathlib.Path.cwd().resolve() == script_dir
     ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}"
     os.chdir(script_dir.parent)
     for p in sm.SNAKEFILE_CHOICES:
@@ -491,12 +495,12 @@ def mock_snakemake(rulename, **wildcards):
     workflow.include(snakefile)
     workflow.global_resources = {}
     try:
-        rule = workflow.get_rule(rulename)
+        rule = workflow.get_rule(rule_name)
     except Exception as exception:
         print(
             exception,
-            f"The {rulename} might be a conditional rule in the Snakefile.\n"
-            f"Did you enable {rulename} in the config?",
+            f"The {rule_name} might be a conditional rule in the Snakefile.\n"
+            f"Did you enable {rule_name} in the config?",
         )
         raise
     dag = sm.dag.DAG(workflow, rules=[rule])
@@ -573,7 +577,9 @@ def three_2_two_digits_country(three_code_country):
     return two_code_country
 
 
-def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_words=[]):
+def two_digits_2_name_country(
+    two_code_country, name_string="name_short", no_comma=False, remove_start_words=[]
+):
     """
     Convert 2-digit country code to full name country:
 
@@ -581,7 +587,10 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word
     ----------
     two_code_country: str
         2-digit country name
-    nocomma: bool (optional, default False)
+    name_string: str (optional, default name_short)
+        When name_short    CD -> DR Congo
+        When name_official CD -> Democratic Republic of the Congo
+    no_comma: bool (optional, default False)
         When true, country names with comma are extended to remove the comma.
         Example CD -> Congo, The Democratic Republic of -> The Democratic Republic of Congo
     remove_start_words: list (optional, default empty)
@@ -593,13 +602,15 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word
     full_name: str
         full country name
     """
+    if remove_start_words is None:
+        remove_start_words = list()
     if two_code_country == "SN-GM":
         return f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"
 
-    full_name = coco.convert(two_code_country, to="name_short")
+    full_name = coco.convert(two_code_country, to=name_string)
 
-    if nocomma:
-        # separate list by delim
+    if no_comma:
+        # separate list by delimiter
         splits = full_name.split(", ")
 
         # reverse the order
@@ -608,7 +619,7 @@ def two_digits_2_name_country(two_code_country, nocomma=False, remove_start_word
         # return the merged string
         full_name = " ".join(splits)
 
-    # when list is non empty
+    # when list is non-empty
     if remove_start_words:
         # loop over every provided word
         for word in remove_start_words:
@@ -634,8 +645,8 @@ def country_name_2_two_digits(country_name):
         2-digit country name
     """
     if (
-            country_name
-            == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"
+        country_name
+        == f"{two_digits_2_name_country('SN')}-{two_digits_2_name_country('GM')}"
     ):
         return "SN-GM"
 
@@ -744,7 +755,7 @@ def filter_codes(c_list, iso_coding=True):
         selected(iso_coding=False), ignore iso-specific names.
         """
         if (
-                iso_coding
+            iso_coding
         ):  # if country lists are in iso coding, then check if they are 2-string
             # 2-code countries
             ret_list = [c for c in c_list if len(c) == 2]
@@ -866,6 +877,7 @@ def get_path_size(path):
 def build_directory(path):
     """
     It creates recursively the directory and its leaf directories.
+
     Parameters:
         path (str): The path to the file
     """
@@ -881,6 +893,7 @@ def change_to_script_dir(path):
     """
     Change the current working directory to the directory containing the given
     script.
+
     Parameters:
         path (str): The path to the file.
     """
@@ -902,6 +915,7 @@ def get_current_directory_path():
 def is_directory_path(path):
     """
     It returns True if the path points to a directory.
+
     False otherwise.
     """
     return pathlib.Path(path).is_dir()
@@ -910,6 +924,7 @@ def is_directory_path(path):
 def is_file_path(path):
     """
     It returns True if the path points to a file.
+
     False otherwise.
     """
     return pathlib.Path(path).is_file()
@@ -918,6 +933,7 @@ def is_file_path(path):
 def get_relative_path(path, start_path="."):
     """
     It returns a relative path to path from start_path.
+
     Default for start_path is the current directory
     """
     return pathlib.Path(path).relative_to(start_path)
@@ -926,6 +942,418 @@ def get_relative_path(path, start_path="."):
 def path_exists(path):
     """
     It returns True if the path exists.
+
     False otherwise.
     """
     return pathlib.Path(path).exists()
+
+
+def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True):
+    """
+    Create a network topology like the power transmission network.
+
+    Parameters
+    ----------
+    n : pypsa.Network
+    prefix : str
+    connector : str
+    bidirectional : bool, default True
+        True: one link for each connection
+        False: one link for each connection and direction (back and forth)
+
+    Returns
+    -------
+    pd.DataFrame with columns bus0, bus1 and length
+    """
+
+    ln_attrs = ["bus0", "bus1", "length"]
+    lk_attrs = ["bus0", "bus1", "length", "underwater_fraction"]
+
+    # TODO: temporary fix for when underwater_fraction is not found
+    if "underwater_fraction" not in n.links.columns:
+        if n.links.empty:
+            n.links["underwater_fraction"] = None
+        else:
+            n.links["underwater_fraction"] = 0.0
+
+    candidates = pd.concat(
+        [n.lines[ln_attrs], n.links.loc[n.links.carrier == "DC", lk_attrs]]
+    ).fillna(0)
+
+    positive_order = candidates.bus0 < candidates.bus1
+    candidates_p = candidates[positive_order]
+    swap_buses = {"bus0": "bus1", "bus1": "bus0"}
+    candidates_n = candidates[~positive_order].rename(columns=swap_buses)
+    candidates = pd.concat([candidates_p, candidates_n])
+
+    def make_index(c):
+        return prefix + c.bus0 + connector + c.bus1
+
+    topo = candidates.groupby(["bus0", "bus1"], as_index=False).mean()
+    topo.index = topo.apply(make_index, axis=1)
+
+    if not bidirectional:
+        topo_reverse = topo.copy()
+        topo_reverse.rename(columns=swap_buses, inplace=True)
+        topo_reverse.index = topo_reverse.apply(make_index, axis=1)
+        topo = pd.concat([topo, topo_reverse])
+
+    return topo
+
+
+def cycling_shift(df, steps=1):
+    """
+    Cyclic shift on index of pd.Series|pd.DataFrame by number of steps.
+    """
+    df = df.copy()
+    new_index = np.roll(df.index, steps)
+    df.values[:] = df.reindex(index=new_index).values
+    return df
+
+
+def download_gadm(country_code, update=False, out_logging=False):
+    """
+    Download gpkg file from GADM for a given country code.
+
+    Parameters
+    ----------
+    country_code : str
+        Two letter country codes of the downloaded files
+    update : bool
+        Update = true, forces re-download of files
+
+    Returns
+    -------
+    gpkg file per country
+    """
+
+    gadm_filename = f"gadm36_{two_2_three_digits_country(country_code)}"
+    gadm_url = f"https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/{gadm_filename}_gpkg.zip"
+    _logger = logging.getLogger(__name__)
+    gadm_input_file_zip = get_path(
+        get_current_directory_path(),
+        "data",
+        "raw",
+        "gadm",
+        gadm_filename,
+        gadm_filename + ".zip",
+    )  # Input filepath zip
+
+    gadm_input_file_gpkg = get_path(
+        get_current_directory_path(),
+        "data",
+        "raw",
+        "gadm",
+        gadm_filename,
+        gadm_filename + ".gpkg",
+    )  # Input filepath gpkg
+
+    if not path_exists(gadm_input_file_gpkg) or update is True:
+        if out_logging:
+            _logger.warning(
+                f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}"
+            )
+        #  create data/osm directory
+        os.makedirs(os.path.dirname(gadm_input_file_zip), exist_ok=True)
+
+        with requests.get(gadm_url, stream=True) as r:
+            with open(gadm_input_file_zip, "wb") as f:
+                shutil.copyfileobj(r.raw, f)
+
+        with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref:
+            zip_ref.extractall(os.path.dirname(gadm_input_file_zip))
+
+    return gadm_input_file_gpkg, gadm_filename
+
+
+def get_gadm_layer(country_list, layer_id, update=False, outlogging=False):
+    """
+    Function to retrieve a specific layer id of a geopackage for a selection of
+    countries.
+
+    Parameters
+    ----------
+    country_list : str
+        List of the countries
+    layer_id : int
+        Layer to consider in the format GID_{layer_id}.
+        When the requested layer_id is greater than the last available layer, then the last layer is selected.
+        When a negative value is requested, then, the last layer is requested
+    """
+    # initialization of the list of geodataframes
+    geodf_list = []
+
+    for country_code in country_list:
+        # download file gpkg
+        file_gpkg, name_file = download_gadm(country_code, update, outlogging)
+
+        # get layers of a geopackage
+        list_layers = fiona.listlayers(file_gpkg)
+
+        # get layer name
+        if layer_id < 0 | layer_id >= len(list_layers):
+            # when layer id is negative or larger than the number of layers, select the last layer
+            layer_id = len(list_layers) - 1
+        code_layer = np.mod(layer_id, len(list_layers))
+        layer_name = (
+            f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}"
+        )
+
+        # read gpkg file
+        geodf_temp = gpd.read_file(file_gpkg, layer=layer_name)
+
+        # convert country name representation of the main country (GID_0 column)
+        geodf_temp["GID_0"] = [
+            three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"]
+        ]
+
+        # create a subindex column that is useful
+        # in the GADM processing of sub-national zones
+        geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"]
+
+        # concatenate geodataframes
+        geodf_list = pd.concat([geodf_list, geodf_temp])
+
+    geodf_gadm = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
+    geodf_gadm.set_crs(geodf_list[0].crs, inplace=True)
+
+    return geodf_gadm
+
+
+def locate_bus(
+    coords,
+    co,
+    gadm_level,
+    path_to_gadm=None,
+    gadm_clustering=False,
+):
+    """
+    Function to locate the right node for a coordinate set input coords of
+    point.
+
+    Parameters
+    ----------
+    coords: pandas dataseries
+        dataseries with 2 rows x & y representing the longitude and latitude
+    co: string (code for country where coords are MA Morocco)
+        code of the countries where the coordinates are
+    """
+    col = "name"
+    if not gadm_clustering:
+        gdf = gpd.read_file(path_to_gadm)
+    else:
+        if path_to_gadm:
+            gdf = gpd.read_file(path_to_gadm)
+            if "GADM_ID" in gdf.columns:
+                col = "GADM_ID"
+
+                if gdf[col][0][
+                    :3
+                ].isalpha():  # TODO clean later by changing all codes to 2 letters
+                    gdf[col] = gdf[col].apply(
+                        lambda name: three_2_two_digits_country(name[:3]) + name[3:]
+                    )
+        else:
+            gdf = get_gadm_layer(co, gadm_level)
+            col = "GID_{}".format(gadm_level)
+
+        # gdf.set_index("GADM_ID", inplace=True)
+    gdf_co = gdf[
+        gdf[col].str.contains(co)
+    ]  # geodataframe of entire continent - output of prev function {} are placeholders
+    # in strings - conditional formatting
+    # insert any variable into that place using .format - extract string and filter for those containing co (MA)
+    point = Point(coords["x"], coords["y"])  # point object
+
+    try:
+        return gdf_co[gdf_co.contains(point)][
+            col
+        ].item()  # filter gdf_co which contains point and returns the bus
+
+    except ValueError:
+        return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
+            col
+        ].item()  # looks for closest one shape=node
+
+
+def override_component_attrs(directory):
+    """Tell PyPSA that links can have multiple outputs by
+    overriding the component_attrs. This can be done for
+    as many buses as you need with format busi for i = 2,3,4,5,....
+    See https://pypsa.org/doc/components.html#link-with-multiple-outputs-or-inputs
+
+    Parameters
+    ----------
+    directory : string
+        Folder where component attributes to override are stored
+        analogous to ``pypsa/component_attrs``, e.g. `links.csv`.
+
+    Returns
+    -------
+    Dictionary of overridden component attributes.
+    """
+
+    attrs = Dict({k: v.copy() for k, v in component_attrs.items()})
+
+    for component, list_name in components.list_name.items():
+        fn = f"{directory}/{list_name}.csv"
+        if os.path.isfile(fn):
+            overrides = pd.read_csv(fn, index_col=0, na_values="n/a")
+            attrs[component] = overrides.combine_first(attrs[component])
+
+    return attrs
+
+
+def get_conv_factors(sector):
+    """
+    Create a dictionary with all the conversion factors for the standard net calorific value
+    from Tera Joule per Kilo Metric-ton to Tera Watt-hour based on
+    https://unstats.un.org/unsd/energy/balance/2014/05.pdf.
+
+    Considering that 1 Watt-hour = 3600 Joule, one obtains the values below dividing
+    the standard net calorific values from the pdf by 3600.
+
+    For example, the value "hard coal": 0.007167 is given by 25.8 / 3600, where 25.8 is the standard
+    net calorific value.
+    """
+
+    conversion_factors_dict = {
+        "additives and oxygenates": 0.008333,
+        "anthracite": 0.005,
+        "aviation gasoline": 0.01230,
+        "bagasse": 0.002144,
+        "biodiesel": 0.01022,
+        "biogasoline": 0.007444,
+        "bio jet kerosene": 0.011111,
+        "bitumen": 0.01117,
+        "brown coal": 0.003889,
+        "brown coal briquettes": 0.00575,
+        "charcoal": 0.00819,
+        "coal tar": 0.007778,
+        "coke-oven coke": 0.0078334,
+        "coke-oven gas": 0.000277,
+        "coking coal": 0.007833,
+        "conventional crude oil": 0.01175,
+        "crude petroleum": 0.011750,
+        "ethane": 0.01289,
+        "fuel oil": 0.01122,
+        "fuelwood": 0.00254,
+        "gas coke": 0.007326,
+        "gas oil/ diesel oil": 0.01194,
+        "gasoline-type jet fuel": 0.01230,
+        "hard coal": 0.007167,
+        "kerosene-type jet fuel": 0.01225,
+        "lignite": 0.003889,
+        "liquefied petroleum gas (lpg)": 0.01313,
+        "lubricants": 0.011166,
+        "motor gasoline": 0.01230,
+        "naphtha": 0.01236,
+        "natural gas": 0.00025,
+        "natural gas liquids": 0.01228,
+        "oil shale": 0.00247,
+        "other bituminous coal": 0.005556,
+        "paraffin waxes": 0.01117,
+        "patent fuel": 0.00575,
+        "peat": 0.00271,
+        "peat products": 0.00271,
+        "petroleum coke": 0.009028,
+        "refinery gas": 0.01375,
+        "sub-bituminous coal": 0.005555,
+    }
+
+    if sector == "industry":
+        return conversion_factors_dict
+    else:
+        logger.info(f"No conversion factors available for sector {sector}")
+        return np.nan
+
+
+def aggregate_fuels(sector):
+    gas_fuels = [
+        "blast furnace gas",
+        "natural gas (including lng)",
+        "natural gas liquids",
+    ]
+
+    oil_fuels = [
+        "bitumen",
+        "conventional crude oil",
+        "crude petroleum",
+        "ethane",
+        "fuel oil",
+        "gas oil/ diesel oil",
+        "kerosene-type jet fuel",
+        "liquefied petroleum gas (lpg)",
+        "lubricants",
+        "motor gasoline",
+        "naphtha",
+        "patent fuel",
+        "petroleum coke",
+        "refinery gas",
+    ]
+
+    coal_fuels = [
+        "anthracite",
+        "brown coal",
+        "brown coal briquettes",
+        "coke-oven coke",
+        "coke-oven gas",
+        "coking coal",
+        "gas coke",
+        "gasworks gas",
+        "hard coal",
+        "lignite",
+        "other bituminous coal",
+        "peat",
+        "peat products",
+        "sub-bituminous coal",
+    ]
+
+    biomass_fuels = [
+        "bagasse",
+        "fuelwood",
+        "biogases",
+        "biogasoline",
+        "biodiesel",
+        "charcoal",
+        "black liquor",
+    ]
+
+    electricity = ["electricity"]
+
+    heat = ["heat", "direct use of geothermal heat", "direct use of solar thermal heat"]
+
+    if sector == "industry":
+        return gas_fuels, oil_fuels, biomass_fuels, coal_fuels, heat, electricity
+    else:
+        logger.info(f"No fuels available for sector {sector}")
+        return np.nan
+
+
+def modify_commodity(commodity):
+    if commodity.strip() == "Hrad coal":
+        commodity = "Hard coal"
+    elif commodity.strip().casefold() == "coke oven gas":
+        commodity = "Coke-oven gas"
+    elif commodity.strip().casefold() == "coke oven coke":
+        commodity = "Coke-oven coke"
+    elif commodity.strip() == "Liquified Petroleum Gas (LPG)":
+        commodity = "Liquefied Petroleum Gas (LPG)"
+    elif commodity.strip() == "Gas Oil/Diesel Oil":
+        commodity = "Gas Oil/ Diesel Oil"
+    elif commodity.strip() == "Lignite brown coal- recoverable resources":
+        commodity = "Lignite brown coal - recoverable resources"
+    return commodity.strip().casefold()
+
+
+def safe_divide(numerator, denominator):
+    """
+    Safe division function that returns NaN when the denominator is zero.
+    """
+    if denominator != 0.0:
+        return numerator / denominator
+    else:
+        logging.warning(
+            f"Division by zero: {numerator} / {denominator}, returning NaN."
+        )
+        return np.nan
diff --git a/test/test_helpers.py b/test/test_helpers.py
index a7981ede8..15bba3a5d 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -9,10 +9,16 @@
 import pathlib
 from test.conftest import _content_temp_file, _name_temp_file, get_temp_file
 
+import numpy as np
+import pandas as pd
+
 from scripts._helpers import (
+    aggregate_fuels,
     change_to_script_dir,
+    country_name_2_two_digits,
     get_abs_path,
     get_basename_abs_path,
+    get_conv_factors,
     get_current_directory_path,
     get_dirname_path,
     get_path,
@@ -20,12 +26,164 @@
     get_relative_path,
     is_directory_path,
     is_file_path,
+    modify_commodity,
     path_exists,
+    safe_divide,
+    sets_path_to_root,
+    three_2_two_digits_country,
+    two_2_three_digits_country,
+    two_digits_2_name_country,
 )
 
 path_cwd = str(pathlib.Path.cwd())
 
 
+original_commodity_data = [
+    "Biogases",
+    "Fuelwood",
+    "of which: fishing",
+    "Natural gas liquids",
+    "Naphtha",
+    "Motor Gasoline",
+    "Motor gasoline",
+    "Gasoline-type jet fuel",
+    "Peat products",
+    "Peat Products",
+    "Direct use of geothermal heat",
+    "Additives and Oxygenates",
+    "Electricity",
+    "Animal waste",
+    "animal waste",
+    "Refinery gas",
+    "Refinery Gas",
+    "Fuel oil",
+    "Oil shale",
+    "Oil Shale",
+    "Lignite",
+    "Falling water",
+    "Petroleum coke",
+    "Petroleum Coke",
+    "Aviation gasoline",
+    "Ethane",
+    "Natural gas (including LNG)",
+    "Natural gas",
+    "Natural Gas (including LNG)",
+    "Other bituminous coal",
+    "Paraffin waxes",
+    "Hard coal",
+    "Coal",
+    "Hrad coal",
+    "Coke Oven Gas",
+    "Gasworks Gas",
+    "Brown coal briquettes",
+    "Brown Coal Briquettes",
+    "Liquefied petroleum gas (LPG)",
+    "Liquified Petroleum Gas (LPG)",
+    "Sub-bituminous coal",
+    "Kerosene-type Jet Fuel",
+    "Charcoal",
+    "Heat",
+    "Gas coke",
+    "Gas Coke",
+    "Patent fuel",
+    "Peat (for fuel use)",
+    "Peat",
+    "Coal Tar",
+    "Biogasoline",
+    "Coking coal",
+    "Electricity generating capacity",
+    "Anthracite",
+    "Coke oven coke",
+    "Coke-oven coke",
+    "Coke Oven Coke",
+    "Conventional crude oil",
+    "Crude petroleum",
+    "Brown coal",
+    "Lignite brown coal",
+    "Lignite brown coal- recoverable resources",
+    "Biodiesel",
+    "Lubricants",
+    "Black Liquor",
+    "Gas Oil/ Diesel Oil",
+    "Gas Oil/ Diesel Oil ",
+    "Gas Oil/Diesel Oil",
+    "Bagasse",
+    "Direct use of solar thermal heat",
+    "Bio jet kerosene",
+    "Blast Furnace Gas",
+    "Blast furnace gas",
+    "Bitumen",
+]
+
+modified_commodity_data = [
+    "biogases",
+    "fuelwood",
+    "of which: fishing",
+    "natural gas liquids",
+    "naphtha",
+    "motor gasoline",
+    "gasoline-type jet fuel",
+    "peat products",
+    "direct use of geothermal heat",
+    "additives and oxygenates",
+    "electricity",
+    "animal waste",
+    "refinery gas",
+    "fuel oil",
+    "oil shale",
+    "lignite",
+    "falling water",
+    "petroleum coke",
+    "aviation gasoline",
+    "ethane",
+    "natural gas (including lng)",
+    "natural gas",
+    "other bituminous coal",
+    "paraffin waxes",
+    "hard coal",
+    "coal",
+    "coke-oven gas",
+    "gasworks gas",
+    "brown coal briquettes",
+    "liquefied petroleum gas (lpg)",
+    "sub-bituminous coal",
+    "kerosene-type jet fuel",
+    "charcoal",
+    "heat",
+    "gas coke",
+    "patent fuel",
+    "peat (for fuel use)",
+    "peat",
+    "coal tar",
+    "biogasoline",
+    "coking coal",
+    "electricity generating capacity",
+    "anthracite",
+    "coke-oven coke",
+    "conventional crude oil",
+    "crude petroleum",
+    "brown coal",
+    "lignite brown coal",
+    "lignite brown coal - recoverable resources",
+    "biodiesel",
+    "lubricants",
+    "black liquor",
+    "gas oil/ diesel oil",
+    "bagasse",
+    "direct use of solar thermal heat",
+    "bio jet kerosene",
+    "blast furnace gas",
+    "bitumen",
+]
+
+original_commodity_dataframe = pd.DataFrame(
+    original_commodity_data, columns=["Commodity"]
+)
+modified_commodity_dataframe = pd.DataFrame(
+    modified_commodity_data, columns=["Commodity"]
+)
+
+
 def test_get_abs_path():
     """
     Verify the path returned by get_abs_path()
@@ -156,3 +314,152 @@ def test_path_exists(get_temp_file):
     assert path_exists(path)
     assert path_exists(pathlib_path)
     assert path_exists(path) == os.path.exists(path)
+
+
+def test_two_2_three_digits_country():
+    """
+    Verify the conversion from two-digit to three-digit country code.
+    """
+    # Afghanistan
+    assert two_2_three_digits_country("AF") == "AFG"
+    # American Samoa
+    assert two_2_three_digits_country("AS") == "ASM"
+    # Aruba
+    assert two_2_three_digits_country("AW") == "ABW"
+    # Germany
+    assert two_2_three_digits_country("DE") == "DEU"
+    # Micronesia (Federated States of)
+    assert two_2_three_digits_country("FM") == "FSM"
+
+
+def test_three_2_two_digits_country():
+    """
+    Verify the conversion from three-digit to two-digit country code.
+    """
+    # Afghanistan
+    assert "AF" == three_2_two_digits_country("AFG")
+    # American Samoa
+    assert "AS" == three_2_two_digits_country("ASM")
+    # Aruba
+    assert "AW" == three_2_two_digits_country("ABW")
+    # Germany
+    assert "DE" == three_2_two_digits_country("DEU")
+    # Micronesia (Federated States of)
+    assert "FM" == three_2_two_digits_country("FSM")
+
+
+def test_two_digits_2_name_country():
+    """
+    Verify the conversion from two-digit country code to country name.
+    """
+    # Micronesia (Federated States of)
+    assert "Micronesia, Fed. Sts." == two_digits_2_name_country("FM")
+    assert "Federated States of Micronesia" == two_digits_2_name_country(
+        "FM", name_string="name_official"
+    )
+    assert "States of Micronesia" == two_digits_2_name_country(
+        "FM", name_string="name_official", remove_start_words=["Federated "]
+    )
+    # Democratic Republic of the Congo
+    assert "DR Congo" == two_digits_2_name_country("CD")
+    assert "Democratic Republic of the Congo" == two_digits_2_name_country(
+        "CD", name_string="name_official"
+    )
+    assert "Republic of the Congo" == two_digits_2_name_country(
+        "CD", name_string="name_official", remove_start_words=["Democratic "]
+    )
+
+
+def test_country_name_2_two_digits():
+    """
+    Verify the conversion from country name to two-digit country code.
+    """
+    # Afghanistan
+    assert "AF" == country_name_2_two_digits("Afghanistan")
+    # American Samoa
+    assert "AS" == country_name_2_two_digits("American Samoa")
+    # Aruba
+    assert "AW" == country_name_2_two_digits("Aruba")
+    # Germany
+    assert "DE" == country_name_2_two_digits("Germany")
+    # Micronesia (Federated States of)
+    assert "FM" == country_name_2_two_digits("Micronesia")
+
+
+def test_safe_divide():
+    """
+    Verify that the method safe_divide prevents divisions by vanishing
+    denominator.
+    """
+    assert safe_divide(3.0, 2.0) == 1.5
+    assert np.isnan(safe_divide(3.0, 0.0))
+
+
+def test_get_conv_factors():
+    """
+    Verify that the conversion factors returned by get_conv_factors are
+    correct.
+    """
+    conversion_factors_dict = get_conv_factors("industry")
+    assert conversion_factors_dict["additives and oxygenates"] == 0.008333
+    assert conversion_factors_dict["anthracite"] == 0.005
+    assert conversion_factors_dict["aviation gasoline"] == 0.01230
+    assert conversion_factors_dict["bagasse"] == 0.002144
+    assert conversion_factors_dict["biodiesel"] == 0.01022
+    assert conversion_factors_dict["biogasoline"] == 0.007444
+    assert conversion_factors_dict["bio jet kerosene"] == 0.011111
+    assert conversion_factors_dict["bitumen"] == 0.01117
+    assert conversion_factors_dict["brown coal"] == 0.003889
+    assert conversion_factors_dict["brown coal briquettes"] == 0.00575
+    assert conversion_factors_dict["charcoal"] == 0.00819
+    assert conversion_factors_dict["coal tar"] == 0.007778
+    assert conversion_factors_dict["coke-oven coke"] == 0.0078334
+    assert conversion_factors_dict["coke-oven gas"] == 0.000277
+    assert conversion_factors_dict["coking coal"] == 0.007833
+    assert conversion_factors_dict["conventional crude oil"] == 0.01175
+    assert conversion_factors_dict["crude petroleum"] == 0.011750
+    assert conversion_factors_dict["ethane"] == 0.01289
+    assert conversion_factors_dict["fuel oil"] == 0.01122
+    assert conversion_factors_dict["fuelwood"] == 0.00254
+    assert conversion_factors_dict["gas coke"] == 0.007326
+    assert conversion_factors_dict["gas oil/ diesel oil"] == 0.01194
+    assert conversion_factors_dict["gasoline-type jet fuel"] == 0.01230
+    assert conversion_factors_dict["hard coal"] == 0.007167
+    assert conversion_factors_dict["kerosene-type jet fuel"] == 0.01225
+    assert conversion_factors_dict["lignite"] == 0.003889
+    assert conversion_factors_dict["liquefied petroleum gas (lpg)"] == 0.01313
+    assert conversion_factors_dict["lubricants"] == 0.011166
+    assert conversion_factors_dict["motor gasoline"] == 0.01230
+    assert conversion_factors_dict["naphtha"] == 0.01236
+    assert conversion_factors_dict["natural gas liquids"] == 0.01228
+    assert conversion_factors_dict["oil shale"] == 0.00247
+    assert conversion_factors_dict["other bituminous coal"] == 0.005556
+    assert conversion_factors_dict["paraffin waxes"] == 0.01117
+    assert conversion_factors_dict["patent fuel"] == 0.00575
+    assert conversion_factors_dict["peat"] == 0.00271
+    assert conversion_factors_dict["peat products"] == 0.00271
+    assert conversion_factors_dict["petroleum coke"] == 0.009028
+    assert conversion_factors_dict["refinery gas"] == 0.01375
+    assert conversion_factors_dict["sub-bituminous coal"] == 0.005555
+    assert np.isnan(get_conv_factors("non-industry"))
+
+
+def test_modify_commodity():
+    """
+    Verify that modify_commodity returns the commodities in wished format.
+    """
+    new_commodity_dataframe = pd.DataFrame()
+    new_commodity_dataframe["Commodity"] = (
+        original_commodity_dataframe["Commodity"].map(modify_commodity).unique()
+    )
+    df = new_commodity_dataframe.compare(modified_commodity_dataframe)
+    boolean_flag = df.empty
+    if not boolean_flag:
+        assert False
+
+
+def test_aggregate_fuels():
+    """
+    Verify what is returned by aggregate_fuels.
+    """
+    assert np.isnan(aggregate_fuels("non-industry"))

From 0c9dd21d019534447288b148ee4bd9fe988d3021 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 17:19:09 +0200
Subject: [PATCH 08/40] change to _helpers.py methods

---
 scripts/_helpers.py      | 15 ++++++++-------
 scripts/make_summary.py  |  2 +-
 scripts/solve_network.py |  2 +-
 test/test_helpers.py     |  7 ++++++-
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 9a421aa39..a9b4afe07 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -874,19 +874,20 @@ def get_path_size(path):
     return pathlib.Path(path).stat().st_size
 
 
-def build_directory(path):
+def build_directory(path, just_parent_directory=True):
     """
     It creates recursively the directory and its leaf directories.
 
     Parameters:
         path (str): The path to the file
+        just_parent_directory (Boolean) : it creates just the parent directory
     """
 
     # Check if the provided path points to a directory
-    if is_directory_path(path):
-        pathlib.Path(path).mkdir(parents=True, exist_ok=True)
-    else:
+    if just_parent_directory:
         pathlib.Path(path).parent.mkdir(parents=True, exist_ok=True)
+    else:
+        pathlib.Path(path).mkdir(parents=True, exist_ok=True)
 
 
 def change_to_script_dir(path):
@@ -1054,14 +1055,14 @@ def download_gadm(country_code, update=False, out_logging=False):
                 f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}"
             )
         #  create data/osm directory
-        os.makedirs(os.path.dirname(gadm_input_file_zip), exist_ok=True)
+        build_directory(gadm_input_file_zip)
 
         with requests.get(gadm_url, stream=True) as r:
             with open(gadm_input_file_zip, "wb") as f:
                 shutil.copyfileobj(r.raw, f)
 
         with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref:
-            zip_ref.extractall(os.path.dirname(gadm_input_file_zip))
+            zip_ref.extractall(get_dirname_path(gadm_input_file_zip))
 
     return gadm_input_file_gpkg, gadm_filename
 
@@ -1197,7 +1198,7 @@ def override_component_attrs(directory):
 
     for component, list_name in components.list_name.items():
         fn = f"{directory}/{list_name}.csv"
-        if os.path.isfile(fn):
+        if is_file_path(fn):
             overrides = pd.read_csv(fn, index_col=0, na_values="n/a")
             attrs[component] = overrides.combine_first(attrs[component])
 
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index c74938027..7bc3aa86c 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -532,7 +532,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
 
 def to_csv(dfs, dir):
-    build_directory(dir)
+    build_directory(dir, just_parent_directory=False)
     for key, df in dfs.items():
         df.to_csv(get_path(dir, f"{key}.csv"))
 
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index 8a84f9499..6f7dfdcdb 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -559,7 +559,7 @@ def solve_network(n, config, opts="", **kwargs):
 
     tmpdir = snakemake.params.solving.get("tmpdir")
     if tmpdir is not None:
-        build_directory(tmpdir)
+        build_directory(tmpdir, just_parent_directory=False)
     opts = snakemake.wildcards.opts.split("-")
     solve_opts = snakemake.params.solving["options"]
 
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 15bba3a5d..c9c176292 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -14,6 +14,7 @@
 
 from scripts._helpers import (
     aggregate_fuels,
+    build_directory,
     change_to_script_dir,
     country_name_2_two_digits,
     get_abs_path,
@@ -29,7 +30,6 @@
     modify_commodity,
     path_exists,
     safe_divide,
-    sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
     two_digits_2_name_country,
@@ -184,6 +184,11 @@
 )
 
 
+def test_build_directory(tmpdir):
+
+    build_directory(tmpdir)
+
+
 def test_get_abs_path():
     """
     Verify the path returned by get_abs_path()

From c64be540e56330c405cf0b0dde4645e7221fd4da Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 19:38:57 +0200
Subject: [PATCH 09/40] add new unit test for build_directory

---
 scripts/_helpers.py  |  4 +++-
 test/conftest.py     | 11 +++++++++++
 test/test_helpers.py | 43 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index a9b4afe07..403a359fc 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -880,7 +880,9 @@ def build_directory(path, just_parent_directory=True):
 
     Parameters:
         path (str): The path to the file
-        just_parent_directory (Boolean) : it creates just the parent directory
+        just_parent_directory (Boolean): given a path dir/subdir
+            True: it creates just the parent directory dir
+            False: it creates the full directory tree dir/subdir
     """
 
     # Check if the provided path points to a directory
diff --git a/test/conftest.py b/test/conftest.py
index 3ba165e42..fa8cbd171 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -6,11 +6,14 @@
 # -*- coding: utf-8 -*-
 
 import pathlib
+import shutil
 
 import pytest
 
 _content_temp_file = "content"
 _name_temp_file = "hello.txt"
+_temp_content_dir = "temp_content_dir"
+_sub_temp_content_dir = "sub_temp_content_dir"
 
 
 @pytest.fixture(scope="function")
@@ -19,3 +22,11 @@ def get_temp_file(tmpdir):
     p.write(_content_temp_file)
     yield p
     pathlib.Path(p).unlink(missing_ok=True)
+
+
+@pytest.fixture(scope="function")
+def get_temp_folder(tmpdir):
+    temp_content_dir = tmpdir.join(_temp_content_dir)
+    sub_temp_content_dir = temp_content_dir.join(_sub_temp_content_dir)
+    yield sub_temp_content_dir
+    shutil.rmtree(str(sub_temp_content_dir))
diff --git a/test/test_helpers.py b/test/test_helpers.py
index c9c176292..c72a02434 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -7,7 +7,13 @@
 
 import os
 import pathlib
-from test.conftest import _content_temp_file, _name_temp_file, get_temp_file
+from test.conftest import (
+    _content_temp_file,
+    _name_temp_file,
+    _sub_temp_content_dir,
+    _temp_content_dir,
+    get_temp_file,
+)
 
 import numpy as np
 import pandas as pd
@@ -19,6 +25,7 @@
     country_name_2_two_digits,
     get_abs_path,
     get_basename_abs_path,
+    get_basename_path,
     get_conv_factors,
     get_current_directory_path,
     get_dirname_path,
@@ -184,9 +191,39 @@
 )
 
 
-def test_build_directory(tmpdir):
+def test_build_directory(get_temp_folder, tmpdir):
+    """
+    Verify the directory tree returned by build_directory()
+    """
+
+    # build_directory(path, just_parent_directory=True) is
+    # equivalent to os.makedirs(os.path.dirname(path), exist_ok=True)
+    # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir
+    # it will create just tmpdir/temp_content_dir/
+    build_directory(get_temp_folder, just_parent_directory=True)
+    just_parent_list = []
+    for root, dirs, files in os.walk(tmpdir):
+        just_parent_list.append(str(get_path(root)))
 
-    build_directory(tmpdir)
+    assert len(just_parent_list) == 2
+    assert just_parent_list[0] == str(tmpdir)
+    assert just_parent_list[1] == str(tmpdir.join(_temp_content_dir))
+
+    # build_directory(path, just_parent_directory=False) is
+    # equivalent to os.makedirs(path, exist_ok=True)
+    # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir
+    # it will create the full path tmpdir/temp_content_dir/sub_temp_content_dir
+    build_directory(get_temp_folder, just_parent_directory=False)
+    full_tree_list = []
+    for root, dirs, files in os.walk(tmpdir):
+        full_tree_list.append(str(get_path(root)))
+
+    assert len(full_tree_list) == 3
+    assert full_tree_list[0] == str(tmpdir)
+    assert full_tree_list[1] == str(tmpdir.join(_temp_content_dir))
+    assert full_tree_list[2] == str(
+        tmpdir.join(_temp_content_dir, _sub_temp_content_dir)
+    )
 
 
 def test_get_abs_path():

From 924a9175469ffa77596282f32a1cc0b2dc8fbb6d Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Wed, 5 Jun 2024 20:06:31 +0200
Subject: [PATCH 10/40] remove .github/workflows/main.yml

---
 .github/workflows/main.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 81f8ef1d1..dfc3587a4 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,14 +1,14 @@
 on:
-  push:
-    branches:
-    - main
-
+#  push:
+#    branches:
+#    - main
+#
 jobs:
-  contrib-readme-job:
-    runs-on: ubuntu-latest
-    name: A job to automate contrib in readme
-    steps:
-    - name: Contribute List
-      uses: akhilmhdh/contributors-readme-action@v2.3.6
-      env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+#  contrib-readme-job:
+#    runs-on: ubuntu-latest
+#    name: A job to automate contrib in readme
+#    steps:
+#    - name: Contribute List
+#      uses: akhilmhdh/contributors-readme-action@v2.3.6
+#      env:
+#        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From d6f7d9970285dbad89d032971858f90d5d5170cc Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 10:57:36 +0200
Subject: [PATCH 11/40] modify unit test build_directory

---
 test/test_helpers.py | 67 +++++++++++++++++++++++++++++++-------------
 1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/test/test_helpers.py b/test/test_helpers.py
index c72a02434..d704ac492 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -7,6 +7,7 @@
 
 import os
 import pathlib
+import shutil
 from test.conftest import (
     _content_temp_file,
     _name_temp_file,
@@ -194,37 +195,65 @@
 def test_build_directory(get_temp_folder, tmpdir):
     """
     Verify the directory tree returned by build_directory()
+
+    Please note:
+    -) build_directory(path, just_parent_directory=True) is equivalent to os.makedirs(os.path.dirname(path)).
+    Given a path tmpdir/temp_content_dir/sub_temp_content_dir, it will create just tmpdir/temp_content_dir/
+    -) build_directory(path, just_parent_directory=False) is equivalent to os.makedirs(path). Given a path
+    tmpdir/temp_content_dir/sub_temp_content_dir, it will create tmpdir/temp_content_dir/sub_temp_content_dir
     """
 
-    # build_directory(path, just_parent_directory=True) is
-    # equivalent to os.makedirs(os.path.dirname(path), exist_ok=True)
-    # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir
-    # it will create just tmpdir/temp_content_dir/
+    # test with pathlib
     build_directory(get_temp_folder, just_parent_directory=True)
-    just_parent_list = []
+    just_parent_list_pathlib = []
+    for root, dirs, files in os.walk(tmpdir):
+        just_parent_list_pathlib.append(str(get_path(root)))
+
+    assert len(just_parent_list_pathlib) == 2
+    assert just_parent_list_pathlib[0] == str(tmpdir)
+    assert just_parent_list_pathlib[1] == str(tmpdir.join(_temp_content_dir))
+
+    # remove the temporary folder tmpdir/temp_content_dir/
+    shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir))
+
+    # test with os.makedirs. Please note for exist_ok=False,
+    # a FileExistsError is raised if the target directory
+    # already exists. Hence, setting exist_ok=False ensures
+    # that the removal with shutil.rmtree was successful
+    os.makedirs(os.path.dirname(get_temp_folder), exist_ok=False)
+    just_parent_list_os = []
     for root, dirs, files in os.walk(tmpdir):
-        just_parent_list.append(str(get_path(root)))
+        just_parent_list_os.append(str(get_path(root)))
 
-    assert len(just_parent_list) == 2
-    assert just_parent_list[0] == str(tmpdir)
-    assert just_parent_list[1] == str(tmpdir.join(_temp_content_dir))
+    assert just_parent_list_pathlib == just_parent_list_os
 
-    # build_directory(path, just_parent_directory=False) is
-    # equivalent to os.makedirs(path, exist_ok=True)
-    # Given in fact a path tmpdir/temp_content_dir/sub_temp_content_dir
-    # it will create the full path tmpdir/temp_content_dir/sub_temp_content_dir
+    # test with pathlib
     build_directory(get_temp_folder, just_parent_directory=False)
-    full_tree_list = []
+    full_tree_list_pathlib = []
     for root, dirs, files in os.walk(tmpdir):
-        full_tree_list.append(str(get_path(root)))
+        full_tree_list_pathlib.append(str(get_path(root)))
 
-    assert len(full_tree_list) == 3
-    assert full_tree_list[0] == str(tmpdir)
-    assert full_tree_list[1] == str(tmpdir.join(_temp_content_dir))
-    assert full_tree_list[2] == str(
+    assert len(full_tree_list_pathlib) == 3
+    assert full_tree_list_pathlib[0] == str(tmpdir)
+    assert full_tree_list_pathlib[1] == str(tmpdir.join(_temp_content_dir))
+    assert full_tree_list_pathlib[2] == str(
         tmpdir.join(_temp_content_dir, _sub_temp_content_dir)
     )
 
+    # remove the temporary folder tmpdir/temp_content_dir/*
+    shutil.rmtree(pathlib.Path(tmpdir, _temp_content_dir))
+
+    # test with os.makedirs. Please note for exist_ok=False,
+    # a FileExistsError is raised if the target directory
+    # already exists. Hence, setting exist_ok=False ensures
+    # that the removal with shutil.rmtree was successful
+    os.makedirs(get_temp_folder, exist_ok=False)
+    full_tree_list_os = []
+    for root, dirs, files in os.walk(tmpdir):
+        full_tree_list_os.append(str(get_path(root)))
+
+    assert full_tree_list_os == full_tree_list_pathlib
+
 
 def test_get_abs_path():
     """

From 2fdd90ecd78778137ff7f289d1abc9a2a022c808 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 11:27:58 +0200
Subject: [PATCH 12/40] comment out test_prepare_network

---
 .github/workflows/ci-unit-test.yaml |  0
 test/test_helpers.py                |  1 -
 test/test_prepare_network.py        | 10 ++++------
 3 files changed, 4 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/ci-unit-test.yaml

diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_helpers.py b/test/test_helpers.py
index d704ac492..6b68b0906 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -26,7 +26,6 @@
     country_name_2_two_digits,
     get_abs_path,
     get_basename_abs_path,
-    get_basename_path,
     get_conv_factors,
     get_current_directory_path,
     get_dirname_path,
diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py
index 9bad7c220..3181b444e 100644
--- a/test/test_prepare_network.py
+++ b/test/test_prepare_network.py
@@ -5,9 +5,7 @@
 
 # -*- coding: utf-8 -*-
 
-from scripts.prepare_network import download_emission_data
-
-
-def test_download_emission_data():
-    filename = download_emission_data()
-    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
+# from scripts.prepare_network import download_emission_data
+# def test_download_emission_data():
+#    filename = download_emission_data()
+#    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"

From 1498ce4514c8c327b971801be49bd4050ed6b664 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 11:47:57 +0200
Subject: [PATCH 13/40] add ci-unit-test.yaml

---
 .github/workflows/ci-unit-test.yaml | 55 +++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml
index e69de29bb..71796061d 100644
--- a/.github/workflows/ci-unit-test.yaml
+++ b/.github/workflows/ci-unit-test.yaml
@@ -0,0 +1,55 @@
+name: CI-unit-test
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+  schedule:
+  - cron: "0 5 * * TUE"
+
+jobs:
+
+  test-with-pypi:
+
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version:
+        - 3.9
+        - "3.10"
+        - "3.11"
+        - "3.12"
+        os:
+        - ubuntu-latest
+        - macos-latest
+        - windows-latest
+    env:
+      MPLBACKEND: Agg  # https://github.com/orgs/community/discussions/26434
+
+    steps:
+
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        pip install .[dev]
+
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+    - name: Test with pytest
+      run: |
+        pytest test/

From b5158f8d42b19d85080c8a55c8b6286db3fac147 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 13:44:24 +0200
Subject: [PATCH 14/40] modify ci-unit-test.yaml

---
 .github/workflows/ci-unit-test.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml
index 71796061d..abb291580 100644
--- a/.github/workflows/ci-unit-test.yaml
+++ b/.github/workflows/ci-unit-test.yaml
@@ -43,7 +43,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install flake8 pytest
-        pip install .[dev]
 
     - name: Lint with flake8
       run: |

From a3bf6cc565f9dc6f9301dec59b9540befb40f910 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 13:50:07 +0200
Subject: [PATCH 15/40] remove match statement from monte_carlo.py

---
 scripts/monte_carlo.py | 49 ++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index 22f8874f8..3eadb00ca 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -237,29 +237,32 @@ def rescale_distribution(
         dist = value.get("type")
         params = value.get("args")
 
-        match dist:
-            case "uniform":
-                l_bounds, u_bounds = params
-                latin_hypercube[:, idx] = minmax_scale(
-                    latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds)
-                )
-            case "normal":
-                mean, std = params
-                latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std)
-            case "lognormal":
-                shape = params[0]
-                latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape)
-            case "triangle":
-                mid_point = params[0]
-                latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point)
-            case "beta":
-                a, b = params
-                latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b)
-            case "gamma":
-                shape, scale = params
-                latin_hypercube[:, idx] = gamma.ppf(
-                    latin_hypercube[:, idx], shape, scale
-                )
+        if dist == "uniform":
+            l_bounds, u_bounds = params
+            latin_hypercube[:, idx] = minmax_scale(
+                latin_hypercube[:, idx], feature_range=(l_bounds, u_bounds)
+            )
+        elif dist == "normal":
+            mean, std = params
+            latin_hypercube[:, idx] = norm.ppf(latin_hypercube[:, idx], mean, std)
+        elif dist == "lognormal":
+            shape = params[0]
+            latin_hypercube[:, idx] = lognorm.ppf(latin_hypercube[:, idx], s=shape)
+        elif dist == "triangle":
+            mid_point = params[0]
+            latin_hypercube[:, idx] = triang.ppf(latin_hypercube[:, idx], mid_point)
+        elif dist == "beta":
+            a, b = params
+            latin_hypercube[:, idx] = beta.ppf(latin_hypercube[:, idx], a, b)
+        elif dist == "gamma":
+            shape, scale = params
+            latin_hypercube[:, idx] = gamma.ppf(latin_hypercube[:, idx], shape, scale)
+        else:
+            exception_message = (
+                f"The value {dist} is not among the allowed ones: uniform, normal, lognormal, "
+                f"triangle, beta, gamma"
+            )
+            raise NotImplementedError(exception_message)
 
     # samples space needs to be from 0 to 1
     mm = MinMaxScaler(feature_range=(0, 1), clip=True)

From 3971b02c9cf85f294ace6ab43b7f711b9f68a51f Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 14:44:35 +0200
Subject: [PATCH 16/40] add unit tests at the end of current workflows

---
 .github/workflows/ci-linux.yaml     |  5 +++
 .github/workflows/ci-mac.yaml       |  5 +++
 .github/workflows/ci-unit-test.yaml | 54 -----------------------------
 .github/workflows/ci-windows.yaml   |  5 +++
 4 files changed, 15 insertions(+), 54 deletions(-)
 delete mode 100644 .github/workflows/ci-unit-test.yaml

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
index 2f09e347b..8c99cf581 100644
--- a/.github/workflows/ci-linux.yaml
+++ b/.github/workflows/ci-linux.yaml
@@ -82,6 +82,11 @@ jobs:
         cp test/tmp/config.landlock_tmp.yaml config.yaml
         snakemake --cores all solve_all_networks --forceall
 
+    - name: Unit tests
+      run: |
+        python -m pip install pytest
+        pytest test/
+
       # - name: Test plotting and summaries
       #   run: |
       #     snakemake --cores all plot_all_p_nom
diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml
index e766539f7..b042f8b3d 100644
--- a/.github/workflows/ci-mac.yaml
+++ b/.github/workflows/ci-mac.yaml
@@ -68,6 +68,11 @@ jobs:
         cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml
         snakemake --cores all solve_all_networks
 
+    - name: Unit tests
+      run: |
+        python -m pip install pytest
+        pytest test/
+
       # - name: Test plotting and summaries
       #   run: |
       #     snakemake --cores all plot_all_p_nom
diff --git a/.github/workflows/ci-unit-test.yaml b/.github/workflows/ci-unit-test.yaml
deleted file mode 100644
index abb291580..000000000
--- a/.github/workflows/ci-unit-test.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-name: CI-unit-test
-
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-    branches:
-    - main
-  schedule:
-  - cron: "0 5 * * TUE"
-
-jobs:
-
-  test-with-pypi:
-
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version:
-        - 3.9
-        - "3.10"
-        - "3.11"
-        - "3.12"
-        os:
-        - ubuntu-latest
-        - macos-latest
-        - windows-latest
-    env:
-      MPLBACKEND: Agg  # https://github.com/orgs/community/discussions/26434
-
-    steps:
-
-    - uses: actions/checkout@v4
-
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
-
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-
-    - name: Test with pytest
-      run: |
-        pytest test/
diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml
index 5943cb9cb..a288b65e1 100644
--- a/.github/workflows/ci-windows.yaml
+++ b/.github/workflows/ci-windows.yaml
@@ -68,6 +68,11 @@ jobs:
         cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml
         snakemake --cores all solve_all_networks
 
+    - name: Unit tests
+      run: |
+        python -m pip install pytest
+        pytest test/
+
       # - name: Test plotting and summaries
       #   run: |
       #     snakemake --cores all plot_all_p_nom

From c059a3ab61851a9599eab7c63c1d92ad990701d1 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 6 Jun 2024 15:15:57 +0200
Subject: [PATCH 17/40] add os.sep in test_helpers.py

---
 test/test_helpers.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/test_helpers.py b/test/test_helpers.py
index 6b68b0906..8153ee622 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -268,7 +268,7 @@ def test_change_to_script_dir():
     Verify the path returned by change_to_script_dir()
     """
     change_to_script_dir(__file__)
-    assert str(pathlib.Path.cwd()) == path_cwd + "/test"
+    assert str(pathlib.Path.cwd()) == path_cwd + os.sep + "test"
     change_to_script_dir(".")
     assert str(pathlib.Path.cwd()) == path_cwd
 
@@ -280,7 +280,7 @@ def test_get_dirname_path():
     dir_name_file = get_dirname_path(__file__)
     dir_name_cwd = get_dirname_path(".")
     assert str(dir_name_file) == os.path.dirname(__file__)
-    assert str(dir_name_file) == path_cwd + "/test"
+    assert str(dir_name_file) == path_cwd + os.sep + "test"
     assert str(dir_name_cwd) == "."
 
 
@@ -318,10 +318,6 @@ def test_get_path():
         "sub_path_5",
         "file.nc",
     )
-    assert (
-        str(file_name_path_one)
-        == path_cwd + "/sub_path_1/sub_path_2/sub_path_3/sub_path_4/sub_path_5/file.nc"
-    )
     assert str(path_name_path_two) == str(
         pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log")
     )

From 52b4d4c6e637cdaba2d50a4fdaf687d53212f19c Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 12:51:54 +0200
Subject: [PATCH 18/40] from scripts

---
 scripts/_helpers.py                   | 23 ++++++-----------------
 scripts/add_electricity.py            | 11 ++++++-----
 scripts/add_extra_components.py       | 13 +++++++++----
 scripts/augmented_line_connections.py | 13 +++++++++----
 scripts/base_network.py               | 10 +++++-----
 scripts/build_bus_regions.py          | 21 ++++++++++++---------
 scripts/build_cutout.py               | 11 ++++++++---
 scripts/build_demand_profiles.py      | 12 +++++++-----
 scripts/build_natura_raster.py        | 16 ++++++++--------
 scripts/build_osm_network.py          | 14 +++++++-------
 scripts/build_powerplants.py          | 10 +++++-----
 scripts/build_renewable_profiles.py   | 15 ++++++++-------
 scripts/build_shapes.py               | 25 +++++++++++++------------
 scripts/build_test_configs.py         | 10 +++++++---
 scripts/clean_osm_data.py             |  7 ++++---
 scripts/cluster_network.py            | 27 ++++++++++++++-------------
 scripts/download_osm_data.py          | 10 ++++++----
 scripts/make_statistics.py            | 12 ++++++------
 scripts/make_summary.py               |  9 +++++----
 scripts/monte_carlo.py                | 15 ++++-----------
 scripts/plot_network.py               | 10 +++++-----
 scripts/plot_summary.py               | 10 ++++++++--
 scripts/prepare_network.py            |  8 ++++----
 scripts/retrieve_databundle_light.py  | 10 +++++-----
 scripts/simplify_network.py           | 22 +++++++++++-----------
 scripts/solve_network.py              | 16 ++++++++--------
 test/test_prepare_network.py          | 10 ++++++----
 27 files changed, 196 insertions(+), 174 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 403a359fc..6a1ba4337 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -11,6 +11,7 @@
 import shutil
 import subprocess
 import sys
+import urllib
 import zipfile
 
 import country_converter as coco
@@ -18,13 +19,18 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
+import pypsa
 import requests
 import snakemake as sm
 import yaml
+from pypsa.clustering.spatial import _make_consense
 from pypsa.components import component_attrs, components
 from pypsa.descriptors import Dict
 from shapely.geometry import Point
 from snakemake.script import Snakemake
+from tqdm import tqdm
+
+from scripts.add_electricity import load_costs, update_transmission_costs
 
 logger = logging.getLogger(__name__)
 
@@ -172,7 +178,6 @@ def configure_logging(snakemake, skip_handlers=False):
     skip_handlers : True | False (default)
         Do (not) skip the default handlers created for redirecting output to STDERR and file.
     """
-    import logging
 
     kwargs = snakemake.config.get("logging", dict()).copy()
     kwargs.setdefault("level", "INFO")
@@ -223,8 +228,6 @@ def load_network(import_name=None, custom_components=None):
     -------
     pypsa.Network
     """
-    import pypsa
-    from pypsa.descriptors import Dict
 
     override_components = None
     override_component_attrs_dict = None
@@ -252,8 +255,6 @@ def load_network(import_name=None, custom_components=None):
 def load_network_for_plots(
     fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
 ):
-    import pypsa
-    from add_electricity import load_costs, update_transmission_costs
 
     n = pypsa.Network(fn)
 
@@ -414,9 +415,6 @@ def progress_retrieve(
         (default 0) Precision used to report the progress
         e.g. 0.1 stands for 88.1, 10 stands for 90, 80
     """
-    import urllib
-
-    from tqdm import tqdm
 
     pbar = tqdm(total=100, disable=disable_progress)
 
@@ -445,14 +443,6 @@ def get_aggregation_strategies(aggregation_strategies):
     the function's definition) they get lost when custom values are specified
     in the config.
     """
-    import numpy as np
-
-    # to handle the new version of PyPSA.
-    try:
-        from pypsa.clustering.spatial import _make_consense
-    except Exception:
-        # TODO: remove after new release and update minimum pypsa version
-        from pypsa.clustering.spatial import _make_consense
 
     bus_strategies = dict(country=_make_consense("Bus", "country"))
     bus_strategies.update(aggregation_strategies.get("buses", {}))
@@ -741,7 +731,6 @@ def create_country_list(input, iso_coding=True):
     full_codes_list : list
         Example ["NG","ZA"]
     """
-    import logging
 
     _logger = logging.getLogger(__name__)
     _logger.setLevel(logging.INFO)
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 19360c9c6..99738ca1c 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -89,14 +89,16 @@
 import powerplantmatching as pm
 import pypsa
 import xarray as xr
-from _helpers import (
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
+    mock_snakemake,
     read_csv_nafix,
+    sets_path_to_root,
     update_p_nom_max,
 )
-from powerplantmatching.export import map_country_bus
 
 idx = pd.IndexSlice
 
@@ -369,7 +371,7 @@ def attach_wind_and_solar(
 
             if not df.query("carrier == @tech").empty:
                 buses = n.buses.loc[ds.indexes["bus"]]
-                caps = map_country_bus(df.query("carrier == @tech"), buses)
+                caps = pm.export.map_country_bus(df.query("carrier == @tech"), buses)
                 caps = caps.groupby(["bus"]).p_nom.sum()
                 caps = pd.Series(data=caps, index=ds.indexes["bus"]).fillna(0)
             else:
@@ -811,11 +813,10 @@ def add_nice_carrier_names(n, config):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake, sets_path_to_root
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("add_electricity")
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.base_network)
diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py
index 94023ad89..46e865093 100644
--- a/scripts/add_extra_components.py
+++ b/scripts/add_extra_components.py
@@ -57,8 +57,14 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import change_to_script_dir, configure_logging, create_logger
-from add_electricity import (
+
+from scripts._helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
+from scripts.add_electricity import (
     _add_missing_carriers_from_costs,
     add_nice_carrier_names,
     load_costs,
@@ -265,10 +271,9 @@ def attach_hydrogen_pipelines(n, costs, config):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10)
+
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.network)
diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py
index c6f9520be..acd4af727 100644
--- a/scripts/augmented_line_connections.py
+++ b/scripts/augmented_line_connections.py
@@ -33,12 +33,18 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import change_to_script_dir, configure_logging, create_logger
-from add_electricity import load_costs
 from networkx.algorithms import complement
 from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
 from pypsa.geo import haversine_pts
 
+from scripts._helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
+from scripts.add_electricity import load_costs
+
 logger = create_logger(__name__)
 
 
@@ -51,12 +57,11 @@ def haversine(p):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "augmented_line_connections", network="elec", simpl="", clusters="54"
         )
+
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.network)
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 0f997078b..c52514cba 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -64,14 +64,16 @@
 import scipy as sp
 import shapely.prepared
 import shapely.wkt
-from _helpers import (
+from shapely.ops import unary_union
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_path_size,
+    mock_snakemake,
     read_csv_nafix,
 )
-from shapely.ops import unary_union
 
 logger = create_logger(__name__)
 
@@ -559,11 +561,9 @@ def base_network(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
-
         snakemake = mock_snakemake("base_network")
+
     configure_logging(snakemake)
 
     inputs = snakemake.input
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index 42f6b6b61..7326abd96 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -44,9 +44,19 @@
 """
 
 import geopandas as gpd
+import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import REGION_COLS, change_to_script_dir, configure_logging, create_logger
+from scipy.spatial import Voronoi
+from shapely.geometry import Polygon
+
+from scripts._helpers import (
+    REGION_COLS,
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
 
 logger = create_logger(__name__)
 
@@ -66,14 +76,8 @@ def custom_voronoi_partition_pts(points, outline, add_bounds_shape=True, multipl
     polygons : N - ndarray[dtype=Polygon|MultiPolygon]
     """
 
-    import numpy as np
-    from scipy.spatial import Voronoi
-    from shapely.geometry import Polygon
-
     points = np.asarray(points)
 
-    polygons_arr = []
-
     if len(points) == 1:
         polygons_arr = [outline]
     else:
@@ -147,10 +151,9 @@ def get_gadm_shape(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_bus_regions")
+
     configure_logging(snakemake)
 
     countries = snakemake.params.countries
diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py
index 83f5e1509..297f449a9 100644
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@@ -97,17 +97,22 @@
 import atlite
 import geopandas as gpd
 import pandas as pd
-from _helpers import change_to_script_dir, configure_logging, create_logger
+
+from scripts._helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
 
 logger = create_logger(__name__)
 
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5")
+
     configure_logging(snakemake)
 
     cutout_params = snakemake.params.cutouts[snakemake.wildcards.cutout]
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index ebb230903..5cfdca1db 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -49,15 +49,18 @@
 import pypsa
 import scipy.sparse as sparse
 import xarray as xr
-from _helpers import (
+from shapely.prepared import prep
+from shapely.validation import make_valid
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_path,
+    mock_snakemake,
     read_osm_config,
+    sets_path_to_root,
 )
-from shapely.prepared import prep
-from shapely.validation import make_valid
 
 logger = create_logger(__name__)
 
@@ -250,11 +253,10 @@ def upsample(cntry, group):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake, sets_path_to_root
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_demand_profiles")
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.base_network)
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index ae9fd478a..e71c3df2c 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -49,16 +49,20 @@
 import atlite
 import geopandas as gpd
 import numpy as np
+import pandas as pd
 import rasterio as rio
-from _helpers import (
+from rasterio.features import geometry_mask
+from rasterio.warp import transform_bounds
+from shapely.ops import unary_union
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_path,
     is_directory_path,
+    mock_snakemake,
 )
-from rasterio.features import geometry_mask
-from rasterio.warp import transform_bounds
 
 logger = create_logger(__name__)
 
@@ -127,9 +131,6 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging):
     -------
     unified_shape : GeoDataFrame with a unified "multishape"
     """
-    import pandas as pd
-    from shapely.ops import unary_union
-    from shapely.validation import make_valid
 
     if out_logging:
         logger.info("Stage 3/5: Unify protected shape area.")
@@ -184,12 +185,11 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"]
         )
+
     configure_logging(snakemake)
 
     # get crs
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index 25f8d7a9c..9706483f7 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -8,19 +8,21 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
-from _helpers import (
+from shapely.geometry import LineString, Point
+from shapely.ops import linemerge, split
+from tqdm import tqdm
+
+from scripts._helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
     create_logger,
+    mock_snakemake,
     read_geojson,
     read_osm_config,
     sets_path_to_root,
     to_csv_nafix,
 )
-from shapely.geometry import LineString, Point
-from shapely.ops import linemerge, split
-from tqdm import tqdm
 
 logger = create_logger(__name__)
 
@@ -408,7 +410,6 @@ def connect_stations_same_station_id(lines, buses):
     station_id_list = buses.station_id.unique()
 
     add_lines = []
-    from shapely.geometry import LineString
 
     for s_id in station_id_list:
         buses_station_id = buses[buses.station_id == s_id]
@@ -891,10 +892,9 @@ def built_network(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_osm_network")
+
     configure_logging(snakemake)
 
     # load default crs
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index b61331241..8696de4a1 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -106,19 +106,21 @@
 import powerplantmatching as pm
 import pypsa
 import yaml
-from _helpers import (
+from scipy.spatial import cKDTree as KDTree
+from shapely.geometry import Point
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
     get_path,
     get_path_size,
+    mock_snakemake,
     read_csv_nafix,
     to_csv_nafix,
     two_digits_2_name_country,
 )
-from scipy.spatial import cKDTree as KDTree
-from shapely.geometry import Point
 
 logger = create_logger(__name__)
 
@@ -298,8 +300,6 @@ def replace_natural_gas_technology(df: pd.DataFrame):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_powerplants")
 
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index d2d211aca..cb5740303 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -201,16 +201,18 @@
 import pandas as pd
 import progressbar as pgb
 import xarray as xr
-from _helpers import (
+from add_electricity import load_powerplants
+from dask.distributed import Client, LocalCluster
+from pypsa.geo import haversine
+from shapely.geometry import LineString, Point, box
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
+    mock_snakemake,
     sets_path_to_root,
 )
-from add_electricity import load_powerplants
-from dask.distributed import Client, LocalCluster
-from pypsa.geo import haversine
-from shapely.geometry import LineString, Point, box
 
 cc = coco.CountryConverter()
 
@@ -490,11 +492,10 @@ def create_scaling_factor(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_renewable_profiles", technology="solar")
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     pgb.streams.wrap_stderr()
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 77a97b669..5ba734438 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -18,7 +18,17 @@
 import rasterio
 import requests
 import xarray as xr
-from _helpers import (
+from numba import njit
+from numba.core import types
+from numba.typed import Dict
+from rasterio.mask import mask
+from rasterio.windows import Window
+from shapely.geometry import MultiPolygon
+from shapely.ops import unary_union
+from shapely.validation import make_valid
+from tqdm import tqdm
+
+from scripts._helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
@@ -26,21 +36,13 @@
     get_current_directory_path,
     get_dirname_path,
     get_path,
+    mock_snakemake,
     path_exists,
     sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
     two_digits_2_name_country,
 )
-from numba import njit
-from numba.core import types
-from numba.typed import Dict
-from rasterio.mask import mask
-from rasterio.windows import Window
-from shapely.geometry import MultiPolygon
-from shapely.ops import unary_union
-from shapely.validation import make_valid
-from tqdm import tqdm
 
 sets_path_to_root("pypsa-earth")
 
@@ -1315,11 +1317,10 @@ def gadm(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_shapes")
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     out = snakemake.output
diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py
index d19d86cb9..7e066e0db 100644
--- a/scripts/build_test_configs.py
+++ b/scripts/build_test_configs.py
@@ -15,9 +15,15 @@
 import collections.abc
 import copy
 
-from _helpers import change_to_script_dir, get_current_directory_path, get_path
 from ruamel.yaml import YAML
 
+from scripts._helpers import (
+    change_to_script_dir,
+    get_current_directory_path,
+    get_path,
+    mock_snakemake,
+)
+
 
 def update(d, u):
     for k, v in u.items():
@@ -85,8 +91,6 @@ def create_test_config(default_config, diff_config, output_path):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_test_configs")
 
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
index 9f7d3ed9a..e762e2019 100644
--- a/scripts/clean_osm_data.py
+++ b/scripts/clean_osm_data.py
@@ -9,12 +9,14 @@
 import numpy as np
 import pandas as pd
 import reverse_geocode as rg
-from _helpers import (
+
+from scripts._helpers import (
     REGION_COLS,
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_path_size,
+    mock_snakemake,
     save_to_geojson,
     to_csv_nafix,
 )
@@ -1062,10 +1064,9 @@ def clean_data(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("clean_osm_data")
+
     configure_logging(snakemake)
 
     tag_substation = snakemake.params.clean_osm_data_options["tag_substation"]
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index aa6fc4fa8..b0ac236c1 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -129,24 +129,27 @@
 import pandas as pd
 import pyomo.environ as po
 import pypsa
-from _helpers import (
+from pypsa.clustering.spatial import (
+    busmap_by_greedy_modularity,
+    busmap_by_hac,
+    busmap_by_kmeans,
+    get_clustering_from_busmap,
+)
+from scipy.sparse import csgraph
+from shapely.geometry import Point
+
+from scripts._helpers import (
     REGION_COLS,
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_aggregation_strategies,
+    mock_snakemake,
     sets_path_to_root,
     update_p_nom_max,
 )
-from add_electricity import load_costs
-from build_shapes import add_gdp_data, add_population_data
-from pypsa.clustering.spatial import (
-    busmap_by_greedy_modularity,
-    busmap_by_hac,
-    busmap_by_kmeans,
-    get_clustering_from_busmap,
-)
-from shapely.geometry import Point
+from scripts.add_electricity import load_costs
+from scripts.build_shapes import add_gdp_data, add_population_data
 
 idx = pd.IndexSlice
 
@@ -429,7 +432,6 @@ def busmap_for_n_clusters(
         algorithm_kwds.setdefault("random_state", 0)
 
     def fix_country_assignment_for_hac(n):
-        from scipy.sparse import csgraph
 
         # overwrite country of nodes that are disconnected from their country-topology
         for country in n.buses.country.unique():
@@ -656,13 +658,12 @@ def cluster_regions(busmaps, inputs, output):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "cluster_network", network="elec", simpl="", clusters="min"
         )
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     inputs, outputs, config = snakemake.input, snakemake.output, snakemake.config
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index b822cc574..4ab6593ec 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -29,15 +29,18 @@
 import pathlib
 import shutil
 
-from _helpers import (
+from earth_osm import eo
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
     get_path,
+    mock_snakemake,
     read_osm_config,
+    sets_path_to_root,
 )
-from earth_osm import eo
 
 logger = create_logger(__name__)
 
@@ -98,11 +101,10 @@ def convert_iso_to_geofk(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake, sets_path_to_root
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("download_osm_data")
         sets_path_to_root("pypsa-earth")
+
     configure_logging(snakemake)
 
     run = snakemake.config.get("run", {})
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index dd410d33d..bc54c1b0f 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -29,17 +29,20 @@
 import pandas as pd
 import pypsa
 import xarray as xr
-from _helpers import (
+from shapely.validation import make_valid
+
+from scripts._helpers import (
     change_to_script_dir,
+    create_country_list,
     create_logger,
     get_path_size,
     is_file_path,
     mock_snakemake,
     sets_path_to_root,
+    three_2_two_digits_country,
     to_csv_nafix,
 )
-from build_test_configs import create_test_config
-from shapely.validation import make_valid
+from scripts.build_test_configs import create_test_config
 
 logger = create_logger(__name__)
 
@@ -77,7 +80,6 @@ def generate_scenario_by_country(
         out_dir : str (optional)
             Output directory where output configuration files are executed
     """
-    from _helpers import create_country_list, three_2_two_digits_country
 
     clean_country_list = create_country_list(country_list)
 
@@ -585,8 +587,6 @@ def calculate_stats(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("make_statistics")
 
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index 7bc3aa86c..fb2d4cf45 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -54,14 +54,17 @@
 
 import pandas as pd
 import pypsa
-from _helpers import (
+
+from scripts._helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
+    create_logger,
     get_path,
+    mock_snakemake,
     path_exists,
 )
-from add_electricity import create_logger, load_costs, update_transmission_costs
+from scripts.add_electricity import load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
 
@@ -539,8 +542,6 @@ def to_csv(dfs, dir):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "make_summary",
diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index 3eadb00ca..fcbc9c21d 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -73,11 +73,12 @@
 import pandas as pd
 import pypsa
 import seaborn as sns
-from _helpers import change_to_script_dir, configure_logging, create_logger
 from pyDOE2 import lhs
 from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
-from sklearn.preprocessing import MinMaxScaler
-from solve_network import *
+from sklearn.preprocessing import MinMaxScaler, minmax_scale
+
+from scripts._helpers import change_to_script_dir, configure_logging, create_logger
+from scripts.solve_network import *
 
 logger = create_logger(__name__)
 sns.set(style="whitegrid")
@@ -99,8 +100,6 @@ def monte_carlo_sampling_pydoe2(
     Adapted from Disspaset: https://github.com/energy-modelling-toolkit/Dispa-SET/blob/master/scripts/build_and_run_hypercube.py
     Documentation on PyDOE2: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors)
     """
-    from pyDOE2 import lhs
-    from scipy.stats import qmc
 
     # Generate a Nfeatures-dimensional latin hypercube varying between 0 and 1:
     lh = lhs(
@@ -134,7 +133,6 @@ def monte_carlo_sampling_chaospy(
     Documentation on Chaospy: https://github.com/clicumu/pyDOE2 (fixes latin_cube errors)
     Documentation on Chaospy latin-hyper cube (quasi-Monte Carlo method): https://chaospy.readthedocs.io/en/master/user_guide/fundamentals/quasi_random_samples.html#Quasi-random-samples
     """
-    from scipy.stats import qmc
 
     # generate a Nfeatures-dimensional latin hypercube varying between 0 and 1:
     N_FEATURES = "chaospy.Uniform(0, 1), " * N_FEATURES
@@ -176,7 +174,6 @@ def monte_carlo_sampling_scipy(
     Documentation for Latin Hypercube: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.qmc.LatinHypercube.html#scipy.stats.qmc.LatinHypercube
     Orthogonal LHS is better than basic LHS: https://github.com/scipy/scipy/pull/14546/files, https://en.wikipedia.org/wiki/Latin_hypercube_sampling
     """
-    from scipy.stats import qmc
 
     sampler = qmc.LatinHypercube(
         d=N_FEATURES,
@@ -230,8 +227,6 @@ def rescale_distribution(
     - The function supports rescaling for uniform, normal, lognormal, triangle, beta, and gamma distributions.
     - The rescaled samples will have values in the range [0, 1].
     """
-    from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
-    from sklearn.preprocessing import MinMaxScaler, minmax_scale
 
     for idx, value in enumerate(uncertainties_values):
         dist = value.get("type")
@@ -349,8 +344,6 @@ def validate_parameters(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "monte_carlo",
diff --git a/scripts/plot_network.py b/scripts/plot_network.py
index 124c6c891..354dd50fd 100644
--- a/scripts/plot_network.py
+++ b/scripts/plot_network.py
@@ -23,16 +23,18 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
-from _helpers import (
+from matplotlib.legend_handler import HandlerPatch
+from matplotlib.patches import Circle, Ellipse
+
+from scripts._helpers import (
     aggregate_costs,
     aggregate_p,
     change_to_script_dir,
     configure_logging,
     create_logger,
     load_network_for_plots,
+    mock_snakemake,
 )
-from matplotlib.legend_handler import HandlerPatch
-from matplotlib.patches import Circle, Ellipse
 
 to_rgba = mpl.colors.colorConverter.to_rgba
 
@@ -357,8 +359,6 @@ def split_costs(n):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "plot_network",
diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py
index f6f126a47..96ee48372 100644
--- a/scripts/plot_summary.py
+++ b/scripts/plot_summary.py
@@ -19,7 +19,14 @@
 
 import matplotlib.pyplot as plt
 import pandas as pd
-from _helpers import change_to_script_dir, configure_logging, create_logger, get_path
+
+from scripts._helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_path,
+    mock_snakemake,
+)
 
 logger = create_logger(__name__)
 
@@ -216,7 +223,6 @@ def plot_energy(infn, snmk, fn=None):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
 
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index 299d69280..cbf785804 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -65,14 +65,16 @@
 import pandas as pd
 import pypsa
 import requests
-from _helpers import (
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
     get_path,
+    mock_snakemake,
 )
-from add_electricity import load_costs, update_transmission_costs
+from scripts.add_electricity import load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
 
@@ -324,8 +326,6 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "prepare_network",
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index e3b0c191b..fa7741162 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -88,7 +88,10 @@
 import geopandas as gpd
 import pandas as pd
 import yaml
-from _helpers import (
+from google_drive_downloader import GoogleDriveDownloader as gdd
+from tqdm import tqdm
+
+from scripts._helpers import (
     change_to_script_dir,
     configure_logging,
     create_country_list,
@@ -97,11 +100,10 @@
     get_current_directory_path,
     get_path,
     get_relative_path,
+    mock_snakemake,
     progress_retrieve,
     sets_path_to_root,
 )
-from google_drive_downloader import GoogleDriveDownloader as gdd
-from tqdm import tqdm
 
 logger = create_logger(__name__)
 
@@ -815,8 +817,6 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
 if __name__ == "__main__":
     if "snakemake" not in globals():
         change_to_script_dir(__file__)
-        from _helpers import mock_snakemake
-
         snakemake = mock_snakemake("retrieve_databundle_light")
     # TODO Make logging compatible with progressbar (see PR #102, PyPSA-Eur)
     configure_logging(snakemake)
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 48f18c4a9..263b98099 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -92,15 +92,6 @@
 import pandas as pd
 import pypsa
 import scipy as sp
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    get_aggregation_strategies,
-    update_p_nom_max,
-)
-from add_electricity import load_costs
-from cluster_network import cluster_regions, clustering_for_n_clusters
 from pypsa.clustering.spatial import (
     aggregategenerators,
     aggregateoneport,
@@ -110,6 +101,17 @@
 from pypsa.io import import_components_from_dataframe, import_series_from_dataframe
 from scipy.sparse.csgraph import connected_components, dijkstra
 
+from scripts._helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_aggregation_strategies,
+    mock_snakemake,
+    update_p_nom_max,
+)
+from scripts.add_electricity import load_costs
+from scripts.cluster_network import cluster_regions, clustering_for_n_clusters
+
 sys.settrace
 
 logger = create_logger(__name__)
@@ -961,8 +963,6 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("simplify_network", simpl="")
     configure_logging(snakemake)
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index 6f7dfdcdb..555bee935 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -82,12 +82,6 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import (
-    build_directory,
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-)
 from pypsa.descriptors import get_switchable_as_dense as get_as_dense
 from pypsa.linopf import (
     define_constraints,
@@ -99,6 +93,14 @@
     network_lopf,
 )
 
+from scripts._helpers import (
+    build_directory,
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
+
 logger = create_logger(__name__)
 
 
@@ -545,8 +547,6 @@ def solve_network(n, config, opts="", **kwargs):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        from _helpers import mock_snakemake
-
         change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "solve_network",
diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py
index 3181b444e..9bad7c220 100644
--- a/test/test_prepare_network.py
+++ b/test/test_prepare_network.py
@@ -5,7 +5,9 @@
 
 # -*- coding: utf-8 -*-
 
-# from scripts.prepare_network import download_emission_data
-# def test_download_emission_data():
-#    filename = download_emission_data()
-#    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
+from scripts.prepare_network import download_emission_data
+
+
+def test_download_emission_data():
+    filename = download_emission_data()
+    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"

From d8430b05e5ed7e1d90b767fad65c8ea98313f630 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 13:35:56 +0200
Subject: [PATCH 19/40] move load_network_for_plots to plot_network

---
 scripts/_helpers.py     | 36 ------------------------------------
 scripts/plot_network.py | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 6a1ba4337..5544be8a3 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -30,8 +30,6 @@
 from snakemake.script import Snakemake
 from tqdm import tqdm
 
-from scripts.add_electricity import load_costs, update_transmission_costs
-
 logger = logging.getLogger(__name__)
 
 # list of recognised nan values (NA and na excluded as may be confused with Namibia 2-letter country code)
@@ -252,40 +250,6 @@ def load_network(import_name=None, custom_components=None):
     )
 
 
-def load_network_for_plots(
-    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
-):
-
-    n = pypsa.Network(fn)
-
-    n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load"
-    n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
-
-    n.links["carrier"] = (
-        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
-    )
-    n.lines["carrier"] = "AC line"
-    n.transformers["carrier"] = "AC transformer"
-
-    n.lines["s_nom"] = n.lines["s_nom_min"]
-    n.links["p_nom"] = n.links["p_nom_min"]
-
-    if combine_hydro_ps:
-        n.storage_units.loc[
-            n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier"
-        ] = "hydro+PHS"
-
-    # if the carrier was not set on the heat storage units
-    # bus_carrier = n.storage_units.bus.map(n.buses.carrier)
-    # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks"
-
-    Nyears = n.snapshot_weightings.objective.sum() / 8760.0
-    costs = load_costs(tech_costs, cost_config, elec_config, Nyears)
-    update_transmission_costs(n, costs)
-
-    return n
-
-
 def update_p_nom_max(n):
     """
     If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing
diff --git a/scripts/plot_network.py b/scripts/plot_network.py
index 354dd50fd..d4f635ede 100644
--- a/scripts/plot_network.py
+++ b/scripts/plot_network.py
@@ -23,6 +23,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import pypsa
 from matplotlib.legend_handler import HandlerPatch
 from matplotlib.patches import Circle, Ellipse
 
@@ -32,9 +33,9 @@
     change_to_script_dir,
     configure_logging,
     create_logger,
-    load_network_for_plots,
     mock_snakemake,
 )
+from scripts.add_electricity import load_costs, update_transmission_costs
 
 to_rgba = mpl.colors.colorConverter.to_rgba
 
@@ -357,6 +358,40 @@ def split_costs(n):
     ax.grid(True, axis="y", color="k", linestyle="dotted")
 
 
+def load_network_for_plots(
+    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
+):
+
+    n = pypsa.Network(fn)
+
+    n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load"
+    n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
+
+    n.links["carrier"] = (
+        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
+    )
+    n.lines["carrier"] = "AC line"
+    n.transformers["carrier"] = "AC transformer"
+
+    n.lines["s_nom"] = n.lines["s_nom_min"]
+    n.links["p_nom"] = n.links["p_nom_min"]
+
+    if combine_hydro_ps:
+        n.storage_units.loc[
+            n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier"
+        ] = "hydro+PHS"
+
+    # if the carrier was not set on the heat storage units
+    # bus_carrier = n.storage_units.bus.map(n.buses.carrier)
+    # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks"
+
+    Nyears = n.snapshot_weightings.objective.sum() / 8760.0
+    costs = load_costs(tech_costs, cost_config, elec_config, Nyears)
+    update_transmission_costs(n, costs)
+
+    return n
+
+
 if __name__ == "__main__":
     if "snakemake" not in globals():
         change_to_script_dir(__file__)

From 451f5697aab2099e42c631319ef00a04b0684d9f Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 13:57:27 +0200
Subject: [PATCH 20/40] main to oet_main in workflow files

---
 .github/workflows/ci-linux.yaml   | 4 ++--
 .github/workflows/ci-mac.yaml     | 4 ++--
 .github/workflows/ci-windows.yaml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
index 8c99cf581..a0e0299c4 100644
--- a/.github/workflows/ci-linux.yaml
+++ b/.github/workflows/ci-linux.yaml
@@ -3,10 +3,10 @@ name: CI-linux
 on:
   push:
     branches:
-    - main
+    - oet_main
   pull_request:
     branches:
-    - main
+    - oet_main
   schedule:
   - cron: "0 5 * * TUE"
 
diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml
index b042f8b3d..4cb98fa97 100644
--- a/.github/workflows/ci-mac.yaml
+++ b/.github/workflows/ci-mac.yaml
@@ -3,10 +3,10 @@ name: CI-mac
 on:
   push:
     branches:
-    - main
+    - oet_main
   pull_request:
     branches:
-    - main
+    - oet_main
   schedule:
   - cron: "0 5 * * TUE"
 
diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml
index a288b65e1..14d56e903 100644
--- a/.github/workflows/ci-windows.yaml
+++ b/.github/workflows/ci-windows.yaml
@@ -3,10 +3,10 @@ name: CI-windows
 on:
   push:
     branches:
-    - main
+    - oet_main
   pull_request:
     branches:
-    - main
+    - oet_main
   schedule:
   - cron: "0 5 * * TUE"
 

From 7a7153784df1da7b4b9e42ce6d8510fe11d8208a Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 14:04:14 +0200
Subject: [PATCH 21/40] modify Snakefile

---
 Snakefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Snakefile b/Snakefile
index 32c92ba5b..12484ff62 100644
--- a/Snakefile
+++ b/Snakefile
@@ -4,16 +4,16 @@
 
 import sys
 
-sys.path.append("./scripts")
+# sys.path.append("./scripts")
 
 from os.path import normpath, exists
 from shutil import copyfile, move
 
 from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
 
-from _helpers import create_country_list, get_last_commit_message
-from build_demand_profiles import get_load_paths_gegis
-from retrieve_databundle_light import datafiles_retrivedatabundle
+from scripts._helpers import create_country_list, get_last_commit_message
+from scripts.build_demand_profiles import get_load_paths_gegis
+from scripts.retrieve_databundle_light import datafiles_retrivedatabundle
 from pathlib import Path
 
 HTTP = HTTPRemoteProvider()

From b5228793e0a7e358d548147d763f90be3486ceca Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 15:10:41 +0200
Subject: [PATCH 22/40] remove .scripts/

---
 Snakefile                             |  8 ++++----
 scripts/add_electricity.py            |  3 +--
 scripts/add_extra_components.py       |  5 ++---
 scripts/augmented_line_connections.py | 11 +++++------
 scripts/base_network.py               |  5 ++---
 scripts/build_bus_regions.py          |  7 +++----
 scripts/build_cutout.py               |  3 +--
 scripts/build_demand_profiles.py      |  7 +++----
 scripts/build_natura_raster.py        |  9 ++++-----
 scripts/build_osm_network.py          |  9 ++++-----
 scripts/build_powerplants.py          |  7 +++----
 scripts/build_renewable_profiles.py   | 11 +++++------
 scripts/build_shapes.py               | 21 ++++++++++-----------
 scripts/build_test_configs.py         |  5 ++---
 scripts/clean_osm_data.py             |  3 +--
 scripts/cluster_network.py            | 23 +++++++++++------------
 scripts/download_osm_data.py          | 10 +++++++---
 scripts/make_statistics.py            |  7 +++----
 scripts/make_summary.py               |  5 ++---
 scripts/monte_carlo.py                |  5 ++---
 scripts/plot_network.py               |  9 ++++-----
 scripts/plot_summary.py               |  3 +--
 scripts/prepare_network.py            |  5 ++---
 scripts/retrieve_databundle_light.py  |  7 +++----
 scripts/simplify_network.py           | 21 ++++++++++-----------
 scripts/solve_network.py              | 15 +++++++--------
 test/test_helpers.py                  |  5 ++++-
 test/test_prepare_network.py          |  6 +++++-
 28 files changed, 111 insertions(+), 124 deletions(-)

diff --git a/Snakefile b/Snakefile
index 12484ff62..32c92ba5b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -4,16 +4,16 @@
 
 import sys
 
-# sys.path.append("./scripts")
+sys.path.append("./scripts")
 
 from os.path import normpath, exists
 from shutil import copyfile, move
 
 from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
 
-from scripts._helpers import create_country_list, get_last_commit_message
-from scripts.build_demand_profiles import get_load_paths_gegis
-from scripts.retrieve_databundle_light import datafiles_retrivedatabundle
+from _helpers import create_country_list, get_last_commit_message
+from build_demand_profiles import get_load_paths_gegis
+from retrieve_databundle_light import datafiles_retrivedatabundle
 from pathlib import Path
 
 HTTP = HTTPRemoteProvider()
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 99738ca1c..f953d853b 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -89,8 +89,7 @@
 import powerplantmatching as pm
 import pypsa
 import xarray as xr
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py
index 46e865093..69f139a79 100644
--- a/scripts/add_extra_components.py
+++ b/scripts/add_extra_components.py
@@ -57,14 +57,13 @@
 import numpy as np
 import pandas as pd
 import pypsa
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
 )
-from scripts.add_electricity import (
+from add_electricity import (
     _add_missing_carriers_from_costs,
     add_nice_carrier_names,
     load_costs,
diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py
index acd4af727..0bddebb30 100644
--- a/scripts/augmented_line_connections.py
+++ b/scripts/augmented_line_connections.py
@@ -33,17 +33,16 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from networkx.algorithms import complement
-from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
-from pypsa.geo import haversine_pts
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
 )
-from scripts.add_electricity import load_costs
+from add_electricity import load_costs
+from networkx.algorithms import complement
+from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
+from pypsa.geo import haversine_pts
 
 logger = create_logger(__name__)
 
diff --git a/scripts/base_network.py b/scripts/base_network.py
index c52514cba..8c2131512 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -64,9 +64,7 @@
 import scipy as sp
 import shapely.prepared
 import shapely.wkt
-from shapely.ops import unary_union
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -74,6 +72,7 @@
     mock_snakemake,
     read_csv_nafix,
 )
+from shapely.ops import unary_union
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index 7326abd96..9af7f1be9 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -47,16 +47,15 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from scipy.spatial import Voronoi
-from shapely.geometry import Polygon
-
-from scripts._helpers import (
+from _helpers import (
     REGION_COLS,
     change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
 )
+from scipy.spatial import Voronoi
+from shapely.geometry import Polygon
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py
index 297f449a9..186e52ab6 100644
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@@ -97,8 +97,7 @@
 import atlite
 import geopandas as gpd
 import pandas as pd
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index 5cfdca1db..747c90f84 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -49,10 +49,7 @@
 import pypsa
 import scipy.sparse as sparse
 import xarray as xr
-from shapely.prepared import prep
-from shapely.validation import make_valid
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -61,6 +58,8 @@
     read_osm_config,
     sets_path_to_root,
 )
+from shapely.prepared import prep
+from shapely.validation import make_valid
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index e71c3df2c..af066fd1b 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -51,11 +51,7 @@
 import numpy as np
 import pandas as pd
 import rasterio as rio
-from rasterio.features import geometry_mask
-from rasterio.warp import transform_bounds
-from shapely.ops import unary_union
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -63,6 +59,9 @@
     is_directory_path,
     mock_snakemake,
 )
+from rasterio.features import geometry_mask
+from rasterio.warp import transform_bounds
+from shapely.ops import unary_union
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index 9706483f7..1ab20b70a 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -8,11 +8,7 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
-from shapely.geometry import LineString, Point
-from shapely.ops import linemerge, split
-from tqdm import tqdm
-
-from scripts._helpers import (
+from _helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
@@ -23,6 +19,9 @@
     sets_path_to_root,
     to_csv_nafix,
 )
+from shapely.geometry import LineString, Point
+from shapely.ops import linemerge, split
+from tqdm import tqdm
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index 8696de4a1..e1f8e91b7 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -106,10 +106,7 @@
 import powerplantmatching as pm
 import pypsa
 import yaml
-from scipy.spatial import cKDTree as KDTree
-from shapely.geometry import Point
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -121,6 +118,8 @@
     to_csv_nafix,
     two_digits_2_name_country,
 )
+from scipy.spatial import cKDTree as KDTree
+from shapely.geometry import Point
 
 logger = create_logger(__name__)
 
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index cb5740303..0ec3f3e3a 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -201,18 +201,17 @@
 import pandas as pd
 import progressbar as pgb
 import xarray as xr
-from add_electricity import load_powerplants
-from dask.distributed import Client, LocalCluster
-from pypsa.geo import haversine
-from shapely.geometry import LineString, Point, box
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
     sets_path_to_root,
 )
+from add_electricity import load_powerplants
+from dask.distributed import Client, LocalCluster
+from pypsa.geo import haversine
+from shapely.geometry import LineString, Point, box
 
 cc = coco.CountryConverter()
 
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 5ba734438..faea62a13 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -18,17 +18,7 @@
 import rasterio
 import requests
 import xarray as xr
-from numba import njit
-from numba.core import types
-from numba.typed import Dict
-from rasterio.mask import mask
-from rasterio.windows import Window
-from shapely.geometry import MultiPolygon
-from shapely.ops import unary_union
-from shapely.validation import make_valid
-from tqdm import tqdm
-
-from scripts._helpers import (
+from _helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
@@ -43,6 +33,15 @@
     two_2_three_digits_country,
     two_digits_2_name_country,
 )
+from numba import njit
+from numba.core import types
+from numba.typed import Dict
+from rasterio.mask import mask
+from rasterio.windows import Window
+from shapely.geometry import MultiPolygon
+from shapely.ops import unary_union
+from shapely.validation import make_valid
+from tqdm import tqdm
 
 sets_path_to_root("pypsa-earth")
 
diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py
index 7e066e0db..0dec51ae0 100644
--- a/scripts/build_test_configs.py
+++ b/scripts/build_test_configs.py
@@ -15,14 +15,13 @@
 import collections.abc
 import copy
 
-from ruamel.yaml import YAML
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     get_current_directory_path,
     get_path,
     mock_snakemake,
 )
+from ruamel.yaml import YAML
 
 
 def update(d, u):
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
index e762e2019..fc5edbb69 100644
--- a/scripts/clean_osm_data.py
+++ b/scripts/clean_osm_data.py
@@ -9,8 +9,7 @@
 import numpy as np
 import pandas as pd
 import reverse_geocode as rg
-
-from scripts._helpers import (
+from _helpers import (
     REGION_COLS,
     change_to_script_dir,
     configure_logging,
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index b0ac236c1..de7d538fc 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -129,16 +129,7 @@
 import pandas as pd
 import pyomo.environ as po
 import pypsa
-from pypsa.clustering.spatial import (
-    busmap_by_greedy_modularity,
-    busmap_by_hac,
-    busmap_by_kmeans,
-    get_clustering_from_busmap,
-)
-from scipy.sparse import csgraph
-from shapely.geometry import Point
-
-from scripts._helpers import (
+from _helpers import (
     REGION_COLS,
     change_to_script_dir,
     configure_logging,
@@ -148,8 +139,16 @@
     sets_path_to_root,
     update_p_nom_max,
 )
-from scripts.add_electricity import load_costs
-from scripts.build_shapes import add_gdp_data, add_population_data
+from add_electricity import load_costs
+from build_shapes import add_gdp_data, add_population_data
+from pypsa.clustering.spatial import (
+    busmap_by_greedy_modularity,
+    busmap_by_hac,
+    busmap_by_kmeans,
+    get_clustering_from_busmap,
+)
+from scipy.sparse import csgraph
+from shapely.geometry import Point
 
 idx = pd.IndexSlice
 
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index 4ab6593ec..2a1e366d2 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -26,12 +26,15 @@
 - ``data/osm/out``:  Prepared power data as .geojson and .csv files per country
 - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
 """
+
+import sys
+
+print("sys path download_osm_data", sys.path)
+
 import pathlib
 import shutil
 
-from earth_osm import eo
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -41,6 +44,7 @@
     read_osm_config,
     sets_path_to_root,
 )
+from earth_osm import eo
 
 logger = create_logger(__name__)
 
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index bc54c1b0f..a62499b6e 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -29,9 +29,7 @@
 import pandas as pd
 import pypsa
 import xarray as xr
-from shapely.validation import make_valid
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     create_country_list,
     create_logger,
@@ -42,7 +40,8 @@
     three_2_two_digits_country,
     to_csv_nafix,
 )
-from scripts.build_test_configs import create_test_config
+from build_test_configs import create_test_config
+from shapely.validation import make_valid
 
 logger = create_logger(__name__)
 
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index fb2d4cf45..390bcd66f 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -54,8 +54,7 @@
 
 import pandas as pd
 import pypsa
-
-from scripts._helpers import (
+from _helpers import (
     build_directory,
     change_to_script_dir,
     configure_logging,
@@ -64,7 +63,7 @@
     mock_snakemake,
     path_exists,
 )
-from scripts.add_electricity import load_costs, update_transmission_costs
+from add_electricity import load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
 
diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index fcbc9c21d..9ba7d4948 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -73,12 +73,11 @@
 import pandas as pd
 import pypsa
 import seaborn as sns
+from _helpers import change_to_script_dir, configure_logging, create_logger
 from pyDOE2 import lhs
 from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
 from sklearn.preprocessing import MinMaxScaler, minmax_scale
-
-from scripts._helpers import change_to_script_dir, configure_logging, create_logger
-from scripts.solve_network import *
+from solve_network import *
 
 logger = create_logger(__name__)
 sns.set(style="whitegrid")
diff --git a/scripts/plot_network.py b/scripts/plot_network.py
index d4f635ede..07c0115e4 100644
--- a/scripts/plot_network.py
+++ b/scripts/plot_network.py
@@ -24,10 +24,7 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from matplotlib.legend_handler import HandlerPatch
-from matplotlib.patches import Circle, Ellipse
-
-from scripts._helpers import (
+from _helpers import (
     aggregate_costs,
     aggregate_p,
     change_to_script_dir,
@@ -35,7 +32,9 @@
     create_logger,
     mock_snakemake,
 )
-from scripts.add_electricity import load_costs, update_transmission_costs
+from add_electricity import load_costs, update_transmission_costs
+from matplotlib.legend_handler import HandlerPatch
+from matplotlib.patches import Circle, Ellipse
 
 to_rgba = mpl.colors.colorConverter.to_rgba
 
diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py
index 96ee48372..d89ef53ec 100644
--- a/scripts/plot_summary.py
+++ b/scripts/plot_summary.py
@@ -19,8 +19,7 @@
 
 import matplotlib.pyplot as plt
 import pandas as pd
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index cbf785804..59c34ea3a 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -65,8 +65,7 @@
 import pandas as pd
 import pypsa
 import requests
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_logger,
@@ -74,7 +73,7 @@
     get_path,
     mock_snakemake,
 )
-from scripts.add_electricity import load_costs, update_transmission_costs
+from add_electricity import load_costs, update_transmission_costs
 
 idx = pd.IndexSlice
 
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index fa7741162..0f369ed27 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -88,10 +88,7 @@
 import geopandas as gpd
 import pandas as pd
 import yaml
-from google_drive_downloader import GoogleDriveDownloader as gdd
-from tqdm import tqdm
-
-from scripts._helpers import (
+from _helpers import (
     change_to_script_dir,
     configure_logging,
     create_country_list,
@@ -104,6 +101,8 @@
     progress_retrieve,
     sets_path_to_root,
 )
+from google_drive_downloader import GoogleDriveDownloader as gdd
+from tqdm import tqdm
 
 logger = create_logger(__name__)
 
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 263b98099..eae4ed1f1 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -92,6 +92,16 @@
 import pandas as pd
 import pypsa
 import scipy as sp
+from _helpers import (
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    get_aggregation_strategies,
+    mock_snakemake,
+    update_p_nom_max,
+)
+from add_electricity import load_costs
+from cluster_network import cluster_regions, clustering_for_n_clusters
 from pypsa.clustering.spatial import (
     aggregategenerators,
     aggregateoneport,
@@ -101,17 +111,6 @@
 from pypsa.io import import_components_from_dataframe, import_series_from_dataframe
 from scipy.sparse.csgraph import connected_components, dijkstra
 
-from scripts._helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    get_aggregation_strategies,
-    mock_snakemake,
-    update_p_nom_max,
-)
-from scripts.add_electricity import load_costs
-from scripts.cluster_network import cluster_regions, clustering_for_n_clusters
-
 sys.settrace
 
 logger = create_logger(__name__)
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index 555bee935..f057b8823 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -82,6 +82,13 @@
 import numpy as np
 import pandas as pd
 import pypsa
+from _helpers import (
+    build_directory,
+    change_to_script_dir,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+)
 from pypsa.descriptors import get_switchable_as_dense as get_as_dense
 from pypsa.linopf import (
     define_constraints,
@@ -93,14 +100,6 @@
     network_lopf,
 )
 
-from scripts._helpers import (
-    build_directory,
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
-
 logger = create_logger(__name__)
 
 
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 8153ee622..4f41c5740 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -8,6 +8,7 @@
 import os
 import pathlib
 import shutil
+import sys
 from test.conftest import (
     _content_temp_file,
     _name_temp_file,
@@ -19,7 +20,9 @@
 import numpy as np
 import pandas as pd
 
-from scripts._helpers import (
+sys.path.append("./scripts")
+
+from _helpers import (
     aggregate_fuels,
     build_directory,
     change_to_script_dir,
diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py
index 9bad7c220..914089614 100644
--- a/test/test_prepare_network.py
+++ b/test/test_prepare_network.py
@@ -5,7 +5,11 @@
 
 # -*- coding: utf-8 -*-
 
-from scripts.prepare_network import download_emission_data
+import sys
+
+sys.path.append("./scripts")
+
+from prepare_network import download_emission_data
 
 
 def test_download_emission_data():

From e2abe3074c3eb638f5b3ed0d3fb18ecaac2fafd9 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Fri, 7 Jun 2024 19:50:40 +0200
Subject: [PATCH 23/40] remove some abstractions

---
 scripts/_helpers.py                  | 76 ++++------------------------
 scripts/build_natura_raster.py       |  6 +--
 scripts/build_shapes.py              | 16 +++---
 scripts/make_statistics.py           | 19 +++----
 scripts/make_summary.py              |  5 +-
 scripts/retrieve_databundle_light.py |  7 ++-
 test/test_helpers.py                 | 65 ------------------------
 7 files changed, 36 insertions(+), 158 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 5544be8a3..d07951f13 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -110,9 +110,9 @@ def read_osm_config(*args):
     {"Africa": {"DZ": "algeria", ...}, ...}
     """
     if "__file__" in globals():
-        base_folder = get_dirname_path(__file__)
-        if not path_exists(get_path(base_folder, "configs")):
-            base_folder = get_dirname_path(base_folder)
+        base_folder = pathlib.Path(__file__).parent
+        if not pathlib.Path(get_path(base_folder, "configs")).exists():
+            base_folder = pathlib.Path(base_folder).parent
     else:
         base_folder = get_current_directory_path()
     osm_config_path = get_path(base_folder, "configs", REGIONS_CONFIG)
@@ -144,7 +144,7 @@ def sets_path_to_root(root_directory_name, n=8):
     while n >= 0:
         n -= 1
         # if repo_name is current folder name, stop and set path
-        if repo_name == get_basename_abs_path("."):
+        if repo_name == pathlib.Path(".").absolute().name:
             repo_path = get_current_directory_path()  # current_path
             os.chdir(repo_path)  # change dir_path to repo_path
             print("This is the repository path: ", repo_path)
@@ -182,7 +182,7 @@ def configure_logging(snakemake, skip_handlers=False):
 
     if skip_handlers is False:
         fallback_path = get_path(
-            get_dirname_path(__file__), "..", "logs", f"{snakemake.rule}.log"
+            pathlib.Path(__file__).parent, "..", "logs", f"{snakemake.rule}.log"
         )
         logfile = snakemake.log.get(
             "python", snakemake.log[0] if snakemake.log else fallback_path
@@ -440,7 +440,7 @@ def mock_snakemake(rule_name, **wildcards):
     ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}"
     os.chdir(script_dir.parent)
     for p in sm.SNAKEFILE_CHOICES:
-        if path_exists(p):
+        if pathlib.Path(p).exists():
             snakefile = p
             break
     workflow = sm.Workflow(
@@ -464,7 +464,7 @@ def mock_snakemake(rule_name, **wildcards):
     def make_accessable(*ios):
         for io in ios:
             for i in range(len(io)):
-                io[i] = get_abs_path(io[i])
+                io[i] = pathlib.Path(io[i]).absolute()
 
     make_accessable(job.input, job.output, job.log)
     snakemake = Snakemake(
@@ -784,35 +784,6 @@ def get_last_commit_message(path):
     return last_commit_message
 
 
-def get_dirname_path(path):
-    """
-    It returns the directory name of the path.
-    """
-    return pathlib.Path(path).parent
-
-
-def get_abs_path(path):
-    """
-    It returns the absolutized version of the path.
-    """
-    return pathlib.Path(path).absolute()
-
-
-def get_basename_abs_path(path):
-    """
-    It returns the base name of a normalized and absolutized version of the
-    path.
-    """
-    return pathlib.Path(path).absolute().name
-
-
-def get_basename_path(path):
-    """
-    It returns the base name of the path.
-    """
-    return pathlib.Path(path).name
-
-
 def get_path(*args):
     """
     It returns a new path string.
@@ -868,24 +839,6 @@ def get_current_directory_path():
     return pathlib.Path.cwd()
 
 
-def is_directory_path(path):
-    """
-    It returns True if the path points to a directory.
-
-    False otherwise.
-    """
-    return pathlib.Path(path).is_dir()
-
-
-def is_file_path(path):
-    """
-    It returns True if the path points to a file.
-
-    False otherwise.
-    """
-    return pathlib.Path(path).is_file()
-
-
 def get_relative_path(path, start_path="."):
     """
     It returns a relative path to path from start_path.
@@ -895,15 +848,6 @@ def get_relative_path(path, start_path="."):
     return pathlib.Path(path).relative_to(start_path)
 
 
-def path_exists(path):
-    """
-    It returns True if the path exists.
-
-    False otherwise.
-    """
-    return pathlib.Path(path).exists()
-
-
 def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True):
     """
     Create a network topology like the power transmission network.
@@ -1004,7 +948,7 @@ def download_gadm(country_code, update=False, out_logging=False):
         gadm_filename + ".gpkg",
     )  # Input filepath gpkg
 
-    if not path_exists(gadm_input_file_gpkg) or update is True:
+    if not pathlib.Path(gadm_input_file_gpkg).exists() or update is True:
         if out_logging:
             _logger.warning(
                 f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}"
@@ -1017,7 +961,7 @@ def download_gadm(country_code, update=False, out_logging=False):
                 shutil.copyfileobj(r.raw, f)
 
         with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref:
-            zip_ref.extractall(get_dirname_path(gadm_input_file_zip))
+            zip_ref.extractall(pathlib.Path(gadm_input_file_zip).parent)
 
     return gadm_input_file_gpkg, gadm_filename
 
@@ -1153,7 +1097,7 @@ def override_component_attrs(directory):
 
     for component, list_name in components.list_name.items():
         fn = f"{directory}/{list_name}.csv"
-        if is_file_path(fn):
+        if pathlib.Path(fn).is_file():
             overrides = pd.read_csv(fn, index_col=0, na_values="n/a")
             attrs[component] = overrides.combine_first(attrs[component])
 
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index af066fd1b..2cea6f681 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -45,6 +45,7 @@
 The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`.
 """
 import os
+import pathlib
 
 import atlite
 import geopandas as gpd
@@ -56,7 +57,6 @@
     configure_logging,
     create_logger,
     get_path,
-    is_directory_path,
     mock_snakemake,
 )
 from rasterio.features import geometry_mask
@@ -74,9 +74,9 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)):
 
     list_fileshapes = []
     for lf in list_paths:
-        if is_directory_path(
+        if pathlib.Path(
             lf
-        ):  # if it is a folder, then list all shapes files contained
+        ).is_dir():  # if it is a folder, then list all shapes files contained
             # loop over all dirs and subdirs
             for path, subdirs, files in os.walk(lf):
                 # loop over all files
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index faea62a13..f6b4d5874 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -24,10 +24,8 @@
     configure_logging,
     create_logger,
     get_current_directory_path,
-    get_dirname_path,
     get_path,
     mock_snakemake,
-    path_exists,
     sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
@@ -103,7 +101,7 @@ def download_GADM(country_code, update=False, out_logging=False):
         GADM_filename + ".gpkg",
     )  # Input filepath gpkg
 
-    if not path_exists(GADM_inputfile_gpkg) or update is True:
+    if not pathlib.Path(GADM_inputfile_gpkg).exists() or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}"
@@ -325,9 +323,9 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"):
     The dataset shall be downloaded independently by the user (see
     guide) or together with pypsa-earth package.
     """
-    if not path_exists(EEZ_gpkg):
+    if not pathlib.Path(EEZ_gpkg).exists():
         raise Exception(
-            f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_dirname_path(EEZ_gpkg)}"
+            f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}"
         )
 
     geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs)
@@ -489,7 +487,7 @@ def download_WorldPop_standard(
         get_current_directory_path(), "data", "WorldPop", WorldPop_filename
     )  # Input filepath tif
 
-    if not path_exists(WorldPop_inputfile) or update is True:
+    if not pathlib.Path(WorldPop_inputfile).exists() or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}"
@@ -587,9 +585,9 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False):
     )  # Input filepath nc
 
     # Check if file exists, otherwise throw exception
-    if not path_exists(GDP_nc):
+    if not pathlib.Path(GDP_nc).exists():
         raise Exception(
-            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_dirname_path(GDP_nc)}"
+            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}"
         )
 
     # open nc dataset
@@ -632,7 +630,7 @@ def load_GDP(
         get_current_directory_path(), "data", "GDP", name_file_tif
     )  # Input filepath tif
 
-    if update | (not path_exists(GDP_tif)):
+    if update | (not pathlib.Path(GDP_tif).exists()):
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index a62499b6e..18a2e3a23 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -24,6 +24,8 @@
 This rule creates a dataframe containing in the columns the relevant statistics for the current run.
 """
 
+import pathlib
+
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -34,7 +36,6 @@
     create_country_list,
     create_logger,
     get_path_size,
-    is_file_path,
     mock_snakemake,
     sets_path_to_root,
     three_2_two_digits_country,
@@ -130,7 +131,7 @@ def collect_basic_osm_stats(path, rulename, header):
     """
     Collect basic statistics on OSM data: number of items
     """
-    if is_file_path(path) and get_path_size(path) > 0:
+    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
 
@@ -149,7 +150,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"):
     - length of the stored shapes
     - length of objects with tag_frequency == 0 (DC elements)
     """
-    if is_file_path(path) and get_path_size(path) > 0:
+    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
         obj_length = (
@@ -251,7 +252,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"):
 
     df = pd.DataFrame()
 
-    if is_file_path(fp_onshore) and is_file_path(fp_offshore):
+    if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file():
         gdf_onshore = gpd.read_file(fp_onshore)
         gdf_offshore = gpd.read_file(fp_offshore)
 
@@ -293,7 +294,7 @@ def capacity_stats(df):
         else:
             return df.groupby("carrier").p_nom.sum().astype(float)
 
-    if is_file_path(network_path):
+    if pathlib.Path(network_path).is_file():
         n = pypsa.Network(network_path)
 
         lines_length = float((n.lines.length * n.lines.num_parallel).sum())
@@ -348,7 +349,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
     """
     snakemake = _mock_snakemake(rulename)
 
-    if not is_file_path(snakemake.output.africa_shape):
+    if not pathlib.Path(snakemake.output.africa_shape).is_file():
         return pd.DataFrame()
 
     df_continent = gpd.read_file(snakemake.output.africa_shape)
@@ -359,7 +360,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
         .geometry.area.iloc[0]
     )
 
-    if not is_file_path(snakemake.output.gadm_shapes):
+    if not pathlib.Path(snakemake.output.gadm_shapes).is_file():
         return pd.DataFrame()
 
     df_gadm = gpd.read_file(snakemake.output.gadm_shapes)
@@ -473,7 +474,7 @@ def collect_renewable_stats(rulename, technology):
     """
     snakemake = _mock_snakemake(rulename, technology=technology)
 
-    if is_file_path(snakemake.output.profile):
+    if pathlib.Path(snakemake.output.profile).is_file():
         res = xr.open_dataset(snakemake.output.profile)
 
         if technology == "hydro":
@@ -506,7 +507,7 @@ def add_computational_stats(df, snakemake, column_name=None):
     comp_data = [np.nan] * 3  # total_time, mean_load and max_memory
 
     if snakemake.benchmark:
-        if not is_file_path(snakemake.benchmark):
+        if not pathlib.Path(snakemake.benchmark).is_file():
             return df
 
         bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t")
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index 390bcd66f..5ad6c5bfc 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -52,6 +52,8 @@
 Replacing *summaries* with *plots* creates nice colored maps of the results.
 """
 
+import pathlib
+
 import pandas as pd
 import pypsa
 from _helpers import (
@@ -61,7 +63,6 @@
     create_logger,
     get_path,
     mock_snakemake,
-    path_exists,
 )
 from add_electricity import load_costs, update_transmission_costs
 
@@ -503,7 +504,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
     for label, filename in networks_dict.items():
         print(label, filename)
-        if not path_exists(filename):
+        if not pathlib.Path(filename).exists():
             print("does not exist!!")
             continue
 
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 0f369ed27..d425a781d 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -93,7 +93,6 @@
     configure_logging,
     create_country_list,
     create_logger,
-    get_basename_path,
     get_current_directory_path,
     get_path,
     get_relative_path,
@@ -445,7 +444,7 @@ def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=
     destination = get_relative_path(config["destination"])
     url = config["urls"]["direct"]
 
-    file_path = get_path(destination, get_basename_path(url))
+    file_path = get_path(destination, pathlib.Path(url).name)
 
     unzip = config.get("unzip", False)
 
@@ -507,7 +506,7 @@ def download_and_unzip_hydrobasins(
 
     for rg in suffix_list:
         url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
-        file_path = get_path(destination, get_basename_path(url))
+        file_path = get_path(destination, pathlib.Path(url).name)
 
         all_downloaded &= download_and_unpack(
             url=url,
@@ -554,7 +553,7 @@ def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=Fa
     # remove url feature
     url = postdata.pop("url")
 
-    file_path = get_path(destination, get_basename_path(url))
+    file_path = get_path(destination, pathlib.Path(url).name)
 
     if hot_run:
         pathlib.Path(file_path).unlink(missing_ok=True)
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 4f41c5740..4d65adea2 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -27,18 +27,12 @@
     build_directory,
     change_to_script_dir,
     country_name_2_two_digits,
-    get_abs_path,
-    get_basename_abs_path,
     get_conv_factors,
     get_current_directory_path,
-    get_dirname_path,
     get_path,
     get_path_size,
     get_relative_path,
-    is_directory_path,
-    is_file_path,
     modify_commodity,
-    path_exists,
     safe_divide,
     three_2_two_digits_country,
     two_2_three_digits_country,
@@ -257,15 +251,6 @@ def test_build_directory(get_temp_folder, tmpdir):
     assert full_tree_list_os == full_tree_list_pathlib
 
 
-def test_get_abs_path():
-    """
-    Verify the path returned by get_abs_path()
-    """
-    abs_file = get_abs_path(__file__)
-    assert str(abs_file) == os.path.abspath(__file__)
-    assert str(abs_file) == __file__
-
-
 def test_change_to_script_dir():
     """
     Verify the path returned by change_to_script_dir()
@@ -276,26 +261,6 @@ def test_change_to_script_dir():
     assert str(pathlib.Path.cwd()) == path_cwd
 
 
-def test_get_dirname_path():
-    """
-    Verify the path returned by get_dirname_path()
-    """
-    dir_name_file = get_dirname_path(__file__)
-    dir_name_cwd = get_dirname_path(".")
-    assert str(dir_name_file) == os.path.dirname(__file__)
-    assert str(dir_name_file) == path_cwd + os.sep + "test"
-    assert str(dir_name_cwd) == "."
-
-
-def test_get_basename_abs_path():
-    """
-    Verify the path returned by get_basename_abs_path()
-    """
-    base_name_file = get_basename_abs_path(__file__)
-    assert str(base_name_file) == os.path.basename(os.path.abspath(__file__))
-    assert str(base_name_file) == "test_helpers.py"
-
-
 def test_get_path():
     """
     Verify the path returned by get_path()
@@ -344,25 +309,6 @@ def test_get_current_directory_path():
     assert str(path) == os.getcwd()
 
 
-def test_is_directory_path(tmpdir):
-    """
-    Verify if is_directory_path() returns True when path points to directory.
-    """
-    assert is_directory_path(tmpdir)
-    assert is_directory_path(tmpdir) == os.path.isdir(tmpdir)
-    assert not is_directory_path(__file__)
-
-
-def test_is_file_path(get_temp_file, tmpdir):
-    """
-    Verify if is_file_path() returns True when path points to file.
-    """
-    path = get_temp_file
-    assert is_file_path(path)
-    assert is_file_path(path) == os.path.isfile(path)
-    assert not is_file_path(tmpdir)
-
-
 def test_get_relative_path(get_temp_file):
     """
     Verify the relative path returned by get_relative_path()
@@ -374,17 +320,6 @@ def test_get_relative_path(get_temp_file):
     assert str(relative_path) == os.path.relpath(path, start=get_path(path).parent)
 
 
-def test_path_exists(get_temp_file):
-    """
-    Verify if path_exists() returns True when path exists.
-    """
-    path = get_temp_file
-    pathlib_path = get_path(path)
-    assert path_exists(path)
-    assert path_exists(pathlib_path)
-    assert path_exists(path) == os.path.exists(path)
-
-
 def test_two_2_three_digits_country():
     """
     Verify the conversion from two-digit to three-digit country code.

From de1ad66b8c21ff7103bbc548c0422a229919b73c Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Fri, 5 Jul 2024 17:47:53 +0200
Subject: [PATCH 24/40] Unify gadm download and layer (#3)

* initial changes

* initial unified draft of download_gadm

* add unit tests for get_gadm_url

* changes to _helpers.py and test_helpers.py

* new unit test

* comment out biogeo test

* finalize test

* enhance download_gadm unit test

* initial changes to get_gadm_layer

* work on get_gadm_layer

* changes to _helpers.py locate_bus

* add exception message

* use in build_shapes, methods from the _helpers.py

* add new configs

* update unit tests

* keep only version gadm 4.1

* upgrade actions from v2 to v3

* upgrade setup-miniconda@v2 to setup-miniconda@v3

* changes to workflow files

* remove get_gadm_country_code

* remove ipopt version requirement

* change to unit test

* remove use_zip_file

* str filename in prepare_network

* modify download_emission_data

* re-add limitation to ipopt version

* add mac specific environment file

* generalize configs
---
 .github/workflows/ci-linux.yaml      |   7 +-
 .github/workflows/ci-mac.yaml        |  11 +-
 .github/workflows/ci-windows.yaml    |   7 +-
 config.default.yaml                  |   2 +
 config.tutorial.yaml                 |   4 +-
 envs/environment.mac.yaml            |  87 ++++++++
 scripts/_helpers.py                  | 285 ++++++++++++++++++++++-----
 scripts/build_demand_profiles.py     |  13 +-
 scripts/build_shapes.py              | 239 ++++------------------
 scripts/prepare_network.py           |  18 +-
 scripts/retrieve_databundle_light.py |  26 ++-
 test/test_helpers.py                 | 111 +++++++++++
 12 files changed, 528 insertions(+), 282 deletions(-)
 create mode 100644 envs/environment.mac.yaml

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
index a0e0299c4..08a7da822 100644
--- a/.github/workflows/ci-linux.yaml
+++ b/.github/workflows/ci-linux.yaml
@@ -32,10 +32,11 @@ jobs:
         shell: bash -l {0}
 
     steps:
-    - uses: actions/checkout@v2
+    - name: Checkout
+      uses: actions/checkout@v3
 
     - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v2
+      uses: conda-incubator/setup-miniconda@v3
       with:
         miniforge-variant: Mambaforge
         miniforge-version: latest
@@ -43,7 +44,7 @@ jobs:
         use-mamba: true
 
     - name: Create environment cache
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       id: cache
       with:
         path: ${{ matrix.prefix }}
diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml
index 4cb98fa97..bbdf4e957 100644
--- a/.github/workflows/ci-mac.yaml
+++ b/.github/workflows/ci-mac.yaml
@@ -30,14 +30,15 @@ jobs:
         shell: bash -l {0}
 
     steps:
-    - uses: actions/checkout@v2
+    - name: Checkout
+      uses: actions/checkout@v3
 
     # - name: Add solver to environment
     #   run: |
     #     echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml
 
     - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v2
+      uses: conda-incubator/setup-miniconda@v3
       with:
         miniforge-variant: Mambaforge
         miniforge-version: latest
@@ -45,15 +46,15 @@ jobs:
         use-mamba: true
 
     - name: Create environment cache
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       id: cache
       with:
         path: ${{ matrix.prefix }}
-        key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
+        key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.mac.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
 
     - name: Update environment due to outdated or unavailable cache
       if: steps.cache.outputs.cache-hit != 'true'
-      run: mamba env update -n pypsa-earth -f envs/environment.yaml
+      run: mamba env update -n pypsa-earth -f envs/environment.mac.yaml
 
     - name: Conda list
       run: |
diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml
index 14d56e903..7697306e3 100644
--- a/.github/workflows/ci-windows.yaml
+++ b/.github/workflows/ci-windows.yaml
@@ -30,14 +30,15 @@ jobs:
         shell: bash -l {0}
 
     steps:
-    - uses: actions/checkout@v2
+    - name: Checkout
+      uses: actions/checkout@v3
 
     # - name: Add solver to environment
     #   run: |
     #     echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml
 
     - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v2
+      uses: conda-incubator/setup-miniconda@v3
       with:
         miniforge-variant: Mambaforge
         miniforge-version: latest
@@ -45,7 +46,7 @@ jobs:
         use-mamba: true
 
     - name: Create environment cache
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       id: cache
       with:
         path: ${{ matrix.prefix }}
diff --git a/config.default.yaml b/config.default.yaml
index 91ae2c53f..80505f563 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -94,6 +94,8 @@ build_shape_options:
   worldpop_method: "standard"  # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
   gdp_method: "standard"  # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout
   contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
+  gadm_file_prefix: "gadm41_"
+  gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/"
 
 clean_osm_data_options:  # osm = OpenStreetMap
   names_by_shapes: true  # Set the country name based on the extended country shapes
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index e6be7cf5b..09c71c068 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -19,7 +19,7 @@ countries: ["NG", "BJ"]
   #["NG"]  # Nigeria
   #["NE"]  # Niger
   #["SL"]  # Sierra Leone
-  #["MA"]  # Morroco
+  #["MA"]  # Morocco
   #["ZA"]  # South Africa
 
 enable:
@@ -108,6 +108,8 @@ build_shape_options:
   worldpop_method: "standard"  # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
   gdp_method: "standard"  # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout
   contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
+  gadm_file_prefix: "gadm41_"
+  gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/"
 
 clean_osm_data_options:
   names_by_shapes: true  # Set the country name based on the extended country shapes
diff --git a/envs/environment.mac.yaml b/envs/environment.mac.yaml
new file mode 100644
index 000000000..608fcb236
--- /dev/null
+++ b/envs/environment.mac.yaml
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+name: pypsa-earth
+channels:
+- conda-forge
+- bioconda
+- gurobi
+dependencies:
+- python>=3.8
+- pip
+- mamba   # esp for windows build
+
+- pypsa>=0.24, <0.25
+# - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
+- dask
+- powerplantmatching>=0.5.7
+- earth-osm>=2.1
+- atlite
+
+  # Dependencies of the workflow itself
+- xlrd
+- openpyxl
+- seaborn
+- snakemake-minimal<8
+- memory_profiler
+- ruamel.yaml<=0.17.26
+- pytables
+- lxml
+- numpy
+- pandas
+- geopandas>=0.11.0, <=0.14.3
+- fiona!=1.8.22
+- xarray>=2023.11.0, <2023.12.0
+- netcdf4
+- networkx
+- scipy
+- pydoe2
+- shapely!=2.0.4
+- pre-commit
+- pyomo
+- matplotlib<=3.5.2
+- reverse-geocode
+- country_converter
+- pyogrio
+- numba
+- py7zr
+
+  # Keep in conda environment when calling ipython
+- ipython
+  # Jupyter notebook requirement
+- ipykernel
+- jupyterlab
+
+  # GIS dependencies:
+- cartopy
+- descartes
+- rasterio!=1.2.10
+- rioxarray
+
+ # Plotting
+- geoviews
+- hvplot
+- graphviz
+- contextily
+- graphviz
+
+  # PyPSA-Eur-Sec Dependencies
+- geopy
+- tqdm
+- pytz
+- country_converter
+
+  # Cloud download
+# - googledrivedownloader  # Commented until https://github.com/ndrplz/google-drive-downloader/pull/28 is merged: PR installed using pip
+
+# Default solver for tests (required for CI)
+- glpk
+- ipopt
+- gurobi
+
+- pip:
+  - git+https://github.com/davide-f/google-drive-downloader@master  # google drive with fix for virus scan
+  - git+https://github.com/FRESNA/vresutils@master  # until new pip release > 0.3.1 (strictly)
+  - tsam>=1.1.0
+  - chaospy  # lastest version only available on pip
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index d07951f13..0140ccb28 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -12,7 +12,6 @@
 import subprocess
 import sys
 import urllib
-import zipfile
 
 import country_converter as coco
 import fiona
@@ -911,62 +910,181 @@ def cycling_shift(df, steps=1):
     return df
 
 
-def download_gadm(country_code, update=False, out_logging=False):
+def get_gadm_filename(country_code, file_prefix="gadm41_"):
+    """
+    Function to get three digits country code for GADM.
+    """
+    special_codes_gadm = {
+        "XK": "XKO",  # kosovo
+        "CP": "XCL",  # clipperton island
+        "SX": "MAF",  # saint-martin
+        "TF": "ATF",  # french southern territories
+        "AX": "ALA",  # aland
+        "IO": "IOT",  # british indian ocean territory
+        "CC": "CCK",  # cocos island
+        "NF": "NFK",  # norfolk
+        "PN": "PCN",  # pitcairn islands
+        "JE": "JEY",  # jersey
+        "XS": "XSP",  # spratly islands
+        "GG": "GGY",  # guernsey
+        "UM": "UMI",  # United States minor outlying islands
+        "SJ": "SJM",  # svalbard
+        "CX": "CXR",  # Christmas island
+    }
+
+    if country_code in special_codes_gadm:
+        return file_prefix + special_codes_gadm[country_code]
+    else:
+        return file_prefix + two_2_three_digits_country(country_code)
+
+
+def get_gadm_url(gadm_url_prefix, gadm_filename):
+    """
+    Function to get the gadm url given a gadm filename.
+    """
+    return gadm_url_prefix + gadm_filename + ".gpkg"
+
+
+def download_gadm(
+    country_code,
+    file_prefix,
+    gadm_url_prefix,
+    gadm_input_file_args,
+    update=False,
+    out_logging=False,
+):
     """
     Download gpkg file from GADM for a given country code.
 
     Parameters
     ----------
     country_code : str
-        Two letter country codes of the downloaded files
+        2-digit country name of the downloaded files
+    file_prefix : str
+        file prefix string
+    gadm_url_prefix: str
+        gadm url prefix
+    gadm_input_file_args: list[str]
+        gadm input file arguments list
     update : bool
         Update = true, forces re-download of files
+    out_logging : bool
+        out_logging = true, enables output logging
 
     Returns
     -------
     gpkg file per country
     """
 
-    gadm_filename = f"gadm36_{two_2_three_digits_country(country_code)}"
-    gadm_url = f"https://biogeo.ucdavis.edu/data/gadm3.6/gpkg/{gadm_filename}_gpkg.zip"
     _logger = logging.getLogger(__name__)
-    gadm_input_file_zip = get_path(
+
+    gadm_filename = get_gadm_filename(country_code, file_prefix)
+    gadm_url = get_gadm_url(gadm_url_prefix, gadm_filename)
+    gadm_input_file = get_path(
         get_current_directory_path(),
-        "data",
-        "raw",
-        "gadm",
+        *gadm_input_file_args,
+        gadm_filename,
         gadm_filename,
-        gadm_filename + ".zip",
-    )  # Input filepath zip
+    )
 
     gadm_input_file_gpkg = get_path(
-        get_current_directory_path(),
-        "data",
-        "raw",
-        "gadm",
-        gadm_filename,
-        gadm_filename + ".gpkg",
+        str(gadm_input_file) + ".gpkg"
     )  # Input filepath gpkg
 
     if not pathlib.Path(gadm_input_file_gpkg).exists() or update is True:
         if out_logging:
             _logger.warning(
-                f"Stage 4/4: {gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_zip}"
+                f"{gadm_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {gadm_input_file_gpkg}"
             )
+
         #  create data/osm directory
-        build_directory(gadm_input_file_zip)
+        build_directory(str(gadm_input_file_gpkg))
 
-        with requests.get(gadm_url, stream=True) as r:
-            with open(gadm_input_file_zip, "wb") as f:
+        try:
+            r = requests.get(gadm_url, stream=True, timeout=300)
+        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
+            raise Exception(
+                f"GADM server is down at {gadm_url}. Data needed for building shapes can't be extracted.\n\r"
+            )
+        except Exception as exception:
+            raise Exception(
+                f"An error happened when trying to load GADM data by {gadm_url}.\n\r"
+                + str(exception)
+                + "\n\r"
+            )
+        else:
+            with open(gadm_input_file_gpkg, "wb") as f:
                 shutil.copyfileobj(r.raw, f)
 
-        with zipfile.ZipFile(gadm_input_file_zip, "r") as zip_ref:
-            zip_ref.extractall(pathlib.Path(gadm_input_file_zip).parent)
-
     return gadm_input_file_gpkg, gadm_filename
 
 
-def get_gadm_layer(country_list, layer_id, update=False, outlogging=False):
+def get_gadm_layer_name(country_code, file_prefix, layer_id, code_layer):
+
+    if file_prefix == "gadm41_":
+        return "ADM_ADM_" + str(layer_id)
+    else:
+        raise Exception(
+            f"The requested GADM data version {file_prefix} does not exist."
+        )
+
+
+def filter_gadm(
+    geo_df,
+    layer,
+    cc,
+    contended_flag,
+    output_nonstd_to_csv=False,
+):
+    # identify non-standard geo_df rows
+    geo_df_non_std = geo_df[geo_df["GID_0"] != two_2_three_digits_country(cc)].copy()
+
+    if not geo_df_non_std.empty:
+        logger.info(
+            f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option"
+        )
+
+        # NOTE: in these options GID_0 is not changed because it is modified below
+        if contended_flag == "drop":
+            geo_df.drop(geo_df_non_std.index, inplace=True)
+        elif contended_flag != "set_by_country":
+            # "set_by_country" option is the default; if this elif applies, the desired option falls back to the default
+            logger.warning(
+                f"Value '{contended_flag}' for option contented_flag is not recognized.\n"
+                + "Fallback to 'set_by_country'"
+            )
+
+    # force GID_0 to be the country code for the relevant countries
+    geo_df["GID_0"] = cc
+
+    # country shape should have a single geometry
+    if (layer == 0) and (geo_df.shape[0] > 1):
+        logger.warning(
+            f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'"
+        )
+        # take the first row only to re-define geometry keeping other columns
+        geo_df = geo_df.iloc[[0]].set_geometry([geo_df.unary_union])
+
+    # debug output to file
+    if output_nonstd_to_csv and not geo_df_non_std.empty:
+        geo_df_non_std.to_csv(
+            f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False
+        )
+
+    return geo_df
+
+
+def get_gadm_layer(
+    country_list,
+    layer_id,
+    geo_crs,
+    file_prefix,
+    gadm_url_prefix,
+    gadm_input_file_args,
+    contended_flag,
+    update=False,
+    out_logging=False,
+):
     """
     Function to retrieve a specific layer id of a geopackage for a selection of
     countries.
@@ -979,52 +1097,95 @@ def get_gadm_layer(country_list, layer_id, update=False, outlogging=False):
         Layer to consider in the format GID_{layer_id}.
         When the requested layer_id is greater than the last available layer, then the last layer is selected.
         When a negative value is requested, then, the last layer is requested
+    geo_crs: str
+        General geographic projection
+    file_prefix : str
+        file prefix string
+    gadm_url_prefix : str
+        gadm url prefix
+    gadm_input_file_args: list[str]
+        gadm input file arguments list
+    contended_flag : str
+        contended areas
+    update : bool
+        Update = true, forces re-download of files
+    out_logging : bool
+        out_logging = true, enables output logging
     """
-    # initialization of the list of geodataframes
-    geodf_list = []
+    # initialization of the list of geo dataframes
+    geo_df_list = []
 
     for country_code in country_list:
         # download file gpkg
-        file_gpkg, name_file = download_gadm(country_code, update, outlogging)
+        file_gpkg, name_file = download_gadm(
+            country_code,
+            file_prefix,
+            gadm_url_prefix,
+            gadm_input_file_args,
+            update,
+            out_logging,
+        )
 
         # get layers of a geopackage
         list_layers = fiona.listlayers(file_gpkg)
 
         # get layer name
-        if layer_id < 0 | layer_id >= len(list_layers):
+        if layer_id < 0 or layer_id >= len(list_layers):
             # when layer id is negative or larger than the number of layers, select the last layer
             layer_id = len(list_layers) - 1
         code_layer = np.mod(layer_id, len(list_layers))
-        layer_name = (
-            f"gadm36_{two_2_three_digits_country(country_code).upper()}_{code_layer}"
+        layer_name = get_gadm_layer_name(
+            country_code, file_prefix, layer_id, code_layer
         )
 
         # read gpkg file
-        geodf_temp = gpd.read_file(file_gpkg, layer=layer_name)
-
-        # convert country name representation of the main country (GID_0 column)
-        geodf_temp["GID_0"] = [
-            three_2_two_digits_country(twoD_c) for twoD_c in geodf_temp["GID_0"]
-        ]
+        geo_df_temp = gpd.read_file(
+            file_gpkg, layer=layer_name, engine="pyogrio"
+        ).to_crs(geo_crs)
+
+        country_sub_index = ""
+        if file_prefix == "gadm41_":
+            country_sub_index = f"GID_{layer_id}"
+            geo_df_temp = filter_gadm(
+                geo_df=geo_df_temp,
+                layer=layer_id,
+                cc=country_code,
+                contended_flag=contended_flag,
+                output_nonstd_to_csv=False,
+            )
+        elif file_prefix == "gadm36_":
+            country_sub_index = f"GID_{code_layer}"
+            geo_df_temp["GID_0"] = [
+                three_2_two_digits_country(twoD_c) for twoD_c in geo_df_temp["GID_0"]
+            ]
+        else:
+            raise Exception(
+                f"The requested GADM data version {file_prefix} does not exist."
+            )
 
-        # create a subindex column that is useful
-        # in the GADM processing of sub-national zones
-        geodf_temp["GADM_ID"] = geodf_temp[f"GID_{code_layer}"]
+        geo_df_temp["GADM_ID"] = geo_df_temp[country_sub_index]
 
-        # concatenate geodataframes
-        geodf_list = pd.concat([geodf_list, geodf_temp])
+        # append geo data frames
+        geo_df_list.append(geo_df_temp)
 
-    geodf_gadm = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
-    geodf_gadm.set_crs(geodf_list[0].crs, inplace=True)
+    geo_df_gadm = gpd.GeoDataFrame(pd.concat(geo_df_list, ignore_index=True))
+    geo_df_gadm.set_crs(geo_crs, inplace=True)
 
-    return geodf_gadm
+    return geo_df_gadm
 
 
 def locate_bus(
     coords,
     co,
     gadm_level,
+    geo_crs,
+    file_prefix,
+    gadm_url_prefix,
+    gadm_input_file_args,
+    contended_flag,
     path_to_gadm=None,
+    update=False,
+    out_logging=False,
     gadm_clustering=False,
 ):
     """
@@ -1037,6 +1198,28 @@ def locate_bus(
         dataseries with 2 rows x & y representing the longitude and latitude
     co: string (code for country where coords are MA Morocco)
         code of the countries where the coordinates are
+    gadm_level : int
+        Layer to consider in the format GID_{layer_id}.
+        When the requested layer_id is greater than the last available layer, then the last layer is selected.
+        When a negative value is requested, then, the last layer is requested
+    geo_crs : str
+        General geographic projection
+    file_prefix : str
+        file prefix string
+    gadm_url_prefix: str
+        gadm url prefix
+    gadm_input_file_args: list[str]
+        gadm input file arguments list
+    contended_flag : str
+        contended areas
+    path_to_gadm : str
+        path to gadm
+    update : bool
+        Update = true, forces re-download of files
+    out_logging : bool
+        out_logging = true, enables output logging
+    gadm_clustering : bool
+        gadm_cluster = true, to enable clustering
     """
     col = "name"
     if not gadm_clustering:
@@ -1054,7 +1237,17 @@ def locate_bus(
                         lambda name: three_2_two_digits_country(name[:3]) + name[3:]
                     )
         else:
-            gdf = get_gadm_layer(co, gadm_level)
+            gdf = get_gadm_layer(
+                co,
+                gadm_level,
+                geo_crs,
+                file_prefix,
+                gadm_url_prefix,
+                gadm_input_file_args,
+                contended_flag,
+                update,
+                out_logging,
+            )
             col = "GID_{}".format(gadm_level)
 
         # gdf.set_index("GADM_ID", inplace=True)
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index faa1965c0..76fd324b9 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -41,8 +41,7 @@
 it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population.
 """
 
-import os
-import os.path
+import pathlib
 from itertools import product
 
 import geopandas as gpd
@@ -117,10 +116,8 @@ def get_load_paths_gegis(ssp_parentfolder, config):
     prediction_year = config.get("load_options")["prediction_year"]
     ssp = config.get("load_options")["ssp"]
 
-    scenario_path = os.path.join(ssp_parentfolder, ssp)
-
     load_paths = []
-    load_dir = os.path.join(
+    load_dir = get_path(
         ssp_parentfolder,
         str(ssp),
         str(prediction_year),
@@ -131,12 +128,12 @@ def get_load_paths_gegis(ssp_parentfolder, config):
     for continent in region_load:
         sel_ext = ".nc"
         for ext in [".nc", ".csv"]:
-            load_path = os.path.join(str(load_dir), str(continent) + str(ext))
-            if os.path.exists(load_path):
+            load_path = get_path(load_dir, str(continent) + str(ext))
+            if pathlib.Path(load_path).exists():
                 sel_ext = ext
                 break
         file_name = str(continent) + str(sel_ext)
-        load_path = os.path.join(str(load_dir), file_name)
+        load_path = get_path(load_dir, file_name)
         load_paths.append(load_path)
         file_names.append(file_name)
 
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index f6b4d5874..1057633bb 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -11,7 +11,6 @@
 from itertools import takewhile
 from operator import attrgetter
 
-import fiona
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -24,12 +23,12 @@
     configure_logging,
     create_logger,
     get_current_directory_path,
+    get_gadm_layer,
     get_path,
     mock_snakemake,
     sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
-    two_digits_2_name_country,
 )
 from numba import njit
 from numba.core import types
@@ -47,197 +46,6 @@
 logger = create_logger(__name__)
 
 
-def get_GADM_filename(country_code):
-    """
-    Function to get the GADM filename given the country code.
-    """
-    special_codes_GADM = {
-        "XK": "XKO",  # kosovo
-        "CP": "XCL",  # clipperton island
-        "SX": "MAF",  # sint maartin
-        "TF": "ATF",  # french southern territories
-        "AX": "ALA",  # aland
-        "IO": "IOT",  # british indian ocean territory
-        "CC": "CCK",  # cocos island
-        "NF": "NFK",  # norfolk
-        "PN": "PCN",  # pitcairn islands
-        "JE": "JEY",  # jersey
-        "XS": "XSP",  # spratly
-        "GG": "GGY",  # guernsey
-        "UM": "UMI",  # united states minor outlying islands
-        "SJ": "SJM",  # svalbard
-        "CX": "CXR",  # Christmas island
-    }
-
-    if country_code in special_codes_GADM:
-        return f"gadm41_{special_codes_GADM[country_code]}"
-    else:
-        return f"gadm41_{two_2_three_digits_country(country_code)}"
-
-
-def download_GADM(country_code, update=False, out_logging=False):
-    """
-    Download gpkg file from GADM for a given country code.
-
-    Parameters
-    ----------
-    country_code : str
-        Two letter country codes of the downloaded files
-    update : bool
-        Update = true, forces re-download of files
-
-    Returns
-    -------
-    gpkg file per country
-    """
-    GADM_filename = get_GADM_filename(country_code)
-    GADM_url = f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{GADM_filename}.gpkg"
-
-    GADM_inputfile_gpkg = get_path(
-        get_current_directory_path(),
-        "data",
-        "gadm",
-        GADM_filename,
-        GADM_filename + ".gpkg",
-    )  # Input filepath gpkg
-
-    if not pathlib.Path(GADM_inputfile_gpkg).exists() or update is True:
-        if out_logging:
-            logger.warning(
-                f"Stage 5 of 5: {GADM_filename} of country {two_digits_2_name_country(country_code)} does not exist, downloading to {GADM_inputfile_gpkg}"
-            )
-        #  create data/osm directory
-        build_directory(GADM_inputfile_gpkg)
-
-        try:
-            r = requests.get(GADM_url, stream=True, timeout=300)
-        except (requests.exceptions.ConnectionError, requests.exceptions.Timeout):
-            raise Exception(
-                f"GADM server is down at {GADM_url}. Data needed for building shapes can't be extracted.\n\r"
-            )
-        except Exception as exception:
-            raise Exception(
-                f"An error happened when trying to load GADM data by {GADM_url}.\n\r"
-                + str(exception)
-                + "\n\r"
-            )
-        else:
-            with open(GADM_inputfile_gpkg, "wb") as f:
-                shutil.copyfileobj(r.raw, f)
-
-    return GADM_inputfile_gpkg, GADM_filename
-
-
-def filter_gadm(
-    geodf,
-    layer,
-    cc,
-    contended_flag,
-    output_nonstd_to_csv=False,
-):
-    # identify non standard geodf rows
-    geodf_non_std = geodf[geodf["GID_0"] != two_2_three_digits_country(cc)].copy()
-
-    if not geodf_non_std.empty:
-        logger.info(
-            f"Contended areas have been found for gadm layer {layer}. They will be treated according to {contended_flag} option"
-        )
-
-        # NOTE: in these options GID_0 is not changed because it is modified below
-        if contended_flag == "drop":
-            geodf.drop(geodf_non_std.index, inplace=True)
-        elif contended_flag != "set_by_country":
-            # "set_by_country" option is the default; if this elif applies, the desired option falls back to the default
-            logger.warning(
-                f"Value '{contended_flag}' for option contented_flag is not recognized.\n"
-                + "Fallback to 'set_by_country'"
-            )
-
-    # force GID_0 to be the country code for the relevant countries
-    geodf["GID_0"] = cc
-
-    # country shape should have a single geometry
-    if (layer == 0) and (geodf.shape[0] > 1):
-        logger.warning(
-            f"Country shape is composed by multiple shapes that are being merged in agreement to contented_flag option '{contended_flag}'"
-        )
-        # take the first row only to re-define geometry keeping other columns
-        geodf = geodf.iloc[[0]].set_geometry([geodf.unary_union])
-
-    # debug output to file
-    if output_nonstd_to_csv and not geodf_non_std.empty:
-        geodf_non_std.to_csv(
-            f"resources/non_standard_gadm{layer}_{cc}_raw.csv", index=False
-        )
-
-    return geodf
-
-
-def get_GADM_layer(
-    country_list,
-    layer_id,
-    geo_crs,
-    contended_flag,
-    update=False,
-    outlogging=False,
-):
-    """
-    Function to retrieve a specific layer id of a geopackage for a selection of
-    countries.
-
-    Parameters
-    ----------
-    country_list : str
-        List of the countries
-    layer_id : int
-        Layer to consider in the format GID_{layer_id}.
-        When the requested layer_id is greater than the last available layer, then the last layer is selected.
-        When a negative value is requested, then, the last layer is requested
-    """
-    # initialization of the geoDataFrame
-    geodf_list = []
-
-    for country_code in country_list:
-        # Set the current layer id (cur_layer_id) to global layer_id
-        cur_layer_id = layer_id
-
-        # download file gpkg
-        file_gpkg, name_file = download_GADM(country_code, update, outlogging)
-
-        # get layers of a geopackage
-        list_layers = fiona.listlayers(file_gpkg)
-
-        # get layer name
-        if (cur_layer_id < 0) or (cur_layer_id >= len(list_layers)):
-            # when layer id is negative or larger than the number of layers, select the last layer
-            cur_layer_id = len(list_layers) - 1
-
-        # read gpkg file
-        geodf_temp = gpd.read_file(
-            file_gpkg, layer="ADM_ADM_" + str(cur_layer_id), engine="pyogrio"
-        ).to_crs(geo_crs)
-
-        geodf_temp = filter_gadm(
-            geodf=geodf_temp,
-            layer=cur_layer_id,
-            cc=country_code,
-            contended_flag=contended_flag,
-            output_nonstd_to_csv=False,
-        )
-
-        # create a subindex column that is useful
-        # in the GADM processing of sub-national zones
-        geodf_temp["GADM_ID"] = geodf_temp[f"GID_{cur_layer_id}"]
-
-        # append geodataframes
-        geodf_list.append(geodf_temp)
-
-    geodf_GADM = gpd.GeoDataFrame(pd.concat(geodf_list, ignore_index=True))
-    geodf_GADM.set_crs(geo_crs)
-
-    return geodf_GADM
-
-
 def _simplify_polys(polys, minarea=0.01, tolerance=0.01, filterremote=False):
     "Function to simplify the shape polygons"
     if isinstance(polys, MultiPolygon):
@@ -257,17 +65,30 @@ def _simplify_polys(polys, minarea=0.01, tolerance=0.01, filterremote=False):
     return polys.simplify(tolerance=tolerance)
 
 
-def countries(countries, geo_crs, contended_flag, update=False, out_logging=False):
+def countries(
+    countries,
+    layer_id,
+    geo_crs,
+    file_prefix,
+    gadm_url_prefix,
+    gadm_input_file_args,
+    contended_flag,
+    update,
+    out_logging,
+):
     "Create country shapes"
 
     if out_logging:
         logger.info("Stage 1 of 5: Create country shapes")
 
     # download data if needed and get the layer id 0, corresponding to the countries
-    df_countries = get_GADM_layer(
+    df_countries = get_gadm_layer(
         countries,
         0,
         geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
         contended_flag,
         update,
         out_logging,
@@ -1247,6 +1068,9 @@ def gadm(
     gdp_method,
     countries,
     geo_crs,
+    file_prefix,
+    gadm_url_prefix,
+    gadm_input_file_args,
     contended_flag,
     mem_mb,
     layer_id=2,
@@ -1259,7 +1083,17 @@ def gadm(
         logger.info("Stage 3 of 5: Creation GADM GeoDataFrame")
 
     # download data if needed and get the desired layer_id
-    df_gadm = get_GADM_layer(countries, layer_id, geo_crs, contended_flag, update)
+    df_gadm = get_gadm_layer(
+        countries,
+        layer_id,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        update,
+        out_logging,
+    )
 
     # select and rename columns
     df_gadm.rename(columns={"GID_0": "country"}, inplace=True)
@@ -1296,7 +1130,7 @@ def gadm(
             name_file_nc="GDP_PPP_1990_2015_5arcmin_v2.nc",
         )
 
-    # renaming 3 letter to 2 letter ISO code before saving GADM file
+    # renaming three-letter to two-letter ISO code before saving GADM file
     # In the case of a contested territory in the form 'Z00.00_0', save 'AA.00_0'
     # Include bugfix for the case of 'XXX00_0' where the "." is missing, such as for Ghana
     df_gadm["GADM_ID"] = df_gadm["country"] + df_gadm["GADM_ID"].str[3:].apply(
@@ -1317,7 +1151,6 @@ def gadm(
         change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_shapes")
         sets_path_to_root("pypsa-earth")
-
     configure_logging(snakemake)
 
     out = snakemake.output
@@ -1337,10 +1170,17 @@ def gadm(
     contended_flag = snakemake.params.build_shape_options["contended_flag"]
     worldpop_method = snakemake.params.build_shape_options["worldpop_method"]
     gdp_method = snakemake.params.build_shape_options["gdp_method"]
+    file_prefix = snakemake.params.build_shape_options["gadm_file_prefix"]
+    gadm_url_prefix = snakemake.params.build_shape_options["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
 
     country_shapes = countries(
         countries_list,
+        layer_id,
         geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
         contended_flag,
         update,
         out_logging,
@@ -1363,6 +1203,9 @@ def gadm(
         gdp_method,
         countries_list,
         geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
         contended_flag,
         mem_mb,
         layer_id,
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index 59c34ea3a..7488b748d 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -103,9 +103,11 @@ def download_emission_data():
             )
         pathlib.Path(file_path).unlink(missing_ok=True)
         return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
-    except:
-        logger.error(f"Failed download resource from '{url}'.")
-        return False
+    except requests.exceptions.RequestException as e:
+        logger.error(
+            f"Failed download resource from '{url}' with exception message '{e}'."
+        )
+        raise SystemExit(e)
 
 
 def emission_extractor(filename, emission_year, country_names):
@@ -120,7 +122,7 @@ def emission_extractor(filename, emission_year, country_names):
     emission_year : int
         Year of CO2 emissions
     country_names : numpy.ndarray
-        Two letter country codes of analysed countries.
+        Two-letter country codes of analysed countries.
 
     Returns
     -------
@@ -128,8 +130,8 @@ def emission_extractor(filename, emission_year, country_names):
     """
 
     # data reading process
-    datapath = get_path(get_current_directory_path(), "data", filename)
-    df = pd.read_excel(datapath, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8)
+    data_path = get_path(get_current_directory_path(), "data", filename)
+    df = pd.read_excel(data_path, sheet_name="v6.0_EM_CO2_fossil_IPCC1996", skiprows=8)
     df.columns = df.iloc[0]
     df = df.set_index("Country_code_A3")
     df = df.loc[
@@ -192,7 +194,7 @@ def set_line_s_max_pu(n, s_max_pu):
     logger.info(f"N-1 security margin of lines set to {s_max_pu}")
 
 
-def set_transmission_limit(n, ll_type, factor, costs, Nyears=1):
+def set_transmission_limit(n, ll_type, factor, costs):
     links_dc_b = n.links.carrier == "DC" if not n.links.empty else pd.Series()
 
     _lines_s_nom = (
@@ -430,7 +432,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf):
                 break
 
     ll_type, factor = snakemake.wildcards.ll[0], snakemake.wildcards.ll[1:]
-    set_transmission_limit(n, ll_type, factor, costs, Nyears)
+    set_transmission_limit(n, ll_type, factor, costs)
 
     set_line_nom_max(
         n,
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index d425a781d..84b1c89b4 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -162,8 +162,10 @@ def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress=
                 zipObj.extractall(path=destination)
             pathlib.Path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
-        except:
-            logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
+        except Exception as e:
+            logger.warning(
+                f"Failed download resource '{resource}' from cloud '{url}' with exception message '{e}'."
+            )
             return False
 
     return True
@@ -336,9 +338,9 @@ def get_first_day_of_previous_month(date):
                         pathlib.Path(inner_zipname).unlink(missing_ok=True)
 
                         logger.info(f"{resource} - Successfully unzipped file '{fzip}'")
-                    except:
+                    except Exception as e:
                         logger.warning(
-                            f"Exception while unzipping file '{fzip}' for {resource_iter}: skipped file"
+                            f"Exception while unzipping file '{fzip}' for {resource_iter} with exception message '{e}': skipped file"
                         )
 
                 # close and remove outer zip file
@@ -351,9 +353,9 @@ def get_first_day_of_previous_month(date):
 
                 downloaded = True
                 break
-            except:
+            except Exception as e:
                 logger.warning(
-                    f"Failed download resource '{resource_iter}' from cloud '{url_iter}'."
+                    f"Failed download resource '{resource_iter}' from cloud '{url_iter}' with exception message '{e}'."
                 )
                 current_first_day = get_first_day_of_previous_month(current_first_day)
 
@@ -411,8 +413,10 @@ def download_and_unpack(
                 pathlib.Path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
             return True
-        except:
-            logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
+        except Exception as e:
+            logger.warning(
+                f"Failed download resource '{resource}' from cloud '{url}' with exception message '{e}'."
+            )
             return False
 
 
@@ -868,8 +872,10 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
                     config_bundles[b_name], root_path, disable_progress=disable_progress
                 ):
                     downloaded_bundle = True
-            except Exception:
-                logger.warning(f"Error in downloading bundle {b_name} - host {host}")
+            except Exception as e:
+                logger.warning(
+                    f"Error in downloading bundle {b_name} - host {host} - with exception message '{e}'"
+                )
 
             if downloaded_bundle:
                 downloaded_bundles.append(b_name)
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 4d65adea2..7cee35c8f 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -17,6 +17,7 @@
     get_temp_file,
 )
 
+import fiona
 import numpy as np
 import pandas as pd
 
@@ -27,8 +28,11 @@
     build_directory,
     change_to_script_dir,
     country_name_2_two_digits,
+    download_gadm,
     get_conv_factors,
     get_current_directory_path,
+    get_gadm_filename,
+    get_gadm_url,
     get_path,
     get_path_size,
     get_relative_path,
@@ -274,6 +278,17 @@ def test_get_path():
         "sub_path_5",
         "file.nc",
     )
+    file_name_path_one_list_unpacked = get_path(
+        path_cwd,
+        *[
+            "sub_path_1",
+            "sub_path_2",
+            "sub_path_3",
+            "sub_path_4",
+            "sub_path_5",
+            "file.nc",
+        ],
+    )
     path_name_path_two = get_path(
         pathlib.Path(__file__).parent, "..", "logs", "rule.log"
     )
@@ -286,6 +301,15 @@ def test_get_path():
         "sub_path_5",
         "file.nc",
     )
+    assert str(file_name_path_one_list_unpacked) == os.path.join(
+        path_cwd,
+        "sub_path_1",
+        "sub_path_2",
+        "sub_path_3",
+        "sub_path_4",
+        "sub_path_5",
+        "file.nc",
+    )
     assert str(path_name_path_two) == str(
         pathlib.Path(__file__).parent.joinpath("..", "logs", "rule.log")
     )
@@ -467,3 +491,90 @@ def test_aggregate_fuels():
     Verify what is returned by aggregate_fuels.
     """
     assert np.isnan(aggregate_fuels("non-industry"))
+
+
+def test_get_gadm_filename():
+    """
+    Verify what is returned by get_gadm_filename.
+    """
+    # Kosovo
+    assert get_gadm_filename("XK") == "gadm41_XKO"
+    # Clipperton island
+    assert get_gadm_filename("CP") == "gadm41_XCL"
+    # Saint-Martin
+    assert get_gadm_filename("SX") == "gadm41_MAF"
+    # French Southern Territories
+    assert get_gadm_filename("TF") == "gadm41_ATF"
+    # Aland
+    assert get_gadm_filename("AX") == "gadm41_ALA"
+    # British Indian Ocean Territory
+    assert get_gadm_filename("IO") == "gadm41_IOT"
+    # Cocos Islands
+    assert get_gadm_filename("CC") == "gadm41_CCK"
+    # Norfolk
+    assert get_gadm_filename("NF") == "gadm41_NFK"
+    # Pitcairn Islands
+    assert get_gadm_filename("PN") == "gadm41_PCN"
+    # Jersey
+    assert get_gadm_filename("JE") == "gadm41_JEY"
+    # Spratly Islands
+    assert get_gadm_filename("XS") == "gadm41_XSP"
+    # Guernsey
+    assert get_gadm_filename("GG") == "gadm41_GGY"
+    # United States Minor Outlying Islands
+    assert get_gadm_filename("UM") == "gadm41_UMI"
+    # Svalbard islands
+    assert get_gadm_filename("SJ") == "gadm41_SJM"
+    # Christmas island
+    assert get_gadm_filename("CX") == "gadm41_CXR"
+    # Afghanistan
+    assert get_gadm_filename("AF") == "gadm41_AFG"
+    # American Samoa
+    assert get_gadm_filename("AS") == "gadm41_ASM"
+    # Aruba
+    assert get_gadm_filename("AW") == "gadm41_ABW"
+    # Germany
+    assert get_gadm_filename("DE") == "gadm41_DEU"
+    # Micronesia (Federated States of)
+    assert get_gadm_filename("FM") == "gadm41_FSM"
+    # Micronesia (Federated States of) with different file_prefix
+    assert get_gadm_filename("FM", file_prefix="gadm456_") == "gadm456_FSM"
+
+
+def test_get_gadm_url():
+    """
+    Verify what is returned by get_gadm_url.
+    """
+    gadm_filename = get_gadm_filename("FM")
+    url_gadm41 = get_gadm_url(
+        "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/",
+        gadm_filename,
+    )
+    assert (
+        url_gadm41
+        == f"https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/{gadm_filename}.gpkg"
+    )
+
+
+def test_download_gadm():
+    """
+    Verify what is returned by download_gadm.
+    """
+    file_prefix_41 = "gadm41_"
+    gadm_url_prefix_41 = "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/"
+    gadm_input_file_args_41 = ["data", "gadm"]
+    gadm_input_file_gpkg_41, gadm_filename_41 = download_gadm(
+        "XK",
+        file_prefix_41,
+        gadm_url_prefix_41,
+        gadm_input_file_args_41,
+        update=True,
+    )
+    assert gadm_input_file_gpkg_41 == get_path(
+        path_cwd, "data/gadm/gadm41_XKO/gadm41_XKO.gpkg"
+    )
+    assert gadm_filename_41 == "gadm41_XKO"
+    list_layers_41 = fiona.listlayers(gadm_input_file_gpkg_41)
+    assert list_layers_41[0] == "ADM_ADM_0"
+    assert list_layers_41[1] == "ADM_ADM_1"
+    assert list_layers_41[2] == "ADM_ADM_2"

From d4697ac99b86974a92fa1327a62e4409aeff44ef Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 11 Jul 2024 13:55:36 +0200
Subject: [PATCH 25/40] modify workflow files

---
 .github/workflows/ci-linux.yaml   | 4 ++--
 .github/workflows/ci-mac.yaml     | 4 ++--
 .github/workflows/ci-windows.yaml | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
index 08a7da822..1b554109c 100644
--- a/.github/workflows/ci-linux.yaml
+++ b/.github/workflows/ci-linux.yaml
@@ -3,10 +3,10 @@ name: CI-linux
 on:
   push:
     branches:
-    - oet_main
+    - main
   pull_request:
     branches:
-    - oet_main
+    - main
   schedule:
   - cron: "0 5 * * TUE"
 
diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml
index bbdf4e957..347c344bc 100644
--- a/.github/workflows/ci-mac.yaml
+++ b/.github/workflows/ci-mac.yaml
@@ -3,10 +3,10 @@ name: CI-mac
 on:
   push:
     branches:
-    - oet_main
+    - main
   pull_request:
     branches:
-    - oet_main
+    - main
   schedule:
   - cron: "0 5 * * TUE"
 
diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml
index 7697306e3..5b7fd7d37 100644
--- a/.github/workflows/ci-windows.yaml
+++ b/.github/workflows/ci-windows.yaml
@@ -3,10 +3,10 @@ name: CI-windows
 on:
   push:
     branches:
-    - oet_main
+    - main
   pull_request:
     branches:
-    - oet_main
+    - main
   schedule:
   - cron: "0 5 * * TUE"
 

From feb3b63b56553637a83b235a7fba7a7be4313358 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Thu, 1 Aug 2024 08:56:45 +0200
Subject: [PATCH 26/40] Unit test base network (#5)

* unit test for get_country

* add unit test for _load_buses_from_osm

* add unit test for _load_lines_from_osm

* unit test for load_transformers_from_osm

* add unit test for _load_converters_from_osm

* replace path_cwd with tmpdir

* initial changes for _get_linetypes_config

* finalize unit test for _get_linetypes_config

* finalize unit test for _set_electrical_parameters_lines and _set_electrical_parameters_dc_lines

* add unit test for _get_linetype_by_voltage

* add unit test for _set_electrical_parameters_links

* new unit tests

* add documentation in test_base_network.py

* update how reference and input dataframes are built

* update how reference and input dataframes are built - 2
---
 Snakefile                 |   2 +-
 scripts/base_network.py   |  88 +------
 test/test_base_network.py | 518 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 523 insertions(+), 85 deletions(-)
 create mode 100644 test/test_base_network.py

diff --git a/Snakefile b/Snakefile
index 8f831b436..e016aef11 100644
--- a/Snakefile
+++ b/Snakefile
@@ -309,7 +309,7 @@ rule build_bus_regions:
         base_network="networks/" + RDIR + "base.nc",
         #gadm_shapes="resources/" + RDIR + "shapes/MAR2.geojson",
         #using this line instead of the following will test updated gadm shapes for MA.
-        #To use: downlaod file from the google drive and place it in resources/" + RDIR + "shapes/
+        #To use: download file from the google drive and place it in resources/" + RDIR + "shapes/
         #Link: https://drive.google.com/drive/u/1/folders/1dkW1wKBWvSY4i-XEuQFFBj242p0VdUlM
         gadm_shapes="resources/" + RDIR + "shapes/gadm_shapes.geojson",
     output:
diff --git a/scripts/base_network.py b/scripts/base_network.py
index 8c2131512..a38527c9c 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -61,7 +61,6 @@
 import numpy as np
 import pandas as pd
 import pypsa
-import scipy as sp
 import shapely.prepared
 import shapely.wkt
 from _helpers import (
@@ -77,13 +76,6 @@
 logger = create_logger(__name__)
 
 
-def _get_oid(df):
-    if "tags" in df.columns:
-        return df.tags.str.extract('"oid"=>"(\\d+)"', expand=False)
-    else:
-        return pd.Series(np.nan, df.index)
-
-
 def get_country(df):
     if "tags" in df.columns:
         return df.tags.str.extract('"country"=>"([A-Z]{2})"', expand=False)
@@ -91,28 +83,6 @@ def get_country(df):
         return pd.Series(np.nan, df.index)
 
 
-def _find_closest_links(links, new_links, distance_upper_bound=1.5):
-    treecoords = np.asarray(
-        [np.asarray(shapely.wkt.loads(s))[[0, -1]].flatten() for s in links.geometry]
-    )
-    querycoords = np.vstack(
-        [new_links[["x1", "y1", "x2", "y2"]], new_links[["x2", "y2", "x1", "y1"]]]
-    )
-    tree = sp.spatial.KDTree(treecoords)
-    dist, ind = tree.query(querycoords, distance_upper_bound=distance_upper_bound)
-    found_b = ind < len(links)
-    found_i = np.arange(len(new_links) * 2)[found_b] % len(new_links)
-
-    return (
-        pd.DataFrame(
-            dict(D=dist[found_b], i=links.index[ind[found_b] % len(links)]),
-            index=new_links.index[found_i],
-        )
-        .sort_values(by="D")[lambda ds: ~ds.index.duplicated(keep="first")]
-        .sort_index()["i"]
-    )
-
-
 def _load_buses_from_osm(fp_buses):
     buses = (
         read_csv_nafix(fp_buses, dtype=dict(bus_id="str", voltage="float"))
@@ -133,20 +103,6 @@ def _load_buses_from_osm(fp_buses):
     return buses
 
 
-def add_underwater_links(n, fp_offshore_shapes):
-    if not hasattr(n.links, "geometry"):
-        n.links["underwater_fraction"] = 0.0
-    else:
-        offshore_shape = gpd.read_file(fp_offshore_shapes).unary_union
-        if offshore_shape is None or offshore_shape.is_empty:
-            n.links["underwater_fraction"] = 0.0
-        else:
-            links = gpd.GeoSeries(n.links.geometry.dropna().map(shapely.wkt.loads))
-            n.links["underwater_fraction"] = (
-                links.intersection(offshore_shape).length / links.length
-            )
-
-
 def _set_dc_underwater_fraction(lines_or_links, fp_offshore_shapes):
     # HVDC part always has some links as converters
     # excluding probably purely DC networks which are currently somewhat exotic
@@ -205,37 +161,7 @@ def _load_lines_from_osm(fp_osm_lines):
     return lines
 
 
-# TODO Seems to be not needed anymore
-def _load_links_from_osm(fp_osm_converters, base_network_config, voltages_config):
-    # the links file can be empty
-    if get_path_size(fp_osm_converters) == 0:
-        links = pd.DataFrame()
-        return links
-
-    links = (
-        read_csv_nafix(
-            fp_osm_converters,
-            dtype=dict(
-                line_id="str",
-                bus0="str",
-                bus1="str",
-                underground="bool",
-                under_construction="bool",
-            ),
-        )
-        .set_index("line_id")
-        .rename(columns=dict(voltage="v_nom", circuits="num_parallel"))
-    )
-
-    links["length"] /= 1e3  # m to km conversion
-    links["v_nom"] /= 1e3  # V to kV conversion
-    links = links.loc[:, ~links.columns.str.contains("^Unnamed")]  # remove unnamed col
-    # links = _remove_dangling_branches(links, buses)  # TODO: add dangling branch removal?
-
-    return links
-
-
-def _load_converters_from_osm(fp_osm_converters, buses):
+def _load_converters_from_osm(fp_osm_converters):
     # the links file can be empty
     if get_path_size(fp_osm_converters) == 0:
         converters = pd.DataFrame()
@@ -254,7 +180,7 @@ def _load_converters_from_osm(fp_osm_converters, buses):
     return converters
 
 
-def _load_transformers_from_osm(fp_osm_transformers, buses):
+def _load_transformers_from_osm(fp_osm_transformers):
     transformers = (
         read_csv_nafix(
             fp_osm_transformers,
@@ -405,12 +331,6 @@ def _set_lines_s_nom_from_linetypes(n):
     ) * n.lines.eval("v_nom * num_parallel")
 
 
-def _remove_dangling_branches(branches, buses):
-    return pd.DataFrame(
-        branches.loc[branches.bus0.isin(buses.index) & branches.bus1.isin(buses.index)]
-    )
-
-
 def _set_countries_and_substations(inputs, base_network_config, countries_config, n):
     countries = countries_config
     country_shapes = gpd.read_file(inputs.country_shapes).set_index("name")["geometry"]
@@ -492,8 +412,8 @@ def base_network(
 ):
     buses = _load_buses_from_osm(inputs.osm_buses).reset_index(drop=True)
     lines = _load_lines_from_osm(inputs.osm_lines).reset_index(drop=True)
-    transformers = _load_transformers_from_osm(inputs.osm_transformers, buses)
-    converters = _load_converters_from_osm(inputs.osm_converters, buses)
+    transformers = _load_transformers_from_osm(inputs.osm_transformers)
+    converters = _load_converters_from_osm(inputs.osm_converters)
 
     lines_ac = lines[lines.tag_frequency.astype(float) != 0].copy()
     lines_dc = lines[lines.tag_frequency.astype(float) == 0].copy()
diff --git a/test/test_base_network.py b/test/test_base_network.py
new file mode 100644
index 000000000..a665ea7cd
--- /dev/null
+++ b/test/test_base_network.py
@@ -0,0 +1,518 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+import sys
+
+import numpy as np
+import pandas as pd
+
+sys.path.append("./scripts")
+
+from _helpers import get_path
+from base_network import (
+    _get_linetype_by_voltage,
+    _get_linetypes_config,
+    _load_buses_from_osm,
+    _load_converters_from_osm,
+    _load_lines_from_osm,
+    _load_transformers_from_osm,
+    _set_electrical_parameters_converters,
+    _set_electrical_parameters_dc_lines,
+    _set_electrical_parameters_lines,
+    _set_electrical_parameters_links,
+    _set_electrical_parameters_transformers,
+    get_country,
+)
+
+path_cwd = pathlib.Path.cwd()
+
+# Common references
+
+# ---> buses
+
+df_buses_input = pd.DataFrame(
+    {
+        "bus_id": 0,
+        "station_id": 0,
+        "voltage": 161000,
+        "dc": False,
+        "symbol": "substation",
+        "under_construction": False,
+        "tag_substation": "transmission",
+        "tag_area": 0.0,
+        "lon": 2.5914,
+        "lat": 9.3321,
+        "country": "BJ",
+        "geometry": "POINT (2.5914 9.3321)",
+        "substation_lv": True,
+    },
+    index=[0],
+)
+
+df_buses_reference = pd.DataFrame(
+    {
+        "bus_id": "0",
+        "v_nom": 161.0,
+        "symbol": "substation",
+        "under_construction": False,
+        "tag_substation": "transmission",
+        "tag_area": 0.0,
+        "lon": 2.5914,
+        "lat": 9.3321,
+        "country": "BJ",
+        "geometry": "POINT (2.5914 9.3321)",
+        "substation_lv": True,
+        "carrier": "AC",
+        "x": 2.5914,
+        "y": 9.3321,
+    },
+    index=[0],
+).set_index("bus_id")
+
+# ---> converters
+
+df_converters_input = pd.DataFrame(
+    {
+        "index": 0,
+        "converter_id": "convert_20_41",
+        "bus0": "41",
+        "bus1": "42",
+        "underground": False,
+        "under_construction": False,
+        "country": "US",
+        "geometry": "LINESTRING(-122.3787 37.6821, -122.3777 37.6831)",
+    },
+    index=[0],
+)
+
+df_converters_reference = pd.DataFrame(
+    {
+        "converter_id": "convert_20_41",
+        "Unnamed: 0": 0,
+        "index": 0,
+        "bus0": "41",
+        "bus1": "42",
+        "underground": False,
+        "under_construction": False,
+        "country": "US",
+        "geometry": "LINESTRING(-122.3787 37.6821, -122.3777 37.6831)",
+        "carrier": "B2B",
+        "dc": True,
+    },
+    index=[0],
+).set_index("converter_id")
+
+# ---> lines
+
+df_lines_input = pd.DataFrame(
+    {
+        "line_id": ["204361221-1_0", "204361287-1_1"],
+        "tag_frequency": [50.0, 0.0],
+        "tag_type": ["line", "line"],
+        "voltage": [161000, 178658],
+        "bus0": ["111", "111"],
+        "bus1": ["0", "0"],
+        "circuits": (3.0, 3.0),
+        "length": [110071.89434240988, 118723.89434240988],
+        "underground": [False, False],
+        "under_construction": [False, False],
+        "dc": [False, False],
+        "country": ["BJ", "BJ"],
+        "geometry": [
+            "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+            "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+        ],
+        "bounds": [
+            "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+            "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+        ],
+        "bus_0_coors": ["POINT (2.6594 10.2042)", "POINT (2.6594 10.2042)"],
+        "bus_1_coors": ["POINT (2.5914 9.3321)", "POINT (2.5914 9.3321)"],
+        "bus0_lon": [2.6594, 2.6594],
+        "bus0_lat": [10.2042, 10.2042],
+        "bus1_lon": [2.5914, 2.5914],
+        "bus1_lat": [9.3321, 9.3321],
+    }
+)
+
+df_lines_reference = pd.DataFrame(
+    {
+        "line_id": ["204361221-1_0", "204361287-1_1"],
+        "tag_frequency": [50.0, 0.0],
+        "tag_type": ["line", "line"],
+        "v_nom": [161.0, 178.658],
+        "bus0": ["111", "111"],
+        "bus1": ["0", "0"],
+        "num_parallel": [3.0, 3.0],
+        "length": [110.07189434240988, 118.72389434240988],
+        "underground": [False, False],
+        "under_construction": [False, False],
+        "dc": [False, False],
+        "country": ["BJ", "BJ"],
+        "geometry": [
+            "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+            "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+        ],
+        "bounds": [
+            "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+            "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+        ],
+        "bus_0_coors": ["POINT (2.6594 10.2042)", "POINT (2.6594 10.2042)"],
+        "bus_1_coors": ["POINT (2.5914 9.3321)", "POINT (2.5914 9.3321)"],
+        "bus0_lon": [2.6594, 2.6594],
+        "bus0_lat": [10.2042, 10.2042],
+        "bus1_lon": [2.5914, 2.5914],
+        "bus1_lat": [9.3321, 9.3321],
+    }
+).set_index("line_id")
+
+lines_ac_reference = pd.DataFrame(
+    {
+        "tag_frequency": 50.0,
+        "tag_type": "line",
+        "v_nom": 161.0,
+        "bus0": "111",
+        "bus1": "0",
+        "num_parallel": 3.0,
+        "length": 110.07189434240988,
+        "underground": False,
+        "under_construction": False,
+        "dc": False,
+        "country": "BJ",
+        "geometry": "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+        "bounds": "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+        "bus_0_coors": "POINT (2.6594 10.2042)",
+        "bus_1_coors": "POINT (2.5914 9.3321)",
+        "bus0_lon": 2.6594,
+        "bus0_lat": 10.2042,
+        "bus1_lon": 2.5914,
+        "bus1_lat": 9.3321,
+        "carrier": "AC",
+        "type": "243-AL1/39-ST1A 20.0",
+        "s_max_pu": 0.7,
+    },
+    index=[0],
+).set_index("tag_frequency")
+
+lines_dc_reference = pd.DataFrame(
+    {
+        "tag_frequency": 0.0,
+        "tag_type": "line",
+        "v_nom": 178.658,
+        "bus0": "111",
+        "bus1": "0",
+        "num_parallel": 3.0,
+        "length": 118.72389434240988,
+        "underground": False,
+        "under_construction": False,
+        "dc": True,
+        "country": "BJ",
+        "geometry": "LINESTRING (2.6594 10.2042, 2.6594451 10.2042341)",
+        "bounds": "MULTIPOINT ((2.6594 10.2042), (2.5914 9.3321))",
+        "bus_0_coors": "POINT (2.6594 10.2042)",
+        "bus_1_coors": "POINT (2.5914 9.3321)",
+        "bus0_lon": 2.6594,
+        "bus0_lat": 10.2042,
+        "bus1_lon": 2.5914,
+        "bus1_lat": 9.3321,
+        "carrier": "DC",
+        "type": "HVDC XLPE 1000",
+        "s_max_pu": 0.7,
+    },
+    index=[0],
+).set_index("tag_frequency")
+
+lines_dict = {
+    "ac_types": {
+        132.0: "243-AL1/39-ST1A 20.0",
+        220.0: "Al/St 240/40 2-bundle 220.0",
+        300.0: "Al/St 240/40 3-bundle 300.0",
+        380.0: "Al/St 240/40 4-bundle 380.0",
+        500.0: "Al/St 240/40 4-bundle 380.0",
+        750.0: "Al/St 560/50 4-bundle 750.0",
+    },
+    "dc_types": {
+        500.0: "HVDC XLPE 1000",
+    },
+    "s_max_pu": 0.7,
+    "s_nom_max": np.inf,
+    "length_factor": 1.25,
+    "under_construction": "zero",
+}
+
+# ---> links
+
+links_dict = {
+    "p_max_pu": 2.1,
+    "p_nom_max": np.inf,
+    "under_construction": "zero",
+}
+
+# ---> transformers
+
+transformers_dict = {
+    "x": 0.1,
+    "s_nom": 2000.0,
+    "type": "",
+}
+
+df_transformers_input = pd.DataFrame(
+    {
+        "line_id": "transf_1_0",
+        "bus0": "1",
+        "bus1": "2",
+        "voltage_bus0": 161000,
+        "voltage_bus1": 330000,
+        "country": "BJ",
+        "geometry": "LINESTRING(2.648 6.7394, 2.649 6.7404)",
+        "bounds": "MULTIPOINT((2.648 6.7394), (2.649 6.7404))",
+        "bus_0_coors": "POINT(2.648 6.7394)",
+        "bus_1_coors": "POINT(2.649 6.7404)",
+        "bus0_lon": 2.648,
+        "bus0_lat": 6.7394,
+        "bus1_lon": 2.649,
+        "bus1_lat": 6.7404,
+    },
+    index=[0],
+)
+
+df_transformers_reference = pd.DataFrame(
+    {
+        "transformer_id": "transf_1_0",
+        "Unnamed: 0": 0,
+        "bus0": "1",
+        "bus1": "2",
+        "voltage_bus0": 161000,
+        "voltage_bus1": 330000,
+        "country": "BJ",
+        "geometry": "LINESTRING(2.648 6.7394, 2.649 6.7404)",
+        "bounds": "MULTIPOINT((2.648 6.7394), (2.649 6.7404))",
+        "bus_0_coors": "POINT(2.648 6.7394)",
+        "bus_1_coors": "POINT(2.649 6.7404)",
+        "bus0_lon": 2.648,
+        "bus0_lat": 6.7394,
+        "bus1_lon": 2.649,
+        "bus1_lat": 6.7404,
+    },
+    index=[0],
+).set_index("transformer_id")
+
+# ---> voltages
+
+voltages_list = [132.0, 220.0, 300.0, 380.0, 500.0, 750.0]
+
+
+def test_get_country():
+    """
+    Verify what returned by get_country()
+    """
+    data_list = [['"country"=>"NG"'], ['"country"=>"CH"'], ['"country"=>"AU"']]
+    df_exercise_with_tags = pd.DataFrame(data_list, columns=["tags"])
+    df_exercise_no_tags = pd.DataFrame(data_list, columns=["other"])
+    series_with_tags = get_country(df_exercise_with_tags)
+    reference_series_with_tags = pd.Series(["NG", "CH", "AU"])
+    comparison_series_with_tags = series_with_tags.compare(reference_series_with_tags)
+    series_no_tags = get_country(df_exercise_no_tags)
+    reference_series_no_tags = pd.Series([np.nan, np.nan, np.nan])
+    comparison_series_no_tags = series_no_tags.compare(reference_series_no_tags)
+    assert comparison_series_with_tags.size == 0
+    assert comparison_series_no_tags.size == 0
+
+
+def test_load_buses_from_osm(tmpdir):
+    """
+    Verify what returned by _load_buses_from_osm.
+    """
+    file_path = get_path(tmpdir, "buses_exercise.csv")
+    df_buses_input.to_csv(file_path)
+    df_buses_output = _load_buses_from_osm(file_path)
+    df_buses_comparison = df_buses_output.compare(df_buses_reference)
+    pathlib.Path.unlink(file_path)
+    assert df_buses_comparison.empty
+
+
+def test_load_lines_from_osm(tmpdir):
+    """
+    Verify what returned by _load_lines_from_osm.
+    """
+    file_path = get_path(tmpdir, "lines_exercise.csv")
+    df_lines_input.to_csv(file_path)
+    df_lines_output = _load_lines_from_osm(file_path)
+    df_lines_comparison = df_lines_output.compare(df_lines_reference)
+    pathlib.Path.unlink(file_path)
+    assert df_lines_comparison.empty
+
+
+def test_load_transformers_from_osm(tmpdir):
+    """
+    Verify what returned by _load_transformers_from_osm.
+    """
+    file_path = get_path(tmpdir, "transformers_exercise.csv")
+    df_transformers_input.to_csv(file_path)
+    df_transformers_output = _load_transformers_from_osm(file_path)
+    df_transformers_comparison = df_transformers_output.compare(
+        df_transformers_reference
+    )
+    pathlib.Path.unlink(file_path)
+    assert df_transformers_comparison.empty
+
+
+def test_load_converters_from_osm(tmpdir):
+    """
+    Verify what returned by _load_converters_from_osm.
+    """
+    file_path = get_path(tmpdir, "converters_exercise.csv")
+    df_converters_input.to_csv(file_path)
+    df_converters_output = _load_converters_from_osm(file_path)
+    df_converters_comparison = df_converters_output.compare(df_converters_reference)
+    pathlib.Path.unlink(file_path)
+    assert df_converters_comparison.empty
+
+
+def test_get_linetypes_config():
+    """
+    Verify what returned by _get_linetypes_config.
+    """
+    output_dict_ac = _get_linetypes_config(lines_dict["ac_types"], voltages_list)
+    output_dict_dc = _get_linetypes_config(lines_dict["dc_types"], voltages_list)
+    assert output_dict_ac == lines_dict["ac_types"]
+    assert output_dict_dc == lines_dict["dc_types"]
+
+
+def test_get_linetype_by_voltage():
+    """
+    Verify what returned by _get_linetype_by_voltage.
+    """
+    v_nom_list = [
+        50.0,
+        101.0,
+        180.0,
+        210.0,
+        220.0,
+        225.0,
+        285.0,
+        300.0,
+        333.0,
+        390.0,
+        600.0,
+        750.0,
+        800.0,
+    ]
+
+    line_type_list = []
+
+    for v_nom in v_nom_list:
+        line_type_list.append(_get_linetype_by_voltage(v_nom, lines_dict["ac_types"]))
+
+    assert line_type_list == [
+        "243-AL1/39-ST1A 20.0",
+        "243-AL1/39-ST1A 20.0",
+        "Al/St 240/40 2-bundle 220.0",
+        "Al/St 240/40 2-bundle 220.0",
+        "Al/St 240/40 2-bundle 220.0",
+        "Al/St 240/40 2-bundle 220.0",
+        "Al/St 240/40 3-bundle 300.0",
+        "Al/St 240/40 3-bundle 300.0",
+        "Al/St 240/40 3-bundle 300.0",
+        "Al/St 240/40 4-bundle 380.0",
+        "Al/St 240/40 4-bundle 380.0",
+        "Al/St 560/50 4-bundle 750.0",
+        "Al/St 560/50 4-bundle 750.0",
+    ]
+
+
+def test_set_electrical_parameters_lines(tmpdir):
+    """
+    Verify what returned by _set_electrical_parameters_lines.
+    """
+    file_path = get_path(tmpdir, "lines_exercise.csv")
+    df_lines_input.to_csv(file_path)
+    df_lines_output = _load_lines_from_osm(file_path).reset_index(drop=True)
+    df_lines_output_ac = df_lines_output[
+        df_lines_output.tag_frequency.astype(float) != 0
+    ].copy()
+    df_lines_output_dc = df_lines_output[
+        df_lines_output.tag_frequency.astype(float) == 0
+    ].copy()
+    lines_ac = _set_electrical_parameters_lines(
+        lines_dict, voltages_list, df_lines_output_ac
+    ).set_index("tag_frequency")
+    lines_dc = _set_electrical_parameters_dc_lines(
+        lines_dict, voltages_list, df_lines_output_dc
+    ).set_index("tag_frequency")
+    df_lines_ac_comparison = lines_ac.compare(lines_ac_reference)
+    df_lines_dc_comparison = lines_dc.compare(lines_dc_reference)
+    pathlib.Path.unlink(file_path)
+    assert df_lines_ac_comparison.empty
+    assert df_lines_dc_comparison.empty
+
+
+def test_set_electrical_parameters_links(tmpdir):
+    """
+    Verify what returned by _set_electrical_parameters_links.
+    """
+    file_path = get_path(tmpdir, "lines_exercise.csv")
+    df_lines_input.to_csv(file_path)
+    df_lines_output = _load_lines_from_osm(file_path).reset_index(drop=True)
+    df_lines_output_dc = df_lines_output[
+        df_lines_output.tag_frequency.astype(float) == 0
+    ].copy()
+    lines_dc = _set_electrical_parameters_dc_lines(
+        lines_dict, voltages_list, df_lines_output_dc
+    )
+    new_lines_dc = _set_electrical_parameters_links(links_dict, lines_dc).set_index(
+        "tag_frequency"
+    )
+    new_lines_dc_reference = lines_dc_reference.copy(deep=True)
+    new_lines_dc_reference["p_max_pu"] = links_dict["p_max_pu"]
+    new_lines_dc_reference["p_min_pu"] = -links_dict["p_max_pu"]
+    pathlib.Path.unlink(file_path)
+    df_comparison = new_lines_dc.compare(new_lines_dc_reference)
+    assert df_comparison.empty
+
+
+def test_set_electrical_parameters_transformers(tmpdir):
+    """
+    Verify what returned by _set_electrical_parameters_transformers.
+    """
+    file_path = get_path(tmpdir, "transformers_exercise.csv")
+    df_transformers_input.to_csv(file_path)
+    df_transformers_output = _load_transformers_from_osm(file_path)
+    df_transformers_parameters = _set_electrical_parameters_transformers(
+        transformers_dict, df_transformers_output
+    )
+    df_transformers_parameters_reference = df_transformers_reference.copy(deep=True)
+    df_transformers_parameters_reference["x"] = transformers_dict["x"]
+    df_transformers_parameters_reference["s_nom"] = transformers_dict["s_nom"]
+    df_transformers_parameters_reference["type"] = transformers_dict["type"]
+    pathlib.Path.unlink(file_path)
+    df_comparison = df_transformers_parameters.compare(
+        df_transformers_parameters_reference
+    )
+    assert df_comparison.empty
+
+
+def test_set_electrical_parameters_converters(tmpdir):
+    """
+    Verify what returned by _set_electrical_parameters_converters.
+    """
+    file_path = get_path(tmpdir, "converters_exercise.csv")
+    df_converters_input.to_csv(file_path)
+    df_converters_output = _load_converters_from_osm(file_path)
+    df_converters_parameters = _set_electrical_parameters_converters(
+        links_dict, df_converters_output
+    )
+    df_converters_parameters_reference = df_converters_reference.copy(deep=True)
+    df_converters_parameters_reference["p_max_pu"] = links_dict["p_max_pu"]
+    df_converters_parameters_reference["p_min_pu"] = -links_dict["p_max_pu"]
+    df_converters_parameters_reference["p_nom"] = 2000
+    df_converters_parameters_reference["under_construction"] = False
+    df_converters_parameters_reference["underground"] = False
+    pathlib.Path.unlink(file_path)
+    df_comparison = df_converters_parameters.compare(df_converters_parameters_reference)
+    assert df_comparison.empty

From 2d7ef6dfa25a0430587bd745acb39692a0de55c0 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 1 Aug 2024 15:12:48 +0200
Subject: [PATCH 27/40] normed in _helpers.py with dedicated unit test

---
 scripts/_helpers.py              |  4 ++++
 scripts/add_electricity.py       |  5 +----
 scripts/build_demand_profiles.py |  5 +----
 scripts/cluster_network.py       |  5 +----
 test/test_helpers.py             | 16 ++++++++++++++++
 5 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 3c7fe60de..93a6cbc38 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -1472,3 +1472,7 @@ def safe_divide(numerator, denominator):
             f"Division by zero: {numerator} / {denominator}, returning NaN."
         )
         return np.nan
+
+
+def normed(x):
+    return (x / x.sum()).fillna(0.0)
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index f953d853b..2c13bfd35 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -94,6 +94,7 @@
     configure_logging,
     create_logger,
     mock_snakemake,
+    normed,
     read_csv_nafix,
     sets_path_to_root,
     update_p_nom_max,
@@ -104,10 +105,6 @@
 logger = create_logger(__name__)
 
 
-def normed(s):
-    return s / s.sum()
-
-
 def calculate_annuity(n, r):
     """
     Calculate the annuity factor for an asset with lifetime n years and
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index 76fd324b9..39414109f 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -56,6 +56,7 @@
     create_logger,
     get_path,
     mock_snakemake,
+    normed,
     read_csv_nafix,
     read_osm_config,
     sets_path_to_root,
@@ -66,10 +67,6 @@
 logger = create_logger(__name__)
 
 
-def normed(s):
-    return s / s.sum()
-
-
 def get_gegis_regions(countries):
     """
     Get the GEGIS region from the config file.
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index de7d538fc..92fe8d5da 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -136,6 +136,7 @@
     create_logger,
     get_aggregation_strategies,
     mock_snakemake,
+    normed,
     sets_path_to_root,
     update_p_nom_max,
 )
@@ -155,10 +156,6 @@
 logger = create_logger(__name__)
 
 
-def normed(x):
-    return (x / x.sum()).fillna(0.0)
-
-
 def weighting_for_country(n, x):
     conv_carriers = {"OCGT", "CCGT", "PHS", "hydro"}
     gen = n.generators.loc[n.generators.carrier.isin(conv_carriers)].groupby(
diff --git a/test/test_helpers.py b/test/test_helpers.py
index 7cee35c8f..81ac8453f 100644
--- a/test/test_helpers.py
+++ b/test/test_helpers.py
@@ -37,6 +37,7 @@
     get_path_size,
     get_relative_path,
     modify_commodity,
+    normed,
     safe_divide,
     three_2_two_digits_country,
     two_2_three_digits_country,
@@ -578,3 +579,18 @@ def test_download_gadm():
     assert list_layers_41[0] == "ADM_ADM_0"
     assert list_layers_41[1] == "ADM_ADM_1"
     assert list_layers_41[2] == "ADM_ADM_2"
+
+
+def test_normed():
+    df_input = pd.DataFrame(
+        {"A": [1.0, 2.0, 3.0, 4.0, 5.0], "B": [6.0, 7.0, 8.0, 9.0, 10.0]}
+    )
+    df_output = normed(df_input)
+    df_reference = pd.DataFrame(
+        {
+            "A": [x / 15.0 for x in [1.0, 2.0, 3.0, 4.0, 5.0]],
+            "B": [x / 40.0 for x in [6.0, 7.0, 8.0, 9.0, 10.0]],
+        }
+    )
+    df_comparison = df_output.compare(df_reference)
+    assert df_comparison.empty

From 9c4268d29e08c44700ea147f2baedf91f173fa40 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 1 Aug 2024 17:59:45 +0200
Subject: [PATCH 28/40] initial addition of unit tests for
 build_demand_profile.py

---
 test/test_build_demand_profile.py | 42 +++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 test/test_build_demand_profile.py

diff --git a/test/test_build_demand_profile.py b/test/test_build_demand_profile.py
new file mode 100644
index 000000000..0ed53f757
--- /dev/null
+++ b/test/test_build_demand_profile.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+import sys
+
+import numpy as np
+import pandas as pd
+
+sys.path.append("./scripts")
+
+from _helpers import create_country_list, get_path
+from build_demand_profiles import get_gegis_regions, get_load_paths_gegis
+
+path_cwd = pathlib.Path.cwd()
+
+
+def test_get_gegis_regions():
+    output_regions = get_gegis_regions(["NG", "IT"])
+    assert output_regions == ["Africa", "Europe"]
+
+
+def test_get_load_paths_gegis():
+    config = {
+        "countries": ["NG", "IT"],
+        "load_options": {
+            "ssp": "ssp2-2.6",
+            "weather_year": 2013,
+            "prediction_year": 2030,
+            "scale": 1,
+        },
+    }
+    load_data_paths = get_load_paths_gegis("data", config)
+    reference_list = [
+        get_path("data", "ssp2-2.6", "2030", "era5_2013", "Africa.nc"),
+        get_path("data", "ssp2-2.6", "2030", "era5_2013", "Europe.nc"),
+    ]
+    assert load_data_paths == reference_list

From 6ca2cbab4760d5aa084a28e50bcd5779b22448ce Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Sat, 3 Aug 2024 11:43:53 +0200
Subject: [PATCH 29/40] add test_build_demand_profiles.py

---
 ...t_build_demand_profile.py => test_build_demand_profiles.py} | 3 ---
 1 file changed, 3 deletions(-)
 rename test/{test_build_demand_profile.py => test_build_demand_profiles.py} (96%)

diff --git a/test/test_build_demand_profile.py b/test/test_build_demand_profiles.py
similarity index 96%
rename from test/test_build_demand_profile.py
rename to test/test_build_demand_profiles.py
index 0ed53f757..fe5c919b7 100644
--- a/test/test_build_demand_profile.py
+++ b/test/test_build_demand_profiles.py
@@ -8,9 +8,6 @@
 import pathlib
 import sys
 
-import numpy as np
-import pandas as pd
-
 sys.path.append("./scripts")
 
 from _helpers import create_country_list, get_path

From 067110dd1e62915b5f58ad221b37c5c6838c7cfa Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Sat, 3 Aug 2024 11:49:29 +0200
Subject: [PATCH 30/40] add documentation

---
 test/test_build_demand_profiles.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/test_build_demand_profiles.py b/test/test_build_demand_profiles.py
index fe5c919b7..0fac5b635 100644
--- a/test/test_build_demand_profiles.py
+++ b/test/test_build_demand_profiles.py
@@ -17,11 +17,17 @@
 
 
 def test_get_gegis_regions():
+    """
+    Verify what returned by get_gegis_regions.
+    """
     output_regions = get_gegis_regions(["NG", "IT"])
     assert output_regions == ["Africa", "Europe"]
 
 
 def test_get_load_paths_gegis():
+    """
+    Verify what returned by get_load_paths_gegis.
+    """
     config = {
         "countries": ["NG", "IT"],
         "load_options": {

From 1473be69a30766aaa68ecdf819a9f4bdd1c01e4f Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 8 Aug 2024 03:19:21 +0200
Subject: [PATCH 31/40] add unit tests for add_extra_components

---
 test/test_add_extra_components.py | 245 ++++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 test/test_add_extra_components.py

diff --git a/test/test_add_extra_components.py b/test/test_add_extra_components.py
new file mode 100644
index 000000000..474af11d7
--- /dev/null
+++ b/test/test_add_extra_components.py
@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+import sys
+
+import pandas as pd
+import pypsa
+
+sys.path.append("./scripts")
+
+from add_electricity import load_costs
+from add_extra_components import (
+    attach_hydrogen_pipelines,
+    attach_storageunits,
+    attach_stores,
+)
+
+path_cwd = pathlib.Path.cwd()
+path_costs = pathlib.Path(path_cwd, "data", "costs.csv")
+
+costs_config_dict = {
+    "year": 2030,
+    "version": "v0.5.0",
+    "rooftop_share": 0.14,
+    "USD2013_to_EUR2013": 0.7532,
+    "fill_values": {
+        "FOM": 0,
+        "VOM": 0,
+        "efficiency": 1,
+        "fuel": 0,
+        "investment": 0,
+        "lifetime": 25,
+        "CO2 intensity": 0,
+        "discount rate": 0.07,
+    },
+    "marginal_cost": {
+        "solar": 0.01,
+        "onwind": 0.015,
+        "offwind": 0.015,
+        "hydro": 0.0,
+        "H2": 0.0,
+        "electrolysis": 0.0,
+        "fuel cell": 0.0,
+        "battery": 0.0,
+        "battery inverter": 0.0,
+    },
+    "emission_prices": {
+        "co2": 0.0,
+    },
+}
+
+costs_config_df = pd.DataFrame.from_dict(costs_config_dict)
+
+config_dict = {
+    "electricity": {
+        "base_voltage": 380.0,
+        "voltages": [132.0, 220.0, 300.0, 380.0, 500.0, 750.0],
+        "co2limit": 1.487e9,
+        "co2base": 1.487e9,
+        "agg_p_nom_limits": "data/agg_p_nom_minmax.csv",
+        "hvdc_as_lines": True,
+        "automatic_emission": True,
+        "automatic_emission_base_year": 1990,
+        "operational_reserve": {
+            "activate": True,
+            "epsilon_load": 0.02,
+            "epsilon_vres": 0.02,
+            "contingency": 0,
+        },
+        "max_hours": {
+            "battery": 6,
+            "H2": 168,
+        },
+        "extendable_carriers": {
+            "Generator": ["solar", "onwind", "offwind-ac", "offwind-dc", "OCGT"],
+            "StorageUnit": ["H2"],
+            "Store": ["battery", "H2"],
+            "Link": ["H2 pipeline"],
+        },
+        "powerplants_filter": "(DateOut >= 2022 or DateOut != DateOut)",
+        "custom_powerplants": False,
+        "conventional_carriers": [
+            "nuclear",
+            "oil",
+            "OCGT",
+            "CCGT",
+            "coal",
+            "lignite",
+            "geothermal",
+            "biomass",
+        ],
+        "renewable_carriers": [
+            "solar",
+            "csp",
+            "onwind",
+            "offwind-ac",
+            "offwind-dc",
+            "hydro",
+        ],
+        "estimate_renewable_capacities": {
+            "stats": "irena",
+            "year": 2020,
+            "p_nom_min": 1,
+            "p_nom_max": False,
+            "technology_mapping": {
+                "Offshore": ["offwind-ac", "offwind-dc"],
+                "Onshore": ["onwind"],
+                "PV": ["solar"],
+            },
+        },
+    },
+    "renewable": {
+        "csp": {
+            "cutout": "cutout-2013-era5-tutorial",
+            "resource": {"method": "csp", "installation": "SAM_solar_tower"},
+            "capacity_per_sqkm": 2.392,
+            "copernicus": {
+                "grid_codes": [20, 30, 40, 60, 90],
+                "distancing_codes": [50],
+                "distance_to_codes": 3000,
+            },
+            "natura": True,
+            "potential": "simple",
+            "clip_p_max_pu": 1.0e-2,
+            "extendable": True,
+            "csp_model": "simple",
+        },
+    },
+}
+
+
+def test_attach_storageunits():
+    """
+    Verify what is returned by attach_storageunits()
+    """
+    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
+    test_costs = load_costs(
+        path_costs,
+        costs_config_dict,
+        config_dict["electricity"],
+        number_years,
+    )
+
+    reference_component_dict = {
+        "Bus": 585,
+        "Carrier": 1,
+        "Line": 852,
+        "LineType": 34,
+        "Transformer": 96,
+        "TransformerType": 14,
+        "Load": 489,
+        "Generator": 1423,
+        "StorageUnit": 623,
+    }
+    attach_storageunits(test_network_de, test_costs, config_dict)
+
+    output_component_dict = {}
+    for c in test_network_de.iterate_components(
+        list(test_network_de.components.keys())[2:]
+    ):
+        output_component_dict[c.name] = len(c.df)
+
+    assert output_component_dict == reference_component_dict
+
+
+def test_attach_stores():
+    """
+    Verify what is returned by attach_stores()
+    """
+    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
+    test_costs = load_costs(
+        path_costs,
+        costs_config_dict,
+        config_dict["electricity"],
+        number_years,
+    )
+
+    reference_component_dict = {
+        "Bus": 1755,
+        "Carrier": 2,
+        "Line": 852,
+        "LineType": 34,
+        "Transformer": 96,
+        "TransformerType": 14,
+        "Link": 2340,
+        "Load": 489,
+        "Generator": 1423,
+        "StorageUnit": 38,
+        "Store": 1170,
+    }
+    test_network_de.buses["country"] = "DE"
+    attach_stores(test_network_de, test_costs, config_dict)
+
+    output_component_dict = {}
+    for c in test_network_de.iterate_components(
+        list(test_network_de.components.keys())[2:]
+    ):
+        output_component_dict[c.name] = len(c.df)
+
+    assert output_component_dict == reference_component_dict
+
+
+def test_attach_hydrogen_pipelines():
+    """
+    Verify what is returned by attach_hydrogen_pipelines()
+    """
+    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
+    test_costs = load_costs(
+        path_costs,
+        costs_config_dict,
+        config_dict["electricity"],
+        number_years,
+    )
+
+    reference_component_dict = {
+        "Bus": 1755,
+        "Carrier": 2,
+        "Line": 852,
+        "LineType": 34,
+        "Transformer": 96,
+        "TransformerType": 14,
+        "Link": 2340,
+        "Load": 489,
+        "Generator": 1423,
+        "StorageUnit": 38,
+        "Store": 1170,
+    }
+    test_network_de.buses["country"] = "DE"
+    attach_stores(test_network_de, test_costs, config_dict)
+
+    output_component_dict = {}
+    for c in test_network_de.iterate_components(
+        list(test_network_de.components.keys())[2:]
+    ):
+        output_component_dict[c.name] = len(c.df)
+
+    assert output_component_dict == reference_component_dict

From 2852bc6107733c47eeb28c07b3a0532ec5d59612 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 8 Aug 2024 03:22:47 +0200
Subject: [PATCH 32/40] add unit tests for add_extra_components - 2

---
 test/test_add_extra_components.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/test/test_add_extra_components.py b/test/test_add_extra_components.py
index 474af11d7..b1dfe78d2 100644
--- a/test/test_add_extra_components.py
+++ b/test/test_add_extra_components.py
@@ -221,20 +221,17 @@ def test_attach_hydrogen_pipelines():
     )
 
     reference_component_dict = {
-        "Bus": 1755,
-        "Carrier": 2,
+        "Bus": 585,
         "Line": 852,
         "LineType": 34,
         "Transformer": 96,
         "TransformerType": 14,
-        "Link": 2340,
+        "Link": 705,
         "Load": 489,
         "Generator": 1423,
         "StorageUnit": 38,
-        "Store": 1170,
     }
-    test_network_de.buses["country"] = "DE"
-    attach_stores(test_network_de, test_costs, config_dict)
+    attach_hydrogen_pipelines(test_network_de, test_costs, config_dict)
 
     output_component_dict = {}
     for c in test_network_de.iterate_components(

From 4e51132a7cf931fd6d211580529801a097205b81 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:28:31 +0200
Subject: [PATCH 33/40] configs from yaml file (#9)

* configs from yaml file

* add get_config_dict fixture

* new unit test fixture
---
 test/conftest.py                   |  20 ++++
 test/test_add_extra_components.py  | 141 ++++-------------------------
 test/test_base_network.py          | 105 ++++++++-------------
 test/test_build_demand_profiles.py |  18 ++--
 4 files changed, 84 insertions(+), 200 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index fa8cbd171..f3a645d83 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -8,7 +8,9 @@
 import pathlib
 import shutil
 
+import pypsa
 import pytest
+import yaml
 
 _content_temp_file = "content"
 _name_temp_file = "hello.txt"
@@ -30,3 +32,21 @@ def get_temp_folder(tmpdir):
     sub_temp_content_dir = temp_content_dir.join(_sub_temp_content_dir)
     yield sub_temp_content_dir
     shutil.rmtree(str(sub_temp_content_dir))
+
+
+@pytest.fixture(scope="function")
+def get_power_network_scigrid_de():
+    return pypsa.examples.scigrid_de(from_master=True)
+
+
+@pytest.fixture(scope="function")
+def get_power_network_ac_dc_meshed():
+    return pypsa.examples.ac_dc_meshed(from_master=True)
+
+
+@pytest.fixture(scope="function")
+def get_config_dict():
+    path_config = pathlib.Path(pathlib.Path.cwd(), "config.default.yaml")
+    with open(path_config, "r") as file:
+        config_dict = yaml.safe_load(file)
+    return config_dict
diff --git a/test/test_add_extra_components.py b/test/test_add_extra_components.py
index b1dfe78d2..ee7afe9ed 100644
--- a/test/test_add_extra_components.py
+++ b/test/test_add_extra_components.py
@@ -8,11 +8,10 @@
 import pathlib
 import sys
 
-import pandas as pd
-import pypsa
-
 sys.path.append("./scripts")
 
+from test.conftest import get_config_dict, get_power_network_scigrid_de
+
 from add_electricity import load_costs
 from add_extra_components import (
     attach_hydrogen_pipelines,
@@ -23,126 +22,18 @@
 path_cwd = pathlib.Path.cwd()
 path_costs = pathlib.Path(path_cwd, "data", "costs.csv")
 
-costs_config_dict = {
-    "year": 2030,
-    "version": "v0.5.0",
-    "rooftop_share": 0.14,
-    "USD2013_to_EUR2013": 0.7532,
-    "fill_values": {
-        "FOM": 0,
-        "VOM": 0,
-        "efficiency": 1,
-        "fuel": 0,
-        "investment": 0,
-        "lifetime": 25,
-        "CO2 intensity": 0,
-        "discount rate": 0.07,
-    },
-    "marginal_cost": {
-        "solar": 0.01,
-        "onwind": 0.015,
-        "offwind": 0.015,
-        "hydro": 0.0,
-        "H2": 0.0,
-        "electrolysis": 0.0,
-        "fuel cell": 0.0,
-        "battery": 0.0,
-        "battery inverter": 0.0,
-    },
-    "emission_prices": {
-        "co2": 0.0,
-    },
-}
-
-costs_config_df = pd.DataFrame.from_dict(costs_config_dict)
-
-config_dict = {
-    "electricity": {
-        "base_voltage": 380.0,
-        "voltages": [132.0, 220.0, 300.0, 380.0, 500.0, 750.0],
-        "co2limit": 1.487e9,
-        "co2base": 1.487e9,
-        "agg_p_nom_limits": "data/agg_p_nom_minmax.csv",
-        "hvdc_as_lines": True,
-        "automatic_emission": True,
-        "automatic_emission_base_year": 1990,
-        "operational_reserve": {
-            "activate": True,
-            "epsilon_load": 0.02,
-            "epsilon_vres": 0.02,
-            "contingency": 0,
-        },
-        "max_hours": {
-            "battery": 6,
-            "H2": 168,
-        },
-        "extendable_carriers": {
-            "Generator": ["solar", "onwind", "offwind-ac", "offwind-dc", "OCGT"],
-            "StorageUnit": ["H2"],
-            "Store": ["battery", "H2"],
-            "Link": ["H2 pipeline"],
-        },
-        "powerplants_filter": "(DateOut >= 2022 or DateOut != DateOut)",
-        "custom_powerplants": False,
-        "conventional_carriers": [
-            "nuclear",
-            "oil",
-            "OCGT",
-            "CCGT",
-            "coal",
-            "lignite",
-            "geothermal",
-            "biomass",
-        ],
-        "renewable_carriers": [
-            "solar",
-            "csp",
-            "onwind",
-            "offwind-ac",
-            "offwind-dc",
-            "hydro",
-        ],
-        "estimate_renewable_capacities": {
-            "stats": "irena",
-            "year": 2020,
-            "p_nom_min": 1,
-            "p_nom_max": False,
-            "technology_mapping": {
-                "Offshore": ["offwind-ac", "offwind-dc"],
-                "Onshore": ["onwind"],
-                "PV": ["solar"],
-            },
-        },
-    },
-    "renewable": {
-        "csp": {
-            "cutout": "cutout-2013-era5-tutorial",
-            "resource": {"method": "csp", "installation": "SAM_solar_tower"},
-            "capacity_per_sqkm": 2.392,
-            "copernicus": {
-                "grid_codes": [20, 30, 40, 60, 90],
-                "distancing_codes": [50],
-                "distance_to_codes": 3000,
-            },
-            "natura": True,
-            "potential": "simple",
-            "clip_p_max_pu": 1.0e-2,
-            "extendable": True,
-            "csp_model": "simple",
-        },
-    },
-}
-
-
-def test_attach_storageunits():
+
+def test_attach_storageunits(get_config_dict, get_power_network_scigrid_de):
     """
     Verify what is returned by attach_storageunits()
     """
-    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    config_dict = get_config_dict
+    config_dict["electricity"]["extendable_carriers"]["StorageUnit"] = ["H2"]
+    test_network_de = get_power_network_scigrid_de
     number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
     test_costs = load_costs(
         path_costs,
-        costs_config_dict,
+        config_dict["costs"],
         config_dict["electricity"],
         number_years,
     )
@@ -169,15 +60,17 @@ def test_attach_storageunits():
     assert output_component_dict == reference_component_dict
 
 
-def test_attach_stores():
+def test_attach_stores(get_config_dict, get_power_network_scigrid_de):
     """
     Verify what is returned by attach_stores()
     """
-    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    config_dict = get_config_dict
+    config_dict["renewable"]["csp"]["csp_model"] = "simple"
+    test_network_de = get_power_network_scigrid_de
     number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
     test_costs = load_costs(
         path_costs,
-        costs_config_dict,
+        config_dict["costs"],
         config_dict["electricity"],
         number_years,
     )
@@ -207,15 +100,17 @@ def test_attach_stores():
     assert output_component_dict == reference_component_dict
 
 
-def test_attach_hydrogen_pipelines():
+def test_attach_hydrogen_pipelines(get_config_dict, get_power_network_scigrid_de):
     """
     Verify what is returned by attach_hydrogen_pipelines()
     """
-    test_network_de = pypsa.examples.scigrid_de(from_master=True)
+    config_dict = get_config_dict
+    config_dict["electricity"]["extendable_carriers"]["Link"] = ["H2 pipeline"]
+    test_network_de = get_power_network_scigrid_de
     number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
     test_costs = load_costs(
         path_costs,
-        costs_config_dict,
+        config_dict["costs"],
         config_dict["electricity"],
         number_years,
     )
diff --git a/test/test_base_network.py b/test/test_base_network.py
index a665ea7cd..be5890acf 100644
--- a/test/test_base_network.py
+++ b/test/test_base_network.py
@@ -13,6 +13,8 @@
 
 sys.path.append("./scripts")
 
+from test.conftest import get_config_dict
+
 from _helpers import get_path
 from base_network import (
     _get_linetype_by_voltage,
@@ -29,8 +31,6 @@
     get_country,
 )
 
-path_cwd = pathlib.Path.cwd()
-
 # Common references
 
 # ---> buses
@@ -227,39 +227,6 @@
     index=[0],
 ).set_index("tag_frequency")
 
-lines_dict = {
-    "ac_types": {
-        132.0: "243-AL1/39-ST1A 20.0",
-        220.0: "Al/St 240/40 2-bundle 220.0",
-        300.0: "Al/St 240/40 3-bundle 300.0",
-        380.0: "Al/St 240/40 4-bundle 380.0",
-        500.0: "Al/St 240/40 4-bundle 380.0",
-        750.0: "Al/St 560/50 4-bundle 750.0",
-    },
-    "dc_types": {
-        500.0: "HVDC XLPE 1000",
-    },
-    "s_max_pu": 0.7,
-    "s_nom_max": np.inf,
-    "length_factor": 1.25,
-    "under_construction": "zero",
-}
-
-# ---> links
-
-links_dict = {
-    "p_max_pu": 2.1,
-    "p_nom_max": np.inf,
-    "under_construction": "zero",
-}
-
-# ---> transformers
-
-transformers_dict = {
-    "x": 0.1,
-    "s_nom": 2000.0,
-    "type": "",
-}
 
 df_transformers_input = pd.DataFrame(
     {
@@ -302,10 +269,6 @@
     index=[0],
 ).set_index("transformer_id")
 
-# ---> voltages
-
-voltages_list = [132.0, 220.0, 300.0, 380.0, 500.0, 750.0]
-
 
 def test_get_country():
     """
@@ -374,20 +337,26 @@ def test_load_converters_from_osm(tmpdir):
     assert df_converters_comparison.empty
 
 
-def test_get_linetypes_config():
+def test_get_linetypes_config(get_config_dict):
     """
     Verify what returned by _get_linetypes_config.
     """
-    output_dict_ac = _get_linetypes_config(lines_dict["ac_types"], voltages_list)
-    output_dict_dc = _get_linetypes_config(lines_dict["dc_types"], voltages_list)
-    assert output_dict_ac == lines_dict["ac_types"]
-    assert output_dict_dc == lines_dict["dc_types"]
+    config_dict = get_config_dict
+    output_dict_ac = _get_linetypes_config(
+        config_dict["lines"]["ac_types"], config_dict["electricity"]["voltages"]
+    )
+    output_dict_dc = _get_linetypes_config(
+        config_dict["lines"]["dc_types"], config_dict["electricity"]["voltages"]
+    )
+    assert output_dict_ac == config_dict["lines"]["ac_types"]
+    assert output_dict_dc == config_dict["lines"]["dc_types"]
 
 
-def test_get_linetype_by_voltage():
+def test_get_linetype_by_voltage(get_config_dict):
     """
     Verify what returned by _get_linetype_by_voltage.
     """
+    config_dict = get_config_dict
     v_nom_list = [
         50.0,
         101.0,
@@ -407,7 +376,9 @@ def test_get_linetype_by_voltage():
     line_type_list = []
 
     for v_nom in v_nom_list:
-        line_type_list.append(_get_linetype_by_voltage(v_nom, lines_dict["ac_types"]))
+        line_type_list.append(
+            _get_linetype_by_voltage(v_nom, config_dict["lines"]["ac_types"])
+        )
 
     assert line_type_list == [
         "243-AL1/39-ST1A 20.0",
@@ -426,10 +397,11 @@ def test_get_linetype_by_voltage():
     ]
 
 
-def test_set_electrical_parameters_lines(tmpdir):
+def test_set_electrical_parameters_lines(get_config_dict, tmpdir):
     """
     Verify what returned by _set_electrical_parameters_lines.
     """
+    config_dict = get_config_dict
     file_path = get_path(tmpdir, "lines_exercise.csv")
     df_lines_input.to_csv(file_path)
     df_lines_output = _load_lines_from_osm(file_path).reset_index(drop=True)
@@ -440,10 +412,10 @@ def test_set_electrical_parameters_lines(tmpdir):
         df_lines_output.tag_frequency.astype(float) == 0
     ].copy()
     lines_ac = _set_electrical_parameters_lines(
-        lines_dict, voltages_list, df_lines_output_ac
+        config_dict["lines"], config_dict["electricity"]["voltages"], df_lines_output_ac
     ).set_index("tag_frequency")
     lines_dc = _set_electrical_parameters_dc_lines(
-        lines_dict, voltages_list, df_lines_output_dc
+        config_dict["lines"], config_dict["electricity"]["voltages"], df_lines_output_dc
     ).set_index("tag_frequency")
     df_lines_ac_comparison = lines_ac.compare(lines_ac_reference)
     df_lines_dc_comparison = lines_dc.compare(lines_dc_reference)
@@ -452,10 +424,11 @@ def test_set_electrical_parameters_lines(tmpdir):
     assert df_lines_dc_comparison.empty
 
 
-def test_set_electrical_parameters_links(tmpdir):
+def test_set_electrical_parameters_links(get_config_dict, tmpdir):
     """
     Verify what returned by _set_electrical_parameters_links.
     """
+    config_dict = get_config_dict
     file_path = get_path(tmpdir, "lines_exercise.csv")
     df_lines_input.to_csv(file_path)
     df_lines_output = _load_lines_from_osm(file_path).reset_index(drop=True)
@@ -463,33 +436,34 @@ def test_set_electrical_parameters_links(tmpdir):
         df_lines_output.tag_frequency.astype(float) == 0
     ].copy()
     lines_dc = _set_electrical_parameters_dc_lines(
-        lines_dict, voltages_list, df_lines_output_dc
-    )
-    new_lines_dc = _set_electrical_parameters_links(links_dict, lines_dc).set_index(
-        "tag_frequency"
+        config_dict["lines"], config_dict["electricity"]["voltages"], df_lines_output_dc
     )
+    new_lines_dc = _set_electrical_parameters_links(
+        config_dict["links"], lines_dc
+    ).set_index("tag_frequency")
     new_lines_dc_reference = lines_dc_reference.copy(deep=True)
-    new_lines_dc_reference["p_max_pu"] = links_dict["p_max_pu"]
-    new_lines_dc_reference["p_min_pu"] = -links_dict["p_max_pu"]
+    new_lines_dc_reference["p_max_pu"] = config_dict["links"]["p_max_pu"]
+    new_lines_dc_reference["p_min_pu"] = -config_dict["links"]["p_max_pu"]
     pathlib.Path.unlink(file_path)
     df_comparison = new_lines_dc.compare(new_lines_dc_reference)
     assert df_comparison.empty
 
 
-def test_set_electrical_parameters_transformers(tmpdir):
+def test_set_electrical_parameters_transformers(get_config_dict, tmpdir):
     """
     Verify what returned by _set_electrical_parameters_transformers.
     """
+    config_dict = get_config_dict
     file_path = get_path(tmpdir, "transformers_exercise.csv")
     df_transformers_input.to_csv(file_path)
     df_transformers_output = _load_transformers_from_osm(file_path)
     df_transformers_parameters = _set_electrical_parameters_transformers(
-        transformers_dict, df_transformers_output
+        config_dict["transformers"], df_transformers_output
     )
     df_transformers_parameters_reference = df_transformers_reference.copy(deep=True)
-    df_transformers_parameters_reference["x"] = transformers_dict["x"]
-    df_transformers_parameters_reference["s_nom"] = transformers_dict["s_nom"]
-    df_transformers_parameters_reference["type"] = transformers_dict["type"]
+    df_transformers_parameters_reference["x"] = config_dict["transformers"]["x"]
+    df_transformers_parameters_reference["s_nom"] = config_dict["transformers"]["s_nom"]
+    df_transformers_parameters_reference["type"] = config_dict["transformers"]["type"]
     pathlib.Path.unlink(file_path)
     df_comparison = df_transformers_parameters.compare(
         df_transformers_parameters_reference
@@ -497,19 +471,20 @@ def test_set_electrical_parameters_transformers(tmpdir):
     assert df_comparison.empty
 
 
-def test_set_electrical_parameters_converters(tmpdir):
+def test_set_electrical_parameters_converters(get_config_dict, tmpdir):
     """
     Verify what returned by _set_electrical_parameters_converters.
     """
+    config_dict = get_config_dict
     file_path = get_path(tmpdir, "converters_exercise.csv")
     df_converters_input.to_csv(file_path)
     df_converters_output = _load_converters_from_osm(file_path)
     df_converters_parameters = _set_electrical_parameters_converters(
-        links_dict, df_converters_output
+        config_dict["links"], df_converters_output
     )
     df_converters_parameters_reference = df_converters_reference.copy(deep=True)
-    df_converters_parameters_reference["p_max_pu"] = links_dict["p_max_pu"]
-    df_converters_parameters_reference["p_min_pu"] = -links_dict["p_max_pu"]
+    df_converters_parameters_reference["p_max_pu"] = config_dict["links"]["p_max_pu"]
+    df_converters_parameters_reference["p_min_pu"] = -config_dict["links"]["p_max_pu"]
     df_converters_parameters_reference["p_nom"] = 2000
     df_converters_parameters_reference["under_construction"] = False
     df_converters_parameters_reference["underground"] = False
diff --git a/test/test_build_demand_profiles.py b/test/test_build_demand_profiles.py
index 0fac5b635..d37ee0d59 100644
--- a/test/test_build_demand_profiles.py
+++ b/test/test_build_demand_profiles.py
@@ -10,6 +10,8 @@
 
 sys.path.append("./scripts")
 
+from test.conftest import get_config_dict
+
 from _helpers import create_country_list, get_path
 from build_demand_profiles import get_gegis_regions, get_load_paths_gegis
 
@@ -24,22 +26,14 @@ def test_get_gegis_regions():
     assert output_regions == ["Africa", "Europe"]
 
 
-def test_get_load_paths_gegis():
+def test_get_load_paths_gegis(get_config_dict):
     """
     Verify what returned by get_load_paths_gegis.
     """
-    config = {
-        "countries": ["NG", "IT"],
-        "load_options": {
-            "ssp": "ssp2-2.6",
-            "weather_year": 2013,
-            "prediction_year": 2030,
-            "scale": 1,
-        },
-    }
-    load_data_paths = get_load_paths_gegis("data", config)
+    config_dict = get_config_dict
+    load_data_paths = get_load_paths_gegis("data", config_dict)
     reference_list = [
         get_path("data", "ssp2-2.6", "2030", "era5_2013", "Africa.nc"),
-        get_path("data", "ssp2-2.6", "2030", "era5_2013", "Europe.nc"),
     ]
+    print(load_data_paths)
     assert load_data_paths == reference_list

From 3298a6a584bf4e3adb225230b7606315bc8a945b Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Wed, 4 Sep 2024 16:41:15 +0200
Subject: [PATCH 34/40] apply index=False (#11)

---
 test/test_base_network.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/test/test_base_network.py b/test/test_base_network.py
index be5890acf..0fc035683 100644
--- a/test/test_base_network.py
+++ b/test/test_base_network.py
@@ -93,7 +93,6 @@
 df_converters_reference = pd.DataFrame(
     {
         "converter_id": "convert_20_41",
-        "Unnamed: 0": 0,
         "index": 0,
         "bus0": "41",
         "bus1": "42",
@@ -251,7 +250,6 @@
 df_transformers_reference = pd.DataFrame(
     {
         "transformer_id": "transf_1_0",
-        "Unnamed: 0": 0,
         "bus0": "1",
         "bus1": "2",
         "voltage_bus0": 161000,
@@ -316,7 +314,7 @@ def test_load_transformers_from_osm(tmpdir):
     Verify what returned by _load_transformers_from_osm.
     """
     file_path = get_path(tmpdir, "transformers_exercise.csv")
-    df_transformers_input.to_csv(file_path)
+    df_transformers_input.to_csv(file_path, index=False)
     df_transformers_output = _load_transformers_from_osm(file_path)
     df_transformers_comparison = df_transformers_output.compare(
         df_transformers_reference
@@ -330,7 +328,7 @@ def test_load_converters_from_osm(tmpdir):
     Verify what returned by _load_converters_from_osm.
     """
     file_path = get_path(tmpdir, "converters_exercise.csv")
-    df_converters_input.to_csv(file_path)
+    df_converters_input.to_csv(file_path, index=False)
     df_converters_output = _load_converters_from_osm(file_path)
     df_converters_comparison = df_converters_output.compare(df_converters_reference)
     pathlib.Path.unlink(file_path)
@@ -455,7 +453,7 @@ def test_set_electrical_parameters_transformers(get_config_dict, tmpdir):
     """
     config_dict = get_config_dict
     file_path = get_path(tmpdir, "transformers_exercise.csv")
-    df_transformers_input.to_csv(file_path)
+    df_transformers_input.to_csv(file_path, index=False)
     df_transformers_output = _load_transformers_from_osm(file_path)
     df_transformers_parameters = _set_electrical_parameters_transformers(
         config_dict["transformers"], df_transformers_output
@@ -477,7 +475,7 @@ def test_set_electrical_parameters_converters(get_config_dict, tmpdir):
     """
     config_dict = get_config_dict
     file_path = get_path(tmpdir, "converters_exercise.csv")
-    df_converters_input.to_csv(file_path)
+    df_converters_input.to_csv(file_path, index=False)
     df_converters_output = _load_converters_from_osm(file_path)
     df_converters_parameters = _set_electrical_parameters_converters(
         config_dict["links"], df_converters_output

From ba82543bbb9c6f1b9559c8e76eaf3a2b6ab16780 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Wed, 11 Sep 2024 18:48:08 +0200
Subject: [PATCH 35/40] Unit test build shapes (#12)

* add unit test for gadm

* finalize unit test for gadm

* finalize unit test for gadm

* rename methods and remove unnecessary variables

* re-factor and test for country_cover

* add unit test for download_WorldPop_standard

* add unit test download_WorldPop_API

* update unit tests

* add unit test for add_population_data

* scripts/build_shapes.py

* add GDP data

* unit test load_gdp

* update environment packages

* remove one assert statement in test_get_gadm_shapes
---
 envs/environment.mac.yaml |   4 +-
 envs/environment.yaml     |   4 +-
 scripts/build_shapes.py   |  59 +++----
 test/test_build_shapes.py | 312 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 340 insertions(+), 39 deletions(-)
 create mode 100644 test/test_build_shapes.py

diff --git a/envs/environment.mac.yaml b/envs/environment.mac.yaml
index 608fcb236..879f857d0 100644
--- a/envs/environment.mac.yaml
+++ b/envs/environment.mac.yaml
@@ -15,7 +15,7 @@ dependencies:
 - pypsa>=0.24, <0.25
 # - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
 - dask
-- powerplantmatching>=0.5.7
+- powerplantmatching>=0.5.7, <=0.5.15
 - earth-osm>=2.1
 - atlite
 
@@ -31,7 +31,7 @@ dependencies:
 - numpy
 - pandas
 - geopandas>=0.11.0, <=0.14.3
-- fiona!=1.8.22
+- fiona!=1.10.0
 - xarray>=2023.11.0, <2023.12.0
 - netcdf4
 - networkx
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 207a8858c..68e54fd08 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -15,7 +15,7 @@ dependencies:
 - pypsa>=0.24, <0.25
 # - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
 - dask
-- powerplantmatching>=0.5.7
+- powerplantmatching>=0.5.7, <=0.5.15
 - earth-osm>=2.1
 - atlite
 
@@ -31,7 +31,7 @@ dependencies:
 - numpy
 - pandas
 - geopandas>=0.11.0, <=0.14.3
-- fiona!=1.8.22
+- fiona!=1.10.0
 - xarray>=2023.11.0, <2023.12.0
 - netcdf4
 - networkx
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index a6a58f1b6..a642efee8 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -65,9 +65,8 @@ def _simplify_polys(polys, minarea=0.01, tolerance=0.01, filterremote=False):
     return polys.simplify(tolerance=tolerance)
 
 
-def countries(
+def get_countries_shapes(
     countries,
-    layer_id,
     geo_crs,
     file_prefix,
     gadm_url_prefix,
@@ -137,19 +136,19 @@ def save_to_geojson(df, fn):
             pass
 
 
-def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"):
+def load_eez(countries_codes, geo_crs, eez_gpkg_file="./data/eez/eez_v11.gpkg"):
     """
     Function to load the database of the Exclusive Economic Zones.
 
     The dataset shall be downloaded independently by the user (see
     guide) or together with pypsa-earth package.
     """
-    if not pathlib.Path(EEZ_gpkg).exists():
+    if not pathlib.Path(eez_gpkg_file).exists():
         raise Exception(
-            f"File EEZ {EEZ_gpkg} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}"
+            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}"
         )
 
-    geodf_EEZ = gpd.read_file(EEZ_gpkg, engine="pyogrio").to_crs(geo_crs)
+    geodf_EEZ = gpd.read_file(eez_gpkg_file, engine="pyogrio").to_crs(geo_crs)
     geodf_EEZ.dropna(axis=0, how="any", subset=["ISO_TER1"], inplace=True)
     # [["ISO_TER1", "TERRITORY1", "ISO_SOV1", "ISO_SOV2", "ISO_SOV3", "geometry"]]
     geodf_EEZ = geodf_EEZ[["ISO_TER1", "geometry"]]
@@ -169,11 +168,11 @@ def load_EEZ(countries_codes, geo_crs, EEZ_gpkg="./data/eez/eez_v11.gpkg"):
     return geodf_EEZ
 
 
-def eez(
+def get_eez(
     countries,
     geo_crs,
     country_shapes,
-    EEZ_gpkg,
+    eez_gpkg_file,
     out_logging=False,
     distance=0.01,
     minarea=0.01,
@@ -189,7 +188,7 @@ def eez(
         logger.info("Stage 2 of 5: Create offshore shapes")
 
     # load data
-    df_eez = load_EEZ(countries, geo_crs, EEZ_gpkg)
+    df_eez = load_eez(countries, geo_crs, eez_gpkg_file)
 
     eez_countries = [cc for cc in countries if df_eez.name.str.contains(cc).any()]
     ret_df = gpd.GeoDataFrame(
@@ -242,7 +241,7 @@ def download_WorldPop(
     worldpop_method: str
          worldpop_method = "api" will use the API method to access the WorldPop 100mx100m dataset.  worldpop_method = "standard" will use the standard method to access the WorldPop 1KMx1KM dataset.
     country_code : str
-        Two letter country codes of the downloaded files.
+        Two-letter country codes of the downloaded files.
         Files downloaded from https://data.worldpop.org/ datasets WorldPop UN adjusted
     year : int
         Year of the data to download
@@ -252,7 +251,7 @@ def download_WorldPop(
         Minimum size of each file to download
     """
     if worldpop_method == "api":
-        return download_WorldPop_API(country_code, year, update, out_logging, size_min)
+        return download_WorldPop_API(country_code, year, update)
 
     elif worldpop_method == "standard":
         return download_WorldPop_standard(
@@ -274,7 +273,7 @@ def download_WorldPop_standard(
     Parameters
     ----------
     country_code : str
-        Two letter country codes of the downloaded files.
+        Two-letter country codes of the downloaded files.
         Files downloaded from https://data.worldpop.org/ datasets WorldPop UN adjusted
     year : int
         Year of the data to download
@@ -330,9 +329,7 @@ def download_WorldPop_standard(
     return WorldPop_inputfile, WorldPop_filename
 
 
-def download_WorldPop_API(
-    country_code, year=2020, update=False, out_logging=False, size_min=300
-):
+def download_WorldPop_API(country_code, year=2020, size_min=300):
     """
     Download tiff file for each country code using the api method from worldpop
     API with 100mx100m resolution.
@@ -340,14 +337,10 @@ def download_WorldPop_API(
     Parameters
     ----------
     country_code : str
-        Two letter country codes of the downloaded files.
+        Two-letter country codes of the downloaded files.
         Files downloaded from https://data.worldpop.org/ datasets WorldPop UN adjusted
     year : int
         Year of the data to download
-    update : bool
-        Update = true, forces re-download of files
-    size_min : int
-        Minimum size of each file to download
     Returns
     -------
     WorldPop_inputfile : str
@@ -383,7 +376,7 @@ def download_WorldPop_API(
     return WorldPop_inputfile, WorldPop_filename
 
 
-def convert_GDP(name_file_nc, year=2015, out_logging=False):
+def convert_gdp(name_file_nc, year=2015, out_logging=False):
     """
     Function to convert the nc database of the GDP to tif, based on the work at https://doi.org/10.1038/sdata.2018.4.
     The dataset shall be downloaded independently by the user (see guide) or together with pypsa-earth package.
@@ -430,7 +423,7 @@ def convert_GDP(name_file_nc, year=2015, out_logging=False):
     return GDP_tif, name_file_tif
 
 
-def load_GDP(
+def load_gdp(
     year=2015,
     update=False,
     out_logging=False,
@@ -455,7 +448,7 @@ def load_GDP(
             logger.warning(
                 f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"
             )
-        convert_GDP(name_file_nc, year, out_logging)
+        convert_gdp(name_file_nc, year, out_logging)
 
     return GDP_tif, name_file_tif
 
@@ -496,8 +489,6 @@ def add_gdp_data(
     update=False,
     out_logging=False,
     name_file_nc="GDP_PPP_1990_2015_5arcmin_v2.nc",
-    nprocesses=2,
-    disable_progressbar=False,
 ):
     """
     Function to add gdp data to arbitrary number of shapes in a country.
@@ -520,7 +511,7 @@ def add_gdp_data(
     # initialize new gdp column
     df_gadm["gdp"] = 0.0
 
-    GDP_tif, name_tif = load_GDP(year, update, out_logging, name_file_nc)
+    GDP_tif, name_tif = load_gdp(year, update, out_logging, name_file_nc)
 
     with rasterio.open(GDP_tif) as src:
         # resample data to target shape
@@ -946,7 +937,6 @@ def add_population_data(
     out_logging=False,
     mem_read_limit_per_process=1024,
     nprocesses=2,
-    disable_progressbar=False,
 ):
     """
     Function to add population data to arbitrary number of shapes in a country.
@@ -1062,7 +1052,7 @@ def add_population_data(
                     pbar.update(1)
 
 
-def gadm(
+def get_gadm_shapes(
     worldpop_method,
     gdp_method,
     countries,
@@ -1173,9 +1163,8 @@ def gadm(
     gadm_url_prefix = snakemake.params.build_shape_options["gadm_url_prefix"]
     gadm_input_file_args = ["data", "gadm"]
 
-    country_shapes = countries(
+    country_shapes_df = get_countries_shapes(
         countries_list,
-        layer_id,
         geo_crs,
         file_prefix,
         gadm_url_prefix,
@@ -1184,20 +1173,20 @@ def gadm(
         update,
         out_logging,
     )
-    country_shapes.to_file(snakemake.output.country_shapes)
+    country_shapes_df.to_file(snakemake.output.country_shapes)
 
-    offshore_shapes = eez(
-        countries_list, geo_crs, country_shapes, EEZ_gpkg, out_logging
+    offshore_shapes = get_eez(
+        countries_list, geo_crs, country_shapes_df, EEZ_gpkg, out_logging
     )
 
     offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
 
     africa_shape = gpd.GeoDataFrame(
-        geometry=[country_cover(country_shapes, offshore_shapes.geometry)]
+        geometry=[country_cover(country_shapes_df, offshore_shapes.geometry)]
     )
     africa_shape.reset_index().to_file(snakemake.output.africa_shape)
 
-    gadm_shapes = gadm(
+    gadm_shapes = get_gadm_shapes(
         worldpop_method,
         gdp_method,
         countries_list,
diff --git a/test/test_build_shapes.py b/test/test_build_shapes.py
new file mode 100644
index 000000000..9e53b004f
--- /dev/null
+++ b/test/test_build_shapes.py
@@ -0,0 +1,312 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+import sys
+
+import geopandas as gpd
+import numpy as np
+
+sys.path.append("./scripts")
+
+from _helpers import get_gadm_layer
+from build_shapes import (
+    _simplify_polys,
+    add_population_data,
+    country_cover,
+    download_WorldPop_API,
+    download_WorldPop_standard,
+    get_countries_shapes,
+    get_gadm_shapes,
+    load_gdp,
+    save_to_geojson,
+)
+
+path_cwd = str(pathlib.Path.cwd())
+
+
+def test_simplify_polys(get_config_dict):
+    """
+    Verify what is returned by _simplify_polys.
+    """
+
+    config_dict = get_config_dict
+
+    countries_list = ["NG"]
+    geo_crs = config_dict["crs"]["geo_crs"]
+
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    contended_flag = config_dict["build_shape_options"]["contended_flag"]
+    file_prefix = config_dict["build_shape_options"]["gadm_file_prefix"]
+    gadm_url_prefix = config_dict["build_shape_options"]["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
+
+    country_shapes_df = get_countries_shapes(
+        countries_list,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        update,
+        out_logging,
+    )
+
+    simplified_poly = _simplify_polys(country_shapes_df)
+
+    simplified_poly_df = gpd.GeoDataFrame(
+        geometry=[
+            country_cover(
+                simplified_poly, eez_shapes=None, out_logging=False, distance=0.02
+            )
+        ]
+    )
+    simplified_poly_df["area"] = simplified_poly_df.area
+    simplified_poly_df["centroid"] = simplified_poly_df.centroid
+    simplified_poly_df["perimeter"] = simplified_poly_df.length
+    print(simplified_poly_df["perimeter"][0])
+    assert np.round(simplified_poly_df.area[0], 6) == 75.750018
+    assert (
+        str(simplified_poly_df.centroid[0])
+        == "POINT (8.100522482086877 9.591585359563023)"
+    )
+    assert np.round(simplified_poly_df["perimeter"][0], 6) == 47.060882
+
+
+def test_get_countries_shapes(get_config_dict):
+    """
+    Verify what is returned by get_countries_shapes.
+    """
+
+    config_dict = get_config_dict
+
+    countries_list = ["XK"]
+    geo_crs = config_dict["crs"]["geo_crs"]
+
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    contended_flag = config_dict["build_shape_options"]["contended_flag"]
+    file_prefix = config_dict["build_shape_options"]["gadm_file_prefix"]
+    gadm_url_prefix = config_dict["build_shape_options"]["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
+
+    country_shapes_df = get_countries_shapes(
+        countries_list,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        update,
+        out_logging,
+    )
+
+    assert country_shapes_df.shape == (1,)
+    assert country_shapes_df.index.unique().tolist() == ["XK"]
+
+
+def test_country_cover(get_config_dict):
+    """
+    Verify what is returned by country_cover.
+    """
+
+    config_dict = get_config_dict
+
+    countries_list = ["NG"]
+    geo_crs = config_dict["crs"]["geo_crs"]
+
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    contended_flag = config_dict["build_shape_options"]["contended_flag"]
+    file_prefix = config_dict["build_shape_options"]["gadm_file_prefix"]
+    gadm_url_prefix = config_dict["build_shape_options"]["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
+
+    country_shapes_df = get_countries_shapes(
+        countries_list,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        update,
+        out_logging,
+    )
+
+    africa_shapes_df = gpd.GeoDataFrame(
+        geometry=[
+            country_cover(
+                country_shapes_df, eez_shapes=None, out_logging=False, distance=0.02
+            )
+        ]
+    )
+    africa_shapes_df["area"] = africa_shapes_df.area
+    africa_shapes_df["centroid"] = africa_shapes_df.centroid
+    africa_shapes_df["perimeter"] = africa_shapes_df.length
+    print(africa_shapes_df["perimeter"])
+    assert np.round(africa_shapes_df.area[0], 6) == 75.750104
+    assert (
+        str(africa_shapes_df.centroid[0])
+        == "POINT (8.100519548407405 9.59158035236806)"
+    )
+    assert np.round(africa_shapes_df["perimeter"][0], 6) == 47.080743
+
+
+def test_download_world_pop_standard(get_config_dict):
+    """
+    Verify what is returned by download_WorldPop_standard.
+    """
+
+    config_dict = get_config_dict
+    update_val = config_dict["build_shape_options"]["update_file"]
+    out_logging_val = config_dict["build_shape_options"]["out_logging"]
+
+    world_pop_input_file, world_pop_file_name = download_WorldPop_standard(
+        "NG",
+        year=2020,
+        update=update_val,
+        out_logging=out_logging_val,
+        size_min=300,
+    )
+    assert world_pop_file_name == "nga_ppp_2020_UNadj_constrained.tif"
+
+
+def test_download_world_pop_api():
+    """
+    Verify what is returned by download_WorldPop_API.
+    """
+    world_pop_input_file, world_pop_file_name = download_WorldPop_API(
+        "NG", year=2020, size_min=300
+    )
+    assert world_pop_file_name == "nga_ppp_2020_UNadj_constrained.tif"
+
+
+def test_get_gadm_shapes(get_config_dict):
+    """
+    Verify what is returned by get_gadm_shapes.
+    """
+    config_dict = get_config_dict
+
+    mem_mb = 3096
+
+    countries_list = ["XK"]
+    geo_crs = config_dict["crs"]["geo_crs"]
+
+    layer_id = config_dict["build_shape_options"]["gadm_layer_id"]
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    year = config_dict["build_shape_options"]["year"]
+    nprocesses = config_dict["build_shape_options"]["nprocesses"]
+    contended_flag = config_dict["build_shape_options"]["contended_flag"]
+    worldpop_method = config_dict["build_shape_options"]["worldpop_method"]
+    gdp_method = config_dict["build_shape_options"]["gdp_method"]
+    file_prefix = config_dict["build_shape_options"]["gadm_file_prefix"]
+    gadm_url_prefix = config_dict["build_shape_options"]["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
+
+    gadm_shapes_df = get_gadm_shapes(
+        worldpop_method,
+        gdp_method,
+        countries_list,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        mem_mb,
+        layer_id,
+        update,
+        out_logging,
+        year,
+        nprocesses=nprocesses,
+    )
+
+    assert gadm_shapes_df.shape == (7, 4)
+    assert gadm_shapes_df.index.unique().tolist() == [f"XK.{x}_1" for x in range(1, 8)]
+    assert gadm_shapes_df.loc["XK.1_1"]["pop"] == 207473.70381259918
+
+
+def test_add_population_data(get_config_dict):
+    """
+    Verify what is returned by add_population_data.
+    """
+    config_dict = get_config_dict
+
+    mem_mb = 3096
+
+    countries_list = ["XK"]
+    geo_crs = config_dict["crs"]["geo_crs"]
+
+    layer_id = config_dict["build_shape_options"]["gadm_layer_id"]
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    year = config_dict["build_shape_options"]["year"]
+    nprocesses = config_dict["build_shape_options"]["nprocesses"]
+    contended_flag = config_dict["build_shape_options"]["contended_flag"]
+    worldpop_method = config_dict["build_shape_options"]["worldpop_method"]
+    file_prefix = config_dict["build_shape_options"]["gadm_file_prefix"]
+    gadm_url_prefix = config_dict["build_shape_options"]["gadm_url_prefix"]
+    gadm_input_file_args = ["data", "gadm"]
+
+    mem_read_limit_per_process = mem_mb / nprocesses
+
+    df_gadm = get_gadm_layer(
+        countries_list,
+        layer_id,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        gadm_input_file_args,
+        contended_flag,
+        update,
+        out_logging,
+    )
+
+    # select and rename columns
+    df_gadm.rename(columns={"GID_0": "country"}, inplace=True)
+
+    # drop useless columns
+    df_gadm.drop(
+        df_gadm.columns.difference(["country", "GADM_ID", "geometry"]),
+        axis=1,
+        inplace=True,
+        errors="ignore",
+    )
+
+    add_population_data(
+        df_gadm,
+        countries_list,
+        worldpop_method,
+        year,
+        update,
+        out_logging,
+        mem_read_limit_per_process,
+        nprocesses=nprocesses,
+    )
+
+    assert np.round(df_gadm["pop"].values[0], 0) == 207474.0
+    assert np.round(df_gadm["pop"].values[1], 0) == 208332.0
+    assert np.round(df_gadm["pop"].values[2], 0) == 257191.0
+    assert np.round(df_gadm["pop"].values[3], 0) == 215703.0
+    assert np.round(df_gadm["pop"].values[4], 0) == 610695.0
+    assert np.round(df_gadm["pop"].values[5], 0) == 420344.0
+    assert np.round(df_gadm["pop"].values[6], 0) == 215316.0
+
+
+def test_load_gdp(get_config_dict):
+    """
+    Verify what is returned by load_gdp.
+    """
+    config_dict = get_config_dict
+
+    update = config_dict["build_shape_options"]["update_file"]
+    out_logging = config_dict["build_shape_options"]["out_logging"]
+    year = config_dict["build_shape_options"]["year"]
+    name_file_nc = "GDP_PPP_1990_2015_5arcmin_v2.nc"
+    GDP_tif, name_tif = load_gdp(year, update, out_logging, name_file_nc)
+    assert name_tif == "GDP_PPP_1990_2015_5arcmin_v2.tif"

From 2b91e910f9a2b86f9e31189fc8142dc60a062b84 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Wed, 11 Sep 2024 19:29:49 +0200
Subject: [PATCH 36/40] Unit test ad add_power_plants in build_powerplants.py
 (#10)

* fix typo in main

* modify the way powerplant.csv is built

* unit test for replace_natural_gas_technology

* add unit test for add_power_plants

* reduce size of custom_powerplants.csv for Nigeria

* add documentation to add_power_plants unit test

* modify order of executions in test_add_power_plants

* remove unit tests for merge and false

* update changes

* update environments dependencies

* re-instate one part of the unit test

* PR requests

* remove bug
---
 .reuse/dep5                              |   4 +
 Snakefile                                |   7 +-
 scripts/build_powerplants.py             | 223 +++++++----------------
 test/test_build_powerplants.py           | 165 +++++++++++++++++
 test/test_data/custom_NG_powerplants.csv |   5 +
 5 files changed, 246 insertions(+), 158 deletions(-)
 create mode 100644 test/test_build_powerplants.py
 create mode 100644 test/test_data/custom_NG_powerplants.csv

diff --git a/.reuse/dep5 b/.reuse/dep5
index 4a1632b46..592a0440f 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -11,6 +11,10 @@ Files: doc/configtables/*
 Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
 License: CC-BY-4.0
 
+Files: test/test_data/*
+Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
+License: CC-BY-4.0
+
 Files: data/*
 Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
 License: CC-BY-4.0
diff --git a/Snakefile b/Snakefile
index e016aef11..98dc8203d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -488,15 +488,12 @@ rule build_powerplants:
         gadm_layer_id=config["build_shape_options"]["gadm_layer_id"],
         alternative_clustering=config["cluster_options"]["alternative_clustering"],
         powerplants_filter=config["electricity"]["powerplants_filter"],
+        custom_powerplants=config["electricity"]["custom_powerplants"],
     input:
         base_network="networks/" + RDIR + "base.nc",
         pm_config="configs/powerplantmatching_config.yaml",
-        custom_powerplants="data/custom_powerplants.csv",
+        custom_powerplants_file="data/custom_powerplants.csv",
         osm_powerplants="resources/" + RDIR + "osm/clean/all_clean_generators.csv",
-        #gadm_shapes="resources/" + RDIR + "shapes/MAR2.geojson",
-        #using this line instead of the following will test updated gadm shapes for MA.
-        #To use: downlaod file from the google drive and place it in resources/" + RDIR + "shapes/
-        #Link: https://drive.google.com/drive/u/1/folders/1dkW1wKBWvSY4i-XEuQFFBj242p0VdUlM
         gadm_shapes="resources/" + RDIR + "shapes/gadm_shapes.geojson",
     output:
         powerplants="resources/" + RDIR + "powerplants.csv",
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index e1f8e91b7..3609f11c0 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -112,10 +112,8 @@
     create_logger,
     get_current_directory_path,
     get_path,
-    get_path_size,
     mock_snakemake,
     read_csv_nafix,
-    to_csv_nafix,
     two_digits_2_name_country,
 )
 from scipy.spatial import cKDTree as KDTree
@@ -124,142 +122,62 @@
 logger = create_logger(__name__)
 
 
-def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm):
-    if get_path_size(filepath_ppl_osm) == 0:
-        return to_csv_nafix(pd.DataFrame(), filepath_ppl_pm, index=False)
+def add_power_plants(
+    custom_power_plants_file_path,
+    power_plants_config_dict,
+    ppl_assignment_strategy,
+    countries_names_list,
+):
 
-    add_ppls = read_csv_nafix(filepath_ppl_osm, index_col=0, dtype={"bus": "str"})
-
-    custom_ppls_coords = gpd.GeoSeries.from_wkt(add_ppls["geometry"])
-    add_ppls = (
-        add_ppls.rename(
-            columns={
-                "name": "Name",
-                "tags.generator:source": "Fueltype",
-                "tags.generator:type": "Technology",
-                "tags.power": "Set",
-                "power_output_MW": "Capacity",
-            }
+    if ppl_assignment_strategy == "replace":
+        # use only the data from custom_powerplants.csv
+        custom_power_plants = read_csv_nafix(
+            custom_power_plants_file_path, index_col=0, dtype={"bus": "str"}
         )
-        .replace(
-            dict(
-                Fueltype={
-                    "nuclear": "Nuclear",
-                    "wind": "Wind",
-                    "hydro": "Hydro",
-                    "tidal": "Other",
-                    "wave": "Other",
-                    "geothermal": "Geothermal",
-                    "solar": "Solar",
-                    # "Hard Coal" follows defaults of PPM
-                    "coal": "Hard Coal",
-                    "gas": "Natural Gas",
-                    "biomass": "Bioenergy",
-                    "biofuel": "Bioenergy",
-                    "biogas": "Bioenergy",
-                    "oil": "Oil",
-                    "diesel": "Oil",
-                    "gasoline": "Oil",
-                    "waste": "Waste",
-                    "osmotic": "Other",
-                    "wave": "Other",
-                    # approximation
-                    # TODO: this shall be improved, one entry shall be Oil and the otherone gas
-                    "gas;oil": "Oil",
-                    "steam": "Natural Gas",
-                    "waste_heat": "Other",
-                },
-                Technology={
-                    "combined_cycle": "CCGT",
-                    "gas_turbine": "OCGT",
-                    "steam_turbine": "Steam Turbine",
-                    "reciprocating_engine": "Combustion Engine",
-                    # a very strong assumption
-                    "wind_turbine": "Onshore",
-                    "horizontal_axis": "Onshore",
-                    "vertical_axis": "Offhore",
-                    "solar_photovoltaic_panel": "Pv",
-                },
-                Set={"generator": "PP", "plant": "PP"},
+        return custom_power_plants
+    elif ppl_assignment_strategy == "merge":
+        # merge the data from powerplantmatching and custom_powerplants.csv
+        ppl_ppm = (
+            pm.powerplants(
+                from_url=False, update=True, config_update=power_plants_config_dict
+            )
+            .powerplant.fill_missing_decommissioning_years()
+            .query(
+                'Fueltype not in ["Solar", "Wind"] and Country in @countries_names_list'
             )
+            .powerplant.convert_country_to_alpha2()
+            .pipe(replace_natural_gas_technology)
         )
-        .assign(
-            Country=lambda df: df.Country.map(two_digits_2_name_country),
-            # Name=lambda df: "OSM_"
-            # + df.Country.astype(str)
-            # + "_"
-            # + df.id.astype(str)
-            # + "-"
-            # + df.Name.astype(str),
-            Efficiency="",
-            Duration="",
-            Volume_Mm3="",
-            DamHeight_m="",
-            StorageCapacity_MWh="",
-            DateIn="",
-            DateRetrofit="",
-            DateMothball="",
-            DateOut="",
-            lat=custom_ppls_coords.y,
-            lon=custom_ppls_coords.x,
-            EIC=lambda df: df.id,
-            projectID=lambda df: "OSM" + df.id.astype(str),
+        ppl_cpp = read_csv_nafix(
+            custom_power_plants_file_path, index_col=0, dtype={"bus": "str"}
         )
-        .dropna(subset=["Fueltype"])
-    )
-
-    # All Hydro objects can be interpreted by PPM as Storages, too
-    # However, everything extracted from OSM seems to belong
-    # to power plants with "tags.power" == "generator" only
-    osm_ppm_df = pd.DataFrame(
-        data={
-            "osm_method": ["run-of-the-river", "water-pumped-storage", "water-storage"],
-            "ppm_technology": ["Run-Of-River", "Pumped Storage", "Reservoir"],
-        }
-    )
-    for i in osm_ppm_df.index:
-        add_ppls.loc[
-            add_ppls["tags.generator:method"] == osm_ppm_df.loc[i, "osm_method"],
-            "Technology",
-        ] = osm_ppm_df.loc[i, "ppm_technology"]
-
-    # originates from osm::"tags.generator:source"
-    add_ppls.loc[add_ppls["Fueltype"] == "Nuclear", "Technology"] = "Steam Turbine"
-
-    # PMM contains data on NG, batteries and hydro storages
-    # trying to catch some of them...
-    # originates from osm::"tags.generator:source"
-    add_ppls.loc[add_ppls["Fueltype"] == "battery", "Set"] = "Store"
-    # originates from osm::tags.generator:type
-    add_ppls.loc[add_ppls["Technology"] == "battery storage", "Set"] = "Store"
-
-    add_ppls = add_ppls.replace(dict(Fueltype={"battery": "Other"})).drop(
-        columns=["tags.generator:method", "geometry", "Area", "id"],
-        errors="ignore",
-    )
-
-    to_csv_nafix(add_ppls, filepath_ppl_pm, index=False)
-
-    return add_ppls
-
-
-def add_custom_powerplants(ppl, inputs, config):
-    if "custom_powerplants" not in config["electricity"]:
-        return ppl
-
-    custom_ppl_query = config["electricity"]["custom_powerplants"]
-    if not custom_ppl_query:
-        return ppl
-    add_ppls = read_csv_nafix(
-        inputs.custom_powerplants, index_col=0, dtype={"bus": "str"}
-    )
-
-    if custom_ppl_query == "merge":
-        return pd.concat(
-            [ppl, add_ppls], sort=False, ignore_index=True, verify_integrity=True
+        power_plants = pd.concat(
+            [ppl_ppm, ppl_cpp], sort=False, ignore_index=True, verify_integrity=True
+        )
+        return power_plants
+    elif (
+        ppl_assignment_strategy not in ["merge", "replace"]
+        or ppl_assignment_strategy is None
+    ):
+        # use only the data from powerplantsmatching
+        power_plants = (
+            pm.powerplants(
+                from_url=False, update=True, config_update=power_plants_config_dict
+            )
+            .powerplant.fill_missing_decommissioning_years()
+            .query(
+                'Fueltype not in ["Solar", "Wind"] and Country in @countries_names_list'
+            )
+            .powerplant.convert_country_to_alpha2()
+            .pipe(replace_natural_gas_technology)
+        )
+        return power_plants
+    else:
+        raise Exception(
+            "No power plants were built for custom_powerplants {}".format(
+                ppl_assignment_strategy
+            )
         )
-    elif custom_ppl_query == "replace":
-        return add_ppls
 
 
 def replace_natural_gas_technology(df: pd.DataFrame):
@@ -305,28 +223,30 @@ def replace_natural_gas_technology(df: pd.DataFrame):
     configure_logging(snakemake)
 
     with open(snakemake.input.pm_config, "r") as f:
-        config = yaml.safe_load(f)
+        powerplants_config = yaml.safe_load(f)
 
     filepath_osm_ppl = snakemake.input.osm_powerplants
     filepath_osm2pm_ppl = snakemake.output.powerplants_osm2pm
+    powerplants_assignment_strategy = snakemake.params.custom_powerplants
 
     n = pypsa.Network(snakemake.input.base_network)
     countries_codes = n.buses.country.unique()
     countries_names = list(map(two_digits_2_name_country, countries_codes))
 
-    config["target_countries"] = countries_names
+    powerplants_config["target_countries"] = countries_names
 
     if (
         "EXTERNAL_DATABASE"
-        in config["matching_sources"] + config["fully_included_sources"]
+        in powerplants_config["matching_sources"]
+        + powerplants_config["fully_included_sources"]
     ):
-        if "EXTERNAL_DATABASE" not in config:
+        if "EXTERNAL_DATABASE" not in powerplants_config:
             logger.error(
                 "Missing configuration EXTERNAL_DATABASE in powerplantmatching config yaml\n\t"
                 "Please check file configs/powerplantmatching_config.yaml"
             )
         logger.info("Parsing OSM generator data to powerplantmatching format")
-        config["EXTERNAL_DATABASE"]["fn"] = get_path(
+        powerplants_config["EXTERNAL_DATABASE"]["fn"] = get_path(
             get_current_directory_path(), filepath_osm2pm_ppl
         )
     else:
@@ -337,23 +257,20 @@ def replace_natural_gas_technology(df: pd.DataFrame):
     # specify the main query for filtering powerplants
     ppl_query = snakemake.params.powerplants_filter
     if isinstance(ppl_query, str):
-        config["main_query"] = ppl_query
+        powerplants_config["main_query"] = ppl_query
     else:
-        config["main_query"] = ""
-
-    ppl = (
-        pm.powerplants(from_url=False, update=True, config_update=config)
-        .powerplant.fill_missing_decommissioning_years()
-        .query('Fueltype not in ["Solar", "Wind"] and Country in @countries_names')
-        .powerplant.convert_country_to_alpha2()
-        .pipe(replace_natural_gas_technology)
-    )
+        powerplants_config["main_query"] = ""
 
-    ppl = add_custom_powerplants(
-        ppl, snakemake.input, snakemake.config
-    )  # add carriers from own powerplant files
+    ppl = add_power_plants(
+        snakemake.input.custom_powerplants_file,
+        powerplants_config,
+        powerplants_assignment_strategy,
+        countries_names,
+    )
 
-    cntries_without_ppl = [c for c in countries_codes if c not in ppl.Country.unique()]
+    countries_without_ppl = [
+        c for c in countries_codes if c not in ppl.Country.unique()
+    ]
 
     for c in countries_codes:
         substation_i = n.buses.query("substation_lv and country == @c").index
@@ -363,8 +280,8 @@ def replace_natural_gas_technology(df: pd.DataFrame):
         tree_i = kdtree.query(ppl.loc[ppl_i, ["lon", "lat"]].values)[1]
         ppl.loc[ppl_i, "bus"] = substation_i.append(pd.Index([np.nan]))[tree_i]
 
-    if cntries_without_ppl:
-        logger.warning(f"No powerplants known in: {', '.join(cntries_without_ppl)}")
+    if countries_without_ppl:
+        logger.warning(f"No powerplants known in: {', '.join(countries_without_ppl)}")
 
     bus_null_b = ppl["bus"].isnull()
     if bus_null_b.any():
diff --git a/test/test_build_powerplants.py b/test/test_build_powerplants.py
new file mode 100644
index 000000000..68f406f04
--- /dev/null
+++ b/test/test_build_powerplants.py
@@ -0,0 +1,165 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+
+import pathlib
+import sys
+
+import pandas as pd
+import yaml
+
+sys.path.append("./scripts")
+
+from test.conftest import get_config_dict
+
+from build_powerplants import add_power_plants, replace_natural_gas_technology
+
+path_cwd = pathlib.Path.cwd()
+
+
+def test_replace_natural_gas_technology():
+    """
+    Verify what returned by replace_natural_gas_technology.
+    """
+    input_df = pd.DataFrame(
+        {
+            "Fueltype": [
+                "Natural Gas",
+                "Oil",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Natural Gas",
+                "Hydro",
+            ],
+            "Technology": [
+                "Steam Turbine",
+                "Combustion Engine",
+                "NG",
+                "Ng",
+                "NG/FO",
+                "Ng/Fo",
+                "NG/D",
+                "LNG",
+                "CCGT/D",
+                "CCGT/FO",
+                "LCCGT",
+                "CCGT/Fo",
+                "Reservoir",
+            ],
+        }
+    )
+
+    reference_df = pd.DataFrame(
+        {
+            "Fueltype": [
+                "CCGT",
+                "Oil",
+                "CCGT",
+                "CCGT",
+                "OCGT",
+                "OCGT",
+                "OCGT",
+                "OCGT",
+                "CCGT",
+                "CCGT",
+                "CCGT",
+                "CCGT",
+                "Hydro",
+            ],
+            "Technology": [
+                "CCGT",
+                "Combustion Engine",
+                "CCGT",
+                "CCGT",
+                "OCGT",
+                "OCGT",
+                "OCGT",
+                "OCGT",
+                "CCGT",
+                "CCGT",
+                "CCGT",
+                "CCGT",
+                "Reservoir",
+            ],
+        }
+    )
+    modified_df = replace_natural_gas_technology(input_df)
+    comparison_df = modified_df.compare(reference_df)
+    assert comparison_df.empty
+
+
+def test_add_power_plants(get_config_dict):
+    """
+    Verify what returned by add_power_plants.
+    """
+    config_dict = get_config_dict
+    custom_powerplants_file_path = pathlib.Path(
+        path_cwd, "test", "test_data", "custom_NG_powerplants.csv"
+    )
+    pm_config_path = pathlib.Path(path_cwd, "configs", "powerplantmatching_config.yaml")
+    with open(pm_config_path, "r") as f:
+        power_plants_config = yaml.safe_load(f)
+    ppl_query = config_dict["electricity"]["powerplants_filter"]
+
+    config_dict["countries"] = ["NG"]
+
+    # replace
+    config_dict["electricity"]["custom_powerplants"] = "replace"
+    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
+    if isinstance(ppl_query, str):
+        power_plants_config["main_query"] = ppl_query
+    countries_names = ["Nigeria"]
+    power_plants_config["target_countries"] = countries_names
+    ppl_replace = add_power_plants(
+        custom_powerplants_file_path,
+        power_plants_config,
+        powerplants_assignment_strategy,
+        countries_names,
+    )
+    assert ppl_replace.shape == (4, 19)
+
+    # false
+    config_dict["electricity"]["custom_powerplants"] = "false"
+    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
+    if isinstance(ppl_query, str):
+        power_plants_config["main_query"] = ppl_query
+    countries_names = ["Nigeria"]
+    power_plants_config["target_countries"] = countries_names
+    ppl_false = add_power_plants(
+        custom_powerplants_file_path,
+        power_plants_config,
+        powerplants_assignment_strategy,
+        countries_names,
+    )
+    # The number of powerplants returned by powerplantmatching
+    # may vary depending on the version of powerplantmatching
+    # The numbers below refer to version 0.15.5
+    assert ppl_false.shape == (31, 18)
+
+    # merge
+    config_dict["electricity"]["custom_powerplants"] = "merge"
+    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
+    if isinstance(ppl_query, str):
+        power_plants_config["main_query"] = ppl_query
+    countries_names = ["Nigeria"]
+    power_plants_config["target_countries"] = countries_names
+    ppl_merge = add_power_plants(
+        custom_powerplants_file_path,
+        power_plants_config,
+        powerplants_assignment_strategy,
+        countries_names,
+    )
+    # The number of powerplants returned by powerplantmatching
+    # may vary depending on the version of powerplantmatching
+    # The numbers below refer to version 0.15.5
+    assert ppl_merge.shape == (35, 20)
diff --git a/test/test_data/custom_NG_powerplants.csv b/test/test_data/custom_NG_powerplants.csv
new file mode 100644
index 000000000..cacc22d5a
--- /dev/null
+++ b/test/test_data/custom_NG_powerplants.csv
@@ -0,0 +1,5 @@
+Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,DateIn,DateRetrofit,DateMothball,DateOut,lat,lon,EIC,projectID,bus
+Jebba,hydro,Reservoir,PP,NG,578,,,0,0,0,0,1985,1985,2085,9.1409,4.7896,{nan},"{'GHPT': {'G602885'}, 'GEO': {'GEO-42544'}, 'GPD': {'WRI1000037'}}",
+Olorunsogo,CCGT,CCGT,PP,NG,754,,,0,0,0,0,2008,2008,2048,6.885316,3.316268,"{nan, nan}","{'GGPT': {'L406801'}, 'GEO': {'GEO-42564'}, 'GPD': {'WRI1000031'}}",
+Gbarain Gas,CCGT,CCGT,PP,NG,252,,,0,0,0,0,2016,2016,2056,5.031067,6.301568,"{nan, nan}","{'GGPT': {'L406786'}, 'GEO': {'GEO-42563'}, 'GPD': {'WRI1000027'}}",
+Sapele,CCGT,CCGT,PP,NG,1472,,,0,0,0,0,1978,1978,2018,5.926139,5.644983,"{nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan}","{'GGPT': {'L406809', 'L406808'}, 'GEO': {'GEO-42560'}, 'GPD': {'WRI1000028'}}",

From c78cf82f1b67ff1a56fef09742b9e0b007b6d899 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Wed, 11 Sep 2024 20:06:06 +0200
Subject: [PATCH 37/40] Unit test prepare network (#8)

* add config options for prepare_network

* add new config

* initial tests

* unit test for emission_extractor

* add unit test for add_co2limit

* unit test for add_gaslimit

* add test for enforce_autarky

* add unit test for set_line_nom_max

* re-work test_add_emission_prices

* add unit tests for average_every_nhours and add_emission_prices

* modify prepare_network_options

* modify prepare_network_options

* replace cc.convert with two_2_three_digits_country

* remove import country_convert

* revert change on cc.convert

* change away from cc.convert

* fixtures for grid

* resolve conflicts

* pull request comments

* remove unnecessary params

* update geopandas version

* update environment files

* update environment files

* set upper limiti on powerplantmatching version
---
 scripts/prepare_network.py   |  11 +-
 test/test_prepare_network.py | 222 ++++++++++++++++++++++++++++++++++-
 2 files changed, 225 insertions(+), 8 deletions(-)

diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index 7488b748d..e0f061537 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -60,7 +60,6 @@
 import re
 from zipfile import ZipFile
 
-import country_converter as cc
 import numpy as np
 import pandas as pd
 import pypsa
@@ -72,6 +71,7 @@
     get_current_directory_path,
     get_path,
     mock_snakemake,
+    two_2_three_digits_country,
 )
 from add_electricity import load_costs, update_transmission_costs
 
@@ -139,9 +139,10 @@ def emission_extractor(filename, emission_year, country_names):
     ]
     df = df.loc[:, "Y_1970":"Y_2018"].astype(float).ffill(axis=1)
     df = df.loc[:, "Y_1970":"Y_2018"].astype(float).bfill(axis=1)
-    cc_iso3 = cc.convert(names=country_names, to="ISO3")
-    if len(country_names) == 1:
-        cc_iso3 = [cc_iso3]
+    cc_iso3 = [
+        two_2_three_digits_country(two_code_country)
+        for two_code_country in country_names
+    ]
     emission_by_country = df.loc[
         df.index.intersection(cc_iso3), "Y_" + str(emission_year)
     ]
@@ -368,7 +369,7 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf):
         if "Co2L" in o:
             m = re.findall("[0-9]*\.?[0-9]+$", o)
             if snakemake.params.electricity["automatic_emission"]:
-                country_names = n.buses.country.unique()
+                country_names = n.buses.country.unique().tolist()
                 emission_year = snakemake.params.electricity[
                     "automatic_emission_base_year"
                 ]
diff --git a/test/test_prepare_network.py b/test/test_prepare_network.py
index 914089614..a5b4779d8 100644
--- a/test/test_prepare_network.py
+++ b/test/test_prepare_network.py
@@ -5,13 +5,229 @@
 
 # -*- coding: utf-8 -*-
 
+
 import sys
 
+from pandas import Timestamp
+
 sys.path.append("./scripts")
 
-from prepare_network import download_emission_data
+from test.conftest import get_power_network_ac_dc_meshed, get_power_network_scigrid_de
+
+from prepare_network import (
+    add_co2limit,
+    add_emission_prices,
+    add_gaslimit,
+    average_every_nhours,
+    download_emission_data,
+    emission_extractor,
+    enforce_autarky,
+    set_line_nom_max,
+    set_line_s_max_pu,
+)
+
+automatic_emission_base_year = 1990
+country_names = ["DE", "IT", "NG"]
+co2limit = 7.75e7
 
 
 def test_download_emission_data():
-    filename = download_emission_data()
-    assert filename == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
+    """
+    Verify what returned by download_emission_data.
+    """
+    file_name = download_emission_data()
+    assert file_name == "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
+
+
+def test_emission_extractor():
+    """
+    Verify what returned by emission_extractor.
+    """
+    output_series = emission_extractor(
+        "v60_CO2_excl_short-cycle_org_C_1970_2018.xls",
+        automatic_emission_base_year,
+        country_names,
+    )
+    assert output_series.index.tolist() == ["NGA", "DEU", "ITA"]
+    assert output_series.values.tolist() == [
+        5698.76187,
+        381475.887377666,
+        123981.6458729,
+    ]
+
+
+def test_add_co2limit(get_power_network_scigrid_de):
+    """
+    Verify what returned by add_co2limit.
+    """
+    test_network_de = get_power_network_scigrid_de
+    number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
+    add_co2limit(test_network_de, co2limit, number_years)
+    assert (
+        test_network_de.global_constraints.carrier_attribute.values[0]
+        == "co2_emissions"
+    )
+    assert test_network_de.global_constraints.sense.values[0] == "<="
+    assert (
+        test_network_de.global_constraints.constant.values[0] == co2limit * number_years
+    )
+
+
+def test_add_gaslimit(get_power_network_scigrid_de):
+    """
+    Verify what returned by add_gaslimit.
+    """
+    test_network_de = get_power_network_scigrid_de
+    test_network_de.add("Carrier", "OCGT")
+    number_years = test_network_de.snapshot_weightings.objective.sum() / 8760.0
+    add_gaslimit(test_network_de, number_years, number_years)
+    assert test_network_de.global_constraints.carrier_attribute.values[0] == "gas_usage"
+    assert test_network_de.global_constraints.sense.values[0] == "<="
+    assert (
+        test_network_de.global_constraints.constant.values[0]
+        == number_years * number_years
+    )
+
+
+def test_add_emission_prices(get_power_network_ac_dc_meshed):
+    """
+    Verify what returned by add_emission_prices.
+    """
+    test_network_ac_dc_meshed = get_power_network_ac_dc_meshed
+    add_emission_prices(
+        test_network_ac_dc_meshed, emission_prices={"co2": 1.0}, exclude_co2=False
+    )
+    assert test_network_ac_dc_meshed.generators["marginal_cost"].index.tolist() == [
+        "Manchester Wind",
+        "Manchester Gas",
+        "Norway Wind",
+        "Norway Gas",
+        "Frankfurt Wind",
+        "Frankfurt Gas",
+    ]
+    assert test_network_ac_dc_meshed.generators["marginal_cost"].values.tolist() == [
+        0.11,
+        5.218030132047726,
+        0.09,
+        6.565421697244602,
+        0.1,
+        4.768788024122113,
+    ]
+
+
+def test_set_line_s_max_pu(get_power_network_scigrid_de):
+    """
+    Verify what returned by set_line_s_max_pu.
+    """
+    test_network_de = get_power_network_scigrid_de
+    s_max_pu_new_value = 3.0
+    set_line_s_max_pu(test_network_de, s_max_pu_new_value)
+    assert test_network_de.lines["s_max_pu"].unique()[0] == s_max_pu_new_value
+
+
+def test_average_every_nhours(get_power_network_scigrid_de):
+    """
+    Verify what returned by average_every_nhours.
+    """
+    test_network_de = get_power_network_scigrid_de
+
+    # The input network is already sampled in 1H snapshots.
+    # Hence, average_every_nhours should not change anything
+    new_network_1h = average_every_nhours(test_network_de, "1H")
+    assert test_network_de.snapshots.tolist() == new_network_1h.snapshots.tolist()
+
+    # Re-sample to 4H intervals
+    new_network_4h = average_every_nhours(test_network_de, "4H")
+    assert new_network_4h.snapshots.tolist() == [
+        Timestamp("2011-01-01 00:00:00"),
+        Timestamp("2011-01-01 04:00:00"),
+        Timestamp("2011-01-01 08:00:00"),
+        Timestamp("2011-01-01 12:00:00"),
+        Timestamp("2011-01-01 16:00:00"),
+        Timestamp("2011-01-01 20:00:00"),
+    ]
+
+
+def test_enforce_autarky_only_crossborder_false(get_power_network_ac_dc_meshed):
+    """
+    Verify what returned by enforce_autarky when only_crossborder is False.
+    """
+    # --> it removes all lines and all DC links
+    test_network_no_cross_border = get_power_network_ac_dc_meshed
+
+    bus_country_list = ["UK", "UK", "UK", "UK", "DE", "DE", "DE", "NO", "NO"]
+    test_network_no_cross_border.buses["country"] = bus_country_list
+    test_network_no_cross_border.links["carrier"] = "DC"
+
+    enforce_autarky(test_network_no_cross_border, only_crossborder=False)
+
+    output_component_dict_no_cross_border = {}
+    for c in test_network_no_cross_border.iterate_components(
+        list(test_network_no_cross_border.components.keys())[2:]
+    ):
+        output_component_dict_no_cross_border[c.name] = len(c.df)
+
+    reference_component_dict_no_cross_border = {
+        "Bus": 9,
+        "Carrier": 3,
+        "GlobalConstraint": 1,
+        "LineType": 34,
+        "TransformerType": 14,
+        "Load": 6,
+        "Generator": 6,
+    }
+    assert (
+        output_component_dict_no_cross_border
+        == reference_component_dict_no_cross_border
+    )
+
+
+def test_enforce_autarky_only_crossborder_true(get_power_network_ac_dc_meshed):
+    """
+    Verify what returned by enforce_autarky when only_crossborder is True.
+    """
+    # --> it removes links and lines that cross borders
+    test_network_with_cross_border = get_power_network_ac_dc_meshed
+    bus_country_list = ["UK", "UK", "UK", "UK", "DE", "DE", "DE", "NO", "NO"]
+    test_network_with_cross_border.buses["country"] = bus_country_list
+    test_network_with_cross_border.links["carrier"] = "DC"
+
+    enforce_autarky(test_network_with_cross_border, only_crossborder=True)
+
+    output_component_dict_with_cross_border = {}
+    for c in test_network_with_cross_border.iterate_components(
+        list(test_network_with_cross_border.components.keys())[2:]
+    ):
+        output_component_dict_with_cross_border[c.name] = len(c.df)
+
+    reference_component_dict_with_cross_border = {
+        "Bus": 9,
+        "Carrier": 3,
+        "GlobalConstraint": 1,
+        "Line": 4,
+        "LineType": 34,
+        "TransformerType": 14,
+        "Link": 3,
+        "Load": 6,
+        "Generator": 6,
+    }
+    print(output_component_dict_with_cross_border)
+
+    assert (
+        output_component_dict_with_cross_border
+        == reference_component_dict_with_cross_border
+    )
+
+
+def test_set_line_nom_max(get_power_network_ac_dc_meshed):
+    """
+    Verify what returned by set_line_nom_max.
+    """
+    test_network = get_power_network_ac_dc_meshed
+    s_nom_max_value = 5.0
+    p_nom_max_value = 10.0
+    set_line_nom_max(
+        test_network, s_nom_max_set=s_nom_max_value, p_nom_max_set=p_nom_max_value
+    )
+    assert test_network.lines.s_nom_max.unique()[0] == s_nom_max_value
+    assert test_network.links.p_nom_max.unique()[0] == p_nom_max_value

From 8bd4b862cae6e926257cab401da09e8ddc939abe Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:47:43 +0200
Subject: [PATCH 38/40] config: restricting fiona version to <=1.9.6 (#13)

---
 envs/environment.mac.yaml | 2 +-
 envs/environment.yaml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/envs/environment.mac.yaml b/envs/environment.mac.yaml
index 879f857d0..96c7cf837 100644
--- a/envs/environment.mac.yaml
+++ b/envs/environment.mac.yaml
@@ -31,7 +31,7 @@ dependencies:
 - numpy
 - pandas
 - geopandas>=0.11.0, <=0.14.3
-- fiona!=1.10.0
+- fiona<=1.9.6
 - xarray>=2023.11.0, <2023.12.0
 - netcdf4
 - networkx
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 68e54fd08..3d9915a49 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -31,7 +31,7 @@ dependencies:
 - numpy
 - pandas
 - geopandas>=0.11.0, <=0.14.3
-- fiona!=1.10.0
+- fiona<=1.9.6
 - xarray>=2023.11.0, <2023.12.0
 - netcdf4
 - networkx

From 73ca8d28283abda2e467cf5a2bca2ae4e0cae7c8 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <167071962+finozzifa@users.noreply.github.com>
Date: Tue, 24 Sep 2024 21:24:30 +0200
Subject: [PATCH 39/40] Merge the merge (#14)

* Update release_notes.rst (#1104)

* Merge pull request #1086 from merge-pyspa-earth-sec

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Comment add_brownfield to bypass linter

* Add myopic test

* Revise irena and minor fixes

* Revise test name

* Add existing_heating

* implement review suggesstions

* bug fix in solve_network with reference case

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update submodule

* Add zeros for missing entries - aluminium

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* adding missing templates

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix MissingOutputException

* fix TypeError (#321)

* fix TypeError

* Update scripts/prepare_gas_network.py

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>

---------

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>

* solve pandas deprecations

* Add Params for Rule add_export.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to Rule build_base_energy_totals

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to Rule build_base_industry_totals

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to Rule build_cop_profiles

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to Rule build_heat_demand

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to build_industrial_distribution_key

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to build_industry_demand

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params and urban_percentage effect all Rules

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to build_ship_profile

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params build_solar_thermal_profiles

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to build_temperature_profiles

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to copy_config

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params make_summary

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add to override_respot + panning_horizons wildcard

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update Snakefile

* Update Snakefile

* Add Params to prepare_airports

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to prepare_gas_network

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add Params to prepare_db and energy_totals

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update Snakefile

* Update add_export.py

* Add Params to build_population_layout

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* enhance the industry scripts and adapt the fuel aggregation

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove legacy EUR code

* remove legacy params form config

* remove legacy params form config

* Update build_base_industry_totals.py

* Update build_base_energy_totals.py

* omit double transpose

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* remove plotting from workflow

* remove plotting from prepare_gas_network

* Test routine: add Makefile
CI: consolidate ci.yaml files

* CI: bump cache number, remove os prefixes and labels
env: remove ipopt restriction

* Makefile: add tutorial yaml as additional config

* snakefile: use config.default as basis
ci: use tutorial config as secondary config

* snakefile: fix databundle config path

* debug: print out downloaded files

* snakefile: use renewables as bases for used cutouts

* Include scenario management

* Add scenario in tutorial

* Update submodule

* Add shared_cutouts

* Fix missing RDIR_PE

* fix IndexError (#326)

* fix IndexError

* '.' gets only added when len(id) > 3

* fix with layer_id

* Update scripts/prepare_gas_network.py

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix index error when layer_id == 0

---------

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* solve_network: modularize solver_options to allow for incremental config changes
config: harmonize and apply yaml linting

* refactor configs: boil down to effective diff in tutorial yaml and scenarios yaml

* test: remove config no_progress

* ci: disable for windows

* helpers: use copy_default_files

* config: make config.default + config.tutorial match old config.tutorial

* solve_network: fix options assignment in prepare

* add build_heating_distribution

* Update licensing

* Set location to Earth

* Update cluster pop

* Fix typos

* Bugfix to skip h2 pipelines with missing buses

* Bugfix h2_network loc

* reintroduce config.tutorial.yaml as basis (possibly revert this commit later to reenable config.default)

* doc: update testing documentation [skip ci]

* add comments to makefile

* Update myopic test

* Bugfix none location in build_industrial_database

* Revise run_test myopic name

* revert transpose

* delete plot_network_eur.py

* minor bug in build_base_energy_totals

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update build_base_industry_totals.py

* adapt build_base_energy and build_base_industry to params

* add missing param in snakefile

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Revise myopic test file

* fix param in snakefile

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* reset config.pypsa-earth.yaml

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update Snakefile

* handle exception to avoid reference before assignment error

* Add different scenario name for subworkflow

* Fix myopic test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update config.test_myopic.yaml to include new parameter

* replace snakemake subworkflow by submodule; fix path relations

* fill NaN without international bunkers

* remove factor for gas emissions from residential and services sector

* account for multi-country-cases

* snakefile: replace rdir by resdir for compatibility with pypsa-earth

* Update build_base_industry_totals.py

* Update Snakefile

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update build_base_industry_totals.py

* modified config to fix nan objective value

* print objective value to terminal

* fix for urls not working

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added license for datasets

* Update README.md

* Update README.md

* Final README adaptations before v0.2 release (#364)

* Adding the updated network representation figure

* Delete docs/SCPE_v0.2.pdf

* Include new network representaion figure as png

* Update README.md

* Embed the new network figure in the readme

* Delete outdated incomplete network configurations

* updated pypsa-earth submodule to v0.4.0

* adjust modular solver options

* resolve global paths

* workflow: use global root directory to avoid recursive upwards chdir

* update pypsa-earth commit

* revert changes to config.default and config.tutorial priority

* udpate pypsa-earth

* consolidate CI yaml

* yamllint: align formatting and config

* Snakefile: fix SDIR and RESDIR path ending

* ci.yaml include git submodule

* fix duplicated "/"

* remove config.pypsa-earth.yaml in favour of actual pypsa-earth default config

* remove pypsa-earth.config from copy_config

* ci: use one core to better track log

* test: use more core

* make yamllint compatible

* build_renewable_profiles: use local client in order to suppress verbose dask output

* update submodule

* config: consolidate clustering key
snakefile: fix cost retrieval

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* solve_networks: fix network reference for sector coupled case

* address review comments

* config.myopic.yaml: revert country order again

* Update README.md

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>

* address davides comments (first round)

* consolidate current working directory; considate helpers scripts

* fix reference to helpers script; fix fiona version

* env: update ppm version

* build_industrial_database: make retrieval robust against restrictive permissions

* follow up: consolidate restrictive url retrievals

* helpers: reinsert gadm functions from pypsa-earth-sec due to discrepancies (solve those later)

* env: temporarily install new earth osm version from pypi

* ci: restrict ipopt for windows

* env: follow up

* ci: roll back and disable windows

* _helpers: try make content_retrieval more robust

* Update README.md

Co-authored-by: Ekaterina <e.v.kasilova@gmail.com>

* config: bump version; add `allow_scenario_failure` flag; remove `base` cutout comment

* Snakefile: make cutout path consistent for sector-coupled version
config.default: make hydro extedable again

* helpers: fix numpy random usage

* properly remove submodule

* add version tag to all configs

* env: update powerplantmatching

* add missing config version tag

* update powerplantmatching

* follow up: move ppm installation to pip while conda is not out

* readme: add description on running previous models

* helpers: add HTTPError as allowed exception

* config: remove lifetime from config.default

* harmonize cost calculation in sector coupled model

* bump version tag in readme [skip ci]

* update README to account for Hazem's comments

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: davide-f <fioritidavidesubs@gmail.com>
Co-authored-by: Hazem-IEG <hazem.abdel-khalek@ieg.fraunhofer.de>
Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>
Co-authored-by: Hazem <87850910+hazemakhalek@users.noreply.github.com>
Co-authored-by: Eddy Jalbout <75539255+Eddy-JV@users.noreply.github.com>
Co-authored-by: finozzifa <167071962+finozzifa@users.noreply.github.com>
Co-authored-by: Anton Achhammer <anton.achhammer@oth-regensburg.de>
Co-authored-by: Anton Achhammer <132910766+doneachh@users.noreply.github.com>
Co-authored-by: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Co-authored-by: Eddy-JV <eddy.jalbout@gmail.com>
Co-authored-by: energyLS <89515385+energyLS@users.noreply.github.com>
Co-authored-by: cpschau <caspar.schauss@ieg.fraunhofer.de>
Co-authored-by: Emmanuel Bolarinwa <gbotemibolarinwa@gmail.com>
Co-authored-by: Ekaterina <e.v.kasilova@gmail.com>

* code: re-add copy_default_files

* code:fix imports

* code:fix arguments in build_osm_network

* code:geo_crs value as explicit argument

* code:remove unnecessary arguments

* code: modify the demand in scripts/build_base_energy_totals.py

* code: modify the demand in scripts/build_base_energy_totals.py - 2

* code: modify the demand in scripts/build_base_energy_totals.py - 3

* code: modify the demand in scripts/build_base_industry_totals.py

* code: modify the demand in scripts/build_base_industry_totals.py and scripts/build_base_energy_totals.py

* code:provide missing arguments in locate_bus

* code: add missing arguments in prepare_sector_network

* code: remove tuple

* code: remove tuple

* code:remove environment.mac and update test_build_powerplants

---------

Co-authored-by: Davide Fioriti <67809479+davide-f@users.noreply.github.com>
Co-authored-by: Fabian Hofmann <fab.hof@gmx.de>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: davide-f <fioritidavidesubs@gmail.com>
Co-authored-by: Hazem-IEG <hazem.abdel-khalek@ieg.fraunhofer.de>
Co-authored-by: Hazem <87850910+hazemakhalek@users.noreply.github.com>
Co-authored-by: Eddy Jalbout <75539255+Eddy-JV@users.noreply.github.com>
Co-authored-by: Anton Achhammer <anton.achhammer@oth-regensburg.de>
Co-authored-by: Anton Achhammer <132910766+doneachh@users.noreply.github.com>
Co-authored-by: Eddy-JV <eddy.jalbout@gmail.com>
Co-authored-by: energyLS <89515385+energyLS@users.noreply.github.com>
Co-authored-by: cpschau <caspar.schauss@ieg.fraunhofer.de>
Co-authored-by: Emmanuel Bolarinwa <gbotemibolarinwa@gmail.com>
Co-authored-by: Ekaterina <e.v.kasilova@gmail.com>
---
 .github/workflows/ci-linux.yaml               |   96 -
 .github/workflows/ci-mac.yaml                 |   82 -
 .github/workflows/ci-windows.yaml             |   82 -
 .github/workflows/ci.yml                      |   72 +
 .gitignore                                    |   71 +-
 .pre-commit-config.yaml                       |    9 +
 .reuse/dep5                                   |   24 -
 .yamllint                                     |    9 +-
 Makefile                                      |   31 +
 README.md                                     |   31 +-
 REUSE.toml                                    |   38 +
 Snakefile                                     | 1005 +++++-
 config.default.yaml                           |  787 ++++-
 config.tutorial.yaml                          |  459 +--
 configs/powerplantmatching_config.yaml        |    4 +-
 configs/scenarios/config.NG.yaml              |    9 +-
 data/AL_production.csv                        |  258 ++
 .../custom/TEMPLATE_energy_totals_AB_2030.csv |    2 +
 .../TEMPLATE_h2_underground_AB_2030 copy.csv  |   19 +
 data/custom/TEMPLATE_industrial_database.csv  |    2 +
 .../TEMPLATE_industry_demand_AB_2030.csv      |    9 +
 data/custom_res_installable.csv               |    4 +
 data/custom_res_potential.csv                 |    4 +
 data/demand/district_heating.csv              |    3 +
 data/demand/efficiency_gains_cagr.csv         |    3 +
 data/demand/fuel_shares.csv                   |    3 +
 data/demand/growth_factors_cagr.csv           |    3 +
 data/demand/industry_growth_cagr.csv          |    3 +
 .../paths/Energy_Statistics_Database.xlsx     |  Bin 0 -> 9202 bytes
 data/emobility/Bus__count                     |  171 +
 .../European_countries_car_ownership.csv      |   31 +
 data/emobility/KFZ__count                     |  171 +
 data/emobility/Lfw__count                     |  171 +
 data/emobility/Lkw__count                     |  171 +
 data/emobility/LoA__count                     |  171 +
 data/emobility/Lzg__count                     |  171 +
 data/emobility/Pkw__count                     |  171 +
 data/emobility/PmA__count                     |  171 +
 data/emobility/Sat__count                     |  171 +
 data/emobility/traffic.tex                    |   14 +
 data/energy_totals_DF_2030.csv                |    2 +
 .../existing_heating_raw.csv                  |   32 +
 data/export_ports.csv                         |   14 +
 data/heat_load_profile_BDEW.csv               |   25 +
 data/hydrogen_salt_cavern_potentials.csv      |    6 +
 data/override_component_attrs/buses.csv       |    3 +
 data/override_component_attrs/generators.csv  |    3 +
 data/override_component_attrs/links.csv       |   13 +
 data/override_component_attrs/loads.csv       |    2 +
 data/override_component_attrs/stores.csv      |    4 +
 .../biomass_transport_costs.csv               |   48 +
 data/temp_hard_coded/energy_totals.csv        |   37 +
 data/temp_hard_coded/transport_data.csv       |  123 +
 data/unsd_transactions.csv                    |   39 +
 doc/configtables/licenses.csv                 |    5 +
 doc/how_to_contribute.rst                     |    4 +-
 doc/release_notes.rst                         |    7 +-
 envs/environment.mac.yaml                     |   87 -
 envs/environment.yaml                         |    9 +-
 scripts/_helpers.py                           |  402 ++-
 scripts/add_brownfield.py                     |  259 ++
 scripts/add_electricity.py                    |   25 +-
 scripts/add_existing_baseyear.py              |  651 ++++
 scripts/add_export.py                         |  261 ++
 scripts/add_extra_components.py               |   39 +-
 scripts/augmented_line_connections.py         |    8 +-
 scripts/base_network.py                       |    6 -
 scripts/build_base_energy_totals.py           |  473 +++
 scripts/build_base_industry_totals.py         |  205 ++
 scripts/build_bus_regions.py                  |    9 +-
 scripts/build_clustered_population_layouts.py |   55 +
 scripts/build_cop_profiles.py                 |   44 +
 scripts/build_cutout.py                       |    8 +-
 scripts/build_demand_profiles.py              |    5 -
 .../build_existing_heating_distribution.py    |  176 ++
 scripts/build_heat_demand.py                  |   41 +
 scripts/build_industrial_database.py          |  524 +++
 scripts/build_industrial_distribution_key.py  |  208 ++
 scripts/build_industry_demand.py              |  318 ++
 scripts/build_natura_raster.py                |   10 +-
 scripts/build_osm_network.py                  |   63 +-
 scripts/build_population_layouts.py           |  131 +
 scripts/build_powerplants.py                  |    2 -
 scripts/build_renewable_profiles.py           |   32 +-
 scripts/build_shapes.py                       |   13 +-
 scripts/build_ship_profile.py                 |   88 +
 scripts/build_solar_thermal_profiles.py       |   54 +
 scripts/build_temperature_profiles.py         |   58 +
 scripts/build_test_configs.py                 |    8 +-
 scripts/clean_osm_data.py                     |    2 -
 scripts/cluster_network.py                    |   15 +-
 scripts/copy_config.py                        |   23 +
 scripts/download_osm_data.py                  |   11 +-
 scripts/make_statistics.py                    |    7 -
 scripts/make_summary.py                       |    2 -
 scripts/monte_carlo.py                        |    4 +-
 scripts/non_workflow/zenodo_handler.py        |    6 +-
 scripts/non_workflow/zip_folder.py            |   15 +-
 scripts/override_respot.py                    |  109 +
 scripts/plot_network.py                       |  807 ++++-
 scripts/plot_summary.py                       |    9 +-
 scripts/prepare_airports.py                   |  115 +
 scripts/prepare_db.py                         |  496 +++
 scripts/prepare_energy_totals.py              |  293 ++
 scripts/prepare_gas_network.py                |  554 ++++
 scripts/prepare_heat_data.py                  |  173 +
 scripts/prepare_network.py                    |   10 +-
 scripts/prepare_ports.py                      |   87 +
 scripts/prepare_sector_network.py             | 2816 +++++++++++++++++
 scripts/prepare_transport_data.py             |  251 ++
 scripts/prepare_transport_data_input.py       |  152 +
 scripts/prepare_urban_percent.py              |   99 +
 scripts/retrieve_databundle_light.py          |   21 +-
 scripts/simplify_network.py                   |    3 +-
 scripts/solve_network.py                      | 1616 ++++++----
 test/config.custom.yaml                       |    3 +-
 test/config.landlock.yaml                     |    3 +-
 test/config.monte_carlo.yaml                  |    3 +-
 test/config.test1.yaml                        |   54 +
 test/config.test_myopic.yaml                  |  540 ++++
 test/config.tutorial_noprogress.yaml          |    7 -
 test/test_build_powerplants.py                |   50 +-
 122 files changed, 15539 insertions(+), 2162 deletions(-)
 delete mode 100644 .github/workflows/ci-linux.yaml
 delete mode 100644 .github/workflows/ci-mac.yaml
 delete mode 100644 .github/workflows/ci-windows.yaml
 create mode 100644 .github/workflows/ci.yml
 delete mode 100644 .reuse/dep5
 create mode 100644 Makefile
 create mode 100644 REUSE.toml
 create mode 100644 data/AL_production.csv
 create mode 100644 data/custom/TEMPLATE_energy_totals_AB_2030.csv
 create mode 100644 data/custom/TEMPLATE_h2_underground_AB_2030 copy.csv
 create mode 100644 data/custom/TEMPLATE_industrial_database.csv
 create mode 100644 data/custom/TEMPLATE_industry_demand_AB_2030.csv
 create mode 100644 data/custom_res_installable.csv
 create mode 100644 data/custom_res_potential.csv
 create mode 100644 data/demand/district_heating.csv
 create mode 100644 data/demand/efficiency_gains_cagr.csv
 create mode 100644 data/demand/fuel_shares.csv
 create mode 100644 data/demand/growth_factors_cagr.csv
 create mode 100644 data/demand/industry_growth_cagr.csv
 create mode 100644 data/demand/unsd/paths/Energy_Statistics_Database.xlsx
 create mode 100644 data/emobility/Bus__count
 create mode 100644 data/emobility/European_countries_car_ownership.csv
 create mode 100644 data/emobility/KFZ__count
 create mode 100644 data/emobility/Lfw__count
 create mode 100644 data/emobility/Lkw__count
 create mode 100644 data/emobility/LoA__count
 create mode 100644 data/emobility/Lzg__count
 create mode 100644 data/emobility/Pkw__count
 create mode 100644 data/emobility/PmA__count
 create mode 100644 data/emobility/Sat__count
 create mode 100644 data/emobility/traffic.tex
 create mode 100644 data/energy_totals_DF_2030.csv
 create mode 100644 data/existing_infrastructure/existing_heating_raw.csv
 create mode 100644 data/export_ports.csv
 create mode 100644 data/heat_load_profile_BDEW.csv
 create mode 100644 data/hydrogen_salt_cavern_potentials.csv
 create mode 100644 data/override_component_attrs/buses.csv
 create mode 100644 data/override_component_attrs/generators.csv
 create mode 100644 data/override_component_attrs/links.csv
 create mode 100644 data/override_component_attrs/loads.csv
 create mode 100644 data/override_component_attrs/stores.csv
 create mode 100644 data/temp_hard_coded/biomass_transport_costs.csv
 create mode 100644 data/temp_hard_coded/energy_totals.csv
 create mode 100644 data/temp_hard_coded/transport_data.csv
 create mode 100644 data/unsd_transactions.csv
 delete mode 100644 envs/environment.mac.yaml
 create mode 100644 scripts/add_brownfield.py
 create mode 100644 scripts/add_existing_baseyear.py
 create mode 100644 scripts/add_export.py
 create mode 100644 scripts/build_base_energy_totals.py
 create mode 100644 scripts/build_base_industry_totals.py
 create mode 100644 scripts/build_clustered_population_layouts.py
 create mode 100644 scripts/build_cop_profiles.py
 create mode 100644 scripts/build_existing_heating_distribution.py
 create mode 100644 scripts/build_heat_demand.py
 create mode 100644 scripts/build_industrial_database.py
 create mode 100644 scripts/build_industrial_distribution_key.py
 create mode 100644 scripts/build_industry_demand.py
 create mode 100644 scripts/build_population_layouts.py
 create mode 100644 scripts/build_ship_profile.py
 create mode 100644 scripts/build_solar_thermal_profiles.py
 create mode 100644 scripts/build_temperature_profiles.py
 create mode 100644 scripts/copy_config.py
 create mode 100644 scripts/override_respot.py
 create mode 100644 scripts/prepare_airports.py
 create mode 100644 scripts/prepare_db.py
 create mode 100644 scripts/prepare_energy_totals.py
 create mode 100644 scripts/prepare_gas_network.py
 create mode 100644 scripts/prepare_heat_data.py
 create mode 100644 scripts/prepare_ports.py
 create mode 100644 scripts/prepare_sector_network.py
 create mode 100644 scripts/prepare_transport_data.py
 create mode 100644 scripts/prepare_transport_data_input.py
 create mode 100644 scripts/prepare_urban_percent.py
 create mode 100644 test/config.test1.yaml
 create mode 100644 test/config.test_myopic.yaml
 delete mode 100644 test/config.tutorial_noprogress.yaml

diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml
deleted file mode 100644
index 1b554109c..000000000
--- a/.github/workflows/ci-linux.yaml
+++ /dev/null
@@ -1,96 +0,0 @@
-name: CI-linux
-
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-    branches:
-    - main
-  schedule:
-  - cron: "0 5 * * TUE"
-
-env:
-  CACHE_NUMBER: 1  # Change this value to manually reset the environment cache
-
-jobs:
-  build:
-    strategy:
-      fail-fast: false  # don't break CI for ubuntu if windows fails before
-      matrix:
-        include:
-          # Matrix required to handle environment caching with Mambaforge
-        - os: ubuntu-latest
-          label: ubuntu-latest
-          prefix: /usr/share/miniconda3/envs/pypsa-earth
-
-    name: ${{ matrix.label }}
-    runs-on: ${{ matrix.os }}
-
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-
-    - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v3
-      with:
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
-        activate-environment: pypsa-earth
-        use-mamba: true
-
-    - name: Create environment cache
-      uses: actions/cache@v3
-      id: cache
-      with:
-        path: ${{ matrix.prefix }}
-        key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
-
-    - name: Update environment due to outdated or unavailable cache
-      if: steps.cache.outputs.cache-hit != 'true'
-      run: mamba env update -n pypsa-earth -f envs/environment.yaml
-
-    - name: Conda list
-      run: |
-        conda list
-
-    - name: Create test configs
-      run: |
-        snakemake --cores all build_test_configs
-
-    - name: Test tutorial workflow
-      run: |
-        cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml
-        snakemake --cores all solve_all_networks --forceall
-
-    - name: Test custom workflow
-      run: |
-        mkdir -p configs/scenarios
-        cp test/config.custom.yaml configs/scenarios/config.custom.yaml
-        snakemake --cores 1 run_all_scenarios --forceall
-
-    - name: Test monte-carlo workflow
-      run: |
-        cp test/tmp/config.monte_carlo_tmp.yaml config.yaml
-        snakemake --cores all solve_all_networks_monte --forceall
-
-    - name: Test landlock workflow
-      run: |
-        cp test/tmp/config.landlock_tmp.yaml config.yaml
-        snakemake --cores all solve_all_networks --forceall
-
-    - name: Unit tests
-      run: |
-        python -m pip install pytest
-        pytest test/
-
-      # - name: Test plotting and summaries
-      #   run: |
-      #     snakemake --cores all plot_all_p_nom
-      #     snakemake --cores all plot_all_summaries
-      #     snakemake --cores all make_all_summaries
-      #     rm -rf resources/*.nc resources/*.geojson resources/*.h5 networks results
diff --git a/.github/workflows/ci-mac.yaml b/.github/workflows/ci-mac.yaml
deleted file mode 100644
index 347c344bc..000000000
--- a/.github/workflows/ci-mac.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: CI-mac
-
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-    branches:
-    - main
-  schedule:
-  - cron: "0 5 * * TUE"
-
-env:
-  CACHE_NUMBER: 1  # Change this value to manually reset the environment cache
-
-jobs:
-  build:
-    strategy:
-      matrix:
-        include:
-        - os: macos-latest
-          label: macos-latest
-          prefix: /Users/runner/miniconda3/envs/pypsa-earth
-
-    name: ${{ matrix.label }}
-    runs-on: ${{ matrix.os }}
-
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-
-    # - name: Add solver to environment
-    #   run: |
-    #     echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml
-
-    - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v3
-      with:
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
-        activate-environment: pypsa-earth
-        use-mamba: true
-
-    - name: Create environment cache
-      uses: actions/cache@v3
-      id: cache
-      with:
-        path: ${{ matrix.prefix }}
-        key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.mac.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
-
-    - name: Update environment due to outdated or unavailable cache
-      if: steps.cache.outputs.cache-hit != 'true'
-      run: mamba env update -n pypsa-earth -f envs/environment.mac.yaml
-
-    - name: Conda list
-      run: |
-        conda list
-
-    - name: Create test configs
-      run: |
-        snakemake --cores all build_test_configs
-
-    - name: Test tutorial workflow
-      run: |
-        cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml
-        snakemake --cores all solve_all_networks
-
-    - name: Unit tests
-      run: |
-        python -m pip install pytest
-        pytest test/
-
-      # - name: Test plotting and summaries
-      #   run: |
-      #     snakemake --cores all plot_all_p_nom
-      #     snakemake --cores all plot_all_summaries
-      #     snakemake --cores all make_all_summaries
-      #     rm -rf resources/*.nc resources/*.geojson resources/*.h5 networks results
diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml
deleted file mode 100644
index 5b7fd7d37..000000000
--- a/.github/workflows/ci-windows.yaml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: CI-windows
-
-on:
-  push:
-    branches:
-    - main
-  pull_request:
-    branches:
-    - main
-  schedule:
-  - cron: "0 5 * * TUE"
-
-env:
-  CACHE_NUMBER: 1  # Change this value to manually reset the environment cache
-
-jobs:
-  build:
-    strategy:
-      matrix:
-        include:
-        - os: windows-latest
-          label: windows-latest
-          prefix: C:\Miniconda3\envs\pypsa-earth
-
-    name: ${{ matrix.label }}
-    runs-on: ${{ matrix.os }}
-
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v3
-
-    # - name: Add solver to environment
-    #   run: |
-    #     echo -e "- glpk\n- ipopt<3.13.3" >> envs/environment.yaml
-
-    - name: Setup Mambaforge
-      uses: conda-incubator/setup-miniconda@v3
-      with:
-        miniforge-variant: Mambaforge
-        miniforge-version: latest
-        activate-environment: pypsa-earth
-        use-mamba: true
-
-    - name: Create environment cache
-      uses: actions/cache@v3
-      id: cache
-      with:
-        path: ${{ matrix.prefix }}
-        key: ${{ matrix.label }}-conda-${{ hashFiles('envs/environment.yaml') }}-${{ env.DATE }}-${{ env.CACHE_NUMBER }}
-
-    - name: Update environment due to outdated or unavailable cache
-      if: steps.cache.outputs.cache-hit != 'true'
-      run: mamba env update -n pypsa-earth -f envs/environment.yaml
-
-    - name: Conda list
-      run: |
-        conda list
-
-    - name: Create test configs
-      run: |
-        snakemake --cores all build_test_configs
-
-    - name: Test tutorial workflow
-      run: |
-        cp test/tmp/config.tutorial_noprogress_tmp.yaml config.yaml
-        snakemake --cores all solve_all_networks
-
-    - name: Unit tests
-      run: |
-        python -m pip install pytest
-        pytest test/
-
-      # - name: Test plotting and summaries
-      #   run: |
-      #     snakemake --cores all plot_all_p_nom
-      #     snakemake --cores all plot_all_summaries
-      #     snakemake --cores all make_all_summaries
-      #     rm -rf resources/*.nc resources/*.geojson resources/*.h5 networks results
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 000000000..1c44686dd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,72 @@
+name: CI
+
+on:
+  push:
+    branches:
+    - main
+  pull_request:
+    branches:
+    - main
+  schedule:
+  - cron: "0 5 * * TUE"
+
+env:
+  CACHE_NUMBER: 2  # Change this value to manually reset the environment cache
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      max-parallel: 3
+      matrix:
+        os:
+        - ubuntu-latest
+        - macos-latest
+        # - windows-latest
+
+    runs-on: ${{ matrix.os }}
+
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+    - uses: actions/checkout@v3
+
+
+    - name: Setup micromamba
+      uses: mamba-org/setup-micromamba@v1
+      with:
+        micromamba-version: latest
+        environment-file: envs/environment.yaml
+        log-level: debug
+        init-shell: bash
+        cache-environment: true
+        cache-downloads: true
+
+
+    - name: Set cache dates
+      run: |
+        echo "WEEK=$(date +'%Y%U')" >> $GITHUB_ENV
+
+    - name: Cache data and cutouts folders
+      uses: actions/cache@v3
+      with:
+        path: |
+          data
+          cutouts
+        key: data-cutouts-${{ env.WEEK }}-${{ env.CACHE_NUMBER }}
+
+
+    - name: Conda list
+      run: conda list
+
+    - name: Run Test
+      run: make checks
+
+      # - name: Test plotting and summaries
+      #   run: |
+      #     snakemake --cores all plot_all_p_nom
+      #     snakemake --cores all plot_all_summaries
+      #     snakemake --cores all make_all_summaries
+      #     rm -rf resources/*.nc resources/*.geojson resources/*.h5 networks results
diff --git a/.gitignore b/.gitignore
index 3e2a7d45e..7eeacaad3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,8 @@
 **/__pycache__
 *.py[cod]
 *$py.class
+
+# Jupyter-related files
 .ipynb_checkpoints
 
 # General untracked file formats
@@ -15,55 +17,58 @@
 *.zip
 *.png
 *.done
+*.tif
+*.csv
+*.geojson
+*.nc
+*.xls
+*.xlsx
+*.org
+*~
 
-# Untracked files
+# Specific untracked files
 config.yaml
 dag.svg
-
-# Files appear for tests
-configs/scenarios/config.custom.yaml
-data/*.csv
-data/*.geojson
+gurobi.log
+gadm_shapes.geojson
+doc/*.vscode/settings.json
 
 # Untracked folders (and files within)
-img/
-.snakemake/
+bak
 benchmarks/
+backup*
 cutouts/
 data/
-data/osm/
-data/raw/
-data/base_network/
-results/
+dask-worker-space/
+img/
+logs/
 networks/
+notebooks
+notebooks/
+pypsa/
 resources/
-scripts/temp
-scripts/test.py
+results/
+.snakemake/
+slurm
+scripts/old
+.tmp
+doc/_build
+sample_pypsa_eur
 test/tmp/
+playground/
+
+# Specific configuration files
 configs/scenarios/config.*.yaml
 !configs/scenarios/config.NG.yaml
-dask-worker-space/
 
-# Untrack some Jupyter changes
-.ipynb_checkpoints
-notebooks/*.nc
-notebooks/*.csv
-notebooks/*.zip
-notebooks/*.tif
-notebooks/old_notebooks/*.nc
-notebooks/old_notebooks/*.csv
-notebooks/old_notebooks/*.zip
-notebooks/old_notebooks/*.tif
-
-# Untrack test doc builds
-doc/_build
-
-# VS code related
-*.vscode
-doc/*.vscode/settings.json
-*.vscode/settings.json
+# VS Code-related files
+.vscode
 *.code-workspace
+*.vscode/settings.json
 
 # Additional debugging folders
 backup*
 screenlog*
+
+# Exclude specific file paths
+!data/demand/unsd/paths/Energy_Statistics_Database.xlsx
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 88f3a318f..941864551 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -56,6 +56,15 @@ repos:
   # Format Jupyter Python notebooks
   - id: black-jupyter
 
+# Find common spelling mistakes in comments and docstrings
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.2.1
+  hooks:
+  - id: codespell
+    args: ['--ignore-regex="\b[A-Z]+\b"', '--ignore-words-list=fom,appartment,bage,ore,setis,tabacco,berfore,vor'] # Ignore capital case words, e.g. country codes
+    types_or: [python, rst, markdown]
+    files: ^(actions|doc)/
+
 # Do YAML formatting (before the linter checks it for misses)
 - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
   rev: v2.14.0
diff --git a/.reuse/dep5 b/.reuse/dep5
deleted file mode 100644
index 592a0440f..000000000
--- a/.reuse/dep5
+++ /dev/null
@@ -1,24 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: pypsa-earth
-Upstream-Contact: The PyPSA-Earth and PyPSA-Eur Authors <pypsameetsearth@gmail.com>
-Source: https://github.com/pypsa-meets-earth/pypsa-earth
-
-Files: doc/data.csv
-Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
-License: CC-BY-4.0
-
-Files: doc/configtables/*
-Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
-License: CC-BY-4.0
-
-Files: test/test_data/*
-Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
-License: CC-BY-4.0
-
-Files: data/*
-Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
-License: CC-BY-4.0
-
-Files: .github/*
-Copyright:  The PyPSA-Earth and PyPSA-Eur Authors
-License: CC0-1.0
diff --git a/.yamllint b/.yamllint
index 66d402399..2fb93d7dc 100644
--- a/.yamllint
+++ b/.yamllint
@@ -9,14 +9,13 @@ extends: default
 
 rules:
   braces:
-    # Do not allow flow mappings using curly braces "{" and "}"
-    forbid: true
+    forbid: false
   brackets:
     max-spaces-inside: 0
     max-spaces-inside-empty: 0
   comments:
-    require-starting-space: true
-    min-spaces-from-content: 2
+    require-starting-space: false
+    min-spaces-from-content: 0
   # Force correct indentation of comments
   # yamllint disable-line rule:braces
   comments-indentation: {}
@@ -35,7 +34,7 @@ rules:
   key-duplicates: {}
   line-length:
     level: warning
-    max: 88
+    max: 350
   new-line-at-end-of-file: enable
   truthy:
     check-keys: false       # Disable truthy check hits on keys like "on": ...
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..07db5cb3a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+.PHONY: checks tests setup clean
+
+tests:
+	set -e
+	snakemake solve_all_networks -call --configfile config.tutorial.yaml # this runs the tutorial config
+	snakemake solve_all_networks -call --configfile config.tutorial.yaml test/config.custom.yaml # add custom config to tutorial config
+	snakemake solve_all_networks -call --configfile config.tutorial.yaml configs/scenarios/config.NG.yaml
+	snakemake solve_all_networks_monte -call --configfile config.tutorial.yaml test/config.monte_carlo.yaml
+	snakemake solve_all_networks -call --configfile config.tutorial.yaml test/config.landlock.yaml
+	snakemake -c4 solve_sector_networks --configfile config.tutorial.yaml test/config.test1.yaml
+	echo "All tests completed successfully."
+
+checks: tests
+	pytest test
+
+setup:
+	# Add setup commands here
+	echo "Setup complete."
+
+clean:
+	# Add clean-up commands here
+	snakemake -j1 solve_all_networks --delete-all-output --configfile config.tutorial.yaml test/config.custom.yaml
+	snakemake -j1 solve_all_networks --delete-all-output --configfile config.tutorial.yaml configs/scenarios/config.NG.yaml
+	snakemake -j1 solve_all_networks_monte --delete-all-output --configfile test/config.monte_carlo.yaml
+	snakemake -j1 run_all_scenarios --delete-all-output --configfile test/config.landlock.yaml
+	snakemake -j1 solve_sector_networks --delete-all-output --configfile test/config.test1.yaml
+	echo "Clean-up complete."
diff --git a/README.md b/README.md
index f80424831..df964e28c 100644
--- a/README.md
+++ b/README.md
@@ -27,9 +27,16 @@ by
 [![Discord](https://img.shields.io/discord/911692131440148490?logo=discord)](https://discord.gg/AnuJBk23FU)
 [![Google Drive](https://img.shields.io/badge/Google%20Drive-4285F4?style=flat&logo=googledrive&logoColor=white)](https://drive.google.com/drive/folders/1U7fgktbxlaGzWxT2C0-Xv-_ffWCxAKZz)
 
-**PyPSA-Earth is the first open-source global energy system model with data in high spatial and temporal resolution.** It enables large-scale collaboration by providing a tool that can model the world energy system or any subset of it. This work is derived from the European [PyPSA-Eur](https://pypsa-eur.readthedocs.io/en/latest/) model using new data and functions. It is suitable for operational as well as combined generation, storage and transmission expansion studies. The model provides two main features: (1) customizable data extraction and preparation scripts with global coverage and (2) a [PyPSA](https://pypsa.readthedocs.io/en/latest/) energy modelling framework integration. The data includes electricity demand, generation and medium to high-voltage networks from open sources, yet additional data can be further integrated. A broad range of clustering and grid meshing strategies help adapt the model to computational and practical needs.
+**PyPSA-Earth: A Global Sector-Coupled Open-Source Multi-Energy System Model**
+
+PyPSA-Earth is the first open-source global cross-sectoral energy system model with high spatial and temporal resolution. Originally it was derived from the European [PyPSA-Eur](https://pypsa-eur.readthedocs.io/en/latest/) model using new data and functions which provide capabilities for modelling the world energy system or any subset of it, enabling large-scale collaboration and transparent analysis for a sustainable energy future. It is suitable for operational studies, as well as expansion studies on combined generation, storage and transmission accounting for cross-sectoral interactions. The model provides two main features: (1) customizable data extraction and preparation scripts with global coverage for power and cross-sectoral modelling and (2) a [PyPSA](https://pypsa.readthedocs.io/en/latest/) energy modelling framework integration. In particular, the data includes energy demand, generation and medium to high-voltage networks from open sources, yet additional data can be further integrated. A broad range of clustering and grid meshing strategies help adapt the model to computational and practical needs.
+
+With the recent integration of PyPSA-Earth and the sector-coupled PyPSA-Earth model, all functionality is now combined into a single, comprehensive tool. This unified model allows for detailed optimization of multi-energy systems, covering electricity, heating, transport, and more. It is designed to adapt to the specific needs of any country or region, offering customizable data extraction, preparation scripts with global coverage, and a broad range of clustering and grid meshing strategies to meet computational and practical requirements.
+
+PyPSA-Earth is capable to provide the modelling evidence needed to translate implications behind energy scenarios into the regional actions. By making this tool openly available, we aim to foster collaboration, innovation, and informed decision-making that leads to sustainable and efficient energy solutions worldwide.
+
+For more details, the model is described in the Applied Energy article "PyPSA-Earth: A new global open energy system optimization model demonstrated in Africa," 2023. The preprint describing the sector-coupled functionalities is also available [here](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4743242). Additional information can be found in the [documentation](https://pypsa-earth.readthedocs.io/en/latest/index.html).
 
-The model is described in the Applied Energy article "PyPSA-Earth. A new global open energy system optimization model demonstrated in Africa", 2023, https://doi.org/10.1016/j.apenergy.2023.121096 [(BibTeX)](https://pypsa-earth.readthedocs.io/en/latest/talks_and_papers.html#publications). The [documentation](https://pypsa-earth.readthedocs.io/en/latest/index.html) provides additional information.
 
 **PyPSA meets Earth is a free and open source software initiative aiming to develop a powerful energy system model for Earth.** We work on open data, open source modelling, open source solver support and open communities. Stay tuned and join our mission - We look for users, co-developers and leaders! Check out our [website for results and our projects](https://pypsa-meets-earth.github.io/projects.html). Happy coding!
 
@@ -41,6 +48,11 @@ The model is described in the Applied Energy article "PyPSA-Earth. A new global
 
 <p align="center"><b> Figure:</b> Example power systems build with PyPSA-Earth. See images of ~193 more countries at <a>https://zenodo.org/records/10080766</a></p>
 
+
+The diagram below depicts one representative clustered node for the sector-coupled model with its generation, storage and conversion technologies.
+
+![alt text](doc/SCPE_v0.2.png)
+
 ## Livetracker. Most popular global models:
 
 <p align="center">
@@ -128,6 +140,17 @@ There are multiple ways to get involved and learn more about our work. That's ho
       Java HotSpot(TM) 64-Bit Server VM (build 25.341-b10, mixed mode)
    ```
 
+## Running the model in previous versions
+
+The model can be run in previous versions by checking out the respective tag. For instance, to run the model in version 0.4.1, which is the last version before the repo `pypsa-earth-sec` was merged, the following command can be used:
+
+```bash
+git checkout v0.4.1
+```
+After checking out the tag, the model can be run as usual. Please make sure to install the required packages for the respective version.
+
+
+
 ## Test run on tutorial
 
 - In the folder open a terminal/command window to be located at this path `~/pypsa-earth/`
@@ -143,6 +166,10 @@ There are multiple ways to get involved and learn more about our work. That's ho
 
   Remove the -n to do a real run. Follow the tutorial of PyPSA-Eur 1 and 2 on [YouTube](https://www.youtube.com/watch?v=ty47YU1_eeQ) to continue with an analysis.
 
+
+
+
+
 ## Training
 
 - We recently updated some [hackathon material](https://github.com/pypsa-meets-earth/documentation) for PyPSA-Earth. The hackathon contains jupyter notebooks with exercises. After going through the 1 day theoretical and practical material you should have a suitable coding setup and feel confident about contributing.
diff --git a/REUSE.toml b/REUSE.toml
new file mode 100644
index 000000000..06bce5d3b
--- /dev/null
+++ b/REUSE.toml
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+version = 1
+SPDX-PackageName = "pypsa-earth"
+SPDX-PackageSupplier = "The PyPSA-Earth and PyPSA-Eur Authors <pypsameetsearth@gmail.com>"
+SPDX-PackageDownloadLocation = "https://github.com/pypsa-meets-earth/pypsa-earth"
+
+[[annotations]]
+path = "doc/data.csv"
+precedence = "aggregate"
+SPDX-FileCopyrightText = "The PyPSA-Earth and PyPSA-Eur Authors"
+SPDX-License-Identifier = "CC-BY-4.0"
+
+[[annotations]]
+path = "doc/configtables/**"
+precedence = "aggregate"
+SPDX-FileCopyrightText = "The PyPSA-Earth and PyPSA-Eur Authors"
+SPDX-License-Identifier = "CC-BY-4.0"
+
+[[annotations]]
+path = "test/test_data/**"
+precedence = "aggregate"
+SPDX-FileCopyrightText = "The PyPSA-Earth and PyPSA-Eur Authors"
+SPDX-License-Identifier = "CC-BY-4.0"
+
+[[annotations]]
+path = "data/**"
+precedence = "aggregate"
+SPDX-FileCopyrightText = "The PyPSA-Earth and PyPSA-Eur Authors"
+SPDX-License-Identifier = "CC-BY-4.0"
+
+[[annotations]]
+path = ".github/**"
+precedence = "aggregate"
+SPDX-FileCopyrightText = "The PyPSA-Earth and PyPSA-Eur Authors"
+SPDX-License-Identifier = "CC0-1.0"
diff --git a/Snakefile b/Snakefile
index 98dc8203d..6dfc8179a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -3,35 +3,38 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
 import sys
+import pathlib
 
 sys.path.append("./scripts")
 
-from os.path import normpath, exists
+from os.path import normpath
 from shutil import copyfile, move
 
 from snakemake.remote.HTTP import RemoteProvider as HTTPRemoteProvider
 
-from _helpers import create_country_list, get_last_commit_message, check_config_version
+from _helpers import (
+    create_country_list,
+    get_last_commit_message,
+    check_config_version,
+    copy_default_files,
+)
 from build_demand_profiles import get_load_paths_gegis
 from retrieve_databundle_light import datafiles_retrivedatabundle
-from pathlib import Path
 
 
 HTTP = HTTPRemoteProvider()
 
-if "config" not in globals() or not config:  # skip when used as sub-workflow
-    if not exists("config.yaml"):
-        copyfile("config.tutorial.yaml", "config.yaml")
-
-    configfile: "config.yaml"
-
-
-check_config_version(config=config)
+copy_default_files()
 
 
+configfile: "config.default.yaml"
 configfile: "configs/bundle_config.yaml"
+configfile: "configs/powerplantmatching_config.yaml"
+configfile: "config.yaml"
 
 
+check_config_version(config=config)
+
 config.update({"git_commit": get_last_commit_message(".")})
 
 # convert country list according to the desired region
@@ -43,9 +46,13 @@ config["scenario"]["unc"] = [
     f"m{i}" for i in range(config["monte_carlo"]["options"]["samples"])
 ]
 
+
 run = config.get("run", {})
 RDIR = run["name"] + "/" if run.get("name") else ""
 CDIR = RDIR if not run.get("shared_cutouts") else ""
+SDIR = config["summary_dir"].strip("/") + f"/{RDIR}/"
+RESDIR = config["results_dir"].strip("/") + f"/{RDIR}/"
+COSTDIR = config["costs_dir"]
 
 load_data_paths = get_load_paths_gegis("data", config)
 
@@ -62,6 +69,11 @@ wildcard_constraints:
     ll="(v|c)([0-9\.]+|opt|all)|all",
     opts="[-+a-zA-Z0-9\.]*",
     unc="[-+a-zA-Z0-9\.]*",
+    sopts="[-+a-zA-Z0-9\.\s]*",
+    discountrate="[-+a-zA-Z0-9\.\s]*",
+    demand="[-+a-zA-Z0-9\.\s]*",
+    h2export="[0-9]+m?|all",
+    planning_horizons="20[2-9][0-9]|2100",
 
 
 if config["custom_rules"] is not []:
@@ -80,32 +92,6 @@ rule clean:
         shell("snakemake -j 1 run_all_scenarios --delete-all-output")
 
 
-rule run_tests:
-    output:
-        touch("tests.done"),
-    run:
-        import os
-
-        shell("snakemake --cores all build_test_configs")
-        directory = "test/tmp"  # assign directory
-        for filename in os.scandir(directory):  # iterate over files in that directory
-            if filename.is_file():
-                print(
-                    f"Running test: config name '{filename.name}'' and path name '{filename.path}'"
-                )
-                if "custom" in filename.name:
-                    shell("mkdir -p configs/scenarios")
-                    shell("cp {filename.path} configs/scenarios/config.custom.yaml")
-                    shell("snakemake --cores 1 run_all_scenarios --forceall")
-                if "monte" in filename.name:
-                    shell("cp {filename.path} config.yaml")
-                    shell("snakemake --cores all solve_all_networks_monte --forceall")
-                else:
-                    shell("cp {filename.path} config.yaml")
-                    shell("snakemake --cores all solve_all_networks --forceall")
-        print("Tests are successful.")
-
-
 rule solve_all_networks:
     input:
         expand(
@@ -337,7 +323,7 @@ def terminate_if_cutout_exists(config=config):
 
     for ct in set(config_cutouts):
         cutout_fl = "cutouts/" + CDIR + ct + ".nc"
-        if os.path.exists(cutout_fl):
+        if pathlib.Path(cutout_fl).exists():
             raise Exception(
                 "An option `build_cutout` is enabled, while a cutout file '"
                 + cutout_fl
@@ -375,7 +361,10 @@ if config["enable"].get("build_natura_raster", False):
             area_crs=config["crs"]["area_crs"],
         input:
             shapefiles_land="data/landcover",
-            cutouts=expand("cutouts/" + CDIR + "{cutouts}.nc", **config["atlite"]),
+            cutouts=expand(
+                "cutouts/" + CDIR + "{cutout}.nc",
+                cutout=[c["cutout"] for _, c in config["renewable"].items()],
+            ),
         output:
             "resources/" + RDIR + "natura.tiff",
         log:
@@ -416,6 +405,20 @@ if config["enable"].get("retrieve_cost_data", True):
         run:
             move(input[0], output[0])
 
+    rule retrieve_cost_data_flexible:
+        input:
+            HTTP.remote(
+                f"raw.githubusercontent.com/PyPSA/technology-data/{config['costs']['version']}/outputs/costs"
+                + "_{planning_horizons}.csv",
+                keep_local=True,
+            ),
+        output:
+            costs=COSTDIR + "costs_{planning_horizons}.csv",
+        resources:
+            mem_mb=5000,
+        run:
+            move(input[0], output[0])
+
 
 rule build_demand_profiles:
     params:
@@ -811,7 +814,7 @@ if config["monte_carlo"]["options"].get("add_to_snakefile", False) == False:
             solving=config["solving"],
             augmented_line_connection=config["augmented_line_connection"],
         input:
-            "networks/" + RDIR + "elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc",
+            network="networks/" + RDIR + "elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc",
         output:
             "results/" + RDIR + "networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc",
         log:
@@ -877,7 +880,9 @@ if config["monte_carlo"]["options"].get("add_to_snakefile", False) == True:
             solving=config["solving"],
             augmented_line_connection=config["augmented_line_connection"],
         input:
-            "networks/" + RDIR + "elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{unc}.nc",
+            network="networks/"
+            + RDIR
+            + "elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{unc}.nc",
         output:
             "results/"
             + RDIR
@@ -959,6 +964,566 @@ rule make_summary:
         "scripts/make_summary.py"
 
 
+rule prepare_sector_networks:
+    input:
+        expand(
+            RESDIR
+            + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}.nc",
+            **config["scenario"],
+            **config["costs"],
+        ),
+
+
+rule override_res_all_nets:
+    input:
+        expand(
+            RESDIR
+            + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_presec.nc",
+            **config["scenario"],
+            **config["costs"],
+            **config["export"],
+        ),
+
+
+rule solve_sector_networks:
+    input:
+        expand(
+            RESDIR
+            + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            **config["scenario"],
+            **config["costs"],
+            **config["export"],
+        ),
+
+
+rule prepare_ports:
+    output:
+        ports="data/ports.csv",  # TODO move from data to resources
+    script:
+        "scripts/prepare_ports.py"
+
+
+rule prepare_airports:
+    params:
+        airport_sizing_factor=config["sector"]["airport_sizing_factor"],
+    output:
+        ports="data/airports.csv",  # TODO move from data to resources
+    script:
+        "scripts/prepare_airports.py"
+
+
+rule prepare_urban_percent:
+    output:
+        urban_percent="data/urban_percent.csv",  # TODO move from data to resources
+    script:
+        "scripts/prepare_urban_percent.py"
+
+
+rule prepare_transport_data_input:
+    output:
+        transport_data_input="resources/transport_data.csv",
+    script:
+        "scripts/prepare_transport_data_input.py"
+
+
+if not config["custom_data"]["gas_network"]:
+
+    rule prepare_gas_network:
+        params:
+            gas_config=config["sector"]["gas"],
+            alternative_clustering=config["cluster_options"]["alternative_clustering"],
+            countries_list=config["countries"],
+            layer_id=config["build_shape_options"]["gadm_layer_id"],
+            update=config["build_shape_options"]["update_file"],
+            out_logging=config["build_shape_options"]["out_logging"],
+            year=config["build_shape_options"]["year"],
+            nprocesses=config["build_shape_options"]["nprocesses"],
+            contended_flag=config["build_shape_options"]["contended_flag"],
+            gadm_file_prefix=config["build_shape_options"]["gadm_file_prefix"],
+            gadm_url_prefix=config["build_shape_options"]["gadm_url_prefix"],
+            geo_crs=config["crs"]["geo_crs"],
+            custom_gas_network=config["custom_data"]["gas_network"],
+        input:
+            regions_onshore="resources/"
+            + RDIR
+            + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        output:
+            clustered_gas_network="resources/gas_networks/gas_network_elec_s{simpl}_{clusters}.csv",
+            # TODO: Should be a own snakemake rule
+            # gas_network_fig_1="resources/gas_networks/existing_gas_pipelines_{simpl}_{clusters}.png",
+            # gas_network_fig_2="resources/gas_networks/clustered_gas_pipelines_{simpl}_{clusters}.png",
+        script:
+            "scripts/prepare_gas_network.py"
+
+
+rule prepare_sector_network:
+    params:
+        costs=config["costs"],
+        contended_flag=config["build_shape_options"]["contended_flag"],
+        gadm_file_prefix=config["build_shape_options"]["gadm_file_prefix"],
+        gadm_url_prefix=config["build_shape_options"]["gadm_url_prefix"],
+        geo_crs=config["crs"]["geo_crs"],
+    input:
+        network=RESDIR
+        + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_presec.nc",
+        costs=COSTDIR + "costs_{planning_horizons}.csv",
+        h2_cavern="data/hydrogen_salt_cavern_potentials.csv",
+        nodal_energy_totals="resources/demand/heat/nodal_energy_heat_totals_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        transport="resources/demand/transport_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        avail_profile="resources/pattern_profiles/avail_profile_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        dsm_profile="resources/pattern_profiles/dsm_profile_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        nodal_transport_data="resources/demand/nodal_transport_data_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        overrides="data/override_component_attrs",
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        industrial_demand="resources/demand/industrial_energy_demand_per_node_elec_s{simpl}_{clusters}_{planning_horizons}_{demand}.csv",
+        energy_totals="data/energy_totals_{demand}_{planning_horizons}.csv",
+        airports="data/airports.csv",
+        ports="data/ports.csv",
+        heat_demand="resources/demand/heat/heat_demand_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        ashp_cop="resources/demand/heat/ashp_cop_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        gshp_cop="resources/demand/heat/gshp_cop_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        solar_thermal="resources/demand/heat/solar_thermal_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        district_heat_share="resources/demand/heat/district_heat_share_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        biomass_transport_costs="data/temp_hard_coded/biomass_transport_costs.csv",
+        shapes_path="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        pipelines=(
+            "data/custom/pipelines.csv"
+            if config["custom_data"]["gas_network"]
+            else "resources/gas_networks/gas_network_elec_s{simpl}_{clusters}.csv"
+        ),
+    output:
+        RESDIR
+        + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}.nc",
+    threads: 1
+    resources:
+        mem_mb=2000,
+    benchmark:
+        (
+            RESDIR
+            + "benchmarks/prepare_network/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}"
+        )
+    script:
+        "scripts/prepare_sector_network.py"
+
+
+rule build_ship_profile:
+    params:
+        snapshots=config["snapshots"],
+        ship_opts=config["export"]["ship"],
+    output:
+        ship_profile="resources/ship_profile_{h2export}TWh.csv",
+    script:
+        "scripts/build_ship_profile.py"
+
+
+rule add_export:
+    params:
+        gadm_level=config["sector"]["gadm_level"],
+        alternative_clustering=config["cluster_options"]["alternative_clustering"],
+        store=config["export"]["store"],
+        store_capital_costs=config["export"]["store_capital_costs"],
+        export_profile=config["export"]["export_profile"],
+        snapshots=config["snapshots"],
+        costs=config["costs"],
+        contended_flag=config["build_shape_options"]["contended_flag"],
+        gadm_file_prefix=config["build_shape_options"]["gadm_file_prefix"],
+        gadm_url_prefix=config["build_shape_options"]["gadm_url_prefix"],
+        geo_crs=config["crs"]["geo_crs"],
+    input:
+        overrides="data/override_component_attrs",
+        export_ports="data/export_ports.csv",
+        costs=COSTDIR + "costs_{planning_horizons}.csv",
+        ship_profile="resources/ship_profile_{h2export}TWh.csv",
+        network=RESDIR
+        + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}.nc",
+        shapes_path="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+    output:
+        RESDIR
+        + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+    script:
+        "scripts/add_export.py"
+
+
+rule override_respot:
+    params:
+        run=run["name"],
+        custom_data=config["custom_data"],
+        countries=config["countries"],
+    input:
+        **{
+            f"custom_res_pot_{tech}_{planning_horizons}_{discountrate}": f"resources/custom_renewables/{tech}_{planning_horizons}_{discountrate}_potential.csv"
+            for tech in config["custom_data"]["renewables"]
+            for discountrate in config["costs"]["discountrate"]
+            for planning_horizons in config["scenario"]["planning_horizons"]
+        },
+        **{
+            f"custom_res_ins_{tech}_{planning_horizons}_{discountrate}": f"resources/custom_renewables/{tech}_{planning_horizons}_{discountrate}_installable.csv"
+            for tech in config["custom_data"]["renewables"]
+            for discountrate in config["costs"]["discountrate"]
+            for planning_horizons in config["scenario"]["planning_horizons"]
+        },
+        overrides="data/override_component_attrs",
+        network="networks/" + RDIR + "elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc",
+        energy_totals="data/energy_totals_{demand}_{planning_horizons}.csv",
+    output:
+        RESDIR
+        + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_presec.nc",
+    script:
+        "scripts/override_respot.py"
+
+
+rule prepare_transport_data:
+    input:
+        network="networks/" + RDIR + "elec_s{simpl}_{clusters}.nc",
+        energy_totals_name="data/energy_totals_{demand}_{planning_horizons}.csv",
+        traffic_data_KFZ="data/emobility/KFZ__count",
+        traffic_data_Pkw="data/emobility/Pkw__count",
+        transport_name="resources/transport_data.csv",
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        temp_air_total="resources/temperatures/temp_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    output:
+        # nodal_energy_totals="resources/nodal_energy_totals_s{simpl}_{clusters}.csv",
+        transport="resources/demand/transport_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        avail_profile="resources/pattern_profiles/avail_profile_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        dsm_profile="resources/pattern_profiles/dsm_profile_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        nodal_transport_data="resources/demand/nodal_transport_data_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+    script:
+        "scripts/prepare_transport_data.py"
+
+
+rule build_cop_profiles:
+    params:
+        heat_pump_sink_T=config["sector"]["heat_pump_sink_T"],
+    input:
+        temp_soil_total="resources/temperatures/temp_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_soil_rural="resources/temperatures/temp_soil_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_soil_urban="resources/temperatures/temp_soil_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_total="resources/temperatures/temp_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_rural="resources/temperatures/temp_air_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_urban="resources/temperatures/temp_air_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    output:
+        cop_soil_total="resources/cops/cop_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_soil_rural="resources/cops/cop_soil_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_soil_urban="resources/cops/cop_soil_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_air_total="resources/cops/cop_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_air_rural="resources/cops/cop_air_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_air_urban="resources/cops/cop_air_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    resources:
+        mem_mb=20000,
+    benchmark:
+        "benchmarks/build_cop_profiles/s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_cop_profiles.py"
+
+
+rule prepare_heat_data:
+    input:
+        network="networks/" + RDIR + "elec_s{simpl}_{clusters}.nc",
+        energy_totals_name="data/energy_totals_{demand}_{planning_horizons}.csv",
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        temp_air_total="resources/temperatures/temp_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_soil_total="resources/cops/cop_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        cop_air_total="resources/cops/cop_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        solar_thermal_total="resources/demand/heat/solar_thermal_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        heat_demand_total="resources/demand/heat/heat_demand_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        heat_profile="data/heat_load_profile_BDEW.csv",
+    output:
+        nodal_energy_totals="resources/demand/heat/nodal_energy_heat_totals_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        heat_demand="resources/demand/heat/heat_demand_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        ashp_cop="resources/demand/heat/ashp_cop_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        gshp_cop="resources/demand/heat/gshp_cop_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        solar_thermal="resources/demand/heat/solar_thermal_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        district_heat_share="resources/demand/heat/district_heat_share_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+    script:
+        "scripts/prepare_heat_data.py"
+
+
+rule build_base_energy_totals:
+    params:
+        space_heat_share=config["sector"]["space_heat_share"],
+        update_data=config["demand_data"]["update_data"],
+        base_year=config["demand_data"]["base_year"],
+        countries=config["countries"],
+        shift_coal_to_elec=config["sector"]["coal"]["shift_to_elec"],
+    input:
+        unsd_paths="data/demand/unsd/paths/Energy_Statistics_Database.xlsx",
+    output:
+        energy_totals_base="data/energy_totals_base.csv",
+    script:
+        "scripts/build_base_energy_totals.py"
+
+
+rule prepare_energy_totals:
+    params:
+        countries=config["countries"],
+        base_year=config["demand_data"]["base_year"],
+        sector_options=config["sector"],
+    input:
+        unsd_paths="data/energy_totals_base.csv",
+        efficiency_gains_cagr="data/demand/efficiency_gains_cagr.csv",
+        growth_factors_cagr="data/demand/growth_factors_cagr.csv",
+        district_heating="data/demand/district_heating.csv",
+        fuel_shares="data/demand/fuel_shares.csv",
+    output:
+        energy_totals="data/energy_totals_{demand}_{planning_horizons}.csv",
+    script:
+        "scripts/prepare_energy_totals.py"
+
+
+rule build_solar_thermal_profiles:
+    params:
+        solar_thermal_config=config["solar_thermal"],
+        snapshots=config["snapshots"],
+    input:
+        pop_layout_total="resources/population_shares/pop_layout_total_{planning_horizons}.nc",
+        pop_layout_urban="resources/population_shares/pop_layout_urban_{planning_horizons}.nc",
+        pop_layout_rural="resources/population_shares/pop_layout_rural_{planning_horizons}.nc",
+        regions_onshore="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        cutout="cutouts/"
+        + CDIR
+        + [c["cutout"] for _, c in config["renewable"].items()][0]
+        + ".nc",
+        # default to first cutout found
+    output:
+        solar_thermal_total="resources/demand/heat/solar_thermal_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        solar_thermal_urban="resources/demand/heat/solar_thermal_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        solar_thermal_rural="resources/demand/heat/solar_thermal_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    resources:
+        mem_mb=20000,
+    benchmark:
+        "benchmarks/build_solar_thermal_profiles/s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_solar_thermal_profiles.py"
+
+
+rule build_population_layouts:
+    params:
+        planning_horizons=config["scenario"]["planning_horizons"][0],
+    input:
+        nuts3_shapes="resources/" + RDIR + "shapes/gadm_shapes.geojson",
+        urban_percent="data/urban_percent.csv",
+        cutout="cutouts/"
+        + CDIR
+        + [c["cutout"] for _, c in config["renewable"].items()][0]
+        + ".nc",
+        # default to first cutout found
+    output:
+        pop_layout_total="resources/population_shares/pop_layout_total_{planning_horizons}.nc",
+        pop_layout_urban="resources/population_shares/pop_layout_urban_{planning_horizons}.nc",
+        pop_layout_rural="resources/population_shares/pop_layout_rural_{planning_horizons}.nc",
+        gdp_layout="resources/gdp_shares/gdp_layout_{planning_horizons}.nc",
+    resources:
+        mem_mb=20000,
+    benchmark:
+        "benchmarks/build_population_layouts_{planning_horizons}"
+    threads: 8
+    script:
+        "scripts/build_population_layouts.py"
+
+
+rule move_hardcoded_files_temp:
+    input:
+        "data/temp_hard_coded/energy_totals.csv",
+    output:
+        "resources/energy_totals.csv",
+    shell:
+        "cp -a data/temp_hard_coded/. resources"
+
+
+rule build_clustered_population_layouts:
+    input:
+        pop_layout_total="resources/population_shares/pop_layout_total_{planning_horizons}.nc",
+        pop_layout_urban="resources/population_shares/pop_layout_urban_{planning_horizons}.nc",
+        pop_layout_rural="resources/population_shares/pop_layout_rural_{planning_horizons}.nc",
+        gdp_layout="resources/gdp_shares/gdp_layout_{planning_horizons}.nc",
+        regions_onshore="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        cutout="cutouts/"
+        + CDIR
+        + [c["cutout"] for _, c in config["renewable"].items()][0]
+        + ".nc",
+        # default to first cutout found
+    output:
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        clustered_gdp_layout="resources/gdp_shares/gdp_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+    resources:
+        mem_mb=10000,
+    benchmark:
+        "benchmarks/build_clustered_population_layouts/s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_clustered_population_layouts.py"
+
+
+rule build_heat_demand:
+    params:
+        snapshots=config["snapshots"],
+    input:
+        pop_layout_total="resources/population_shares/pop_layout_total_{planning_horizons}.nc",
+        pop_layout_urban="resources/population_shares/pop_layout_urban_{planning_horizons}.nc",
+        pop_layout_rural="resources/population_shares/pop_layout_rural_{planning_horizons}.nc",
+        regions_onshore="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        cutout="cutouts/"
+        + CDIR
+        + [c["cutout"] for _, c in config["renewable"].items()][0]
+        + ".nc",
+        # default to first cutout found
+    output:
+        heat_demand_urban="resources/demand/heat/heat_demand_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        heat_demand_rural="resources/demand/heat/heat_demand_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        heat_demand_total="resources/demand/heat/heat_demand_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    resources:
+        mem_mb=20000,
+    benchmark:
+        "benchmarks/build_heat_demand/s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_heat_demand.py"
+
+
+rule build_temperature_profiles:
+    params:
+        snapshots=config["snapshots"],
+    input:
+        pop_layout_total="resources/population_shares/pop_layout_total_{planning_horizons}.nc",
+        pop_layout_urban="resources/population_shares/pop_layout_urban_{planning_horizons}.nc",
+        pop_layout_rural="resources/population_shares/pop_layout_rural_{planning_horizons}.nc",
+        regions_onshore="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        cutout="cutouts/"
+        + CDIR
+        + [c["cutout"] for _, c in config["renewable"].items()][0]
+        + ".nc",
+        # default to first cutout found
+    output:
+        temp_soil_total="resources/temperatures/temp_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_soil_rural="resources/temperatures/temp_soil_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_soil_urban="resources/temperatures/temp_soil_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_total="resources/temperatures/temp_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_rural="resources/temperatures/temp_air_rural_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        temp_air_urban="resources/temperatures/temp_air_urban_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+    resources:
+        mem_mb=20000,
+    benchmark:
+        "benchmarks/build_temperature_profiles/s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_temperature_profiles.py"
+
+
+rule copy_config:
+    params:
+        summary_dir=config["summary_dir"],
+        run=run,
+    output:
+        folder=directory(SDIR + "configs"),
+        config=SDIR + "configs/config.yaml",
+    threads: 1
+    resources:
+        mem_mb=1000,
+    benchmark:
+        SDIR + "benchmarks/copy_config"
+    script:
+        "scripts/copy_config.py"
+
+
+if config["foresight"] == "overnight":
+
+    rule solve_sector_network:
+        params:
+            solving=config["solving"],
+            augmented_line_connection=config["augmented_line_connection"],
+        input:
+            overrides="data/override_component_attrs",
+            # network=RESDIR
+            # + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}.nc",
+            network=RESDIR
+            + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            costs=COSTDIR + "costs_{planning_horizons}.csv",
+            configs=SDIR + "configs/config.yaml",  # included to trigger copy_config rule
+        output:
+            RESDIR
+            + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+        shadow:
+            "shallow"
+        log:
+            solver=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_solver.log",
+            python=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_python.log",
+            memory=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_memory.log",
+        threads: 25
+        resources:
+            mem_mb=config["solving"]["mem"],
+        benchmark:
+            (
+                RESDIR
+                + "benchmarks/solve_network/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+            )
+        script:
+            "scripts/solve_network.py"
+
+
+rule make_sector_summary:
+    params:
+        planning_horizons=config["scenario"]["planning_horizons"],
+        results_dir=config["results_dir"],
+        summary_dir=config["summary_dir"],
+        run=run["name"],
+        scenario_config=config["scenario"],
+        costs_config=config["costs"],
+        h2export_qty=config["export"]["h2export"],
+        foresight=config["foresight"],
+    input:
+        overrides="data/override_component_attrs",
+        networks=expand(
+            RESDIR
+            + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            **config["scenario"],
+            **config["costs"],
+            **config["export"],
+        ),
+        costs=COSTDIR + "costs_{planning_horizons}.csv",
+        plots=expand(
+            RESDIR
+            + "maps/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}-costs-all_{planning_horizons}_{discountrate}_{demand}_{h2export}export.pdf",
+            **config["scenario"],
+            **config["costs"],
+            **config["export"],
+        ),
+    output:
+        nodal_costs=SDIR + "csvs/nodal_costs.csv",
+        nodal_capacities=SDIR + "csvs/nodal_capacities.csv",
+        nodal_cfs=SDIR + "csvs/nodal_cfs.csv",
+        cfs=SDIR + "csvs/cfs.csv",
+        costs=SDIR + "csvs/costs.csv",
+        capacities=SDIR + "csvs/capacities.csv",
+        curtailment=SDIR + "csvs/curtailment.csv",
+        energy=SDIR + "csvs/energy.csv",
+        supply=SDIR + "csvs/supply.csv",
+        supply_energy=SDIR + "csvs/supply_energy.csv",
+        prices=SDIR + "csvs/prices.csv",
+        weighted_prices=SDIR + "csvs/weighted_prices.csv",
+        market_values=SDIR + "csvs/market_values.csv",
+        price_statistics=SDIR + "csvs/price_statistics.csv",
+        metrics=SDIR + "csvs/metrics.csv",
+    threads: 2
+    resources:
+        mem_mb=10000,
+    benchmark:
+        SDIR + "benchmarks/make_summary"
+    script:
+        "scripts/make_summary.py"
+
+
 rule plot_summary:
     input:
         "results/"
@@ -1002,26 +1567,6 @@ rule plot_network:
         "scripts/plot_network.py"
 
 
-rule build_test_configs:
-    input:
-        base_config="config.tutorial.yaml",
-        update_file_list=[
-            "test/config.tutorial_noprogress.yaml",
-            "test/config.custom.yaml",
-            "test/config.monte_carlo.yaml",
-            "test/config.landlock.yaml",
-        ],
-    output:
-        tmp_test_configs=[
-            "test/tmp/config.tutorial_noprogress_tmp.yaml",
-            "test/tmp/config.custom_tmp.yaml",
-            "test/tmp/config.monte_carlo_tmp.yaml",
-            "test/tmp/config.landlock_tmp.yaml",
-        ],
-    script:
-        "scripts/build_test_configs.py"
-
-
 rule make_statistics:
     params:
         countries=config["countries"],
@@ -1036,6 +1581,329 @@ rule make_statistics:
         "scripts/make_statistics.py"
 
 
+rule plot_sector_network:
+    input:
+        overrides="data/override_component_attrs",
+        network=RESDIR
+        + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+    output:
+        map=RESDIR
+        + "maps/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}-costs-all_{planning_horizons}_{discountrate}_{demand}_{h2export}export.pdf",
+    threads: 2
+    resources:
+        mem_mb=10000,
+    benchmark:
+        (
+            RESDIR
+            + "benchmarks/plot_network/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+        )
+    script:
+        "scripts/plot_network.py"
+
+
+rule plot_sector_summary:
+    input:
+        costs=SDIR + "csvs/costs.csv",
+        energy=SDIR + "csvs/energy.csv",
+        balances=SDIR + "csvs/supply_energy.csv",
+    output:
+        costs=SDIR + "graphs/costs.pdf",
+        energy=SDIR + "graphs/energy.pdf",
+        balances=SDIR + "graphs/balances-energy.pdf",
+    threads: 2
+    resources:
+        mem_mb=10000,
+    benchmark:
+        SDIR + "benchmarks/plot_summary"
+    script:
+        "scripts/plot_summary.py"
+
+
+rule build_industrial_database:
+    output:
+        industrial_database="data/industrial_database.csv",
+    script:
+        "scripts/build_industrial_database.py"
+
+
+rule prepare_db:
+    params:
+        tech_colors=config["plotting"]["tech_colors"],
+    input:
+        network=RESDIR
+        + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+    output:
+        db=RESDIR
+        + "summaries/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}-costs-all_{planning_horizons}_{discountrate}_{demand}_{h2export}export.csv",
+    threads: 2
+    resources:
+        mem_mb=10000,
+    benchmark:
+        (
+            RESDIR
+            + "benchmarks/prepare_db/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+        )
+    script:
+        "scripts/prepare_db.py"
+
+
+rule build_industrial_distribution_key:  #default data
+    params:
+        countries=config["countries"],
+        gadm_level=config["sector"]["gadm_level"],
+        alternative_clustering=config["cluster_options"]["alternative_clustering"],
+        industry_database=config["custom_data"]["industry_database"],
+        contended_flag=config["build_shape_options"]["contended_flag"],
+        gadm_file_prefix=config["build_shape_options"]["gadm_file_prefix"],
+        gadm_url_prefix=config["build_shape_options"]["gadm_url_prefix"],
+        geo_crs=config["crs"]["geo_crs"],
+    input:
+        regions_onshore="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        clustered_gdp_layout="resources/gdp_shares/gdp_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        industrial_database="data/industrial_database.csv",
+        shapes_path="resources/"
+        + RDIR
+        + "bus_regions/regions_onshore_elec_s{simpl}_{clusters}.geojson",
+    output:
+        industrial_distribution_key="resources/demand/industrial_distribution_key_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+    threads: 1
+    resources:
+        mem_mb=1000,
+    benchmark:
+        "benchmarks/build_industrial_distribution_key_elec_s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_industrial_distribution_key.py"
+
+
+rule build_base_industry_totals:  #default data
+    params:
+        base_year=config["demand_data"]["base_year"],
+        countries=config["countries"],
+        other_industries=config["demand_data"]["other_industries"],
+    input:
+        #industrial_production_per_country="data/industrial_production_per_country.csv",
+        #unsd_path="data/demand/unsd/data/",
+        energy_totals_base="data/energy_totals_base.csv",
+        transactions_path="data/unsd_transactions.csv",
+    output:
+        base_industry_totals="resources/demand/base_industry_totals_{planning_horizons}_{demand}.csv",
+    threads: 1
+    resources:
+        mem_mb=1000,
+    benchmark:
+        "benchmarks/build_base_industry_totals_{planning_horizons}_{demand}"
+    script:
+        "scripts/build_base_industry_totals.py"
+
+
+rule build_industry_demand:  #default data
+    params:
+        countries=config["countries"],
+        industry_demand=config["custom_data"]["industry_demand"],
+        base_year=config["demand_data"]["base_year"],
+        industry_util_factor=config["sector"]["industry_util_factor"],
+        aluminium_year=config["demand_data"]["aluminium_year"],
+    input:
+        industrial_distribution_key="resources/demand/industrial_distribution_key_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        #industrial_production_per_country_tomorrow="resources/demand/industrial_production_per_country_tomorrow_{planning_horizons}_{demand}.csv",
+        #industrial_production_per_country="data/industrial_production_per_country.csv",
+        base_industry_totals="resources/demand/base_industry_totals_{planning_horizons}_{demand}.csv",
+        industrial_database="data/industrial_database.csv",
+        costs=COSTDIR + "costs_{planning_horizons}.csv",
+        industry_growth_cagr="data/demand/industry_growth_cagr.csv",
+    output:
+        industrial_energy_demand_per_node="resources/demand/industrial_energy_demand_per_node_elec_s{simpl}_{clusters}_{planning_horizons}_{demand}.csv",
+    threads: 1
+    resources:
+        mem_mb=1000,
+    benchmark:
+        "benchmarks/industrial_energy_demand_per_node_elec_s{simpl}_{clusters}_{planning_horizons}_{demand}.csv"
+    script:
+        "scripts/build_industry_demand.py"
+
+
+rule build_existing_heating_distribution:
+    params:
+        baseyear=config["scenario"]["planning_horizons"][0],
+        sector=config["sector"],
+        existing_capacities=config["existing_capacities"],
+    input:
+        existing_heating="data/existing_infrastructure/existing_heating_raw.csv",
+        clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+        clustered_pop_energy_layout="resources/demand/heat/nodal_energy_heat_totals_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",  #"resources/population_shares/pop_weighted_energy_totals_s{simpl}_{clusters}.csv",
+        district_heat_share="resources/demand/heat/district_heat_share_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+    output:
+        existing_heating_distribution="resources/heating/existing_heating_distribution_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+    threads: 1
+    resources:
+        mem_mb=2000,
+    log:
+        RESDIR
+        + "logs/build_existing_heating_distribution_{demand}_s{simpl}_{clusters}_{planning_horizons}.log",
+    benchmark:
+        RESDIR
+        +"benchmarks/build_existing_heating_distribution/{demand}_s{simpl}_{clusters}_{planning_horizons}"
+    script:
+        "scripts/build_existing_heating_distribution.py"
+
+
+if config["foresight"] == "myopic":
+
+    rule add_existing_baseyear:
+        params:
+            baseyear=config["scenario"]["planning_horizons"][0],
+            sector=config["sector"],
+            existing_capacities=config["existing_capacities"],
+            costs=config["costs"],
+        input:
+            network=RESDIR
+            + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            powerplants="resources/" + RDIR + "powerplants.csv",
+            busmap_s="resources/" + RDIR + "bus_regions/busmap_elec_s{simpl}.csv",
+            busmap=pypsaearth(
+                "resources/" + RDIR + "bus_regions/busmap_elec_s{simpl}_{clusters}.csv"
+            ),
+            clustered_pop_layout="resources/population_shares/pop_layout_elec_s{simpl}_{clusters}_{planning_horizons}.csv",
+            costs=CDIR
+            + "costs_{}.csv".format(config["scenario"]["planning_horizons"][0]),
+            cop_soil_total="resources/cops/cop_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+            cop_air_total="resources/cops/cop_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+            existing_heating_distribution="resources/heating/existing_heating_distribution_{demand}_s{simpl}_{clusters}_{planning_horizons}.csv",
+        output:
+            RESDIR
+            + "prenetworks-brownfield/elec_s{simpl}_{clusters}_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+        wildcard_constraints:
+            # TODO: The first planning_horizon needs to be aligned across scenarios
+            # snakemake does not support passing functions to wildcard_constraints
+            # reference: https://github.com/snakemake/snakemake/issues/2703
+            planning_horizons=config["scenario"]["planning_horizons"][0],  #only applies to baseyear
+        threads: 1
+        resources:
+            mem_mb=2000,
+        log:
+            RESDIR
+            + "logs/add_existing_baseyear_elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.log",
+        benchmark:
+            RESDIR
+            +"benchmarks/add_existing_baseyear/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+        script:
+            "scripts/add_existing_baseyear.py"
+
+    def input_profile_tech_brownfield(w):
+        return {
+            f"profile_{tech}": f"resources/"
+            + RDIR
+            + "renewable_profiles/profile_{tech}.nc"
+            for tech in config["electricity"]["renewable_carriers"]
+            if tech != "hydro"
+        }
+
+    def solved_previous_horizon(w):
+        planning_horizons = config["scenario"]["planning_horizons"]
+        i = planning_horizons.index(int(w.planning_horizons))
+        planning_horizon_p = str(planning_horizons[i - 1])
+
+        return (
+            RDIR
+            + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_"
+            + planning_horizon_p
+            + "_{discountrate}_{demand}_{h2export}export.nc"
+        )
+
+    rule add_brownfield:
+        params:
+            H2_retrofit=config["sector"]["hydrogen"],
+            H2_retrofit_capacity_per_CH4=config["sector"]["hydrogen"][
+                "H2_retrofit_capacity_per_CH4"
+            ],
+            threshold_capacity=config["existing_capacities"]["threshold_capacity"],
+            snapshots=config["snapshots"],
+            # drop_leap_day=config["enable"]["drop_leap_day"],
+            carriers=config["electricity"]["renewable_carriers"],
+        input:
+            # unpack(input_profile_tech_brownfield),
+            simplify_busmap="resources/" + RDIR + "bus_regions/busmap_elec_s{simpl}.csv",
+            cluster_busmap="resources/"
+            + RDIR
+            + "bus_regions/busmap_elec_s{simpl}_{clusters}.csv",
+            network=RESDIR
+            + "prenetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            network_p=solved_previous_horizon,  #solved network at previous time step
+            costs=CDIR + "costs_{planning_horizons}.csv",
+            cop_soil_total="resources/cops/cop_soil_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+            cop_air_total="resources/cops/cop_air_total_elec_s{simpl}_{clusters}_{planning_horizons}.nc",
+        output:
+            RESDIR
+            + "prenetworks-brownfield/elec_s{simpl}_{clusters}_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+        threads: 4
+        resources:
+            mem_mb=10000,
+        log:
+            RESDIR
+            + "logs/add_brownfield_elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.log",
+        benchmark:
+            (
+                RESDIR
+                + "benchmarks/add_brownfield/elec_s{simpl}_ec_{clusters}_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+            )
+        script:
+            "./scripts/add_brownfield.py"
+
+    ruleorder: add_existing_baseyear > add_brownfield
+
+    rule solve_network_myopic:
+        params:
+            solving=config["solving"],
+            foresight=config["foresight"],
+            planning_horizons=config["scenario"]["planning_horizons"],
+            co2_sequestration_potential=config["scenario"].get(
+                "co2_sequestration_potential", 200
+            ),
+        input:
+            overrides="data/override_component_attrs",
+            network=RESDIR
+            + "prenetworks-brownfield/elec_s{simpl}_{clusters}_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            costs=CDIR + "costs_{planning_horizons}.csv",
+            configs=SDIR + "configs/config.yaml",  # included to trigger copy_config rule
+        output:
+            network=RESDIR
+            + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+            # config=RESDIR
+            # + "configs/config.elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.yaml",
+        shadow:
+            "shallow"
+        log:
+            solver=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_solver.log",
+            python=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_python.log",
+            memory=RESDIR
+            + "logs/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export_memory.log",
+        threads: 25
+        resources:
+            mem_mb=config["solving"]["mem"],
+        benchmark:
+            (
+                RESDIR
+                + "benchmarks/solve_network/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export"
+            )
+        script:
+            "./scripts/solve_network.py"
+
+    rule solve_all_networks_myopic:
+        input:
+            networks=expand(
+                RESDIR
+                + "postnetworks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}_{sopts}_{planning_horizons}_{discountrate}_{demand}_{h2export}export.nc",
+                **config["scenario"],
+                **config["costs"],
+                **config["export"],
+            ),
+
+
 rule run_scenario:
     input:
         diff_config="configs/scenarios/config.{scenario_name}.yaml",
@@ -1048,6 +1916,7 @@ rule run_scenario:
     run:
         from build_test_configs import create_test_config
         import yaml
+        from subprocess import run
 
         # get base configuration file from diff config
         with open(input.diff_config) as f:
@@ -1065,8 +1934,16 @@ rule run_scenario:
         )
         # merge the default config file with the difference
         create_test_config(base_config_path, input.diff_config, "config.yaml")
-        os.system("snakemake -j all solve_all_networks --rerun-incomplete")
-        os.system("snakemake -j1 make_statistics --force")
+        run(
+            "snakemake -j all solve_all_networks --rerun-incomplete",
+            shell=True,
+            check=not config["run"]["allow_scenario_failure"],
+        )
+        run(
+            "snakemake -j1 make_statistics --force",
+            shell=True,
+            check=not config["run"]["allow_scenario_failure"],
+        )
         copyfile("config.yaml", output.copyconfig)
 
 
@@ -1077,6 +1954,6 @@ rule run_all_scenarios:
             "results/{scenario_name}/scenario.done",
             scenario_name=[
                 c.stem.replace("config.", "")
-                for c in Path("configs/scenarios").glob("config.*.yaml")
+                for c in pathlib.Path("configs/scenarios").glob("config.*.yaml")
             ],
         ),
diff --git a/config.default.yaml b/config.default.yaml
index d6956d21a..8edce653c 100644
--- a/config.default.yaml
+++ b/config.default.yaml
@@ -9,31 +9,49 @@ logging:
   level: INFO
   format: "%(levelname)s:%(name)s:%(message)s"
 
+results_dir: results/
+summary_dir: results/
+costs_dir: data/ # TODO change to the equivalent of technology data
+
+foresight: overnight
+
+
 countries: ["NG", "BJ"]
 # Can be replaced by country ["NG", "BJ"], continent ["Africa"] or user-specific region, see more at https://pypsa-earth.readthedocs.io/en/latest/configuration.html#top-level-configuration
 
 enable:
-  retrieve_databundle: true  #  Recommended 'true', for the first run. Otherwise data might be missing.
-  retrieve_cost_data: true  # true: retrieves cost data from technology data and saves in resources/costs.csv, false: uses cost data in data/costs.csv
-  download_osm_data: true  # If 'true', OpenStreetMap data will be downloaded for the above given countries
-  build_natura_raster: false  # If True, then an exclusion raster will be build
+  retrieve_databundle: true #  Recommended 'true', for the first run. Otherwise data might be missing.
+  retrieve_databundle_sector: true
+  retrieve_cost_data: true # true: retrieves cost data from technology data and saves in resources/costs.csv, false: uses cost data in data/costs.csv
+  download_osm_data: true # If 'true', OpenStreetMap data will be downloaded for the above given countries
+  build_natura_raster: false # If True, then an exclusion raster will be build
   build_cutout: false
   # If "build_cutout" : true, then environmental data is extracted according to `snapshots` date range and `countries`
   # requires cds API key https://cds.climate.copernicus.eu/api-how-to
   # More information https://atlite.readthedocs.io/en/latest/introduction.html#datasets
+  progress_bar: true # show progress bar during downloading routines and other long-running tasks
+
+
 
-custom_rules: []  # Default empty [] or link to custom rule file e.g. ["my_folder/my_rules.smk"] that add rules to Snakefile
+custom_rules: [] # Default empty [] or link to custom rule file e.g. ["my_folder/my_rules.smk"] that add rules to Snakefile
 
 run:
   name: "" # use this to keep track of runs with different settings
-  shared_cutouts: true  # set to true to share the default cutout(s) across runs
-                        # Note: value false requires build_cutout to be enabled
+  shared_cutouts: true # set to true to share the default cutout(s) across runs
+  # Note: value false requires build_cutout to be enabled
+  allow_scenario_failure: false # If True, the workflow will continue even if a scenario in run_scnenario fails
 
 scenario:
-  simpl: ['']
-  ll: ['copt']
+  simpl: [""]
+  ll: ["copt"]
   clusters: [10]
   opts: [Co2L-3H]
+  planning_horizons: # investment years for myopic and perfect; or costs year for overnight
+  - 2030
+  sopts:
+  - "144H"
+  demand:
+  - "AB"
 
 snapshots:
   start: "2013-01-01"
@@ -42,18 +60,19 @@ snapshots:
 
 # definition of the Coordinate Reference Systems
 crs:
-  geo_crs: EPSG:4326  # general geographic projection, not used for metric measures. "EPSG:4326" is the standard used by OSM and google maps
-  distance_crs: EPSG:3857  # projection for distance measurements only. Possible recommended values are "EPSG:3857" (used by OSM and Google Maps)
-  area_crs: ESRI:54009  # projection for area measurements only. Possible recommended values are Global Mollweide "ESRI:54009"
+  geo_crs: EPSG:4326 # general geographic projection, not used for metric measures. "EPSG:4326" is the standard used by OSM and google maps
+  distance_crs: EPSG:3857 # projection for distance measurements only. Possible recommended values are "EPSG:3857" (used by OSM and Google Maps)
+  area_crs: ESRI:54009 # projection for area measurements only. Possible recommended values are Global Mollweide "ESRI:54009"
 
 # download_osm_data_nprocesses: 10  # (optional) number of threads used to download osm data
 
 augmented_line_connection:
-  add_to_snakefile: false  # If True, includes this rule to the workflow
-  connectivity_upgrade: 2  # Min. lines connection per node, https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation
-  new_line_type: ["HVAC"]  # Expanded lines can be either ["HVAC"] or ["HVDC"] or both ["HVAC", "HVDC"]
-  min_expansion: 1  # [MW] New created line expands by float/int input
-  min_DC_length: 600  # [km] Minimum line length of DC line
+  add_to_snakefile: false # If True, includes this rule to the workflow
+  connectivity_upgrade: 2 # Min. lines connection per node,
+  # https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation
+  new_line_type: ["HVAC"] # Expanded lines can be either ["HVAC"] or ["HVDC"] or both ["HVAC", "HVDC"]
+  min_expansion: 1 # [MW] New created line expands by float/int input
+  min_DC_length: 600 # [km] Minimum line length of DC line
 
 cluster_options:
   simplify_network:
@@ -70,11 +89,11 @@ cluster_options:
     algorithm: kmeans
     feature: solar+onwind-time
     exclude_carriers: []
-  alternative_clustering: false  # "False" use Voronoi shapes, "True" use GADM shapes
-  distribute_cluster: ['load'] # Distributes cluster nodes per country according to ['load'],['pop'] or ['gdp']
-  out_logging: true  # When "True", logging is printed to console
+  alternative_clustering: false # "False" use Voronoi shapes, "True" use GADM shapes
+  distribute_cluster: ["load"] # Distributes cluster nodes per country according to ['load'],['pop'] or ['gdp']
+  out_logging: true # When "True", logging is printed to console
   aggregation_strategies:
-    generators:  # use "min" for more conservative assumptions
+    generators: # use "min" for more conservative assumptions
       p_nom: sum
       p_nom_max: sum
       p_nom_min: sum
@@ -86,54 +105,55 @@ cluster_options:
       efficiency: mean
 
 build_shape_options:
-  gadm_layer_id: 1  # GADM level area used for the gadm_shapes. Codes are country-dependent but roughly: 0: country, 1: region/county-like, 2: municipality-like
-  update_file: false  # When true, all the input files are downloaded again and replace the existing files
-  out_logging: true  # When true, logging is printed to console
-  year: 2020  # reference year used to derive shapes, info on population and info on GDP
-  nprocesses: 3  # number of processes to be used in build_shapes
-  worldpop_method: "standard"  # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
-  gdp_method: "standard"  # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout
+  gadm_layer_id: 1 # GADM level area used for the gadm_shapes. Codes are country-dependent but roughly: 0: country, 1: region/county-like, 2: municipality-like
+  update_file: false # When true, all the input files are downloaded again and replace the existing files
+  out_logging: true # When true, logging is printed to console
+  year: 2020 # reference year used to derive shapes, info on population and info on GDP
+  nprocesses: 3 # number of processes to be used in build_shapes
+  worldpop_method: "standard" # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster,
+  # false (not "false") no pop addition to shape which is useful when generating only cutout
+  gdp_method: "standard" # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout
   contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
   gadm_file_prefix: "gadm41_"
   gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/"
 
-clean_osm_data_options:  # osm = OpenStreetMap
-  names_by_shapes: true  # Set the country name based on the extended country shapes
-  threshold_voltage: 51000  # [V] assets below that voltage threshold will not be used (cable, line, generator, etc.)
-  tag_substation: "transmission"  # Filters only substations with 'transmission' tag, ('distribution' also available)
-  add_line_endings: true  # When "True", then line endings are added to the dataset of the substations
-  generator_name_method: OSM  # Methodology to specify the name to the generator. Options: OSM (name as by OSM dataset), closest_city (name by the closest city)
-  use_custom_lines: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_lines: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_lines.geojson)
-  use_custom_substations: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_substations: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_substations.geojson)
-  use_custom_cables: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_cables: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_cables.geojson)
-
-build_osm_network:  # Options of the build_osm_network script; osm = OpenStreetMap
-  group_close_buses: true  # When "True", close buses are merged and guarantee the voltage matching among line endings
-  group_tolerance_buses: 5000  # [m] (default 5000) Tolerance in meters of the close buses to merge
-  split_overpassing_lines: true  # When True, lines overpassing buses are splitted and connected to the bueses
-  overpassing_lines_tolerance: 1  # [m] (default 1) Tolerance to identify lines overpassing buses
-  force_ac: false  # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.
+clean_osm_data_options: # osm = OpenStreetMap
+  names_by_shapes: true # Set the country name based on the extended country shapes
+  threshold_voltage: 51000 # [V] minimum voltage threshold to keep the asset (cable, line, generator, etc.) [V]
+  tag_substation: "transmission" # Filters only substations with 'transmission' tag, ('distribution' also available)
+  add_line_endings: true # When "True", then line endings are added to the dataset of the substations
+  generator_name_method: OSM # Methodology to specify the name to the generator. Options: OSM (name as by OSM dataset), closest_city (name by the closest city)
+  use_custom_lines: "OSM_only" # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
+  path_custom_lines: false # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_lines.geojson)
+  use_custom_substations: "OSM_only" # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
+  path_custom_substations: false # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_substations.geojson)
+  use_custom_cables: "OSM_only" # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
+  path_custom_cables: false # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_cables.geojson)
+
+build_osm_network: # Options of the build_osm_network script; osm = OpenStreetMap
+  group_close_buses: true # When "True", close buses are merged and guarantee the voltage matching among line endings
+  group_tolerance_buses: 5000 # [m] (default 5000) Tolerance in meters of the close buses to merge
+  split_overpassing_lines: true # When True, lines overpassing buses are splitted and connected to the bueses
+  overpassing_lines_tolerance: 1 # [m] (default 1) Tolerance to identify lines overpassing buses
+  force_ac: false # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.
 
 base_network:
-  min_voltage_substation_offshore: 51000  # [V] minimum voltage of the offshore substations
-  min_voltage_rebase_voltage: 51000  # [V] minimum voltage in base network
+  min_voltage_substation_offshore: 51000 # [V] minimum voltage of the offshore substations
+  min_voltage_rebase_voltage: 51000 # [V] minimum voltage in base network
 
 load_options:
   ssp: "ssp2-2.6" # shared socio-economic pathway (GDP and population growth) scenario to consider
-  weather_year: 2013  # Load scenarios available with different weather year (different renewable potentials)
-  prediction_year: 2030  # Load scenarios available with different prediction year (GDP, population)
-  scale: 1  # scales all load time-series, i.e. 2 = doubles load
+  weather_year: 2013 # Load scenarios available with different weather year (different renewable potentials)
+  prediction_year: 2030 # Load scenarios available with different prediction year (GDP, population)
+  scale: 1 # scales all load time-series, i.e. 2 = doubles load
 
 electricity:
   base_voltage: 380.
   voltages: [132., 220., 300., 380., 500., 750.]
-  co2limit: 7.75e+7  # European default, 0.05 * 3.1e9*0.5, needs to be adjusted for Africa
-  co2base: 1.487e+9  # European default, adjustment to Africa necessary
+  co2limit: 7.75e+7 # European default, 0.05 * 3.1e9*0.5, needs to be adjusted for Africa
+  co2base: 1.487e+9 # European default, adjustment to Africa necessary
   agg_p_nom_limits: data/agg_p_nom_minmax.csv
-  hvdc_as_lines: false  # should HVDC lines be modeled as `Line` or as `Link` component?
+  hvdc_as_lines: false # should HVDC lines be modeled as `Line` or as `Link` component?
   automatic_emission: false
   automatic_emission_base_year: 1990 # 1990 is taken as default. Any year from 1970 to 2018 can be selected.
 
@@ -149,21 +169,21 @@ electricity:
 
   extendable_carriers:
     Generator: [solar, onwind, offwind-ac, offwind-dc, OCGT]
-    StorageUnit: []  # battery, H2
+    StorageUnit: [] # battery, H2
     Store: [battery, H2]
-    Link: []  # H2 pipeline
+    Link: [] # H2 pipeline
 
   powerplants_filter: (DateOut >= 2022 or DateOut != DateOut)
-  custom_powerplants: false  #  "false" use only powerplantmatching (ppm) data, "merge" combines ppm and custom powerplants, "replace" use only custom powerplants
+  custom_powerplants: false #  "false" use only powerplantmatching (ppm) data, "merge" combines ppm and custom powerplants, "replace" use only custom powerplants
 
   conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass]
   renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro]
 
   estimate_renewable_capacities:
-    stats: "irena"  # False, = greenfield expansion, 'irena' uses IRENA stats to add expansion limits
-    year: 2020  # Reference year, available years for IRENA stats are 2000 to 2020
-    p_nom_min: 1  # any float, scales the minimum expansion acquired from stats, i.e. 110% of <years>'s capacities => p_nom_min: 1.1
-    p_nom_max: false  # sets the expansion constraint, False to deactivate this option and use estimated renewable potentials determine by the workflow, float scales the p_nom_min factor accordingly
+    stats: "irena" # False, = greenfield expansion, 'irena' uses IRENA stats to add expansion limits
+    year: 2020 # Reference year, available years for IRENA stats are 2000 to 2020
+    p_nom_min: 1 # any float, scales the minimum expansion acquired from stats, i.e. 110% of <years>'s capacities => p_nom_min: 1.1
+    p_nom_max: false # sets the expansion constraint, False to deactivate this option and use estimated renewable potentials determine by the workflow, float scales the p_nom_min factor accordingly
     technology_mapping:
       # Wind is the Fueltype in ppm.data.Capacity_stats, onwind, offwind-{ac,dc} the carrier in PyPSA-Earth
       Offshore: [offwind-ac, offwind-dc]
@@ -198,18 +218,16 @@ transformers:
 atlite:
   nprocesses: 4
   cutouts:
-    # geographical bounds automatically determined from countries input
     cutout-2013-era5:
       module: era5
-      dx: 0.3  # cutout resolution
-      dy: 0.3  # cutout resolution
+      dx: 0.3 # cutout resolution
+      dy: 0.3 # cutout resolution
       # The cutout time is automatically set by the snapshot range. See `snapshot:` option above and 'build_cutout.py'.
       # time: ["2013-01-01", "2014-01-01"]  # to manually specify a different weather year (~70 years available)
       # The cutout spatial extent [x,y] is automatically set by country selection. See `countires:` option above and 'build_cutout.py'.
       # x: [-12., 35.]  # set cutout range manual, instead of automatic by boundaries of country
       # y: [33., 72]    # manual set cutout range
 
-
 renewable:
   onwind:
     cutout: cutout-2013-era5
@@ -244,7 +262,7 @@ renewable:
       method: wind
       turbine: NREL_ReferenceTurbine_5MW_offshore
     capacity_per_sqkm: 2
-    correction_factor: 0.8855
+    # correction_factor: 0.8855
     # proxy for wake losses
     # from 10.1016/j.energy.2018.08.153
     # until done more rigorously in #153
@@ -263,7 +281,7 @@ renewable:
       turbine: NREL_ReferenceTurbine_5MW_offshore
     # ScholzPhd Tab 4.3.1: 10MW/km^2
     capacity_per_sqkm: 3
-    correction_factor: 0.8855
+    # correction_factor: 0.8855
     # proxy for wake losses
     # from 10.1016/j.energy.2018.08.153
     # until done more rigorously in #153
@@ -281,8 +299,8 @@ renewable:
       method: pv
       panel: CSi
       orientation: latitude_optimal # will lead into optimal design
-        # slope: 0.  # slope: 0 represent a flat panel
-        # azimuth: 180.  # azimuth: 180 south orientation
+      # slope: 0.  # slope: 0 represent a flat panel
+      # azimuth: 180.  # azimuth: 180 south orientation
     capacity_per_sqkm: 4.6 # From 1.7 to 4.6 addresses issue #361
     # Determined by comparing uncorrected area-weighted full-load hours to those
     # published in Supplementary Data to
@@ -302,19 +320,19 @@ renewable:
     resource:
       method: hydro
       hydrobasins: data/hydrobasins/hybas_world.shp
-      flowspeed: 1.0  # m/s
+      flowspeed: 1.0 # m/s
       # weight_with_height: false
       # show_progress: true
     carriers: [ror, PHS, hydro]
     PHS_max_hours: 6
-    hydro_max_hours: "energy_capacity_totals_by_country"  # not active
-    hydro_max_hours_default: 6.0  # (optional, default 6) Default value of max_hours for hydro when NaN values are found
+    hydro_max_hours: "energy_capacity_totals_by_country" # not active
+    hydro_max_hours_default: 6.0 # (optional, default 6) Default value of max_hours for hydro when NaN values are found
     clip_min_inflow: 1.0
     extendable: true
     normalization:
-      method: hydro_capacities  # 'hydro_capacities' to rescale country hydro production by using hydro_capacities, 'eia' to rescale by eia data, false for no rescaling
-      year: 2013  # (optional) year of statistics used to rescale the runoff time series. When not provided, the weather year of the snapshots is used
-    multiplier: 1.1  # multiplier applied after the normalization of the hydro production; default 1.0
+      method: hydro_capacities # 'hydro_capacities' to rescale country hydro production by using hydro_capacities, 'eia' to rescale by eia data, false for no rescaling
+      year: 2013 # (optional) year of statistics used to rescale the runoff time series. When not provided, the cutout weather year is used
+    multiplier: 1.1 # multiplier applied after the normalization of the hydro production; default 1.0
   csp:
     cutout: cutout-2013-era5
     resource:
@@ -337,12 +355,13 @@ renewable:
     csp_model: advanced # simple or advanced
 
 # TODO: Needs to be adjusted for Africa.
-# Costs Configuration (Do not remove, needed for Sphynx documentation).
 costs:
   year: 2030
-  version: v0.5.0
-  rooftop_share: 0.14  # based on the potentials, assuming  (0.1 kW/m2 and 10 m2/person)
+  version: v0.6.2
+  discountrate: [0.071] #, 0.086, 0.111]
+  # [EUR/USD] ECB: https://www.ecb.europa.eu/stats/exchange/eurofxref/html/eurofxref-graph-usd.en.html # noqa: E501
   USD2013_to_EUR2013: 0.7532 # [EUR/USD] ECB: https://www.ecb.europa.eu/stats/exchange/eurofxref/html/eurofxref-graph-usd.en.html
+  rooftop_share: 0.14 # based on the potentials, assuming  (0.1 kW/m2 and 10 m2/person)
   fill_values:
     FOM: 0
     VOM: 0
@@ -376,6 +395,8 @@ costs:
   #   CCGT: 25.0
   # efficiency: # per unit
   #   CCGT: 0.58
+  lines:
+    length_factor: 1.25 #to estimate offwind connection costs
 
 
 monte_carlo:
@@ -385,7 +406,7 @@ monte_carlo:
   options:
     add_to_snakefile: false # When set to true, enables Monte Carlo sampling
     samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number
-    sampling_strategy: "chaospy"  # "pydoe2", "chaospy", "scipy", packages that are supported
+    sampling_strategy: "chaospy" # "pydoe2", "chaospy", "scipy", packages that are supported
     seed: 42 # set seedling for reproducibilty
   # Uncertanties on any PyPSA object are specified by declaring the specific PyPSA object under the key 'uncertainties'.
   # For each PyPSA object, the 'type' and 'args' keys represent the type of distribution and its argument, respectively.
@@ -410,6 +431,251 @@ monte_carlo:
       type: beta
       args: [0.5, 2]
 
+# ------------------- SECTOR OPTIONS -------------------
+
+policy_config:
+  hydrogen:
+    temporal_matching: "no_res_matching" # either "h2_yearly_matching", "h2_monthly_matching", "no_res_matching"
+    spatial_matching: false
+    additionality: false # RE electricity is equal to the amount required for additional hydrogen export compared to the 0 export case ("reference_case")
+    allowed_excess: 1.0
+    is_reference: false # Whether or not this network is a reference case network, relevant only if additionality is _true_
+    remove_h2_load: false #Whether or not to remove the h2 load from the network, relevant only if is_reference is _true_
+    path_to_ref: "" # Path to the reference case network for additionality calculation, relevant only if additionality is _true_ and is_reference is _false_
+    re_country_load: false # Set to "True" to force the RE electricity to be equal to the electricity required for hydrogen export and the country electricity load. "False" excludes the country electricity load from the constraint.
+
+
+demand_data:
+  update_data: true # if true, the workflow downloads the energy balances data saved in data/demand/unsd/data again. Turn on for the first run.
+  base_year: 2019
+
+  other_industries: false # Whether or not to include industries that are not specified. some countries have has exaggerated numbers, check carefully.
+  aluminium_year: 2019 # Year of the aluminium demand data specified in `data/AL_production.csv`
+
+
+fossil_reserves:
+  oil: 100 #TWh Maybe redundant
+
+export:
+  h2export: [10] # Yearly export demand in TWh
+  store: true # [True, False] # specifies whether an export store to balance demand is implemented
+  store_capital_costs: "no_costs" # ["standard_costs", "no_costs"] # specifies the costs of the export store. "standard_costs" takes CAPEX of "hydrogen storage tank type 1 including compressor"
+  export_profile: "ship" # use "ship" or "constant"
+  ship:
+    ship_capacity: 0.4 # TWh # 0.05 TWh for new ones, 0.003 TWh for Susio Frontier, 0.4 TWh according to Hampp2021: "Corresponds to 11360 t H2 (l) with LHV of 33.3333 Mwh/t_H2. Cihlar et al 2020 based on IEA 2019, Table 3-B"
+    travel_time: 288 # hours # From Agadir to Rotterdam and back (12*24)
+    fill_time: 24 # hours, for 48h see Hampp2021
+    unload_time: 24 # hours for 48h see Hampp2021
+
+custom_data:
+  renewables: [] # ['csp', 'rooftop-solar', 'solar']
+  elec_demand: false
+  heat_demand: false
+  industry_demand: false
+  industry_database: false
+  transport_demand: false
+  water_costs: false
+  h2_underground: false
+  add_existing: false
+  custom_sectors: false
+  gas_network: false # If "True" then a custom .csv file must be placed in "resources/custom_data/pipelines.csv" , If "False" the user can choose btw "greenfield" or Model built-in datasets. Please refer to ["sector"] below.
+
+industry:
+  reference_year: 2015
+
+solar_thermal:
+  clearsky_model: simple
+  orientation:
+    slope: 45.
+    azimuth: 180.
+
+existing_capacities:
+  grouping_years_power: [1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025, 2030]
+  grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # these should not extend 2020
+  threshold_capacity: 10
+  default_heating_lifetime: 20
+  conventional_carriers:
+  - lignite
+  - coal
+  - oil
+  - uranium
+
+sector:
+  gas:
+    spatial_gas: true # ALWAYS TRUE
+    network: false # ALWAYS FALSE for now (NOT USED)
+    network_data: GGIT # Global dataset -> 'GGIT' , European dataset -> 'IGGIELGN'
+    network_data_GGIT_status: ["Construction", "Operating", "Idle", "Shelved", "Mothballed", "Proposed"]
+  hydrogen:
+    network: true
+    H2_retrofit_capacity_per_CH4: 0.6
+    network_limit: 2000 #GWkm
+    network_routes: gas # "gas or "greenfield". If "gas"  ->  the network data are fetched from ["sector"]["gas"]["network_data"]. If "greenfield"  -> the network follows the topology of electrical transmission lines
+    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically false
+    underground_storage: false
+    hydrogen_colors: false
+    set_color_shares: false
+    blue_share: 0.40
+    pink_share: 0.05
+  coal:
+    shift_to_elec: true # If true, residential and services demand of coal is shifted to electricity. If false, the final energy demand of coal is disregarded
+
+  international_bunkers: false #Whether or not to count the emissions of international aviation and navigation
+
+  oil:
+    spatial_oil: true
+
+  district_heating:
+    potential: 0.3 #maximum fraction of urban demand which can be supplied by district heating
+    #increase of today's district heating demand to potential maximum district heating share
+    #progress = 0 means today's district heating share, progress=-1 means maximum fraction of urban demand is supplied by district heating
+    progress: 1
+    # 2020: 0.0
+    # 2030: 0.3
+    # 2040: 0.6
+    # 2050: 1.0
+    district_heating_loss: 0.15
+  reduce_space_heat_exogenously: true # reduces space heat demand by a given factor (applied before losses in DH)
+  # this can represent e.g. building renovation, building demolition, or if
+  # the factor is negative: increasing floor area, increased thermal comfort, population growth
+  reduce_space_heat_exogenously_factor: 0.29 # per unit reduction in space heat demand
+  # the default factors are determined by the LTS scenario from http://tool.european-calculator.eu/app/buildings/building-types-area/?levers=1ddd4444421213bdbbbddd44444ffffff11f411111221111211l212221
+  # 2020: 0.10  # this results in a space heat demand reduction of 10%
+  # 2025: 0.09  # first heat demand increases compared to 2020 because of larger floor area per capita
+  # 2030: 0.09
+  # 2035: 0.11
+  # 2040: 0.16
+  # 2045: 0.21
+  # 2050: 0.29
+
+  tes: true
+  tes_tau: # 180 day time constant for centralised, 3 day for decentralised
+    decentral: 3
+    central: 180
+  boilers: true
+  oil_boilers: false
+  chp: true
+  micro_chp: false
+  solar_thermal: true
+  heat_pump_sink_T: 55 #Celsius, based on DTU / large area radiators; used un build_cop_profiles.py
+  time_dep_hp_cop: true #time dependent heat pump coefficient of performance
+  solar_cf_correction: 0.788457 # = >>>1/1.2683
+  bev_plug_to_wheel_efficiency: 0.2 #kWh/km from EPA https://www.fueleconomy.gov/feg/ for Tesla Model S
+  bev_charge_efficiency: 0.9 #BEV (dis-)charging efficiency
+  transport_heating_deadband_upper: 20.
+  transport_heating_deadband_lower: 15.
+  ICE_lower_degree_factor: 0.375 #in per cent increase in fuel consumption per degree above deadband
+  ICE_upper_degree_factor: 1.6
+  EV_lower_degree_factor: 0.98
+  EV_upper_degree_factor: 0.63
+  bev_avail_max: 0.95
+  bev_avail_mean: 0.8
+  bev_dsm_restriction_value: 0.75 #Set to 0 for no restriction on BEV DSM
+  bev_dsm_restriction_time: 7 #Time at which SOC of BEV has to be dsm_restriction_value
+  v2g: true #allows feed-in to grid from EV battery
+  bev_dsm: true #turns on EV battery
+  bev_energy: 0.05 #average battery size in MWh
+  bev_availability: 0.5 #How many cars do smart charging
+  transport_fuel_cell_efficiency: 0.5
+  transport_internal_combustion_efficiency: 0.3
+  industry_util_factor: 0.7
+
+  biomass_transport: true # biomass transport between nodes
+  biomass_transport_default_cost: 0.1 #EUR/km/MWh
+  solid_biomass_potential: 40 # TWh/a, Potential of whole modelled area
+  biogas_potential: 0.5 # TWh/a, Potential of whole modelled area
+
+  efficiency_heat_oil_to_elec: 0.9
+  efficiency_heat_biomass_to_elec: 0.9
+  efficiency_heat_gas_to_elec: 0.9
+
+  dynamic_transport:
+    enable: false # If "True", then the BEV and FCEV shares are obtained depending on the "Co2L"-wildcard (e.g. "Co2L0.70: 0.10"). If "False", then the shares are obtained depending on the "demand" wildcard and "planning_horizons" wildcard as listed below (e.g. "DF_2050: 0.08")
+    land_transport_electric_share:
+      Co2L2.0: 0.00
+      Co2L1.0: 0.01
+      Co2L0.90: 0.03
+      Co2L0.80: 0.06
+      Co2L0.70: 0.10
+      Co2L0.60: 0.17
+      Co2L0.50: 0.27
+      Co2L0.40: 0.40
+      Co2L0.30: 0.55
+      Co2L0.20: 0.69
+      Co2L0.10: 0.80
+      Co2L0.00: 0.88
+    land_transport_fuel_cell_share:
+      Co2L2.0: 0.01
+      Co2L1.0: 0.01
+      Co2L0.90: 0.01
+      Co2L0.80: 0.01
+      Co2L0.70: 0.01
+      Co2L0.60: 0.01
+      Co2L0.50: 0.01
+      Co2L0.40: 0.01
+      Co2L0.30: 0.01
+      Co2L0.20: 0.01
+      Co2L0.10: 0.01
+      Co2L0.00: 0.01
+
+  land_transport_fuel_cell_share: # 1 means all FCEVs HERE
+    BU_2030: 0.00
+    AP_2030: 0.004
+    NZ_2030: 0.02
+    DF_2030: 0.01
+    AB_2030: 0.01
+    BU_2050: 0.00
+    AP_2050: 0.06
+    NZ_2050: 0.28
+    DF_2050: 0.08
+
+  land_transport_electric_share: # 1 means all EVs  # This leads to problems when non-zero HERE
+    BU_2030: 0.00
+    AP_2030: 0.075
+    NZ_2030: 0.13
+    DF_2030: 0.01
+    AB_2030: 0.01
+    BU_2050: 0.00
+    AP_2050: 0.42
+    NZ_2050: 0.68
+    DF_2050: 0.011
+
+  co2_network: true
+  co2_sequestration_potential: 200 #MtCO2/a sequestration potential for Europe
+  co2_sequestration_cost: 10 #EUR/tCO2 for sequestration of CO2
+  hydrogen_underground_storage: true
+  shipping_hydrogen_liquefaction: false
+  shipping_average_efficiency: 0.4 #For conversion of fuel oil to propulsion in 2011
+
+  shipping_hydrogen_share: #1.0
+    BU_2030: 0.00
+    AP_2030: 0.00
+    NZ_2030: 0.10
+    DF_2030: 0.05
+    AB_2030: 0.05
+    BU_2050: 0.00
+    AP_2050: 0.25
+    NZ_2050: 0.36
+    DF_2050: 0.12
+
+  gadm_level: 1
+  h2_cavern: true
+  marginal_cost_storage: 0
+  methanation: true
+  helmeth: true
+  dac: true
+  SMR: true
+  SMR CC: true
+  cc_fraction: 0.9
+  cc: true
+  space_heat_share: 0.6 # the share of space heating from all heating. Remainder goes to water heating.
+  airport_sizing_factor: 3
+
+  min_part_load_fischer_tropsch: 0.9
+
+  conventional_generation: # generator : carrier
+    OCGT: gas
+    #Gen_Test: oil # Just for testing purposes
 
 
 solving:
@@ -422,17 +688,68 @@ solving:
     clip_p_max_pu: 0.01
     skip_iterations: true
     track_iterations: false
-    #nhours: 10
+    # nhours: 10
+
   solver:
     name: gurobi
-    threads: 4
-    method: 2 # barrier (=ipm)
-    crossover: 0
-    BarConvTol: 1.e-5
-    FeasibilityTol: 1.e-6
-    AggFill: 0
-    PreDual: 0
-    GURO_PAR_BARDENSETHRESH: 200
+    options: gurobi-default
+
+  solver_options:
+    highs-default:
+      # refer to https://ergo-code.github.io/HiGHS/dev/options/definitions/
+      threads: 4
+      solver: "ipm"
+      run_crossover: "off"
+      small_matrix_value: 1e-6
+      large_matrix_value: 1e9
+      primal_feasibility_tolerance: 1e-5
+      dual_feasibility_tolerance: 1e-5
+      ipm_optimality_tolerance: 1e-4
+      parallel: "on"
+      random_seed: 123
+    gurobi-default:
+      threads: 4
+      method: 2 # barrier
+      crossover: 0
+      BarConvTol: 1.e-6
+      Seed: 123
+      AggFill: 0
+      PreDual: 0
+      GURO_PAR_BARDENSETHRESH: 200
+    gurobi-numeric-focus:
+      NumericFocus: 3 # Favour numeric stability over speed
+      method: 2 # barrier
+      crossover: 0 # do not use crossover
+      BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge
+      BarConvTol: 1.e-5
+      FeasibilityTol: 1.e-4
+      OptimalityTol: 1.e-4
+      ObjScale: -0.5
+      threads: 8
+      Seed: 123
+    gurobi-fallback: # Use gurobi defaults
+      crossover: 0
+      method: 2 # barrier
+      BarHomogeneous: 1 # Use homogeneous barrier if standard does not converge
+      BarConvTol: 1.e-5
+      FeasibilityTol: 1.e-5
+      OptimalityTol: 1.e-5
+      Seed: 123
+      threads: 8
+    cplex-default:
+      threads: 4
+      lpmethod: 4 # barrier
+      solutiontype: 2 # non basic solution, ie no crossover
+      barrier.convergetol: 1.e-5
+      feasopt.tolerance: 1.e-6
+    copt-default:
+      Threads: 8
+      LpMethod: 2
+      Crossover: 0
+    cbc-default: {} # Used in CI
+    glpk-default: {} # Used in CI
+
+  mem: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2
 
 
 plotting:
@@ -442,84 +759,220 @@ plotting:
     p_nom:
       bus_size_factor: 5.e+4
       linewidth_factor: 3.e+3
+    color_geomap:
+      ocean: white
+      land: whitesmoke
+
+  costs_max: 10
+  costs_threshold: 0.2
+
+  energy_max: 20000
+  energy_min: -20000
+  energy_threshold: 15
+
+  vre_techs:
+  - onwind
+  - offwind-ac
+  - offwind-dc
+  - solar
+  - ror
+  conv_techs:
+  - OCGT
+  - CCGT
+  - nuclear
+  - Nuclear
+  - coal
+  - oil
+  storage_techs:
+  - hydro+PHS
+  - battery
+  - H2
+  renewable_storage_techs:
+  - PHS
+  - hydro
+  load_carriers:
+  - AC load
+  AC_carriers:
+  - AC line
+  - AC transformer
+  link_carriers:
+  - DC line
+  - Converter AC-DC
+  heat_links:
+  - heat pump
+  - resistive heater
+  - CHP heat
+  - CHP electric
+  - gas boiler
+  - central heat pump
+  - central resistive heater
+  - central CHP heat
+  - central CHP electric
+  - central gas boiler
+  heat_generators:
+  - gas boiler
+  - central gas boiler
+  - solar thermal collector
+  - central solar thermal collector
 
-  costs_max: 800
-  costs_threshold: 1
-
-  energy_max: 15000.
-  energy_min: -10000.
-  energy_threshold: 50.
-
-  vre_techs: ["onwind", "offwind-ac", "offwind-dc", "solar", "ror"]
-  conv_techs: ["OCGT", "CCGT", "nuclear", "coal", "oil"]
-  storage_techs: ["hydro+PHS", "battery", "H2"]
-  load_carriers: ["AC load"]
-  AC_carriers: ["AC line", "AC transformer"]
-  link_carriers: ["DC line", "Converter AC-DC"]
   tech_colors:
-    "onwind": "#235ebc"
-    "onshore wind": "#235ebc"
-    "offwind": "#6895dd"
-    "offwind-ac": "#6895dd"
-    "offshore wind": "#6895dd"
-    "offshore wind ac": "#6895dd"
-    "offwind-dc": "#74c6f2"
-    "offshore wind dc": "#74c6f2"
-    "hydro": "#08ad97"
-    "hydro+PHS": "#08ad97"
-    "PHS": "#08ad97"
-    "hydro reservoir": "#08ad97"
-    "hydroelectricity": "#08ad97"
-    "ror": "#4adbc8"
-    "run of river": "#4adbc8"
-    "solar": "#f9d002"
-    "solar PV": "#f9d002"
-    "solar thermal": "#ffef60"
-    "biomass": "#0c6013"
-    "solid biomass": "#06540d"
-    "biogas": "#23932d"
-    "waste": "#68896b"
-    "geothermal": "#ba91b1"
-    "OCGT": "#d35050"
-    "gas": "#d35050"
-    "natural gas": "#d35050"
-    "CCGT": "#b20101"
-    "nuclear": "#ff9000"
-    "coal": "#707070"
-    "lignite": "#9e5a01"
-    "oil": "#262626"
-    "H2": "#ea048a"
-    "hydrogen storage": "#ea048a"
-    "battery": "#b8ea04"
-    "Electric load": "#f9d002"
-    "electricity": "#f9d002"
-    "lines": "#70af1d"
-    "transmission lines": "#70af1d"
-    "AC": "#70af1d"
-    "AC-AC": "#70af1d"
-    "AC line": "#70af1d"
-    "links": "#8a1caf"
-    "HVDC links": "#8a1caf"
-    "DC": "#8a1caf"
-    "DC-DC": "#8a1caf"
-    "DC link": "#8a1caf"
-    "load": "#ff0000"
-    "load shedding": "#ff0000"
-    "battery discharger": slategray
-    "battery charger": slategray
-    "h2 fuel cell": '#c251ae'
-    "h2 electrolysis": '#ff29d9'
-    "csp": "#fdd404"
+    onwind: #235ebc
+    onshore wind: #235ebc
+    offwind: #6895dd
+    offwind-ac: #6895dd
+    offshore wind: #6895dd
+    offshore wind ac: #6895dd
+    offshore wind (AC): #6895dd
+    offwind-dc: #74c6f2
+    offshore wind dc: #74c6f2
+    offshore wind (DC): #74c6f2
+    wave: #004444
+    hydro: #08ad97
+    hydro+PHS: #08ad97
+    PHS: #08ad97
+    hydro reservoir: #08ad97
+    hydroelectricity: #08ad97
+    ror: #4adbc8
+    run of river: #4adbc8
+    solar: #f9d002
+    solar PV: #f9d002
+    solar thermal: #ffef60
+    solar rooftop: #ffef60
+    biomass: #0c6013
+    solid biomass: #06540d
+    solid biomass for industry co2 from atmosphere: #654321
+    solid biomass for industry co2 to stored: #654321
+    solid biomass for industry CC: #654321
+    biogas: #23932d
+    waste: #68896b
+    geothermal: #ba91b1
+    OCGT: #d35050
+    OCGT marginal: sandybrown
+    OCGT-heat: #ee8340
+    gas: #d35050
+    natural gas: #d35050
+    gas boiler: #ee8340
+    gas boilers: #ee8340
+    gas boiler marginal: #ee8340
+    gas-to-power/heat: brown
+    SMR: #4F4F2F
+    SMR CC: darkblue
+    oil: #262626
+    oil boiler: #B5A642
+    oil emissions: #666666
+    gas for industry: #333333
+    gas for industry CC: brown
+    gas for industry co2 to atmosphere: #654321
+    gas for industry co2 to stored: #654321
+    nuclear: #ff9000
+    Nuclear: r
+    Nuclear marginal: r
+    uranium: r
+    coal: #707070
+    Coal: k
+    Coal marginal: k
+    lignite: #9e5a01
+    Lignite: grey
+    Lignite marginal: grey
+    H2: #ea048a
+    H2 for industry: #222222
+    H2 for shipping: #6495ED
+    H2 liquefaction: m
+    hydrogen storage: #ea048a
+    battery: slategray
+    battery discharger: slategray
+    battery charger: slategray
+    battery storage: slategray
+    home battery: #614700
+    home battery storage: #614700
+    lines: #70af1d
+    transmission lines: #70af1d
+    AC: #70af1d
+    AC-AC: #70af1d
+    AC line: #70af1d
+    links: #8a1caf
+    HVDC links: #8a1caf
+    DC: #8a1caf
+    DC-DC: #8a1caf
+    DC link: #8a1caf
+    load: #ff0000
+    load shedding: #ff0000
+    Electric load: b
+    electricity: k
+    electric demand: k
+    electricity distribution grid: y
+    heat: darkred
+    Heat load: r
+    heat pumps: #76EE00
+    heat pump: #76EE00
+    air heat pump: #76EE00
+    ground heat pump: #40AA00
+    CHP: r
+    CHP heat: r
+    CHP electric: r
+    heat demand: darkred
+    rural heat: #880000
+    central heat: #b22222
+    decentral heat: #800000
+    low-temperature heat for industry: #991111
+    process heat: #FF3333
+    power-to-heat: red
+    resistive heater: pink
+    Sabatier: #FF1493
+    methanation: #FF1493
+    power-to-gas: purple
+    power-to-liquid: darkgreen
+    helmeth: #7D0552
+    DAC: deeppink
+    co2 stored: #123456
+    CO2 pipeline: gray
+    CO2 sequestration: #123456
+    co2: #123456
+    co2 vent: #654321
+    process emissions: #222222
+    process emissions CC: gray
+    process emissions to stored: #444444
+    process emissions to atmosphere: #888888
+    agriculture heat: #D07A7A
+    agriculture machinery oil: #1e1e1e
+    agriculture machinery oil emissions: #111111
+    agriculture electricity: #222222
+    Fischer-Tropsch: #44DD33
+    kerosene for aviation: #44BB11
+    naphtha for industry: #44FF55
+    land transport oil: #44DD33
+    land transport oil emissions: #666666
+    land transport fuel cell: #AAAAAA
+    land transport EV: grey
+    V2G: grey
+    BEV charger: grey
+    shipping: #6495ED
+    shipping oil: #6495ED
+    shipping oil emissions: #6495ED
+    water tanks: #BBBBBB
+    hot water storage: #BBBBBB
+    hot water charging: #BBBBBB
+    hot water discharging: #999999
+    Li ion: grey
+    district heating: #CC4E5C
+    retrofitting: purple
+    building retrofitting: purple
+    solid biomass transport: green
+    biomass EOP: green
+    high-temp electrolysis: magenta
+    today: #D2691E
+    Ambient: k
+
   nice_names:
-    OCGT: "Open-Cycle Gas"
-    CCGT: "Combined-Cycle Gas"
-    offwind-ac: "Offshore Wind (AC)"
-    offwind-dc: "Offshore Wind (DC)"
-    onwind: "Onshore Wind"
-    solar: "Solar"
-    PHS: "Pumped Hydro Storage"
-    hydro: "Reservoir & Dam"
-    battery: "Battery Storage"
-    H2: "Hydrogen Storage"
-    lines: "Transmission Lines"
-    ror: "Run of River"
+    OCGT: Open-Cycle Gas
+    CCGT: Combined-Cycle Gas
+    offwind-ac: Offshore Wind (AC)
+    offwind-dc: Offshore Wind (DC)
+    onwind: Onshore Wind
+    solar: Solar
+    PHS: Pumped Hydro Storage
+    hydro: Reservoir & Dam
+    battery: Battery Storage
+    H2: Hydrogen Storage
+    lines: Transmission Lines
+    ror: Run of River
diff --git a/config.tutorial.yaml b/config.tutorial.yaml
index 0584d12bd..0c98bb152 100644
--- a/config.tutorial.yaml
+++ b/config.tutorial.yaml
@@ -5,208 +5,35 @@
 version: 0.4.1
 tutorial: true
 
-logging:
-  level: INFO
-  format: "%(levelname)s:%(name)s:%(message)s"
 
 countries: ["NG", "BJ"]
 
-  # ['DZ', 'AO', 'BJ', 'BW', 'BF', 'BI', 'CM', 'CF', 'TD', 'CG', 'CD',
-  # 'DJ', 'EG', 'GQ', 'ER', 'ET', 'GA', 'GH', 'GN', 'CI', 'KE', 'LS', 'LR', 'LY',
-  # 'MG', 'MW', 'ML', 'MR', 'MU', 'MA', 'MZ', 'NA', 'NE', 'NG', 'RW',
-  # 'SL', 'ZA', 'SS', 'SD', 'SZ', 'TZ', 'TG', 'TN', 'UG', 'ZM', 'ZW']  # list(AFRICA_CC)
-
-  #["NG"]  # Nigeria
-  #["NE"]  # Niger
-  #["SL"]  # Sierra Leone
-  #["MA"]  # Morocco
-  #["ZA"]  # South Africa
-
 enable:
-  # prepare_links_p_nom: false
-  retrieve_databundle: true
-  retrieve_cost_data: true
-  download_osm_data: true
-  # If "build_cutout" : true # requires cds API key https://cds.climate.copernicus.eu/api-how-to
-  # More information https://atlite.readthedocs.io/en/latest/introduction.html#datasets
-  build_cutout: false
-  build_natura_raster: true  # If True, then build_natura_raster can be run
-
-custom_rules: []  # Default empty [] or link to custom rule file e.g. ["my_folder/my_rules.smk"] that add rules to Snakefile
-
-run:
-  name: ""
+  build_natura_raster: true
+  progress_bar: false
 
 scenario:
-  simpl: ['']
-  ll: ['copt']
   clusters: [6]
   opts: [Co2L-4H]
 
-summary_dir: results
-
 snapshots:
   start: "2013-03-1"
   end: "2013-03-7"
-  inclusive: "left" # end is not inclusive
-
-# definition of the Coordinate Reference Systems
-crs:
-  geo_crs: EPSG:4326  # general geographic projection, not used for metric measures. "EPSG:4326" is the standard used by OSM and google maps
-  distance_crs: EPSG:3857  # projection for distance measurements only. Possible recommended values are "EPSG:3857" (used by OSM and Google Maps)
-  area_crs: ESRI:54009  # projection for area measurements only. Possible recommended values are Global Mollweide "ESRI:54009"
 
-# CI relevant
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: true # show (true) or do not show (false) the progress bar in retrieve_databundle while downloading data
-
-augmented_line_connection:
-  add_to_snakefile: false  # If True, includes this rule to the workflow
-  connectivity_upgrade: 2  # Min. lines connection per node, https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation.html#networkx.algorithms.connectivity.edge_augmentation.k_edge_augmentation
-  new_line_type: ["HVAC"]  # Expanded lines can be either ["HVAC"] or ["HVDC"] or both ["HVAC", "HVDC"]
-  min_expansion: 1  # [MW] New created line expands by float/int input
-  min_DC_length: 600  # [km] Minimum line length of DC line
-
-# if True clusters to GADM shapes, if False Voronoi cells will be clustered
-cluster_options:
-  simplify_network:
-    to_substations: false # network is simplified to nodes with positive or negative power injection (i.e. substations or offwind connections)
-    algorithm: kmeans # choose from: [hac, kmeans]
-    feature: solar+onwind-time # only for hac. choose from: [solar+onwind-time, solar+onwind-cap, solar-time, solar-cap, solar+offwind-cap] etc.
-    exclude_carriers: []
-    remove_stubs: true
-    remove_stubs_across_borders: true
-    p_threshold_drop_isolated: 20 # [MW] isolated buses are being discarded if bus mean power is below the specified threshold
-    p_threshold_merge_isolated: 300 # [MW] isolated buses are being merged into a single isolated bus if a bus mean power is below the specified threshold
-    s_threshold_fetch_isolated: 0.05 # [-] a share of the national load for merging an isolated network into a backbone network
-  cluster_network:
-    algorithm: kmeans
-    feature: solar+onwind-time
-    exclude_carriers: []
-  alternative_clustering: false  # "False" use Voronoi shapes, "True" use GADM shapes
-  distribute_cluster: ['load'] # ['load'],['pop'] or ['gdp']
-  out_logging: true  # When true, logging is printed to console
-  aggregation_strategies:
-    generators:  # use "min" for more conservative assumptions
-      p_nom: sum
-      p_nom_max: sum
-      p_nom_min: sum
-      p_min_pu: mean
-      marginal_cost: mean
-      committable: any
-      ramp_limit_up: max
-      ramp_limit_down: max
-      efficiency: mean
-
-# options for build_shapes
-build_shape_options:
-  gadm_layer_id: 1  # GADM level area used for the gadm_shapes. Codes are country-dependent but roughly: 0: country, 1: region/county-like, 2: municipality-like
-  update_file: false  # When true, all the input files are downloaded again and replace the existing files
-  out_logging: true  # When true, logging is printed to console
-  year: 2020  # reference year used to derive shapes, info on population and info on GDP
-  nprocesses: 2  # number of processes to be used in build_shapes
-  worldpop_method: "standard"  # "standard" pulls from web 1kmx1km raster, "api" pulls from API 100mx100m raster, false (not "false") no pop addition to shape which is useful when generating only cutout
-  gdp_method: "standard"  # "standard" pulls from web 1x1km raster, false (not "false") no gdp addition to shape which useful when generating only cutout
-  contended_flag: "set_by_country" # "set_by_country" assigns the contended areas to the countries according to the GADM database, "drop" drops these contended areas from the model
-  gadm_file_prefix: "gadm41_"
-  gadm_url_prefix: "https://geodata.ucdavis.edu/gadm/gadm4.1/gpkg/"
 
 clean_osm_data_options:
-  names_by_shapes: true  # Set the country name based on the extended country shapes
-  threshold_voltage: 35000  # [V] minimum voltage threshold to keep the asset (cable, line, generator, etc.) [V]
-  tag_substation: "transmission"  # needed feature tag to be considered for the analysis. If empty, no filtering on the tag_substation is performed
-  add_line_endings: true  # When true, the line endings are added to the dataset of the substations
-  generator_name_method: OSM  # Methodology to specify the name to the generator. Options: OSM (name as by OSM dataset), closest_city (name by the closest city)
-  use_custom_lines: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_lines: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_lines.geojson)
-  use_custom_substations: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_substations: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_substations.geojson)
-  use_custom_cables: "OSM_only"  # Use OSM (OSM_only), customized (custom_only), or both data sets (add_custom)
-  path_custom_cables: false  # If exists, provide the specific absolute path of the custom file e.g. (...\data\custom_cables.geojson)
-
-build_osm_network:  # Options of the build_osm_network script; osm = OpenStreetMap
-  group_close_buses: true  # When "True", close buses are merged and guarantee the voltage matching among line endings
-  group_tolerance_buses: 5000  # [m] (default 5000) Tolerance in meters of the close buses to merge
-  split_overpassing_lines: true  # When True, lines overpassing buses are splitted and connected to the bueses
-  overpassing_lines_tolerance: 1  # [m] (default 1) Tolerance to identify lines overpassing buses
-  force_ac: false  # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.
+  threshold_voltage: 35000
 
 base_network:
-  min_voltage_substation_offshore: 35000  # [V] minimum voltage of the offshore substations
+  min_voltage_substation_offshore: 35000
   min_voltage_rebase_voltage: 35000
 
-load_options:
-  ssp: "ssp2-2.6" # shared socio-economic pathway (GDP and population growth) scenario to consider
-  weather_year: 2013  # Load scenarios available with different weather year (different renewable potentials)
-  prediction_year: 2030  # Load scenarios available with different prediction year (GDP, population)
-  scale: 1  # scales all load time-series, i.e. 2 = doubles load
 
 electricity:
-  base_voltage: 380.
-  voltages: [132., 220., 300., 380., 500., 750.]
   co2limit: 1.487e+9
   co2base: 1.487e+9
-  agg_p_nom_limits: data/agg_p_nom_minmax.csv
-  hvdc_as_lines: false  # should HVDC lines be modeled as `Line` or as `Link` component?
   automatic_emission: true
-  automatic_emission_base_year: 1990 # 1990 is taken as default. Any year from 1970 to 2018 can be selected.
-
-  operational_reserve: # like https://genxproject.github.io/GenX/dev/core/#Reserves
-    activate: false
-    epsilon_load: 0.02 # share of total load
-    epsilon_vres: 0.02 # share of total renewable supply
-    contingency: 0 # fixed capacity in MW
-
-  max_hours:
-    battery: 6
-    H2: 168
 
-  extendable_carriers:
-    Generator: [solar, onwind, offwind-ac, offwind-dc, OCGT]
-    StorageUnit: [] # battery, H2
-    Store: [battery, H2]
-    Link: []  # H2 pipeline
-
-  powerplants_filter: (DateOut >= 2022 or DateOut != DateOut)
-  custom_powerplants: false  #  "false" use only powerplantmatching (ppm) data, "merge" combines ppm and custom powerplants, "replace" use only custom powerplants
-
-  conventional_carriers: [nuclear, oil, OCGT, CCGT, coal, lignite, geothermal, biomass]
-  renewable_carriers: [solar, onwind, offwind-ac, offwind-dc, hydro]
-
-  estimate_renewable_capacities:
-    stats: "irena"  # False, = greenfield expansion, 'irena' uses IRENA stats to add expansion limits
-    year: 2020  # Reference year, available years for IRENA stats are 2000 to 2020
-    p_nom_min: 1  # any float, scales the minimum expansion acquired from stats, i.e. 110% of <years>'s capacities => p_nom_min: 1.1
-    p_nom_max: false  # sets the expansion constraint, False to deactivate this option and use estimated renewable potentials determine by the workflow, float scales the p_nom_min factor accordingly
-    technology_mapping:
-      # Wind is the Fueltype in ppm.data.Capacity_stats, onwind, offwind-{ac,dc} the carrier in PyPSA-Earth
-      Offshore: [offwind-ac, offwind-dc]
-      Onshore: [onwind]
-      PV: [solar]
-lines:
-  ac_types:
-    132.: "243-AL1/39-ST1A 20.0"
-    220.: "Al/St 240/40 2-bundle 220.0"
-    300.: "Al/St 240/40 3-bundle 300.0"
-    380.: "Al/St 240/40 4-bundle 380.0"
-    500.: "Al/St 240/40 4-bundle 380.0"
-    750.: "Al/St 560/50 4-bundle 750.0"
-  dc_types:
-    500.: "HVDC XLPE 1000"
-  s_max_pu: 0.7
-  s_nom_max: .inf
-  length_factor: 1.25
-  under_construction: "zero" # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity
-
-links:
-  p_max_pu: 1.0
-  p_nom_max: .inf
-  under_construction: "zero" # 'zero': set capacity to zero, 'remove': remove, 'keep': with full capacity
-
-transformers:
-  x: 0.1
-  s_nom: 2000.
-  type: ""
 
 atlite:
   nprocesses: 4
@@ -214,298 +41,26 @@ atlite:
     # use 'base' to determine geographical bounds and time span from config
     # base:
     # module: era5
-    cutout-2013-era5-tutorial:
-      module: era5
-      dx: 0.3  # cutout resolution
-      dy: 0.3  # cutout resolution
-      # The cutout time is automatically set by the snapshot range. See `snapshot:` option above and 'build_cutout.py'.
-      # time: ["2013-01-01", "2014-01-01"]  # to manually specify a different weather year (~70 years available)
-      # The cutout spatial extent [x,y] is automatically set by country selection. See `countires:` option above and 'build_cutout.py'.
-      # x: [-12., 35.]  # set cutout range manual, instead of automatic by boundaries of country
-      # y: [33., 72]    # manual set cutout range
+    cutout-2013-era5-tutorial: {}
 
 renewable:
   onwind:
     cutout: cutout-2013-era5-tutorial
-    resource:
-      method: wind
-      turbine: Vestas_V112_3MW
-    capacity_per_sqkm: 3 # ScholzPhd Tab 4.3.1: 10MW/km^2
-    # correction_factor: 0.93
-    copernicus:
-      # Scholz, Y. (2012). Renewable energy based electricity supply at low costs:
-      #  development of the REMix model and application for Europe. ( p.42 / p.28)
-      grid_codes: [20, 30, 40, 60, 100, 111, 112, 113, 114, 115, 116, 121, 122, 123, 124, 125, 126]
-      distance: 1000
-      distance_grid_codes: [50]
-    natura: true
-    potential: simple # or conservative
-    clip_p_max_pu: 1.e-2
-    extendable: true
   offwind-ac:
     cutout: cutout-2013-era5-tutorial
-    resource:
-      method: wind
-      turbine: NREL_ReferenceTurbine_5MW_offshore
-    capacity_per_sqkm: 3
-    # correction_factor: 0.93
-    copernicus:
-      grid_codes: [80, 200]
-    natura: true
-    max_depth: 50
-    max_shore_distance: 30000
-    potential: simple # or conservative
-    clip_p_max_pu: 1.e-2
-    extendable: true
   offwind-dc:
     cutout: cutout-2013-era5-tutorial
-    resource:
-      method: wind
-      turbine: NREL_ReferenceTurbine_5MW_offshore
-    # ScholzPhd Tab 4.3.1: 10MW/km^2
-    capacity_per_sqkm: 3
-    # correction_factor: 0.93
-    copernicus:
-      grid_codes: [80, 200]
-    natura: true
-    max_depth: 50
-    min_shore_distance: 30000
-    potential: simple # or conservative
-    clip_p_max_pu: 1.e-2
-    extendable: true
   solar:
     cutout: cutout-2013-era5-tutorial
-    resource:
-      method: pv
-      panel: CSi
-      orientation: latitude_optimal # will lead into optimal design
-        # slope: 0.  # slope: 0 represent a flat panel
-        # azimuth: 180.  # azimuth: 180 south orientation
-    capacity_per_sqkm: 4.6 # From 1.7 to 4.6 addresses issue #361
-    # Determined by comparing uncorrected area-weighted full-load hours to those
-    # published in Supplementary Data to
-    # Pietzcker, Robert Carl, et al. "Using the sun to decarbonize the power
-    # sector: The economic potential of photovoltaics and concentrating solar
-    # power." Applied Energy 135 (2014): 704-720.
-    correction_factor: 0.854337
-    copernicus:
-      grid_codes: [20, 30, 40, 50, 60, 90, 100]
-    natura: true
-    potential: simple # or conservative
-    clip_p_max_pu: 1.e-2
-    extendable: true
   hydro:
     cutout: cutout-2013-era5-tutorial
     hydrobasins_level: 4
-    resource:
-      method: hydro
-      hydrobasins: data/hydrobasins/hybas_world.shp
-      flowspeed: 1.0  # m/s
-      # weight_with_height: false
-      # show_progress: true
-    carriers: [ror, PHS, hydro]
-    PHS_max_hours: 6
-    hydro_max_hours: "energy_capacity_totals_by_country" # one of energy_capacity_totals_by_country, estimate_by_large_installations or a float
-    hydro_max_hours_default: 6.0  # (optional, default 6) Default value of max_hours for hydro when NaN values are found
-    clip_min_inflow: 1.0
-    normalization:
-      method: hydro_capacities  # 'hydro_capacities' to rescale country hydro production by using hydro_capacities, 'eia' to rescale by eia data, false for no rescaling
-      year: 2013  # (optional) year of statistics used to rescale the runoff time series. When not provided, the cutout weather year is used
-    multiplier: 1.1  # multiplier applied after the normalization of the hydro production; default 1.0
+    hydro_max_hours: "energy_capacity_totals_by_country"
   csp:
     cutout: cutout-2013-era5-tutorial
-    resource:
-      method: csp
-      installation: SAM_solar_tower
-    capacity_per_sqkm: 2.392 # From 1.7 to 4.6 addresses issue #361
-    # Determined by comparing uncorrected area-weighted full-load hours to those
-    # published in Supplementary Data to
-    # Pietzcker, Robert Carl, et al. "Using the sun to decarbonize the power
-    # sector: The economic potential of photovoltaics and concentrating solar
-    # power." Applied Energy 135 (2014): 704-720.
-    copernicus:
-      grid_codes: [20, 30, 40, 60, 90]
-      distancing_codes: [50]
-      distance_to_codes: 3000
-    natura: true
-    potential: simple # or conservative
-    clip_p_max_pu: 1.e-2
-    extendable: true
-    csp_model: advanced # simple or advanced
-
-# TODO: Needs to be adjusted for Africa
-costs:
-  year: 2030
-  version: v0.5.0
-  rooftop_share: 0.14  # based on the potentials, assuming  (0.1 kW/m2 and 10 m2/person)
-  USD2013_to_EUR2013: 0.7532 # [EUR/USD] ECB: https://www.ecb.europa.eu/stats/exchange/eurofxref/html/eurofxref-graph-usd.en.html
-  fill_values:
-    FOM: 0
-    VOM: 0
-    efficiency: 1
-    fuel: 0
-    investment: 0
-    lifetime: 25
-    CO2 intensity: 0
-    discount rate: 0.07
-  marginal_cost: # EUR/MWh
-    solar: 0.01
-    onwind: 0.015
-    offwind: 0.015
-    hydro: 0.
-    H2: 0.
-    electrolysis: 0.
-    fuel cell: 0.
-    battery: 0.
-    battery inverter: 0.
-  emission_prices: # in currency per tonne emission, only used with the option Ep
-    co2: 0.
-  # investment: # EUR/MW
-  #   CCGT: 830000
-  # FOM: # %/year
-  #   CCGT: 3.35
-  # VOM: # EUR/MWh
-  #   CCGT: 4.2
-  # fuel: # EUR/MWh
-  #   gas: 10.1
-  # lifetime: # years
-  #   CCGT: 25.0
-  # efficiency: # per unit
-  #   CCGT: 0.58
 
 
-monte_carlo:
-  # Description: Specify Monte Carlo sampling options for uncertainty analysis.
-  # Define the option list for Monte Carlo sampling.
-  # Make sure add_to_snakefile is set to true to enable Monte-Carlo
-  options:
-    add_to_snakefile: false # When set to true, enables Monte Carlo sampling
-    samples: 9 # number of optimizations. Note that number of samples when using scipy has to be the square of a prime number
-    sampling_strategy: "chaospy"  # "pydoe2", "chaospy", "scipy", packages that are supported
-    seed: 42 # set seedling for reproducibilty
-  # Uncertanties on any PyPSA object are specified by declaring the specific PyPSA object under the key 'uncertainties'.
-  # For each PyPSA object, the 'type' and 'args' keys represent the type of distribution and its argument, respectively.
-  # Supported distributions types are uniform, normal, lognormal, triangle, beta and gamma.
-  # The arguments of the distribution are passed using the key 'args'  as follows, tailored by distribution type
-  # normal: [mean, std], lognormal: [mean, std], uniform: [lower_bound, upper_bound],
-  # triangle: [mid_point (between 0 - 1)], beta: [alpha, beta], gamma: [shape, scale]
-  # More info on the distributions are documented in the Chaospy reference guide...
-  # https://chaospy.readthedocs.io/en/master/reference/distribution/index.html
-  # An abstract example is as follows:
-  # {pypsa network object, e.g. "loads_t.p_set"}:
-  # type: {any supported distribution among the previous: "uniform", "normal", ...}
-  # args: {arguments passed as a list depending on the distribution, see the above and more at https://pypsa.readthedocs.io/}
-  uncertainties:
-    loads_t.p_set:
-      type: uniform
-      args: [0, 1]
-    generators_t.p_max_pu.loc[:, n.generators.carrier == "onwind"]:
-      type: lognormal
-      args: [1.5]
-    generators_t.p_max_pu.loc[:, n.generators.carrier == "solar"]:
-      type: beta
-      args: [0.5, 2]
-
 solving:
-  options:
-    formulation: kirchhoff
-    load_shedding: true
-    noisy_costs: true
-    min_iterations: 4
-    max_iterations: 6
-    clip_p_max_pu: 0.01
-    skip_iterations: true
-    track_iterations: false
-    #nhours: 10
   solver:
     name: glpk
-
-
-plotting:
-  map:
-    figsize: [7, 7]
-    boundaries: [-10.2, 29, 35, 72]
-    p_nom:
-      bus_size_factor: 5.e+4
-      linewidth_factor: 3.e+3
-
-  costs_max: 800
-  costs_threshold: 1
-
-  energy_max: 15000.
-  energy_min: -10000.
-  energy_threshold: 50.
-
-  vre_techs: ["onwind", "offwind-ac", "offwind-dc", "solar", "ror"]
-  conv_techs: ["OCGT", "CCGT", "nuclear", "coal", "oil"]
-  storage_techs: ["hydro+PHS", "battery", "H2"]
-  load_carriers: ["AC load"]
-  AC_carriers: ["AC line", "AC transformer"]
-  link_carriers: ["DC line", "Converter AC-DC"]
-  tech_colors:
-    "onwind": "#235ebc"
-    "onshore wind": "#235ebc"
-    "offwind": "#6895dd"
-    "offwind-ac": "#6895dd"
-    "offshore wind": "#6895dd"
-    "offshore wind ac": "#6895dd"
-    "offwind-dc": "#74c6f2"
-    "offshore wind dc": "#74c6f2"
-    "hydro": "#08ad97"
-    "hydro+PHS": "#08ad97"
-    "PHS": "#08ad97"
-    "hydro reservoir": "#08ad97"
-    "hydroelectricity": "#08ad97"
-    "ror": "#4adbc8"
-    "run of river": "#4adbc8"
-    "solar": "#f9d002"
-    "solar PV": "#f9d002"
-    "solar thermal": "#ffef60"
-    "biomass": "#0c6013"
-    "solid biomass": "#06540d"
-    "biogas": "#23932d"
-    "waste": "#68896b"
-    "geothermal": "#ba91b1"
-    "OCGT": "#d35050"
-    "gas": "#d35050"
-    "natural gas": "#d35050"
-    "CCGT": "#b20101"
-    "nuclear": "#ff9000"
-    "coal": "#707070"
-    "lignite": "#9e5a01"
-    "oil": "#262626"
-    "H2": "#ea048a"
-    "hydrogen storage": "#ea048a"
-    "battery": "#b8ea04"
-    "Electric load": "#f9d002"
-    "electricity": "#f9d002"
-    "lines": "#70af1d"
-    "transmission lines": "#70af1d"
-    "AC": "#70af1d"
-    "AC-AC": "#70af1d"
-    "AC line": "#70af1d"
-    "links": "#8a1caf"
-    "HVDC links": "#8a1caf"
-    "DC": "#8a1caf"
-    "DC-DC": "#8a1caf"
-    "DC link": "#8a1caf"
-    "load": "#ff0000"
-    "load shedding": "#ff0000"
-    "battery discharger": slategray
-    "battery charger": slategray
-    "h2 fuel cell": '#c251ae'
-    "h2 electrolysis": '#ff29d9'
-    "csp": "#fdd404"
-  nice_names:
-    OCGT: "Open-Cycle Gas"
-    CCGT: "Combined-Cycle Gas"
-    offwind-ac: "Offshore Wind (AC)"
-    offwind-dc: "Offshore Wind (DC)"
-    onwind: "Onshore Wind"
-    solar: "Solar"
-    PHS: "Pumped Hydro Storage"
-    hydro: "Reservoir & Dam"
-    battery: "Battery Storage"
-    H2: "Hydrogen Storage"
-    lines: "Transmission Lines"
-    ror: "Run of River"
+    options: glpk-default
diff --git a/configs/powerplantmatching_config.yaml b/configs/powerplantmatching_config.yaml
index 86ac59489..ca2dbcb9e 100644
--- a/configs/powerplantmatching_config.yaml
+++ b/configs/powerplantmatching_config.yaml
@@ -14,7 +14,7 @@ google_api_key:
 
 #matching config
 matching_sources:
-  # - CARMA  # deprecated as no more public
+# - CARMA  # deprecated as no more public
 - GEO
 - GPD
 - GBPT
@@ -30,7 +30,7 @@ matching_sources:
 # - EXTERNAL_DATABASE
 
 fully_included_sources:
-  # - CARMA  # deprecated as no more public
+# - CARMA  # deprecated as no more public
 - GEO
 - GPD
 - GBPT
diff --git a/configs/scenarios/config.NG.yaml b/configs/scenarios/config.NG.yaml
index 76e237b12..e99574f3a 100644
--- a/configs/scenarios/config.NG.yaml
+++ b/configs/scenarios/config.NG.yaml
@@ -1,22 +1,15 @@
 # SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
 #
 # SPDX-License-Identifier: CC0-1.0
+version: 0.5.0
 
-# Changes with respect to the base configuration file specified in run->base_config
-# default value is config.tutorial.yaml
 run:
   name: NG  # Name of the configuration (arbitrary string value)
   shared_cutouts: true  # set to true to share the default cutout(s) across runs
   base_config: config.tutorial.yaml  # base configuration file
 
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: false
-
 countries:
 - NG
 
 scenario:
   clusters: [5]
-
-enable:
-  retrieve_databundle: false
diff --git a/data/AL_production.csv b/data/AL_production.csv
new file mode 100644
index 000000000..5f0f6734d
--- /dev/null
+++ b/data/AL_production.csv
@@ -0,0 +1,258 @@
+country,production[ktons/a],Year,source
+CN,36000,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+IN,3700,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+RU,3600,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+CA,2900,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+AE,2700,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+AU,1580,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+BH,1400,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+NO,1300,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+US,1100,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+SA,932,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+IS,850,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+MY,760,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+ZA,714,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+BR,659,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+QA,616,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+MZ,571,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+DE,529,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+AR,438,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+FR,380,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+OM,380,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+IR,370,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+ES,350,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+NZ,340,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+EG,317,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+RO,283,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+KZ,258,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+ID,242,2018,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+SK,212,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+GR,184,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+SE,126,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+VE,108,2019,https://en.wikipedia.org/wiki/List_of_countries_by_aluminium_production
+NG,0,2019,no available information online assumed value of 0
+BJ,0,2019,no available information online assumed value of 0
+AF,0,2019,no available information online assumed value of 0
+AL,0,2019,no available information online assumed value of 0
+DZ,0,2019,no available information online assumed value of 0
+AS,0,2019,no available information online assumed value of 0
+AD,0,2019,no available information online assumed value of 0
+AO,0,2019,no available information online assumed value of 0
+AI,0,2019,no available information online assumed value of 0
+AQ,0,2019,no available information online assumed value of 0
+AG,0,2019,no available information online assumed value of 0
+AM,0,2019,no available information online assumed value of 0
+AW,0,2019,no available information online assumed value of 0
+AT,0,2019,no available information online assumed value of 0
+AZ,0,2019,no available information online assumed value of 0
+BS,0,2019,no available information online assumed value of 0
+BD,0,2019,no available information online assumed value of 0
+BB,0,2019,no available information online assumed value of 0
+BY,0,2019,no available information online assumed value of 0
+BE,0,2019,no available information online assumed value of 0
+BZ,0,2019,no available information online assumed value of 0
+BM,0,2019,no available information online assumed value of 0
+BT,0,2019,no available information online assumed value of 0
+BO,0,2019,no available information online assumed value of 0
+BA,0,2019,no available information online assumed value of 0
+BW,0,2019,no available information online assumed value of 0
+BV,0,2019,no available information online assumed value of 0
+IO,0,2019,no available information online assumed value of 0
+BN,0,2019,no available information online assumed value of 0
+BG,0,2019,no available information online assumed value of 0
+BF,0,2019,no available information online assumed value of 0
+BI,0,2019,no available information online assumed value of 0
+CV,0,2019,no available information online assumed value of 0
+KH,0,2019,no available information online assumed value of 0
+CM,0,2019,no available information online assumed value of 0
+KY,0,2019,no available information online assumed value of 0
+CF,0,2019,no available information online assumed value of 0
+TD,0,2019,no available information online assumed value of 0
+CL,0,2019,no available information online assumed value of 0
+CX,0,2019,no available information online assumed value of 0
+CC,0,2019,no available information online assumed value of 0
+CO,0,2019,no available information online assumed value of 0
+KM,0,2019,no available information online assumed value of 0
+CG,0,2019,no available information online assumed value of 0
+CD,0,2019,no available information online assumed value of 0
+CK,0,2019,no available information online assumed value of 0
+CR,0,2019,no available information online assumed value of 0
+CI,0,2019,no available information online assumed value of 0
+HR,0,2019,no available information online assumed value of 0
+CU,0,2019,no available information online assumed value of 0
+CW,0,2019,no available information online assumed value of 0
+CY,0,2019,no available information online assumed value of 0
+CZ,0,2019,no available information online assumed value of 0
+DK,0,2019,no available information online assumed value of 0
+DJ,0,2019,no available information online assumed value of 0
+DM,0,2019,no available information online assumed value of 0
+DO,0,2019,no available information online assumed value of 0
+EC,0,2019,no available information online assumed value of 0
+SV,0,2019,no available information online assumed value of 0
+GQ,0,2019,no available information online assumed value of 0
+ER,0,2019,no available information online assumed value of 0
+EE,0,2019,no available information online assumed value of 0
+SZ,0,2019,no available information online assumed value of 0
+ET,0,2019,no available information online assumed value of 0
+FK,0,2019,no available information online assumed value of 0
+FO,0,2019,no available information online assumed value of 0
+FJ,0,2019,no available information online assumed value of 0
+FI,0,2019,no available information online assumed value of 0
+GF,0,2019,no available information online assumed value of 0
+PF,0,2019,no available information online assumed value of 0
+TF,0,2019,no available information online assumed value of 0
+GA,0,2019,no available information online assumed value of 0
+GM,0,2019,no available information online assumed value of 0
+GE,0,2019,no available information online assumed value of 0
+GH,0,2019,no available information online assumed value of 0
+GI,0,2019,no available information online assumed value of 0
+GL,0,2019,no available information online assumed value of 0
+GD,0,2019,no available information online assumed value of 0
+GP,0,2019,no available information online assumed value of 0
+GU,0,2019,no available information online assumed value of 0
+GT,0,2019,no available information online assumed value of 0
+GG,0,2019,no available information online assumed value of 0
+GN,0,2019,no available information online assumed value of 0
+GW,0,2019,no available information online assumed value of 0
+GY,0,2019,no available information online assumed value of 0
+HT,0,2019,no available information online assumed value of 0
+HM,0,2019,no available information online assumed value of 0
+VA,0,2019,no available information online assumed value of 0
+HN,0,2019,no available information online assumed value of 0
+HK,0,2019,no available information online assumed value of 0
+HU,0,2019,no available information online assumed value of 0
+IM,0,2019,no available information online assumed value of 0
+IL,0,2019,no available information online assumed value of 0
+IT,0,2019,no available information online assumed value of 0
+JM,0,2019,no available information online assumed value of 0
+JP,0,2019,no available information online assumed value of 0
+JE,0,2019,no available information online assumed value of 0
+JO,0,2019,no available information online assumed value of 0
+KE,0,2019,no available information online assumed value of 0
+KI,0,2019,no available information online assumed value of 0
+KP,0,2019,no available information online assumed value of 0
+KW,0,2019,no available information online assumed value of 0
+KG,0,2019,no available information online assumed value of 0
+LA,0,2019,no available information online assumed value of 0
+LV,0,2019,no available information online assumed value of 0
+LB,0,2019,no available information online assumed value of 0
+LS,0,2019,no available information online assumed value of 0
+LR,0,2019,no available information online assumed value of 0
+LY,0,2019,no available information online assumed value of 0
+LI,0,2019,no available information online assumed value of 0
+LT,0,2019,no available information online assumed value of 0
+LU,0,2019,no available information online assumed value of 0
+MO,0,2019,no available information online assumed value of 0
+MG,0,2019,no available information online assumed value of 0
+MW,0,2019,no available information online assumed value of 0
+MV,0,2019,no available information online assumed value of 0
+ML,0,2019,no available information online assumed value of 0
+MT,0,2019,no available information online assumed value of 0
+MH,0,2019,no available information online assumed value of 0
+MQ,0,2019,no available information online assumed value of 0
+MR,0,2019,no available information online assumed value of 0
+MU,0,2019,no available information online assumed value of 0
+YT,0,2019,no available information online assumed value of 0
+MX,0,2019,no available information online assumed value of 0
+FM,0,2019,no available information online assumed value of 0
+MD,0,2019,no available information online assumed value of 0
+MC,0,2019,no available information online assumed value of 0
+MN,0,2019,no available information online assumed value of 0
+ME,0,2019,no available information online assumed value of 0
+MS,0,2019,no available information online assumed value of 0
+MA,0,2019,no available information online assumed value of 0
+MM,0,2019,no available information online assumed value of 0
+PW,0,2019,no available information online assumed value of 0
+NA,0,2019,no available information online assumed value of 0
+NR,0,2019,no available information online assumed value of 0
+NP,0,2019,no available information online assumed value of 0
+NL,0,2019,no available information online assumed value of 0
+NC,0,2019,no available information online assumed value of 0
+NZ,0,2019,no available information online assumed value of 0
+NI,0,2019,no available information online assumed value of 0
+NE,0,2019,no available information online assumed value of 0
+NU,0,2019,no available information online assumed value of 0
+NF,0,2019,no available information online assumed value of 0
+MP,0,2019,no available information online assumed value of 0
+NO,0,2019,no available information online assumed value of 0
+OM,0,2019,no available information online assumed value of 0
+PK,0,2019,no available information online assumed value of 0
+PS,0,2019,no available information online assumed value of 0
+PA,0,2019,no available information online assumed value of 0
+PG,0,2019,no available information online assumed value of 0
+PY,0,2019,no available information online assumed value of 0
+PE,0,2019,no available information online assumed value of 0
+PH,0,2019,no available information online assumed value of 0
+PN,0,2019,no available information online assumed value of 0
+PL,0,2019,no available information online assumed value of 0
+PT,0,2019,no available information online assumed value of 0
+PR,0,2019,no available information online assumed value of 0
+QA,0,2019,no available information online assumed value of 0
+MK,0,2019,no available information online assumed value of 0
+RO,0,2019,no available information online assumed value of 0
+RU,0,2019,no available information online assumed value of 0
+RW,0,2019,no available information online assumed value of 0
+RE,0,2019,no available information online assumed value of 0
+BL,0,2019,no available information online assumed value of 0
+SH,0,2019,no available information online assumed value of 0
+KN,0,2019,no available information online assumed value of 0
+LC,0,2019,no available information online assumed value of 0
+MF,0,2019,no available information online assumed value of 0
+PM,0,2019,no available information online assumed value of 0
+VC,0,2019,no available information online assumed value of 0
+WS,0,2019,no available information online assumed value of 0
+SM,0,2019,no available information online assumed value of 0
+ST,0,2019,no available information online assumed value of 0
+SA,0,2019,no available information online assumed value of 0
+SN,0,2019,no available information online assumed value of 0
+RS,0,2019,no available information online assumed value of 0
+SC,0,2019,no available information online assumed value of 0
+SL,0,2019,no available information online assumed value of 0
+SG,0,2019,no available information online assumed value of 0
+SX,0,2019,no available information online assumed value of 0
+SK,0,2019,no available information online assumed value of 0
+SI,0,2019,no available information online assumed value of 0
+SB,0,2019,no available information online assumed value of 0
+SO,0,2019,no available information online assumed value of 0
+ZA,0,2019,no available information online assumed value of 0
+GS,0,2019,no available information online assumed value of 0
+SS,0,2019,no available information online assumed value of 0
+ES,0,2019,no available information online assumed value of 0
+LK,0,2019,no available information online assumed value of 0
+SD,0,2019,no available information online assumed value of 0
+SR,0,2019,no available information online assumed value of 0
+SJ,0,2019,no available information online assumed value of 0
+SE,0,2019,no available information online assumed value of 0
+CH,0,2019,no available information online assumed value of 0
+SY,0,2019,no available information online assumed value of 0
+TW,0,2019,no available information online assumed value of 0
+TJ,0,2019,no available information online assumed value of 0
+TZ,0,2019,no available information online assumed value of 0
+TH,0,2019,no available information online assumed value of 0
+TL,0,2019,no available information online assumed value of 0
+TG,0,2019,no available information online assumed value of 0
+TK,0,2019,no available information online assumed value of 0
+TO,0,2019,no available information online assumed value of 0
+TT,0,2019,no available information online assumed value of 0
+TN,0,2019,no available information online assumed value of 0
+TR,0,2019,no available information online assumed value of 0
+TM,0,2019,no available information online assumed value of 0
+TC,0,2019,no available information online assumed value of 0
+TV,0,2019,no available information online assumed value of 0
+UG,0,2019,no available information online assumed value of 0
+UA,0,2019,no available information online assumed value of 0
+GB,0,2019,no available information online assumed value of 0
+US,0,2019,no available information online assumed value of 0
+UM,0,2019,no available information online assumed value of 0
+UY,0,2019,no available information online assumed value of 0
+UZ,0,2019,no available information online assumed value of 0
+VU,0,2019,no available information online assumed value of 0
+VE,0,2019,no available information online assumed value of 0
+VN,0,2019,no available information online assumed value of 0
+VG,0,2019,no available information online assumed value of 0
+VI,0,2019,no available information online assumed value of 0
+WF,0,2019,no available information online assumed value of 0
+EH,0,2019,no available information online assumed value of 0
+YE,0,2019,no available information online assumed value of 0
+ZM,0,2019,no available information online assumed value of 0
+ZW,0,2019,no available information online assumed value of 0
diff --git a/data/custom/TEMPLATE_energy_totals_AB_2030.csv b/data/custom/TEMPLATE_energy_totals_AB_2030.csv
new file mode 100644
index 000000000..5a69a2a4e
--- /dev/null
+++ b/data/custom/TEMPLATE_energy_totals_AB_2030.csv
@@ -0,0 +1,2 @@
+,Unnamed: 0,agriculture biomass,agriculture electricity,agriculture oil,district heat share,electricity rail,electricity residential,electricity residential space,electricity residential water,electricity services space,electricity services water,residential biomass,residential gas,residential heat biomass,residential heat gas,residential heat oil,residential oil,services biomass,services electricity,services gas,services oil,total domestic aviation,total domestic navigation,total international aviation,total international navigation,total navigation hydrogen,total navigation oil,total rail,total residential space,total residential water,total road,total road ev,total road fcev,total road ice,total services space,total services water
+AE,0.0,0.0,0.0,0.0,0,0.0,64.38404562665289,0.4505805278999999,0.30038701859999994,0,0,0.0,0.0,0.0,0.0,0.8099939957379862,0.417141615,0.0,69.53247242631582,0.0,0.0,6.6599243305365174,0.0,111.1180823122369,242.89285120432652,0.0,0.0,0.0,0.9365769253427916,0.6243846168951945,155.84141405284205,0.0,0.0,0.0,0.0,0.0
diff --git a/data/custom/TEMPLATE_h2_underground_AB_2030 copy.csv b/data/custom/TEMPLATE_h2_underground_AB_2030 copy.csv
new file mode 100644
index 000000000..7ec53a349
--- /dev/null
+++ b/data/custom/TEMPLATE_h2_underground_AB_2030 copy.csv	
@@ -0,0 +1,19 @@
+country,id_region,storage_cap_MWh
+AE,AE.8_AC,0
+AE,AE.9_AC,0
+AE,AE.10_AC,0
+AE,AE.3.9_AC,0
+AE,AE.3.5_AC,0
+AE,AE.1.3_AC,0
+AE,AE.1.2_AC,0
+AE,AE.11_AC,266458967703478
+AE,AE.12_AC,101020158085387
+AE,AE.5.58_1_AC,0
+AE,AE.13_AC,0
+AE,AE.14_AC,0
+AE,AE_2_43_AC,0
+AE,AE.7.1_1_AC,0
+AE,AE.15_AC,0
+AE,AE.16_AC,0
+AE,AE.17_AC,0
+AE,AE.18_AC,0
diff --git a/data/custom/TEMPLATE_industrial_database.csv b/data/custom/TEMPLATE_industrial_database.csv
new file mode 100644
index 000000000..902fef6f6
--- /dev/null
+++ b/data/custom/TEMPLATE_industrial_database.csv
@@ -0,0 +1,2 @@
+country,y,x,technology,capacity,unit,quality,location
+MA,32.992424,-7.622267,Industry NMM Cement,790, kt/yr,actual,Settat
diff --git a/data/custom/TEMPLATE_industry_demand_AB_2030.csv b/data/custom/TEMPLATE_industry_demand_AB_2030.csv
new file mode 100644
index 000000000..a97d197d8
--- /dev/null
+++ b/data/custom/TEMPLATE_industry_demand_AB_2030.csv
@@ -0,0 +1,9 @@
+country,carrier,Industry Steel Primary Blast Furnace Open Hearth Furnace,Industry Steel Primary Blast Furnace Basic Oxygen Furnace,Industry Steel Primary DRI,Industry Steel Secondary EAF,Industry Steel Other,Industry Chemical Ammonia SMR,Industry Chemical Ammonia Other conventional,Industry Chemical Ammonia Renewable,Industry Chemical HVC Naphtha,Industry Chemical HVC LPG,Industry Chemical HVC Methanol,Industry Chemical Other,Industry NMM Cement,Industry NMM Other,Industry Food and tobacco,Industry Construction,Industry Mining,Industry Machinery,Industry Non ferrous metals Aluminium Primary,Industry Non ferrous metals Aluminium Secondary,Industry Non ferrous metals Other,Industry Paper and pulp Pulp Primary,Industry Paper and pulp Pulp Secondary,Industry Paper and pulp Paper,Industry Paper and pulp Other,Industry Transport equipment,Industry Textiles and leather,Industry Wood,Industry Miscellaneous
+MA,oil,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10726146.18,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,397010.79,0.0,0.0,0.0,0.0,13258151.395154
+MA,gas,0.0,0.0,3601545.78,236922.08000000002,0.0,1788930.0,0.0,0.0,0.0,0.0,0.0,0.0,9281486.520000001,0.0,0.0,0.0,0.0,0.0,26202513.16,0.0,0.0,2514199.8,0.0,41960.49,0.0,0.0,0.0,0.0,207158615.483001
+MA,electricity,0.0,0.0,1031598.5399999999,962495.9500000001,0.0,93825.0,0.0,132050.0,1884697.5200000003,0.0,55432.72,0.0,3265708.22,0.0,0.0,0.0,0.0,0.0,11232826.1,0.0,0.0,838066.6,0.0,187208.34,0.0,0.0,0.0,0.0,55242297.464439005
+MA,coal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6875175.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
+MA,heat,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
+MA,biomass,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,171879.38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,276211.484037
+MA,hydrogen,0.0,0.0,1158286.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,276211.484037
+MA,process emission,0.0,0.0,50065.41247790604,40962.59763502796,0.0,0.0,0.0,0.0,67875.68113871076,0.0,1385.228977747275,8.72479576360429e-05,9323308.507746331,0.03203319690797162,0.0,0.0,0.0,0.0,1658616.7007389446,0.0,0.18963180469598598,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
diff --git a/data/custom_res_installable.csv b/data/custom_res_installable.csv
new file mode 100644
index 000000000..5b2fa6140
--- /dev/null
+++ b/data/custom_res_installable.csv
@@ -0,0 +1,4 @@
+Generator,p_nom_max,investmentEuroPKW,fixedomEuroPKW,installedcapacity,lifetime
+MAR001001,,,,,
+MAR001002,,,,,
+MAR001003,,,,,
diff --git a/data/custom_res_potential.csv b/data/custom_res_potential.csv
new file mode 100644
index 000000000..525cd9282
--- /dev/null
+++ b/data/custom_res_potential.csv
@@ -0,0 +1,4 @@
+,MAR001001,MAR001002,MAR001003
+2013-01-01 00:00:00,,,
+2013-01-01 03:00:00,,,
+2013-01-01 06:00:00,,,
diff --git a/data/demand/district_heating.csv b/data/demand/district_heating.csv
new file mode 100644
index 000000000..545e39afc
--- /dev/null
+++ b/data/demand/district_heating.csv
@@ -0,0 +1,3 @@
+country,current,potential
+DEFAULT, 0, 0
+MA, 0, 0
diff --git a/data/demand/efficiency_gains_cagr.csv b/data/demand/efficiency_gains_cagr.csv
new file mode 100644
index 000000000..74ce17494
--- /dev/null
+++ b/data/demand/efficiency_gains_cagr.csv
@@ -0,0 +1,3 @@
+,total residential space,total residential water,electricity residential,total services space,total services water,total rail,electricity rail,total domestic aviation,total international aviation,total domestic navigation,total international navigation,services electricity,agriculture electricity,agriculture oil,residential oil,residential biomass,residential gas,agriculture biomass,services oil,services biomass,services gas,total road ev,total road fcev,total road ice,total navigation oil,total navigation hydrogen,residential heat biomass,residential heat oil,residential heat gas
+DEFAULT,-0.004951421072036277,-0.010390428883694458,-0.0026960063028712566,-0.006705489381695462,-0.01786431270106026,-0.004951421072036277,-0.0021462195974406573,0.0,0.0,-0.01786431270106026,0.2742749857031337,-0.0026960063028712566,-0.0026960063028712566,-0.006705489381695462,0.0,-0.004951421072036277,-0.010390428883694458,-0.012329796028156648,0.0,-0.0055299435560203225,0.0,-0.06140104567447868,-0.026527359809364603,0.0,-0.0010627352815874014,-0.00851714967450834,-0.0026960063028712566,-0.0026960063028712566,0.0
+MA,-0.004951421072036277,-0.010390428883694458,-0.0026960063028712566,-0.006705489381695462,-0.01786431270106026,-0.004951421072036277,-0.0021462195974406573,0.0,0.0,-0.01786431270106026,0.2742749857031337,-0.0026960063028712566,-0.0026960063028712566,-0.006705489381695462,0.0,-0.004951421072036277,-0.010390428883694458,-0.012329796028156648,0.0,-0.0055299435560203225,0.0,-0.06140104567447868,-0.026527359809364603,0.0,-0.0010627352815874014,-0.00851714967450834,-0.0026960063028712566,-0.0026960063028712566,0.0
diff --git a/data/demand/fuel_shares.csv b/data/demand/fuel_shares.csv
new file mode 100644
index 000000000..fa97310ac
--- /dev/null
+++ b/data/demand/fuel_shares.csv
@@ -0,0 +1,3 @@
+country,oil residential heat share,biomass residential heat share,biomass to elec heat share,oil to elec heat share,biomass to elec share,oil to elec share,space to water heat share,gas to elec share,gas to elec heat share,gas residential heat share
+DEFAULT, 0.6667, 0.75,0.0, 0.5, 0.0, 0.5,0.6,0,0,0.75
+MA, 0.6667, 0.75,0.0, 0.5, 0.0, 0.5,0.6,0,0,0.75
diff --git a/data/demand/growth_factors_cagr.csv b/data/demand/growth_factors_cagr.csv
new file mode 100644
index 000000000..7d11e947c
--- /dev/null
+++ b/data/demand/growth_factors_cagr.csv
@@ -0,0 +1,3 @@
+,total residential space,total residential water,electricity residential,total services space,total services water,total road,total rail,electricity rail,total domestic aviation,total international aviation,total domestic navigation,total international navigation,services electricity,agriculture electricity,agriculture oil,residential oil,residential biomass,residential gas,agriculture biomass,services oil,services biomass,services gas,residential heat oil,residential heat biomass,residential heat gas
+DEFAULT,0.021569600815111167,0.025045532052292208,0.055685686523292866,0.01786680031720378,0.021569600815111167,0.01390442519840307,0.04940761418323669,0.03982178088300037,0.03715504444619189,0.021569600815111167,0.006453239989770987,0.025045532052292208,0.03715504444619189,0.021569600815111167,-0.018597262031625594,0.0,0.0,0.01095505452809764,0.0147194767041825,-0.03582400205750491,0.0,-0.006114588100848528,0.0,0.0,0.0
+MA,0.021569600815111167,0.025045532052292208,0.055685686523292866,0.01786680031720378,0.021569600815111167,0.01390442519840307,0.04940761418323669,0.03982178088300037,0.03715504444619189,0.021569600815111167,0.006453239989770987,0.025045532052292208,0.03715504444619189,0.021569600815111167,-0.018597262031625594,0.0,0.0,0.01095505452809764,0.0147194767041825,-0.03582400205750491,0.0,-0.006114588100848528,0.0,0.0,0.0
diff --git a/data/demand/industry_growth_cagr.csv b/data/demand/industry_growth_cagr.csv
new file mode 100644
index 000000000..4e823274d
--- /dev/null
+++ b/data/demand/industry_growth_cagr.csv
@@ -0,0 +1,3 @@
+country,chemical and petrochemical,construction,food and tobacco,iron and steel,machinery,mining and quarrying,non-ferrous metals,non-metallic minerals,paper pulp and print,textile and leather,transport equipment,wood and wood products,other
+DEFAULT,0.03,0.015,0.02,0.017,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03
+MA,0.03,0.015,0.02,0.017,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03
diff --git a/data/demand/unsd/paths/Energy_Statistics_Database.xlsx b/data/demand/unsd/paths/Energy_Statistics_Database.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..631ceabb698ebbe4e3f087edd45aa591e27e34cb
GIT binary patch
literal 9202
zcma)C1yo!~v&IH@cZc9^L4vym_u%d>L4&)yyF0-l2_D?t2_!gy;PQsB+3cVF-#f3*
znL9PpRdu_+n%iZgAPo)y3-UA~6CzZ95`SGFf$wh|jIA6Q75?IZ{>fv})!w%QoRb0r
z0fGCQhvA8{GVY^P4-<0G8;W5so)rucqk2yieOuBWpnXP*^wz8-IE}X#Vn_!1SQxxP
zMxBqh-By=pY-wt8Ey;TNIx~7)T&G@N>3G>M%kh{rsNHZWY68(r=a1h<^Ts8`zCAGH
z2t=t1%(rM49X=XB`yxde>@X#jCNae9j6H}j&1|!GgFEF4NtarbJ6)@JvPl@%6M=51
zVy2g-w7q5|xpWbA+|wb0e=bA*y#k;2R)r2Gqk(Z{_@kBtln>x4J~qC`N2yht=!NR{
zf^$i}7Y!{`Y|&Sg=5Clc54XWdjGfxEL=4UIek(k`;P^LWFhDqrYz-A0Z0&xd_HQ<D
zAe)=j-?09h;YkmW!O6_n+W2p_XY^YFg~%%aARx|oARuV}Wc$g?%;08ib*Q0byTpP1
zKpU{Cbxqliu!kASIFeL^fZ8NXWFDHNu9PP*DS5Bj5Dg9qzaP{AKfgp6WDU^NV_c~o
zoYmj=eF!Abv3kvsNO8#GWM=&V6{*W{_a*6<@;Raacmfm>>gZP0dxsGra)%aivM$RN
z90W#ZI&zFC-eEV9*4Ye+{hP*eHzfc$In^f;WLtU4X$rH17&-`4vX;;`)g0w^^QB1u
zcT%#u3~5Tb9f|=!cw;H+Lw^pnkxRa+<q}CzY$N(By%RL*RAg;R#1-~P>e=+!`0$J&
zLbMS5()Gngi*Nlx;wjGb3Y(`}DDF9Qw~H#CgFOkct|{J9?JwVquFK+4XP@-L7=~Fk
zW~h2vQg_7;zUfEcfIJlRq-F%@%>m^1Aye@pb!q!8at>yR+qx!fXQka_y&Vy{zL2k>
zc519^Szc|_Btxk5H^H{o2f4nlwz?epo(Z<5+ig4j{-!b#^%VdRAvVSC6sISl6r?Le
znQ@4{LE!6(*@s1?rKN-8Q&2Xm8FK8%3?Y!FU11h5%}a@P09F;;U7N%8+Lse%)`TUF
zc1|LcA@b^`7peLUuQ2e%=YD>-Ju&tonkbw11_pK<gH2Uue+ye&tWRyt>tT))X(D^9
z)9c6f5LnkcvbB)M!+v?6`!C_B3b=lZT^={LXT^+N4+qi5*$L~Pp@}A9B%H$W^T7%I
zE2X38{rY#e=1C0ocRqj;!>v<&n1*g=@0aODP}b{P!BE)1wM$nPIb=d+6@^=mT=1$7
z^~|WZW^i(Kc~|o(Q)Aomvn!gCOfz^yv{Y;Fa0uS8fqI)V+CsV1(W`g72H7Gaxw--n
z*{FH7puGnK&Vb~%dE<Ox@0n+W9ikN-W8jJiaD{YtJRrZ#w`W`&<ynuKQP1ssJ@_cq
zUjcG47>nZuK8B9y92<}d?v7S2z=;|>jBi&;JoQQ+AAy&&u+NbTt18wMOZa$V%3EuA
z<l@L6_p7q-QLVd~$i;O7);iN{4JY%i*nzkMtTcMQSCx~aIl6Mi?m#f_x*7Y*V9)jE
z8wLuD7F~uIo@peLJUz+29gmE97|OI0ynyibu4Ha!iTtTWj{NbawcYm}fM|M`Ad?RN
zTmGi6ud+mMA!cub5$|(_yP^p^3X>k|JGC1OwAL((4L*I2ybNG*DcOlwPB2GHUa#PU
zFyNL*O}f3~@?Fx_f!T%2SS9;7^FkRA3>qaU%Qv$=q}{;ydYB@Ep+aim8%m=8*aAs$
zQg$dv<P6~@i$lFC?WmMA^S6QI);tr^^@`7Fc(%nAS)lSWs;CDUH1o4yPyle8{s=co
z85Fry;^`%Xl{!&K1{OSLL*Km-R>+dGu~X`Vnan07mn19n#i|CHhL>1zq@EoI>dJ&9
z!*?Z?EwGFJczwhwLT!Q!QEjsj9GaNikoB2ChMARSQ=uT9<gW9~H(KTAO$8*Fia%=M
zS*n~YPG>2&gRc>a8zm;v*)n|w!%T|3?N#T?Qz_zM>{S4T*gKAIi$b{I+YPs0gJR5o
ze$$Z%hX+lVc*17JHnqT<9tsY>`aTR--K#Bria8uS<1#-g=@vd~mC;&4Jk8xD@bK7|
zcJy{%s?I@QZRvax4U_i}AU245sHt7zD~uKfzJ~}nKvLAvV?*7hYD}iEEsLjB9b}(M
z8yxV+ASwahGqn-^0S&7o2k?)%$I$=LewHfA0P)4k#Jnn^em?~tg+nT=KJ&^pS8syu
zyFfwnOu<6Y9`0?otJ}M&vibFlPIfAEbLnsJpBu!>F36j|yHI8v3obG7EZdFV<wS0n
z`Nt7`-B)g|nD33c0wpJ+zl?-dT`rW<O_SclmN_!(q{w4%fpXSw8zimB46DhiIvnZV
z!_M2wAXN73z_dQpi;J3l$k^Pv&1pj_&c;z<@r*tPiOT5cU4td=XHc)cNBHGSo)35W
zZk<~qP!JF<pld||9`1kp5<^=D<ENv&FjhzY6BByS<LAlS!-H;LjwOj{JBrXy>_}Hx
zPv5>DWY*3n$MoPqf=BIEtHon4y|l*Omq~X{xSKl(hta)&v9Jl4-VbVL*!@N?`BqP}
zX4K4T0Ff`Xk-aGaFL-QJ-AOP{K2O~9v$Ze>i8}21H6+HF2DuZwS|2s$=0xR3mWRPv
zXyYt-0ewAx0HmkBg*e$u8URt^#gOwm0lN=~7<=ov1F&56pqby*7~uyQ6$fkT-7xq9
z$vS>GK-_kY-B<{0JpkGnvvh-sixiL3HzgJ|?LHIK>EXj0I}ZnoS~*j^UOv`Cg1^P5
zLYK@CK)TI{YuekefBC{XHCZKXtw6Mej<O+b+#yY6sx2Cq0pGNgTka<L2f4ZB)!EV^
z(l0ds?NJe*U8Sq7gQcUHv9Z%{_xz0floVZ&84&qOAoAG%;{7SY@??O>I9d5_CitL7
zsRW)Oj?Uuu&}<}J#3T^Q_0!h*6!wYD5%-?U5pu)~%XfXJhFx-D>GH=tP(4P>2@cHZ
z(x4yptcA5cH_hF~(^8{;Q?LvMm1+qlw=FXWwom5<LDeQwq~^)<E>vbAaLY8A7hacn
zH(_ajqm_6A-8w3|TKeU!cW(Q8K|<*AD<VjaNcI{rRySxTUlF@Bx0~sOSF&uj5AV*n
zZGs<qa*lCoYey$v2^XH2{FsM~@YtIFvAZdg9-7%p)ZRpC@F~b7;+F8dde_pqGDR8V
z`rxH10CC&Ocb95%-96`?fNMF?fk8#3WUFzpZtFBwB%mmvDV#a8{utBcn>bxx(*m)L
z_^J9M>3zFrHr7WkOSAL!uK-shy``utyo~y}i7#cV)g(Bp?1m;oXNFUalzf6n_SIT$
zleLW`Fi^4#%?YH0AulNp9DB|YP;;}D`=$N4%9N;aMbdh>u1?EU-u$xUvvoN5pfvD-
z@n{O||7x9|kxyotmTkTSa^NHErWcr3@W6#xo>{|L&7@iN7(|mHx*;8L{9xA_B#(wI
zk*X3qV-$?+Os>~n{_^2W|M4No_0DbXx2S#P<!$rLXy+#DsObIkG5%LRRFg*7<y-H$
zoy5E3-rUD{FIVWcKA6vY@>6cyY}LqJZn?FN)a9<;zQ?s`jpq+yZL6{^XU4yXUa;Hq
zYRqJ`KuR5UOm9?ilo>;@@sx_NX|lBNS(_QF<d51V?%-E8RNy~IpE|*;tfbypEc5Yr
z<yF#lfiA?$7Hsl1^Iego)|$6FuZ!0YJ80K?ov&ry;e12_SMN)2ZN9o}$wIrLgk5Tz
z7-N!JtDZe}UZksB@)D-yKQ6xLcEp3|SiKg$tdl!qTdJtL)L)`pK0PO)=vb?qlHa@-
zT`(Wu-K_5Gf>%3^|90m7wYB)$`jAUlQp8O6@x%K5M6lth?fJeX|5!X)gVlYN$E%1g
zXU49p9ee&DTAAj@Z=<$N)tBBS-FLw?oRc!Kdq+&e)sjmx9I$6<UfGX`Tl;K&#%b+r
zOZHuI*3Cu3nd5a#Q<hvx{78m#4_>c`s@zYyuUc>zY1Z;TODne^t6;m6HSh#y=*~oc
zGBF)FvtCm(Y}EL%GW58tKRE_E3cB`!Ty}^5OC(ITDPQC@+sesX$_(sJK3&$|RW7Wh
zTyX4N2}!EOLM~A>6i&!+;z)%@C8t$TaR#}f$JNNtrGxSY6o`ezS<nZV=y1|Vg~ud^
zRZ#H;$BDaZjj+Su!Rud(u)q&cX|T|V1_5wLFG>Ie{-%oJD4Plr2a3T$ZXEPfLa^wl
zw+#X$y!lD!BowfrnVNaXSt_D(o9a=D22Hwscj9nk)&70Ter(`nu;ZleNf_icVSu{6
zB8&mmLB~qs2^C^p;{z=}-$dRmSX6^RG5T6(fTAi(hxBHJz_`>2L*5hZUn#IibHcEx
z$_)IzQeYiOqpAUEp(=qLAT9Jn=T!+D0BHxc({@_P<-`FU3{N#uSB$Z?xE`I*<(XPn
z3I{dTTMbR&@a38DNm5t#pEE7vvwf>T=8>~1a(f`Z^T89d%yC>ZZEPEEJCOG;8WKpe
z|9@!SV;WRf42QX%Rh0pq?z1XVdo>bQ>jRy@<;GuAeNR(E+A=wnl*F#|6)%97v9_?D
zCzjeL(Wl*42_0zyck@hhv(D&N7x=oa&1ayn##5EY)XDXvju|d*e#e)Dg{dq1AnBV&
zof0n~R3=owjbSQQz&(-%kQFKe5@_ZVA<Cxini3K-A<DZ3^dZV>Ck-(%WhkXp5sk!(
za9K+1Fr~P$nmwZVD5Zhh=DC%V7A0w<7ge+EWfSDSdx41yS0D3GasZB{X%GsLk(Qw4
zGxN=u?NH&ABr0Tz;lRmSdwMq{Oj(06v7)5YccQGRWs)*86edBq)b6PWp*fWO!>5it
z+rz`;y10I+TbgYCsi`93@0{p>q(pz5T6oYuKDD@t<mC%;C)IfM6y)bxYvByz^H)dc
zn^4w{>#<UQZ~lr<pUy^pj?)ywUFETrQYu-%D#0BqyukX}RHB461W2y1YM4q^)DLjS
z_6ORg*3^^FYgi$Q61tXQwh70cX{tzr<%oiW@gAwrqbh8-o6N)^o1yrTMwhRVf(NcQ
zmM1|7hOe~-hLlfk&e@Q-A(T`2O&BauiD=Vf-ind9y=kJe*75;O0@EH2D5Ur}$#AgU
z-PfoyKV)j7qlL<U2*$5}yvTBGE_v4ghj#^5W~>kQ=6hLhY|I{J49%8>y-Wc&sv^Nf
zG}y%;LA{?7xpynT`*SnR3K%~{H4Q;%wj8~Th<2hJ^9z3n>0p9<U%u<1oXujj9(<TY
z8_+}+?-Cuz`@6jiGQUasI}liJqIHmjg&8D({7n91;~r7^Rt)}~%`CFf<!-^*Z7t8p
zv435#4Sd{c)J5mwjW;(etcwn3CxJ}rs*g(BHPnB46nJEd1sq@=UI`BZg7EC`9i7~*
zj2)kxe6O0d?IIKM18r=V5)`bCV1R`{ERC~Lq03}vS`UosecVeqsLYEs*Y{|svo87Y
z4aEAlZ!eb}O>Z|Ns%QP7<dv~x2BDqDgwPdm$^!=Jgx(NhW=f-@2Mq++QJ4pCGzh50
z+v{^=tX5O8gMti@3eg&VN9hx7&+|t&r71==Q7i;7Gfm?#ILJyWs*`=RXM|_0A(kPH
zX3})MCHM9tQ?F3TRdmFp^;1c!OX(EB-6Wy3k0jUq)Gj2Be6PMk;5!nkFWr~Az@<us
zeEO9#UMij<IEH|k4QEJ^E`Zu!J#kWmO2mytF42nIi&)QDJX+B!8IA`)EvgIWaDmT@
zf*4oT7=fY2%_I=5Nf9GKS~8b{GMbc@`@w_Q|1>iBvLX$xR1qCsU6bU3A!^SIs=lXs
zYtITNx0lpuEO-3{h>I-fS;W~PSSA@`N`(F(!i)pxyPQ3KD+h^~03Afl=0d%X+Gki(
zGV!uaoLu9r3K<khZ#XRbvp;QZ`Cv@5Bkw4FgFv+BQD97qcqxmJVwqHFWvLu>Sio$T
zMY$CaD<k=-^4;PlSCU^j3JMD?*H%}h#an$J2AXos>-niq-?+IwaeYljJmul4?>r(@
zpyCLXMvv)N=bX~rJ(uUq*DZTx4TePYnhaI~zm&BS_94-E<Bb?L=?h1<$<gi;Kv|b?
zwr_L{w^0$hk8&!#u;0S1%BnLSk!M?9lQRCASwyYH8AkMSs36B#QGyR%RjS@ppjMm%
z5_BetP`9{v+f7l<w#dt;8YfX1&AUSRJSq;>lPkLS>)j*M9zl)H`@wgoJe=putQW=X
zymtc~=^N?kw|0oZRu@Vc%-V?ys`P86+gj$`HX1B8DHScKp5-@I)roJL!oW@H<XmmT
z`*f*XJ3z#QloTD#YBD1frXEGmRKqq@_oMizmCV`iwQi*jT3xYp(w9Cpb5OB|k4Nu+
z3*NZ%nAq12ni`UO%s}Ms{^+!db8b3y{gxTtbK{`ctE0Ki^5aHOC2d^d)U6n1?(By)
zYn}DSjhmxa04qqp=0{ik^&M%$?{NS53>g_N)JOqiW=vo?>Zuf^C2VWsWNhQ~M%mrY
z*iq-{Ng7ceviZb>e3v*dN5gDaQWo|hH`*{<`3AJAg9J0m+8qnlXH8bI0JacSMR0R-
z?qGCSQ&)bst<P9X!t5PmZ!>l%d^=*_jSBT@^4N9eKuiMXX8=;+Jb2i(RqL|&hX_Z$
zq&&*7OU1mZ+}G*>&{?rG1!5?+Q&M7ua~1#x7Tgsn0$#u33{6Dl^jAWK98q<_5AB5v
zfngIGl_A!Hm7L-Q*PQyXADo%hmqIx4iR+39>d(J0g$ZTl*k#Ngy*y52q|^ZSslzP$
zY{4XGiz`RXM5zF231Xo=F(R5@aXiAHoQE~TRKEQ38fWr`EWp{G`Cc_w>ydKJkE+f5
zEyXL!-E+Y^34Mu1@S9WrK(a%E^%(DHZG{$9m|;8$n?rqYe(;AcTjQDB<O7BVY_S9-
zX_*Yzq6?5&^$uoQ3%)NUGfI&b>u%>v{s@R~jk~f#fY5UTk1%8)^nWLc`gV3txMwQq
z$Zr90r;W{yZdHcP{mjgbK|t(Rr6}#Ln<2Z^T}$r?b9jH_h)PjFtb1hb>cW<FU(@_*
zn$nU<<j`t#)YLI4SEXv)CA+@(u)3Z$Nfb=W*B;Z+rDve3P#tQQIi@d#0u(P9;Y$VS
zcj?jmvYH?+Y54T8LO6B>sqU<{x@lP343F{&)HlPs$?bM!D&cL16E9A6k9~Si1kB**
zM(@mGBu}S2^Dz|mUU%;EH>MNF@It@p6-8al@qkN<a$vY(!@vVTs!1v)>5;-dfQIk{
zxM*6AlSoaB*9nR;7%?K(T2q@MKp-Sd68~6BU{IIepJ3p4r5H2D(B20do3~CjAE<Q1
zHjCvjfud+9wVfY}o~^lT!Gi6RYzciHxpnkS-rJKWeRl17dd=MYwD(4DbE-aCPe0mE
zy4Il)$}^z}l(%ZK-N)!sbp(P4c2jU^?rOW~2ogFNBg5Z7N$>-ywYByHbV%E@G84Uz
zir<PMvt1|VqSnHXHNm2_@s)uXxCi@-??k^In13gvf0cOuTY+7ZcS&jwXjD<4adDr0
z(=*#s_G(~jYx!j1pY`^}=X$$<55@3GQDu3El?!8djanrUa+<l)ds!h9)x$MBPby5h
zgIrxyp#e`MmfNkl@o=PwY4_rs0SDx-7Pj1TG;Y<pM_B^MmZna}1T@&O>u&R12GuY5
zNR|Mop&Jr@#)xBziq`hy3D>?1D{QY2)25i;vwXeD(@HST;{SY=4k}$#;$#(z@%F}z
zBz%CpxN-m$RfHX~UG!^uQTgfi{=RS2<H=_YkWDM}o_cD5RcxgPADI?hVO4Q9Tl^`t
ziCGf3D}2W;!fwOpB{i98jV`Sj$GDt~*%2tCyio)segp^RhhW=ZKtAV#$Y@+@A;44T
zh8+Y1{TU%gGkphRBPAyXa~sp2PW+4JvBM%4vJVCVk<F0_mEK~JB<|Of7cmXwOp=Rl
z51`#Ya$5~18bRASjycG?Xa+l2gY&T%X0p4!aj6+!yfH7Fv2}I6I3fuhL`~RHY@iXW
zd`&`}0^gF-L_>58$rwc|f=45SD-Bxt8Q1BH&aI54a7EF0rIeA3hi0`Url63sY!UGR
zKWizf#X)}BoO79P5H}c;q_!;wuK14thu0)BCZFc3JFQeDD6f*?J<^kQBzwIBn#N>0
zyj1&%u7y2&dN(U<IxY7DNI2iZq|NKcopyU5`s0rCUo6416$H()#6}-{vgpBl+rg+t
zn#8uuHeC`$xcb9-3^W>{1XG&fwm9>Ykxg>V;0!}$hs7%MlE*c(-9Bc^+Po<Fn$?oM
zyt-U`hI3}?PNV4+t24vS9GhP)tf2E;v2y*MDe}e0m1U)z@c2RS>n*zMy#yAXOBY=B
z5(?!-f{$pp@fb6Z5~p0p_aDtkMR8pHzaOoMFC5XmWOpf#C7EH_0SgQMQ3|21!(FV2
zg;eCZKO}YFAn6SyjG0paxuM7BvgR4UzI{RB0#bL3BGG1YnERTyPr8d~$Ggw(5&s%|
z$_NG;Dv53*WFDNk$V$*aI<|m@aCh4f$5hN}0S?DVO1;935Yx6ZmvcwSrM=)qDR?sX
zo<~SsdCUmjR!^4)cr(`P-VE$U*9`=*9#?T-EkzdxqWuMyK%evn=T@q$nkfE+u)#nq
z0?#c`dO3!jo{6aaYj7oXErSFzdzQpN0diNrt3IqAuPR@hIP-8Pu^nPr2v{_H6a^*T
zlTJ?B8_)HmQZ4>{>^4r6AMRGUw4==$YN0i`>{yHATtP8RX$F&~k?~;ZHsp!43gnTc
zOo$S_Eu2e_%OcaF?Jld-8)mrb@AAevi(T*og<FZP3>pL>nQ_`qCOO|Vxe$LGlvHTU
z3*Dp$Q=`(&=Mp-F>}3v%x2T^m%Yjy4d)+Q_3fpUmE>5+Y!zJd23u*yTf|4WV`=7*w
zNTkVnJU~@3s8>r*L5REEIyD0?h?~wY_vk&gP=i*H+)ebL*o&oN{za?ozDGx`x0S5z
zO#A5H;V<YMW9JDx;yd1ls43F3K?)<<C4C@R>ZT`S$ii^z4rDBWB`6&5SS=`H%!s+s
zvi&~&vNJU<G;%WDya*w?FQzTyg{S=(+-z=KuyY4`?!E7(OYibAO~CQ&O%3%rA9v5C
zFSQvK;%NuxvQy9QCrN;p(;-q1BTlwG9=F^FDjeaYoZH8<=uKvMsguWiD+@9`__yKx
zN<!aEn-HoCVl6xw^R8w*zKk4Zmdn`nO;vmE3yitRP7R1OT!pqU=t~~IGh;C>2Ba=k
zYu9jfdzu+J@!gGp9<Zb=H$giVV_<d_2Cvp+?nm?rU|dtqxXx=5jSbLj(s5Hv`82ya
z`aT@B(Lqriwei%7&jeLBp)@)bX9d?a&59v{#n{8ZEMH)EBk@pBN;k)(j#@JH3ZE6#
z+=g&6vK2}N;drJwJfs=~kSD!nPu$e-7}MTvk<0Kz(XJZPTZk@GS6Tdo#$Hmp=s1nR
z)B<5FIX4=v%&&A0;mG9TP|ZXi0YPYcwCcN~@A5UAQh8z_0<JC(iNOX(ro9D%r#7Bk
z`-^q!>NFpvoX}P%XRdveZC!sly50Z_SbSGL{fm~pP|lV;yqYS==`?r{A!~9iUiOXm
z(cxOyoU7a4Y|dzY)Kjlmb9kMC``N`np9KjnTbEspWfk)f3e6B&NDm;14*G^!quiY|
zNt~TUW5v9DsH*$ER+_>qx8lMY%^Ghak65%)MudT}%-#KtdrdqgVTxL9dAn$qnrT}|
zX=sS)K$Zf_jn*Lj%i9A_V*b{qFi%VY9m}bQKVruBTV`iMz`%tKSZ76gKDqzt)goU%
z$BP(4mvK-4K!D($v}l=Tfg)GBn=l<dPaOf6&ziFW#QAi*Y2SWk<_|0TGW3`J&XZ<h
z@q#iiUzedvFC!i*)(BaoGIllXjM<nxAjPrHG%OGFO^5x~9S^bTJF1IcZ+}OnIf5*L
zjN}Zg#kiC>eC5yxV9<tHr^P6==Z&IaVDE5BS$sTa5+!kTV|G{ayGW+QM8D)}r!_wO
z`c_o}kS<MVkbYAdE5*c((<dyIVE*x1jf5|UL&$IKweDPY`=^8#qWP8siK3_<#Dw;%
zJaV=h5;aqjCi>xBU5^&9C6_iT(K~x)Oa_gnEsu#W3r*SZdOvbpeuuW~G-N~UoQPCw
zs(B!Hp|MwNLx*{gQb>|O*|*s{&LPk3&fYsO;_fJy<LO!@-=%q@p~xo1M|L+!6yyq-
zTO!Z*2vCp)1w#k<-#hw1%z@)6W_%jI!jE4v&+hfl4*xUn)0O;hnV<2;|8=vc0Oxr;
z-Ha7z^S^`C|4ZztH~u`H#593oKRe~W7Wm%-sow&g&jP<X=fA|B_w{G*{I44U-}3$`
z_Pdk*d!f$?{_M~ER^S8mkNy4X*8CR#_p+ZF63^plpCiye_xT^~iT_;tpACv%(jXw$
z|C0UPuK2xz=MDT>NdK)R8TgX{BK?mQ{M7QZtp2B#KZ_^7wR{6^2B_tCDdkVmKXb<4
zqV&K(?!RP^e=7Jh^ZBiy1mhnj_$v?kE&Xo`{O1o;7whNB|N8^+XGH${rh)$y`{zma
z$Mf+F|DSQ|Z#~7prqdH}e@3ytH}I_C&-3%Q27SW64frcZP>=>Z{fP(IN<;yHhX4T?
ICwltyKSdX)dH?_b

literal 0
HcmV?d00001

diff --git a/data/emobility/Bus__count b/data/emobility/Bus__count
new file mode 100644
index 000000000..82089d566
--- /dev/null
+++ b/data/emobility/Bus__count
@@ -0,0 +1,171 @@
+File generated for type: Bus_
+Time of generation:2016-08-28 20:17:00
+day,hour,count,n_counts
+0,0,688044.0,362525
+0,1,608947.0,362760
+0,2,572404.0,362373
+0,3,597034.0,362616
+0,4,805533.0,362414
+0,5,1200112.0,362489
+0,6,1860173.0,361440
+0,7,2098221.0,359719
+0,8,2404650.0,359120
+0,9,2578585.0,359720
+0,10,2350794.0,359929
+0,11,2321246.0,360102
+0,12,2260093.0,359865
+0,13,2386226.0,360122
+0,14,2339446.0,360016
+0,15,2255673.0,360028
+0,16,2216057.0,359829
+0,17,1983239.0,360283
+0,18,1683708.0,361261
+0,19,1315877.0,362196
+0,20,1028140.0,362436
+0,21,805207.0,362814
+0,22,663540.0,362753
+0,23,568841.0,362998
+1,0,509277.0,362249
+1,1,419731.0,362513
+1,2,390290.0,362108
+1,3,429170.0,362376
+1,4,641839.0,362289
+1,5,1054962.0,362523
+1,6,1750259.0,361621
+1,7,2023537.0,360128
+1,8,2324676.0,359251
+1,9,2362721.0,359477
+1,10,1980177.0,359155
+1,11,1887266.0,359195
+1,12,1951051.0,359236
+1,13,2135594.0,359521
+1,14,2120963.0,359223
+1,15,2094027.0,359244
+1,16,2143592.0,358847
+1,17,2041272.0,359291
+1,18,1771129.0,360558
+1,19,1352459.0,361620
+1,20,1005893.0,361930
+1,21,769552.0,362451
+1,22,639518.0,362455
+1,23,564931.0,362597
+2,0,508541.0,362648
+2,1,423035.0,363005
+2,2,393755.0,362492
+2,3,431433.0,362864
+2,4,654627.0,362817
+2,5,1089912.0,362885
+2,6,1816050.0,362102
+2,7,2129602.0,360763
+2,8,2496897.0,359752
+2,9,2554351.0,360151
+2,10,2138169.0,359936
+2,11,2021726.0,360236
+2,12,2064415.0,359958
+2,13,2275888.0,360461
+2,14,2253623.0,360077
+2,15,2211607.0,359758
+2,16,2262266.0,359447
+2,17,2160683.0,359537
+2,18,1900672.0,360899
+2,19,1458371.0,362159
+2,20,1093595.0,362416
+2,21,848917.0,362918
+2,22,712905.0,363133
+2,23,641304.0,363091
+3,0,582469.0,363273
+3,1,498079.0,363561
+3,2,469810.0,362951
+3,3,507598.0,363187
+3,4,721334.0,363299
+3,5,1144325.0,363376
+3,6,1884245.0,362695
+3,7,2243750.0,361484
+3,8,2590665.0,360527
+3,9,2641185.0,360609
+3,10,2270328.0,360376
+3,11,2152761.0,360513
+3,12,2165532.0,360273
+3,13,2356492.0,360522
+3,14,2345217.0,360337
+3,15,2299540.0,360099
+3,16,2340537.0,359603
+3,17,2206811.0,359633
+3,18,1924940.0,360965
+3,19,1515144.0,362334
+3,20,1165743.0,362820
+3,21,915349.0,363384
+3,22,774193.0,363427
+3,23,689359.0,363587
+4,0,621126.0,363388
+4,1,539901.0,363701
+4,2,520039.0,363287
+4,3,565110.0,363519
+4,4,798465.0,363425
+4,5,1245096.0,363643
+4,6,1980815.0,363050
+4,7,2325866.0,362354
+4,8,2660579.0,361873
+4,9,2745608.0,362056
+4,10,2620872.0,361697
+4,11,2600677.0,361778
+4,12,2543784.0,361465
+4,13,2666359.0,360650
+4,14,2634750.0,359843
+4,15,2525754.0,359410
+4,16,2516413.0,359387
+4,17,2400374.0,360167
+4,18,2139578.0,361294
+4,19,1741363.0,362428
+4,20,1430780.0,362897
+4,21,1229394.0,363349
+4,22,1130635.0,363493
+4,23,1088862.0,363690
+5,0,1018343.0,363206
+5,1,925525.0,363875
+5,2,863707.0,363663
+5,3,871107.0,363694
+5,4,955376.0,363668
+5,5,1144235.0,363985
+5,6,1463072.0,363621
+5,7,1950959.0,363818
+5,8,2587571.0,363438
+5,9,2839520.0,363453
+5,10,2529976.0,362857
+5,11,2201205.0,362676
+5,12,1956437.0,362363
+5,13,1979229.0,362549
+5,14,2006627.0,362495
+5,15,1997953.0,362534
+5,16,2128904.0,362430
+5,17,2365955.0,363048
+5,18,2373743.0,363122
+5,19,1927225.0,363419
+5,20,1559949.0,363495
+5,21,1295733.0,363705
+5,22,1109561.0,363689
+5,23,1007277.0,363785
+6,0,958544.0,363171
+6,1,881912.0,363459
+6,2,777244.0,349342
+6,3,770629.0,362382
+6,4,756589.0,362881
+6,5,809296.0,363310
+6,6,958540.0,363228
+6,7,1296002.0,363457
+6,8,1834878.0,363348
+6,9,2197519.0,363505
+6,10,2192665.0,363126
+6,11,2121109.0,362880
+6,12,1913820.0,362339
+6,13,1930082.0,362362
+6,14,2147226.0,362103
+6,15,2219053.0,362175
+6,16,2319372.0,361915
+6,17,2440426.0,362370
+6,18,2247233.0,362367
+6,19,1846874.0,362862
+6,20,1491772.0,362936
+6,21,1160206.0,363190
+6,22,934218.0,363207
+6,23,777840.0,363130
diff --git a/data/emobility/European_countries_car_ownership.csv b/data/emobility/European_countries_car_ownership.csv
new file mode 100644
index 000000000..e10ce4e08
--- /dev/null
+++ b/data/emobility/European_countries_car_ownership.csv
@@ -0,0 +1,31 @@
+Passenger car ownership (passenger cars per 1 000 inhabitants),,,,Passenger cars stock,,,,Population,,,,
+ctr,1995,2005,2007,2009,1995,2005,2007,2009,1995,2005,2007,2009
+Luxembourg,711,688,696,700,288348,317211,331223,345236,406,461,476,494
+Italy,529,593,599,603,30095699,34654962,35417303,36179643,56844,58462,59131,60045
+Malta,490,528,545,561,180851,212642,222119,232013,369,403,408,414
+Germany,417,531,541,551,33995614,43844313,44533683,45223052,81539,82501,82315,82002
+Cyprus,399,556,548,548,257314,416758,426876,436948,645,749,779,797
+Austria,423,507,517,528,3362604,4156738,4283532,4410326,7943,8201,8283,8355
+Switzerland,460,519,521,516,3229166,3846085,3908308,3970530,7019,7415,7509,7702
+France,421,479,484,490,24999145,30074344,30817127,31559910,59315,62773,63623,64367
+Slovenia,350,469,478,485,696460,936267,961223,986179,1989,1998,2010,2032
+Finland,372,461,473,484,1898191,2414070,2496651,2579233,5099,5237,5277,5326
+Sweden,412,458,471,481,3630886,4130366,4293138,4455910,8816,9011,9113,9256
+Spain,361,471,472,474,14211916,20250464,20993460,21736456,39343,43038,44475,45828
+Lithuania,197,425,449,473,718470,1455165,1519002,1582838,3643,3425,3385,3350
+Belgium,422,465,469,471,4273072,4860971,4961562,5062153,10131,10446,10585,10750
+United Kingdom,354,448,456,463,20526377,26890772,27694004,28497236,57943,60060,60781,61596
+Norway,384,435,447,454,1669164,2003057,2090795,2178533,4348,4606,4681,4799
+Netherlands,367,439,445,449,5664005,7152178,7275957,7399737,15424,16306,16358,16486
+Estonia,316,387,412,436,457042,520843,552474,584104,1448,1348,1342,1340
+Ireland,275,410,417,430,990375,1684031,1797710,1911389,3598,4109,4313,4450
+Greece,235,409,414,419,2492172,4530963,4624830,4718696,10595,11083,11172,11260
+Denmark,332,372,380,386,1729338,2013037,2070319,2127601,5216,5411,5447,5506
+Bulgaria,187,311,348,386,1574416,2417251,2675701,2934152,8427,7761,7679,7607
+Czech Republic,294,373,382,386,3034461,3817019,3927115,4037211,10333,10221,10287,10468
+Latvia,133,320,339,357,332153,738699,772850,807001,2501,2306,2281,2261
+Portugal,252,350,352,355,2520535,3680074,3728382,3776690,10018,10529,10599,10627
+Poland,195,313,328,342,7517260,11963408,12494111,13024815,38581,38174,38125,38136
+Hungary,160,270,282,294,1658229,2723837,2834001,2944165,10337,10098,10066,10031
+Slovakia,179,233,239,244,958900,1252049,1286700,1321351,5356,5385,5394,5412
+Romania,92,147,163,180,2091454,3182940,3506192,3860190,22712,21659,21565,21499
diff --git a/data/emobility/KFZ__count b/data/emobility/KFZ__count
new file mode 100644
index 000000000..5ce0726f7
--- /dev/null
+++ b/data/emobility/KFZ__count
@@ -0,0 +1,171 @@
+File generated for type: KFZ_
+Time of generation:2016-08-28 14:55:02
+day,hour,count,n_counts
+0,0,108473680.0,377319
+0,1,82987931.0,377577
+0,2,78055747.0,377161
+0,3,99581978.0,377428
+0,4,172795923.0,377238
+0,5,400623334.0,377401
+0,6,727700515.0,376521
+0,7,914767528.0,374979
+0,8,830383041.0,374209
+0,9,734596809.0,374676
+0,10,700350090.0,374810
+0,11,700405030.0,375003
+0,12,710251471.0,374707
+0,13,738780860.0,374996
+0,14,772520157.0,374869
+0,15,837956441.0,375033
+0,16,934010842.0,375027
+0,17,902342954.0,375441
+0,18,741103748.0,376228
+0,19,522132881.0,377135
+0,20,369891627.0,377329
+0,21,277916689.0,377731
+0,22,219250568.0,377645
+0,23,141678428.0,377850
+1,0,95156728.0,377035
+1,1,76026628.0,377326
+1,2,72517990.0,376877
+1,3,85055976.0,377173
+1,4,137277893.0,377098
+1,5,341366378.0,377432
+1,6,683476500.0,376695
+1,7,906037810.0,375351
+1,8,831626263.0,374260
+1,9,723694854.0,374319
+1,10,668452115.0,373960
+1,11,664704156.0,374005
+1,12,690565597.0,373987
+1,13,735536927.0,374315
+1,14,781166346.0,374048
+1,15,860257784.0,374193
+1,16,965402078.0,374066
+1,17,934866325.0,374463
+1,18,779387294.0,375527
+1,19,548212087.0,376486
+1,20,381721707.0,376748
+1,21,287688969.0,377301
+1,22,230826843.0,377264
+1,23,151270009.0,377397
+2,0,99616179.0,377393
+2,1,76961997.0,377709
+2,2,71796059.0,377134
+2,3,82858649.0,377543
+2,4,132930697.0,377537
+2,5,333303738.0,377714
+2,6,673294545.0,377078
+2,7,898898634.0,375922
+2,8,829407732.0,374739
+2,9,728235958.0,375027
+2,10,681952207.0,374726
+2,11,685713984.0,375050
+2,12,718219308.0,374758
+2,13,770418372.0,375283
+2,14,816464298.0,374897
+2,15,890812713.0,374707
+2,16,990725411.0,374557
+2,17,961389484.0,374666
+2,18,807912334.0,375865
+2,19,583263969.0,377031
+2,20,412514255.0,377266
+2,21,310607831.0,377782
+2,22,249885512.0,377948
+2,23,166192733.0,377905
+3,0,107876994.0,377993
+3,1,81437580.0,378342
+3,2,74233593.0,377694
+3,3,84125264.0,377956
+3,4,132580534.0,378079
+3,5,325787216.0,378209
+3,6,652758946.0,377663
+3,7,875031543.0,376605
+3,8,816037220.0,375450
+3,9,728032342.0,375444
+3,10,695708230.0,375202
+3,11,705193400.0,375367
+3,12,735624407.0,375098
+3,13,784820947.0,375397
+3,14,835187522.0,375223
+3,15,907919578.0,375128
+3,16,995789388.0,374778
+3,17,966250089.0,374802
+3,18,836284879.0,375970
+3,19,618832742.0,377227
+3,20,442937304.0,377693
+3,21,334643805.0,378303
+3,22,267652954.0,378318
+3,23,178146869.0,378423
+4,0,115705423.0,378121
+4,1,86908753.0,378462
+4,2,78502260.0,378033
+4,3,87775878.0,378294
+4,4,134134525.0,378202
+4,5,314584395.0,378476
+4,6,612697560.0,377982
+4,7,818107726.0,377431
+4,8,751902744.0,376827
+4,9,704861077.0,377003
+4,10,734119657.0,376643
+4,11,795396373.0,376756
+4,12,898172946.0,376441
+4,13,979133479.0,375687
+4,14,1019161427.0,374886
+4,15,1027411123.0,374474
+4,16,1012542255.0,374375
+4,17,957387669.0,375151
+4,18,847136984.0,376252
+4,19,664024491.0,377365
+4,20,496680891.0,377830
+4,21,375517425.0,378307
+4,22,310908746.0,378432
+4,23,236691370.0,378626
+5,0,170647816.0,378057
+5,1,125760497.0,378795
+5,2,102009453.0,378554
+5,3,98692501.0,378596
+5,4,118464284.0,378578
+5,5,180157441.0,378942
+5,6,241530096.0,378625
+5,7,342439034.0,378889
+5,8,487538788.0,378507
+5,9,656404729.0,378579
+5,10,769195000.0,378019
+5,11,803039041.0,377811
+5,12,794237213.0,377402
+5,13,803664678.0,377667
+5,14,798804166.0,377627
+5,15,733570647.0,377662
+5,16,690321161.0,377546
+5,17,672280656.0,378226
+5,18,615526093.0,378264
+5,19,480484999.0,378515
+5,20,371959421.0,378532
+5,21,285991449.0,378773
+5,22,246363536.0,378697
+5,23,208755872.0,378786
+6,0,160101609.0,378135
+6,1,115163686.0,378434
+6,2,77540144.0,363731
+6,3,65061949.0,377304
+6,4,66523216.0,377852
+6,5,89201395.0,378337
+6,6,107382896.0,378240
+6,7,147838676.0,378553
+6,8,242570906.0,378407
+6,9,397489009.0,378631
+6,10,561353053.0,378245
+6,11,673692139.0,377974
+6,12,706920410.0,377365
+6,13,745702428.0,377481
+6,14,786527068.0,377250
+6,15,761783218.0,377287
+6,16,764720330.0,377036
+6,17,788390221.0,377547
+6,18,723533261.0,377538
+6,19,600302038.0,378044
+6,20,469053762.0,378093
+6,21,344816931.0,378307
+6,22,246445284.0,378262
+6,23,164301984.0,378136
diff --git a/data/emobility/Lfw__count b/data/emobility/Lfw__count
new file mode 100644
index 000000000..74c0d3029
--- /dev/null
+++ b/data/emobility/Lfw__count
@@ -0,0 +1,171 @@
+File generated for type: Lfw_
+Time of generation:2016-08-28 20:57:59
+day,hour,count,n_counts
+0,0,7343543.0,351444
+0,1,6186286.0,351690
+0,2,6453736.0,351299
+0,3,8893046.0,351559
+0,4,15924754.0,351339
+0,5,32769945.0,351428
+0,6,63709284.0,350402
+0,7,69916938.0,348710
+0,8,65113309.0,348123
+0,9,57285903.0,348706
+0,10,54207525.0,348880
+0,11,53141494.0,349054
+0,12,51197929.0,348832
+0,13,51710899.0,349092
+0,14,53501380.0,348991
+0,15,58489423.0,348987
+0,16,63471327.0,348756
+0,17,52378227.0,349222
+0,18,40614283.0,350170
+0,19,29592192.0,351107
+0,20,21751956.0,351355
+0,21,17002836.0,351739
+0,22,14199546.0,351669
+0,23,10815233.0,351919
+1,0,8201665.0,351210
+1,1,7116678.0,351481
+1,2,6883983.0,351036
+1,3,7659611.0,351295
+1,4,10552911.0,351236
+1,5,21939774.0,351463
+1,6,54676510.0,350582
+1,7,63943651.0,349085
+1,8,59121792.0,348225
+1,9,52165803.0,348496
+1,10,49888295.0,348174
+1,11,50189655.0,348234
+1,12,49879872.0,348283
+1,13,51681013.0,348557
+1,14,54333355.0,348283
+1,15,60141748.0,348285
+1,16,65550148.0,347873
+1,17,54228269.0,348296
+1,18,42610001.0,349513
+1,19,31501452.0,350566
+1,20,22914649.0,350886
+1,21,17813566.0,351402
+1,22,14936088.0,351382
+1,23,11422110.0,351557
+2,0,8622049.0,351546
+2,1,7390440.0,351931
+2,2,7027093.0,351393
+2,3,7633967.0,351759
+2,4,10147433.0,351715
+2,5,21085909.0,351782
+2,6,53839783.0,351016
+2,7,63362599.0,349717
+2,8,58850261.0,348716
+2,9,52488247.0,349139
+2,10,50827190.0,348903
+2,11,51718072.0,349204
+2,12,51740494.0,348915
+2,13,53914905.0,349422
+2,14,56707486.0,349036
+2,15,62243365.0,348725
+2,16,67355332.0,348438
+2,17,56209363.0,348522
+2,18,44811104.0,349854
+2,19,33730171.0,351089
+2,20,24939304.0,351357
+2,21,19418040.0,351831
+2,22,16156497.0,352044
+2,23,12335184.0,352010
+3,0,9206370.0,352170
+3,1,7750642.0,352476
+3,2,7276817.0,351834
+3,3,7799405.0,352070
+3,4,10202285.0,352165
+3,5,20697297.0,352263
+3,6,52217544.0,351596
+3,7,61768714.0,350426
+3,8,57796785.0,349492
+3,9,52169034.0,349597
+3,10,51299649.0,349379
+3,11,52715347.0,349480
+3,12,53220555.0,349254
+3,13,55778091.0,349506
+3,14,59217166.0,349304
+3,15,65103019.0,349071
+3,16,69994963.0,348560
+3,17,59516088.0,348580
+3,18,48740267.0,349893
+3,19,37772143.0,351224
+3,20,28328593.0,351701
+3,21,21839031.0,352256
+3,22,17668883.0,352283
+3,23,13229933.0,352469
+4,0,9771339.0,352287
+4,1,8135050.0,352593
+4,2,7606454.0,352190
+4,3,8036314.0,352396
+4,4,10256588.0,352317
+4,5,19853898.0,352527
+4,6,48201520.0,351963
+4,7,58036724.0,351273
+4,8,55214720.0,350789
+4,9,52263225.0,350999
+4,10,55172298.0,350644
+4,11,59935302.0,350733
+4,12,64101642.0,350403
+4,13,66453594.0,349629
+4,14,65764731.0,348831
+4,15,63698805.0,348411
+4,16,59855286.0,348384
+4,17,52721863.0,349172
+4,18,45527306.0,350272
+4,19,36484125.0,351376
+4,20,28343035.0,351805
+4,21,22397328.0,352258
+4,22,18730897.0,352386
+4,23,15005841.0,352615
+5,0,11702950.0,352169
+5,1,9656560.0,352778
+5,2,8552724.0,352529
+5,3,8294981.0,352565
+5,4,8875452.0,352556
+5,5,12244359.0,352882
+5,6,19355997.0,352531
+5,7,25895465.0,352711
+5,8,30687945.0,352358
+5,9,35814741.0,352382
+5,10,39398035.0,351790
+5,11,40919859.0,351615
+5,12,40791621.0,351283
+5,13,40634233.0,351499
+5,14,39801604.0,351460
+5,15,37491124.0,351498
+5,16,35311486.0,351400
+5,17,33354082.0,352023
+5,18,29986628.0,352078
+5,19,24095462.0,352354
+5,20,18693694.0,352418
+5,21,14468888.0,352624
+5,22,12041509.0,352574
+5,23,10110804.0,352688
+6,0,8097109.0,352092
+6,1,6356221.0,352393
+6,2,4866424.0,338640
+6,3,4506172.0,351331
+6,4,4636227.0,351785
+6,5,5735829.0,352256
+6,6,7274359.0,352176
+6,7,9725198.0,352387
+6,8,14035975.0,352289
+6,9,20364646.0,352486
+6,10,26684604.0,352124
+6,11,31306621.0,351882
+6,12,32772318.0,351350
+6,13,33570124.0,351409
+6,14,34513158.0,351143
+6,15,34598170.0,351190
+6,16,34854652.0,350945
+6,17,35122496.0,351406
+6,18,32696529.0,351397
+6,19,28116797.0,351862
+6,20,22843253.0,351900
+6,21,17833457.0,352165
+6,22,13495084.0,352106
+6,23,9958520.0,352047
diff --git a/data/emobility/Lkw__count b/data/emobility/Lkw__count
new file mode 100644
index 000000000..14c8d11b4
--- /dev/null
+++ b/data/emobility/Lkw__count
@@ -0,0 +1,171 @@
+File generated for type: Lkw_
+Time of generation:2016-08-28 08:24:42
+day,hour,count,n_counts
+0,0,28353372.0,367662
+0,1,30476881.0,367896
+0,2,32461409.0,367506
+0,3,41167478.0,367755
+0,4,56354604.0,367553
+0,5,78379797.0,367627
+0,6,96858355.0,366538
+0,7,97698171.0,364712
+0,8,100714262.0,364087
+0,9,103553284.0,364691
+0,10,104780603.0,364896
+0,11,106546748.0,365085
+0,12,106217504.0,364847
+0,13,107379570.0,365118
+0,14,108031410.0,365014
+0,15,104983342.0,365028
+0,16,97429568.0,364811
+0,17,84359088.0,365256
+0,18,71117050.0,366307
+0,19,59440593.0,367294
+0,20,50618843.0,367571
+0,21,44133106.0,367962
+0,22,39663515.0,367900
+0,23,35200717.0,368153
+1,0,33432717.0,367393
+1,1,35014519.0,367650
+1,2,38130314.0,367256
+1,3,45285376.0,367516
+1,4,59514703.0,367440
+1,5,83900075.0,367668
+1,6,108377278.0,366722
+1,7,113636795.0,365103
+1,8,117891629.0,364189
+1,9,120563203.0,364445
+1,10,120196288.0,364117
+1,11,120529625.0,364150
+1,12,119829657.0,364200
+1,13,121018104.0,364476
+1,14,121315697.0,364181
+1,15,117563305.0,364201
+1,16,108775474.0,363807
+1,17,94262522.0,364228
+1,18,79550942.0,365564
+1,19,65672089.0,366692
+1,20,54535420.0,367033
+1,21,46514082.0,367567
+1,22,40691741.0,367575
+1,23,35463902.0,367716
+2,0,33282217.0,367784
+2,1,34375632.0,368087
+2,2,37102850.0,367540
+2,3,43591483.0,367955
+2,4,57479187.0,367929
+2,5,82413324.0,368003
+2,6,108573172.0,367179
+2,7,115328046.0,365766
+2,8,119928418.0,364742
+2,9,122343298.0,365151
+2,10,121518743.0,364916
+2,11,121734247.0,365220
+2,12,120716404.0,364960
+2,13,121759859.0,365440
+2,14,121988372.0,365036
+2,15,118051274.0,364712
+2,16,108917881.0,364364
+2,17,94540532.0,364452
+2,18,80402328.0,365914
+2,19,66767671.0,367249
+2,20,55311559.0,367542
+2,21,46793756.0,368062
+2,22,40629860.0,368277
+2,23,35116868.0,368236
+3,0,32714530.0,368416
+3,1,33639814.0,368707
+3,2,36091617.0,368102
+3,3,42113095.0,368335
+3,4,55215966.0,368457
+3,5,78901110.0,368540
+3,6,104137527.0,367825
+3,7,110953265.0,366515
+3,8,115364986.0,365524
+3,9,117204681.0,365595
+3,10,116491919.0,365378
+3,11,116518736.0,365512
+3,12,115334107.0,365253
+3,13,116073451.0,365511
+3,14,115957748.0,365327
+3,15,111627415.0,365049
+3,16,102402298.0,364516
+3,17,88380196.0,364528
+3,18,74855496.0,365973
+3,19,62378429.0,367430
+3,20,51913309.0,367949
+3,21,44263124.0,368536
+3,22,39421182.0,368591
+3,23,34464033.0,368743
+4,0,32293898.0,368562
+4,1,33167037.0,368870
+4,2,35550055.0,368460
+4,3,41508334.0,368695
+4,4,53912512.0,368598
+4,5,75485286.0,368806
+4,6,97127436.0,368176
+4,7,103458727.0,367430
+4,8,108770319.0,366931
+4,9,110076237.0,367139
+4,10,109356004.0,366733
+4,11,108685980.0,366811
+4,12,105702502.0,366489
+4,13,102355975.0,365618
+4,14,97423790.0,364740
+4,15,89395443.0,364298
+4,16,79522670.0,364282
+4,17,68778907.0,365124
+4,18,59430681.0,366374
+4,19,51752181.0,367559
+4,20,45586345.0,368063
+4,21,40096771.0,368525
+4,22,35368643.0,368677
+4,23,30307117.0,368889
+5,0,27532044.0,368376
+5,1,27188874.0,369040
+5,2,27097389.0,368840
+5,3,27967866.0,368871
+5,4,29887619.0,368847
+5,5,33447263.0,369162
+5,6,37931699.0,368788
+5,7,41211214.0,368984
+5,8,41151531.0,368588
+5,9,39547103.0,368566
+5,10,36844852.0,367935
+5,11,34210380.0,367702
+5,12,31663051.0,367358
+5,13,29840513.0,367585
+5,14,28142214.0,367521
+5,15,26071165.0,367549
+5,16,24006820.0,367466
+5,17,21909880.0,368118
+5,18,19392282.0,368219
+5,19,16635047.0,368513
+5,20,15095601.0,368611
+5,21,12814034.0,368850
+5,22,10622342.0,368837
+5,23,8261717.0,368929
+6,0,6045965.0,368328
+6,1,5114285.0,368617
+6,2,4390178.0,354319
+6,3,4296006.0,367533
+6,4,4324305.0,368029
+6,5,4783516.0,368459
+6,6,5383051.0,368364
+6,7,6276354.0,368592
+6,8,7662326.0,368442
+6,9,9297621.0,368553
+6,10,10646955.0,368136
+6,11,11569669.0,367830
+6,12,11671736.0,367262
+6,13,11832597.0,367297
+6,14,12269265.0,367023
+6,15,12558604.0,367089
+6,16,12967138.0,366824
+6,17,13320791.0,367320
+6,18,12975986.0,367348
+6,19,12219053.0,367903
+6,20,11532212.0,368041
+6,21,11941422.0,368309
+6,22,20801133.0,368345
+6,23,25971973.0,368290
diff --git a/data/emobility/LoA__count b/data/emobility/LoA__count
new file mode 100644
index 000000000..d2609cc9a
--- /dev/null
+++ b/data/emobility/LoA__count
@@ -0,0 +1,171 @@
+File generated for type: LoA_
+Time of generation:2016-08-29 00:30:52
+day,hour,count,n_counts
+0,0,3673264.0,362524
+0,1,3955678.0,362757
+0,2,4719874.0,362371
+0,3,6429302.0,362606
+0,4,9990364.0,362412
+0,5,16272893.0,362492
+0,6,25261843.0,361440
+0,7,29390208.0,359715
+0,8,31930978.0,359113
+0,9,30439765.0,359725
+0,10,29741202.0,359925
+0,11,29620653.0,360094
+0,12,28565522.0,359862
+0,13,29253940.0,360115
+0,14,28951985.0,360014
+0,15,26818410.0,360026
+0,16,23091569.0,359826
+0,17,17389134.0,360273
+0,18,12767156.0,361250
+0,19,9588649.0,362182
+0,20,7611007.0,362429
+0,21,6369389.0,362807
+0,22,5725046.0,362737
+0,23,4943828.0,362992
+1,0,4533230.0,362228
+1,1,4748607.0,362491
+1,2,5451832.0,362102
+1,3,7045663.0,362366
+1,4,10286377.0,362288
+1,5,16402061.0,362524
+1,6,26370079.0,361614
+1,7,31227023.0,360135
+1,8,33488232.0,359240
+1,9,31815310.0,359483
+1,10,30775314.0,359157
+1,11,30592415.0,359188
+1,12,29770187.0,359235
+1,13,30915306.0,359522
+1,14,30752780.0,359226
+1,15,28520135.0,359243
+1,16,24517540.0,358843
+1,17,18518082.0,359295
+1,18,13743556.0,360553
+1,19,10285625.0,361606
+1,20,8039093.0,361932
+1,21,6678938.0,362447
+1,22,5956816.0,362441
+1,23,5099091.0,362582
+2,0,4627314.0,362630
+2,1,4791678.0,362977
+2,2,5474137.0,362493
+2,3,6981222.0,362852
+2,4,10189451.0,362808
+2,5,16308367.0,362885
+2,6,26341874.0,362094
+2,7,31206843.0,360765
+2,8,33512224.0,359741
+2,9,32000526.0,360146
+2,10,30958328.0,359934
+2,11,30870467.0,360240
+2,12,30008283.0,359961
+2,13,31127303.0,360464
+2,14,31016804.0,360076
+2,15,28820447.0,359753
+2,16,24807764.0,359448
+2,17,18822368.0,359530
+2,18,14092342.0,360895
+2,19,10618074.0,362148
+2,20,8298478.0,362408
+2,21,6848298.0,362913
+2,22,6055565.0,363116
+2,23,5142248.0,363083
+3,0,4631366.0,363257
+3,1,4777868.0,363546
+3,2,5441706.0,362941
+3,3,6909181.0,363175
+3,4,9956379.0,363296
+3,5,15875606.0,363380
+3,6,25597195.0,362693
+3,7,30386656.0,361486
+3,8,32685164.0,360515
+3,9,31158273.0,360601
+3,10,30241628.0,360380
+3,11,30139594.0,360511
+3,12,29367426.0,360272
+3,13,30469667.0,360520
+3,14,30389828.0,360339
+3,15,28233086.0,360102
+3,16,24323941.0,359599
+3,17,18597970.0,359635
+3,18,14061013.0,360962
+3,19,10714477.0,362327
+3,20,8415411.0,362815
+3,21,6943322.0,363376
+3,22,6127925.0,363415
+3,23,5205311.0,363577
+4,0,4668108.0,363373
+4,1,4772604.0,363685
+4,2,5440113.0,363280
+4,3,6871076.0,363506
+4,4,9878140.0,363416
+4,5,15467561.0,363640
+4,6,24431626.0,363054
+4,7,29409306.0,362358
+4,8,32101563.0,361872
+4,9,30448060.0,362056
+4,10,29843755.0,361699
+4,11,29795506.0,361777
+4,12,28806040.0,361463
+4,13,28460208.0,360653
+4,14,26923434.0,359839
+4,15,23802612.0,359404
+4,16,19740885.0,359376
+4,17,15538429.0,360160
+4,18,12266489.0,361284
+4,19,9650738.0,362417
+4,20,7758312.0,362887
+4,21,6444632.0,363338
+4,22,5579848.0,363473
+4,23,4584425.0,363682
+5,0,3951848.0,363185
+5,1,3816168.0,363845
+5,2,3954720.0,363643
+5,3,4244829.0,363666
+5,4,4881922.0,363648
+5,5,5931278.0,363968
+5,6,7841506.0,363606
+5,7,9707002.0,363808
+5,8,10157683.0,363423
+5,9,9961205.0,363441
+5,10,9823542.0,362839
+5,11,9620356.0,362662
+5,12,9092735.0,362343
+5,13,8653516.0,362536
+5,14,8113725.0,362482
+5,15,7306442.0,362511
+5,16,6496819.0,362405
+5,17,5721744.0,363038
+5,18,4869249.0,363104
+5,19,3974632.0,363391
+5,20,3265049.0,363466
+5,21,2643487.0,363680
+5,22,2182388.0,363674
+5,23,1797544.0,363768
+6,0,1470547.0,363160
+6,1,1255734.0,363444
+6,2,1063446.0,349329
+6,3,1061634.0,362371
+6,4,1151991.0,362862
+6,5,1373847.0,363301
+6,6,1615799.0,363218
+6,7,1923327.0,363446
+6,8,2426031.0,363332
+6,9,3216728.0,363489
+6,10,4167256.0,363116
+6,11,4850810.0,362872
+6,12,5013570.0,362324
+6,13,5038018.0,362347
+6,14,5098677.0,362078
+6,15,5146378.0,362158
+6,16,5228665.0,361890
+6,17,5250334.0,362355
+6,18,4994483.0,362349
+6,19,4550709.0,362843
+6,20,4042904.0,362914
+6,21,3664340.0,363156
+6,22,3806784.0,363167
+6,23,3742585.0,363114
diff --git a/data/emobility/Lzg__count b/data/emobility/Lzg__count
new file mode 100644
index 000000000..4dab594de
--- /dev/null
+++ b/data/emobility/Lzg__count
@@ -0,0 +1,171 @@
+File generated for type: Lzg_
+Time of generation:2016-08-29 00:53:36
+day,hour,count,n_counts
+0,0,23300531.0,362534
+0,1,25096969.0,362762
+0,2,26311743.0,362372
+0,3,33064872.0,362617
+0,4,44091097.0,362411
+0,5,58774545.0,362489
+0,6,66859499.0,361444
+0,7,63395056.0,359714
+0,8,63467612.0,359115
+0,9,67471264.0,359722
+0,10,69617030.0,359914
+0,11,71506410.0,360091
+0,12,72332256.0,359857
+0,13,72689015.0,360116
+0,14,73670711.0,360008
+0,15,72915695.0,360019
+0,16,69314268.0,359816
+0,17,62566391.0,360269
+0,18,54594390.0,361246
+0,19,46820202.0,362190
+0,20,40546315.0,362436
+0,21,35707272.0,362820
+0,22,32107467.0,362756
+0,23,28615668.0,362996
+1,0,27366456.0,362242
+1,1,28789768.0,362514
+1,2,31204673.0,362110
+1,3,36548400.0,362366
+1,4,46945586.0,362293
+1,5,64090224.0,362525
+1,6,77060014.0,361627
+1,7,77194043.0,360134
+1,8,78809962.0,359246
+1,9,82955531.0,359479
+1,10,84050217.0,359150
+1,11,84668390.0,359187
+1,12,84744618.0,359230
+1,13,84621348.0,359522
+1,14,85073662.0,359227
+1,15,83661239.0,359241
+1,16,79022664.0,358847
+1,17,71051132.0,359287
+1,18,61752763.0,360553
+1,19,52167222.0,361619
+1,20,43968474.0,361936
+1,21,37762858.0,362462
+1,22,32898950.0,362462
+1,23,28713796.0,362590
+2,0,27121605.0,362645
+2,1,28125135.0,362997
+2,2,30205194.0,362503
+2,3,34978509.0,362863
+2,4,45059765.0,362813
+2,5,62727111.0,362884
+2,6,77244439.0,362096
+2,7,78755899.0,360765
+2,8,80578144.0,359747
+2,9,84297663.0,360145
+2,10,84983194.0,359927
+2,11,85422694.0,360238
+2,12,85236772.0,359955
+2,13,84969528.0,360455
+2,14,85336203.0,360077
+2,15,83723119.0,359752
+2,16,78789434.0,359447
+2,17,70899043.0,359528
+2,18,62088351.0,360891
+2,19,52773808.0,362157
+2,20,44370711.0,362419
+2,21,37787982.0,362929
+2,22,32661494.0,363135
+2,23,28259553.0,363091
+3,0,26497695.0,363274
+3,1,27342578.0,363565
+3,2,29148968.0,362951
+3,3,33531280.0,363184
+3,4,43020865.0,363301
+3,5,59674525.0,363374
+3,6,73591490.0,362699
+3,7,75200500.0,361485
+3,8,76878661.0,360519
+3,9,80069417.0,360600
+3,10,80645590.0,360370
+3,11,80906128.0,360509
+3,12,80527737.0,360270
+3,13,79986346.0,360522
+3,14,79938810.0,360337
+3,15,77949448.0,360105
+3,16,72826594.0,359596
+3,17,65072570.0,359632
+3,18,56659100.0,360958
+3,19,48320028.0,362331
+3,20,40858867.0,362824
+3,21,35133251.0,363383
+3,22,31344458.0,363430
+3,23,27509529.0,363586
+4,0,25999236.0,363388
+4,1,26831527.0,363704
+4,2,28568070.0,363290
+4,3,32911603.0,363522
+4,4,41737912.0,363421
+4,5,56663378.0,363641
+4,6,67880433.0,363054
+4,7,68794551.0,362357
+4,8,70931127.0,361877
+4,9,73691741.0,362058
+4,10,73738734.0,361693
+4,11,73116971.0,361775
+4,12,71250950.0,361459
+4,13,68302675.0,360647
+4,14,65109477.0,359843
+4,15,60500169.0,359403
+4,16,54962320.0,359378
+4,17,48815324.0,360157
+4,18,43214867.0,361284
+4,19,38809209.0,362430
+4,20,35054642.0,362896
+4,21,31235674.0,363354
+4,22,27558571.0,363497
+4,23,23650845.0,363687
+5,0,21667238.0,363204
+5,1,21580662.0,363862
+5,2,21483679.0,363651
+5,3,22058007.0,363682
+5,4,23199653.0,363662
+5,5,25420231.0,363973
+5,6,27545852.0,363613
+5,7,28356285.0,363812
+5,8,27160083.0,363424
+5,9,25498222.0,363437
+5,10,23270488.0,362843
+5,11,21217851.0,362662
+5,12,19505320.0,362348
+5,13,18135864.0,362527
+5,14,17001837.0,362474
+5,15,15841037.0,362512
+5,16,14548015.0,362409
+5,17,13067099.0,363034
+5,18,11466394.0,363095
+5,19,10152695.0,363405
+5,20,9743176.0,363476
+5,21,8456291.0,363692
+5,22,6974190.0,363669
+5,23,5179330.0,363764
+6,0,3400456.0,363164
+6,1,2801051.0,363445
+6,2,2402973.0,349340
+6,3,2315587.0,362379
+6,4,2259207.0,362876
+6,5,2420104.0,363304
+6,6,2596005.0,363219
+6,7,2793847.0,363441
+6,8,3062006.0,363324
+6,9,3440425.0,363490
+6,10,3758102.0,363105
+6,11,4014898.0,362859
+6,12,4151038.0,362313
+6,13,4268061.0,362336
+6,14,4419022.0,362076
+6,15,4591183.0,362142
+6,16,4810645.0,361877
+6,17,5014566.0,362346
+6,18,5153546.0,362353
+6,19,5300740.0,362842
+6,20,5537405.0,362921
+6,21,6692060.0,363176
+6,22,15501061.0,363189
+6,23,20824760.0,363125
diff --git a/data/emobility/Pkw__count b/data/emobility/Pkw__count
new file mode 100644
index 000000000..b2625b55e
--- /dev/null
+++ b/data/emobility/Pkw__count
@@ -0,0 +1,171 @@
+File generated for type: Pkw_
+Time of generation:2016-08-28 00:44:15
+day,hour,count,n_counts
+0,0,60505420.0,351450
+0,1,37492700.0,351695
+0,2,31094666.0,351305
+0,3,39475671.0,351569
+0,4,82704875.0,351349
+0,5,247018956.0,351434
+0,6,487519584.0,350409
+0,7,646461135.0,348716
+0,8,571271067.0,348128
+0,9,490598450.0,348711
+0,10,462098807.0,348885
+0,11,461379423.0,349059
+0,12,473135116.0,348837
+0,13,497279091.0,349097
+0,14,525025205.0,348995
+0,15,581031653.0,348992
+0,16,668935283.0,348761
+0,17,664540270.0,349227
+0,18,546120154.0,350175
+0,19,374381719.0,351112
+0,20,256033806.0,351360
+0,21,186028860.0,351744
+0,22,141426408.0,351671
+0,23,80248929.0,351924
+1,0,43623969.0,351218
+1,1,26428885.0,351487
+1,2,20734849.0,351041
+1,3,24314641.0,351298
+1,4,54268719.0,351241
+1,5,201061338.0,351468
+1,6,447685115.0,350586
+1,7,630859274.0,349088
+1,8,563379406.0,348227
+1,9,471894090.0,348499
+1,10,425217066.0,348177
+1,11,421083012.0,348237
+1,12,445605798.0,348286
+1,13,482772169.0,348560
+1,14,520127287.0,348287
+1,15,588092473.0,348288
+1,16,684434394.0,347877
+1,17,682306419.0,348300
+1,18,570243457.0,349518
+1,19,389809888.0,350571
+1,20,261697774.0,350892
+1,21,191683322.0,351407
+1,22,150080930.0,351386
+1,23,87964977.0,351563
+2,0,47228027.0,351554
+2,1,27564752.0,351936
+2,2,20881971.0,351399
+2,3,23943229.0,351764
+2,4,52710930.0,351720
+2,5,196221733.0,351787
+2,6,439548111.0,351021
+2,7,623812425.0,349722
+2,8,559872079.0,348721
+2,9,473562625.0,349144
+2,10,434527175.0,348907
+2,11,436615050.0,349209
+2,12,466735933.0,348920
+2,13,509973727.0,349427
+2,14,547416443.0,349040
+2,15,611307000.0,348729
+2,16,704108636.0,348443
+2,17,702801162.0,348526
+2,18,591477390.0,349859
+2,19,416819590.0,351094
+2,20,285685420.0,351362
+2,21,209785929.0,351836
+2,22,165540768.0,352046
+2,23,100411550.0,352018
+3,0,54416768.0,352175
+3,1,31727428.0,352479
+3,2,23611858.0,351838
+3,3,26149869.0,352075
+3,4,54280155.0,352169
+3,5,193063963.0,352267
+3,6,426914025.0,351601
+3,7,608397986.0,350430
+3,8,553340587.0,349496
+3,9,477883656.0,349601
+3,10,449755328.0,349382
+3,11,456335269.0,349484
+3,12,484445074.0,349257
+3,13,524934074.0,349509
+3,14,566074498.0,349306
+3,15,628804654.0,349073
+3,16,710971509.0,348564
+3,17,708578615.0,348583
+3,18,617255095.0,349896
+3,19,447641406.0,351227
+3,20,311506961.0,351705
+3,21,230215254.0,352259
+3,22,180349040.0,352283
+3,23,110377276.0,352472
+4,0,60883780.0,352293
+4,1,36444873.0,352596
+4,2,27410102.0,352193
+4,3,29552584.0,352402
+4,4,56640466.0,352321
+4,5,187011031.0,352532
+4,6,402093716.0,351967
+4,7,568722993.0,351276
+4,8,504850959.0,350792
+4,9,463284983.0,351003
+4,10,485698446.0,350644
+4,11,535334118.0,350736
+4,12,625403999.0,350405
+4,13,698152918.0,349632
+4,14,738517808.0,348834
+4,15,755055610.0,348414
+4,16,755413741.0,348388
+4,17,724499920.0,349175
+4,18,643737063.0,350276
+4,19,498656556.0,351381
+4,20,364590450.0,351810
+4,21,269209536.0,352262
+4,22,221017049.0,352388
+4,23,164188001.0,352618
+5,0,111955295.0,352176
+5,1,74639316.0,352782
+5,2,54695061.0,352537
+5,3,50919995.0,352570
+5,4,65448723.0,352561
+5,5,112687133.0,352887
+5,6,153415459.0,352536
+5,7,231348351.0,352715
+5,8,353558756.0,352362
+5,9,498383162.0,352386
+5,10,596449397.0,351794
+5,11,626989618.0,351619
+5,12,622441787.0,351288
+5,13,632983743.0,351503
+5,14,631392190.0,351464
+5,15,578049176.0,351502
+5,16,544733904.0,351404
+5,17,534539031.0,352026
+5,18,492234809.0,352082
+5,19,382429414.0,352358
+5,20,293794785.0,352422
+5,21,224738641.0,352629
+5,22,194983502.0,352579
+5,23,166324089.0,352695
+6,0,127654805.0,352099
+6,1,90433681.0,352404
+6,2,59014779.0,338649
+6,3,48196655.0,351344
+6,4,48941477.0,351795
+6,5,66965746.0,352263
+6,6,79768176.0,352180
+6,7,110980975.0,352395
+6,8,187757052.0,352295
+6,9,315111159.0,352490
+6,10,450209037.0,352129
+6,11,543034147.0,351887
+6,12,571041370.0,351354
+6,13,605165846.0,351413
+6,14,640649127.0,351147
+6,15,618145297.0,351194
+6,16,620568613.0,350949
+6,17,642882258.0,351411
+6,18,589388151.0,351401
+6,19,486165115.0,351866
+6,20,376850444.0,351904
+6,21,272380036.0,352169
+6,22,182189174.0,352110
+6,23,109012042.0,352051
diff --git a/data/emobility/PmA__count b/data/emobility/PmA__count
new file mode 100644
index 000000000..fcb4c70a2
--- /dev/null
+++ b/data/emobility/PmA__count
@@ -0,0 +1,171 @@
+File generated for type: PmA_
+Time of generation:2016-08-29 00:07:29
+day,hour,count,n_counts
+0,0,1370980.0,362540
+0,1,1122100.0,362767
+0,2,1007756.0,362374
+0,3,1096055.0,362617
+0,4,1494098.0,362411
+0,5,2632117.0,362495
+0,6,5040844.0,361441
+0,7,6734633.0,359716
+0,8,7267283.0,359118
+0,9,7727214.0,359728
+0,10,8459109.0,359922
+0,11,8830399.0,360099
+0,12,8529356.0,359867
+0,13,8521282.0,360122
+0,14,8631975.0,360013
+0,15,8671765.0,360027
+0,16,8544168.0,359826
+0,17,7192283.0,360277
+0,18,5656559.0,361253
+0,19,4287554.0,362189
+0,20,3198490.0,362433
+0,21,2352343.0,362817
+0,22,1755082.0,362754
+0,23,1314022.0,362998
+1,0,1005392.0,362248
+1,1,847397.0,362516
+1,2,786540.0,362110
+1,3,851625.0,362375
+1,4,1173075.0,362297
+1,5,2119684.0,362522
+1,6,4470704.0,361613
+1,7,6276030.0,360136
+1,8,6752467.0,359244
+1,9,7189080.0,359483
+1,10,7794481.0,359153
+1,11,8079976.0,359192
+1,12,7862712.0,359234
+1,13,8091831.0,359525
+1,14,8402102.0,359227
+1,15,8557098.0,359248
+1,16,8512622.0,358840
+1,17,7191074.0,359293
+1,18,5729074.0,360558
+1,19,4396866.0,361612
+1,20,3289114.0,361926
+1,21,2428819.0,362451
+1,22,1825280.0,362455
+1,23,1367457.0,362594
+2,0,1049680.0,362645
+2,1,880559.0,363003
+2,2,812058.0,362501
+2,3,872623.0,362866
+2,4,1193607.0,362812
+2,5,2176322.0,362883
+2,6,4540578.0,362096
+2,7,6330227.0,360766
+2,8,6895238.0,359745
+2,9,7460725.0,360145
+2,10,8174174.0,359930
+2,11,8488161.0,360241
+2,12,8312798.0,359961
+2,13,8691772.0,360462
+2,14,9098390.0,360072
+2,15,9243026.0,359755
+2,16,9153770.0,359443
+2,17,7793564.0,359527
+2,18,6293163.0,360891
+2,19,4883114.0,362152
+2,20,3678028.0,362415
+2,21,2727234.0,362921
+2,22,2048624.0,363131
+2,23,1532197.0,363095
+3,0,1160897.0,363278
+3,1,960382.0,363568
+3,2,871783.0,362954
+3,3,942043.0,363193
+3,4,1271954.0,363303
+3,5,2268038.0,363370
+3,6,4625537.0,362690
+3,7,6467140.0,361481
+3,8,7184633.0,360522
+3,9,7968584.0,360603
+3,10,8836589.0,360373
+3,11,9178199.0,360510
+3,12,8951110.0,360273
+3,13,9232805.0,360526
+3,14,9572293.0,360342
+3,15,9661117.0,360107
+3,16,9488316.0,359596
+3,17,8194615.0,359636
+3,18,6753290.0,360962
+3,19,5337257.0,362326
+3,20,4095281.0,362823
+3,21,3089366.0,363383
+3,22,2357248.0,363432
+3,23,1777432.0,363583
+4,0,1357791.0,363387
+4,1,1109570.0,363707
+4,2,994076.0,363294
+4,3,1051080.0,363518
+4,4,1383504.0,363424
+4,5,2431527.0,363642
+4,6,4741219.0,363054
+4,7,6483026.0,362360
+4,8,7400858.0,361879
+4,9,8627703.0,362057
+4,10,9834477.0,361694
+4,11,10421857.0,361781
+4,12,10457946.0,361466
+4,13,10949863.0,360649
+4,14,11420028.0,359844
+4,15,11369683.0,359407
+4,16,10765132.0,359381
+4,17,9546660.0,360162
+4,18,8119567.0,361293
+4,19,6526356.0,362426
+4,20,5095448.0,362893
+4,21,3913660.0,363347
+4,22,3053728.0,363492
+4,23,2370754.0,363689
+5,0,1847091.0,363201
+5,1,1530385.0,363869
+5,2,1370784.0,363664
+5,3,1428099.0,363689
+5,4,1841623.0,363669
+5,5,2841688.0,363981
+5,6,4385937.0,363617
+5,7,6372232.0,363819
+5,8,8728069.0,363429
+5,9,11188778.0,363447
+5,10,12580268.0,362851
+5,11,12732664.0,362671
+5,12,12021894.0,362362
+5,13,11731520.0,362544
+5,14,11312156.0,362501
+5,15,10269083.0,362528
+5,16,9114799.0,362428
+5,17,8000062.0,363052
+5,18,6642936.0,363116
+5,19,5212069.0,363414
+5,20,3996119.0,363495
+5,21,3003952.0,363702
+5,22,2284437.0,363691
+5,23,1735776.0,363789
+6,0,1314876.0,363174
+6,1,1054444.0,363453
+6,2,892669.0,349344
+6,3,903624.0,362388
+6,4,1089356.0,362880
+6,5,1610545.0,363319
+6,6,2321719.0,363229
+6,7,3058046.0,363457
+6,8,4194907.0,363344
+6,9,5979547.0,363511
+6,10,7804799.0,363126
+6,11,8962863.0,362885
+6,12,9118678.0,362340
+6,13,8993217.0,362365
+6,14,8798804.0,362102
+6,15,8433439.0,362170
+6,16,8105182.0,361914
+6,17,7728014.0,362369
+6,18,6928657.0,362369
+6,19,5658506.0,362862
+6,20,4303733.0,362940
+6,21,3199738.0,363194
+6,22,2418659.0,363201
+6,23,1806868.0,363125
diff --git a/data/emobility/Sat__count b/data/emobility/Sat__count
new file mode 100644
index 000000000..a656b46be
--- /dev/null
+++ b/data/emobility/Sat__count
@@ -0,0 +1,171 @@
+File generated for type: Sat_
+Time of generation:2016-08-29 01:16:51
+day,hour,count,n_counts
+0,0,17017517.0,351437
+0,1,18439581.0,351686
+0,2,19468406.0,351292
+0,3,24537101.0,351564
+0,4,32642712.0,351336
+0,5,43309706.0,351422
+0,6,48383063.0,350401
+0,7,45353712.0,348704
+0,8,45173926.0,348115
+0,9,48637666.0,348698
+0,10,50743493.0,348867
+0,11,52439727.0,349044
+0,12,53268861.0,348823
+0,13,53488536.0,349086
+0,14,54302613.0,348983
+0,15,53953698.0,348980
+0,16,51548206.0,348748
+0,17,46942974.0,349215
+0,18,40976380.0,350158
+0,19,34358486.0,351102
+0,20,28305820.0,351354
+0,21,23558633.0,351738
+0,22,20789610.0,351665
+0,23,18763808.0,351915
+1,0,17848515.0,351208
+1,1,18553615.0,351479
+1,2,20468167.0,351035
+1,3,25007027.0,351293
+1,4,33535391.0,351234
+1,5,46897621.0,351462
+1,6,56309592.0,350582
+1,7,56258616.0,349083
+1,8,57451495.0,348221
+1,9,60986147.0,348491
+1,10,62218789.0,348171
+1,11,62962418.0,348231
+1,12,63161329.0,348276
+1,13,62899460.0,348553
+1,14,63190916.0,348280
+1,15,62279427.0,348283
+1,16,59062519.0,347871
+1,17,53456894.0,348288
+1,18,46400240.0,349506
+1,19,38337179.0,350566
+1,20,30782786.0,350886
+1,21,24953748.0,351401
+1,22,21238400.0,351381
+1,23,18724911.0,351552
+2,0,17513358.0,351546
+2,1,17898982.0,351928
+2,2,19574226.0,351393
+2,3,23703443.0,351759
+2,4,32028882.0,351712
+2,5,45839412.0,351782
+2,6,56508910.0,351013
+2,7,57582383.0,349717
+2,8,58940357.0,348715
+2,9,62135418.0,349135
+2,10,63063545.0,348898
+2,11,63634971.0,349203
+2,12,63610569.0,348909
+2,13,63214435.0,349420
+2,14,63422083.0,349032
+2,15,62364740.0,348723
+2,16,58909396.0,348437
+2,17,53381766.0,348515
+2,18,46720372.0,349845
+2,19,38854390.0,351086
+2,20,31135999.0,351355
+2,21,25045223.0,351830
+2,22,21133289.0,352042
+2,23,18447782.0,352011
+3,0,17096694.0,352169
+3,1,17372335.0,352476
+3,2,18861394.0,351835
+3,3,22704290.0,352070
+3,4,30567146.0,352163
+3,5,43656838.0,352263
+3,6,53868721.0,351596
+3,7,55031883.0,350426
+3,8,56285139.0,349491
+3,9,59036165.0,349592
+3,10,59835441.0,349376
+3,11,60237052.0,349476
+3,12,60048406.0,349249
+3,13,59420258.0,349500
+3,14,59265301.0,349298
+3,15,57887474.0,349068
+3,16,54251730.0,348554
+3,17,48788325.0,348575
+3,18,42435329.0,349885
+3,19,35383047.0,351219
+3,20,28418010.0,351700
+3,21,23022980.0,352250
+3,22,20121248.0,352278
+3,23,17872306.0,352467
+4,0,16749329.0,352285
+4,1,17049280.0,352589
+4,2,18457512.0,352186
+4,3,22246902.0,352395
+4,4,29630136.0,352312
+4,5,41375477.0,352524
+4,6,49589013.0,351962
+4,7,50217179.0,351271
+4,8,51775236.0,350788
+4,9,54211553.0,350997
+4,10,54528158.0,350638
+4,11,54238347.0,350729
+4,12,52892940.0,350397
+4,13,50513977.0,349624
+4,14,48137956.0,348829
+4,15,44907879.0,348405
+4,16,41101875.0,348383
+4,17,36731229.0,349165
+4,18,32348588.0,350264
+4,19,28229015.0,351376
+4,20,24150750.0,351805
+4,21,20380335.0,352256
+4,22,17704868.0,352384
+4,23,15405120.0,352609
+5,0,13924290.0,352169
+5,1,13599040.0,352771
+5,2,13746841.0,352529
+5,3,14825664.0,352562
+5,4,16564819.0,352554
+5,5,18982583.0,352880
+5,6,20985673.0,352524
+5,7,21880022.0,352709
+5,8,21196279.0,352356
+5,9,19988910.0,352378
+5,10,18254921.0,351790
+5,11,16655183.0,351612
+5,12,15313830.0,351278
+5,13,14202905.0,351489
+5,14,13265942.0,351447
+5,15,12381772.0,351485
+5,16,11411064.0,351390
+5,17,10270984.0,352014
+5,18,9035394.0,352064
+5,19,7981458.0,352346
+5,20,7671168.0,352401
+5,21,6680304.0,352617
+5,22,5522186.0,352566
+5,23,4105229.0,352678
+6,0,2676969.0,352088
+6,1,2215451.0,352393
+6,2,1924656.0,338640
+6,3,1881690.0,351333
+6,4,1840389.0,351786
+6,5,1959029.0,352247
+6,6,2087353.0,352162
+6,7,2257580.0,352374
+6,8,2476505.0,352274
+6,9,2746232.0,352468
+6,10,2950279.0,352110
+6,11,3114276.0,351867
+6,12,3202088.0,351332
+6,13,3289422.0,351395
+6,14,3403264.0,351120
+6,15,3545969.0,351170
+6,16,3711850.0,350924
+6,17,3858901.0,351390
+6,18,3979807.0,351386
+6,19,4104294.0,351849
+6,20,4290238.0,351886
+6,21,5176706.0,352151
+6,22,11499982.0,352090
+6,23,15224700.0,352045
diff --git a/data/emobility/traffic.tex b/data/emobility/traffic.tex
new file mode 100644
index 000000000..327ad68eb
--- /dev/null
+++ b/data/emobility/traffic.tex
@@ -0,0 +1,14 @@
+\documentclass[a4paper,10pt]{article}
+\usepackage[utf8]{inputenc}
+
+\begin{document}
+
+\section{Traffic data}
+Data is provided by Bundesanstalt für Straßenwesen (BASt).
+Hourly data for passenger cars from 2010-2015 was used.
+Data gives the number of counted vehicles in a given hour.
+Day $0$ refers to Monday, day $1$ to Tuesday etc.
+Hour $0$ gives the number of vehicles counted between 0 a.m. and 1 a.m., hour $1$ between $1$ a.m. and $2$ a.m. and so on.
+Only measurements marked as ``on a regular base'' were considered.
+Hence, total number of measurements for single hours of the week slightly vary.
+\end{document}
diff --git a/data/energy_totals_DF_2030.csv b/data/energy_totals_DF_2030.csv
new file mode 100644
index 000000000..14fb5124a
--- /dev/null
+++ b/data/energy_totals_DF_2030.csv
@@ -0,0 +1,2 @@
+,agriculture biomass,agriculture electricity,agriculture oil,electricity rail,electricity residential,residential biomass,residential gas,residential heat biomass,residential heat gas,residential heat oil,residential oil,services biomass,services electricity,services gas,services oil,total domestic aviation,total domestic navigation,total international aviation,total international navigation,total rail,total residential space,total residential water,total road,total services space,total services water,electricity residential space,electricity residential water,district heat share,electricity services space,electricity services water
+MA,0.0,4.123081554340076,8.521542348983138,0.5789568718163199,25.58975669506309,1.3014631205375813,0.0,4.002848367140494,0.0,10.133548789154782,5.218711410000001,7.2243124635979745,8.529932599841148,0.0,0.4160644494555048,0.5915285488151758,0.0,12.000646158859418,2.019527928700952,0.8072925980221416,14.118892132377166,9.41259475491811,81.7049031093965,0.0,0.0,5.6370538386,3.7580358924,0,0,0
diff --git a/data/existing_infrastructure/existing_heating_raw.csv b/data/existing_infrastructure/existing_heating_raw.csv
new file mode 100644
index 000000000..224c2ef7f
--- /dev/null
+++ b/data/existing_infrastructure/existing_heating_raw.csv
@@ -0,0 +1,32 @@
+,gas boiler,coal boiler,oil boiler,resistive heater,air heat pump,ground heat pump
+Austria,9.32,0.4,15.42,0,0.72,1.077
+Belgium,28.39,1.19,19.53,3.14,0.17,0.061
+Bulgaria,0.16,3.68,0.04,3.46,1.01,0.045
+Croatia,8.39,0.03,2.88,1.53,0,0
+Czech Republic,9.26,1.02,0.1,2.73,0.35,0.263
+Denmark,4.82,0,3.67,2.19,1.9,0.381
+Estonia,0.22,0.02,0.12,0.27,0.33,0.1
+Finland,0,0.04,3.79,10.3,1.98,0.58
+France,76.85,1.03,46.03,87.24,26.14,1.97
+Germany,131.09,0.44,132.04,0,2.38,3.29
+Greece,2.17,0.03,18.13,5.91,0,0
+Hungary,21.21,1.3,0.04,0.06,0.03,0.035
+Ireland,4.32,0.8,4.85,1.03,0.03,0.03
+Italy,112.68,1.89,3.33,6.61,54.98,0.6
+Latvia,1.53,0.4,0,0.03,0,0
+Lithuania,0,0,0,0,0.01,0.02
+Luxembourg,0.79,0,0.77,0.09,0.01,0.001
+Netherlands,81.41,0,0.1,0.1,1.82,0.849
+Poland,8.25,24.75,9.04,5.96,0.01,0.04
+Portugal,4.79,0,0.2,21.26,1.58,0.064
+Romania,16.56,0.32,0.03,0.72,0,0
+Slovakia,8.05,0.19,0.01,0.55,0.06,0.015
+Slovenia,0.4,0,1.08,0.4,0.03,0.056
+Spain,48.99,0.51,17.95,56.58,1.15,0.016
+Sweden,1.01,0,0.77,3.76,3.42,4.813
+United Kingdom,160.49,1.26,7.39,13.81,0.81,0.21
+Norway,,,,,2.91,0.334
+Switzerland,,,,,1,0.849
+Serbia,,,,,,
+Bosnia Herzegovina,,,,,,
+DEFAULT,,,,,,
diff --git a/data/export_ports.csv b/data/export_ports.csv
new file mode 100644
index 000000000..2981ce22b
--- /dev/null
+++ b/data/export_ports.csv
@@ -0,0 +1,14 @@
+name,country,fraction,y,x
+Port of Nador,MA,0.2,35.2748795,-2.92229843
+Port of Tanger Med,MA,0.2,35.5324,-5.3036
+Port of Kenitra,MA,0.1,34.26101,-6.5802
+Port of Tan-tan,MA,0.4,28.47384,-11.3453
+Port of Kenitra,MA,0.1,33.1267,-8.62028
+dummy port 1,NG,1,6.455,4.234
+dummy port 1,BJ,1,6.47,2.63
+dummy port 1,BR,1,-22.9068,-43.1729
+dummy port 1,NA,1,-21.05431,13.50664
+dummy port 1,AE,1,25.2048,55.2708
+AS Suways,EG, 1, 29.966667,32.55
+Damietta,EG,1,31.483333,31.75
+Port Said,EG,3,31.266667,32.3
diff --git a/data/heat_load_profile_BDEW.csv b/data/heat_load_profile_BDEW.csv
new file mode 100644
index 000000000..62ca10712
--- /dev/null
+++ b/data/heat_load_profile_BDEW.csv
@@ -0,0 +1,25 @@
+,residential space weekday,residential space weekend,services space weekday,services space weekend,residential water weekday,residential water weekend,services water weekday,services water weekend
+0,0.5437843306385036,0.5391846410003029,0.740230434593118,0.7918173557545402,1.0,1.0,1.0,1.0
+1,0.5690496225400243,0.5641534370440313,0.7642025524842398,0.7929627291950984,1.0,1.0,1.0,1.0
+2,0.5624023211873742,0.5575494117194042,0.8264420882344785,0.8961602364492307,1.0,1.0,1.0,1.0
+3,0.6120351867307156,0.6074588966300298,0.9338477492552973,1.066547622880321,1.0,1.0,1.0,1.0
+4,0.8210089232467712,0.8188451841881503,1.1288089786462463,1.2779268432155158,1.0,1.0,1.0,1.0
+5,1.2287073985428116,1.2315677844536332,1.3311522394966053,1.2808129834243316,1.0,1.0,1.0,1.0
+6,1.327953505819319,1.3349874311629708,1.3976491755316236,1.3076676145167292,1.0,1.0,1.0,1.0
+7,1.2533048874868005,1.2584095945395426,1.3529869654334066,1.239881414312941,1.0,1.0,1.0,1.0
+8,1.204661538907097,1.206562127967529,1.2631870820835946,1.157513929299677,1.0,1.0,1.0,1.0
+9,1.1511425365003825,1.152931252109671,1.183486516733693,1.1001631309844286,1.0,1.0,1.0,1.0
+10,1.0982914366923946,1.0987739728887453,1.1056637898031139,1.0553379006911972,1.0,1.0,1.0,1.0
+11,1.0602079991199889,1.0598534287519163,1.0536117591812475,0.9953570175561463,1.0,1.0,1.0,1.0
+12,1.0430483470403709,1.042552786631541,1.0075511014823457,0.9238971341830102,1.0,1.0,1.0,1.0
+13,1.023765876994618,1.0234573235486537,0.983633820661761,0.928978159404834,1.0,1.0,1.0,1.0
+14,1.0250355817085612,1.0241187665206792,0.973887563496691,0.9277637088455348,1.0,1.0,1.0,1.0
+15,1.0419068035344277,1.0407369052119213,0.968639109712126,0.940383626933661,1.0,1.0,1.0,1.0
+16,1.0886607269753739,1.0871365340901091,0.9776106671510321,0.9762628252848075,1.0,1.0,1.0,1.0
+17,1.1391891744979068,1.1377875788466947,0.9713068946564802,0.9923707220696051,1.0,1.0,1.0,1.0
+18,1.1813708458227477,1.1815796155786216,0.97710710371407,0.9822063279944322,1.0,1.0,1.0,1.0
+19,1.2048721952031847,1.2066686818939167,0.9620977486617706,0.9872726025741575,1.0,1.0,1.0,1.0
+20,1.1883594612741015,1.1911629803333679,0.9096499832485738,0.9736368622053816,1.0,1.0,1.0,1.0
+21,1.0841006081889941,1.0875548281900813,0.7954827338259405,0.8733383541170725,1.0,1.0,1.0,1.0
+22,0.8887378869444746,0.8893062174837649,0.7007233800713178,0.7753100551108082,1.0,1.0,1.0,1.0
+23,0.6584028044030574,0.6576606192147261,0.6910405618412271,0.756430842996538,1.0,1.0,1.0,1.0
diff --git a/data/hydrogen_salt_cavern_potentials.csv b/data/hydrogen_salt_cavern_potentials.csv
new file mode 100644
index 000000000..7d905f7e9
--- /dev/null
+++ b/data/hydrogen_salt_cavern_potentials.csv
@@ -0,0 +1,6 @@
+ct,TWh
+DZ, 1000
+MA, 300
+TN, 100
+NG, 100
+BJ, 20
diff --git a/data/override_component_attrs/buses.csv b/data/override_component_attrs/buses.csv
new file mode 100644
index 000000000..890580582
--- /dev/null
+++ b/data/override_component_attrs/buses.csv
@@ -0,0 +1,3 @@
+attribute,type,unit,default,description,status
+location,string,n/a,n/a,Reference to original electricity bus,Input (optional)
+unit,string,n/a,MWh,Unit of the bus (descriptive only), Input (optional)
diff --git a/data/override_component_attrs/generators.csv b/data/override_component_attrs/generators.csv
new file mode 100644
index 000000000..0facfb2f9
--- /dev/null
+++ b/data/override_component_attrs/generators.csv
@@ -0,0 +1,3 @@
+attribute,type,unit,default,description,status
+build_year,integer,year,n/a,build year,Input (optional)
+lifetime,float,years,n/a,lifetime,Input (optional)
diff --git a/data/override_component_attrs/links.csv b/data/override_component_attrs/links.csv
new file mode 100644
index 000000000..87f608c3d
--- /dev/null
+++ b/data/override_component_attrs/links.csv
@@ -0,0 +1,13 @@
+attribute,type,unit,default,description,status
+bus2,string,n/a,n/a,2nd bus,Input (optional)
+bus3,string,n/a,n/a,3rd bus,Input (optional)
+bus4,string,n/a,n/a,4th bus,Input (optional)
+efficiency2,static or series,per unit,1.,2nd bus efficiency,Input (optional)
+efficiency3,static or series,per unit,1.,3rd bus efficiency,Input (optional)
+efficiency4,static or series,per unit,1.,4th bus efficiency,Input (optional)
+p2,series,MW,0.,2nd bus output,Output
+p3,series,MW,0.,3rd bus output,Output
+p4,series,MW,0.,4th bus output,Output
+build_year,integer,year,n/a,build year,Input (optional)
+lifetime,float,years,n/a,lifetime,Input (optional)
+carrier,string,n/a,n/a,carrier,Input (optional)
diff --git a/data/override_component_attrs/loads.csv b/data/override_component_attrs/loads.csv
new file mode 100644
index 000000000..10bb5b4f9
--- /dev/null
+++ b/data/override_component_attrs/loads.csv
@@ -0,0 +1,2 @@
+attribute,type,unit,default,description,status
+carrier,string,n/a,n/a,carrier,Input (optional)
diff --git a/data/override_component_attrs/stores.csv b/data/override_component_attrs/stores.csv
new file mode 100644
index 000000000..8d521fab1
--- /dev/null
+++ b/data/override_component_attrs/stores.csv
@@ -0,0 +1,4 @@
+attribute,type,unit,default,description,status
+build_year,integer,year,n/a,build year,Input (optional)
+lifetime,float,years,n/a,lifetime,Input (optional)
+carrier,string,n/a,n/a,carrier,Input (optional)
diff --git a/data/temp_hard_coded/biomass_transport_costs.csv b/data/temp_hard_coded/biomass_transport_costs.csv
new file mode 100644
index 000000000..bb7c8fa92
--- /dev/null
+++ b/data/temp_hard_coded/biomass_transport_costs.csv
@@ -0,0 +1,48 @@
+0,EUR/km/MWh
+BE,0.140625
+BG,0.063541666666667
+CZ,0.0875
+DK,0.190625
+DE,0.133333333333333
+EE,0.086458333333333
+IE,0.133333333333333
+GR,0.111458333333333
+ES,0.119791666666667
+FR,0.142708333333333
+IT,0.132291666666667
+CY,0.109375
+LV,0.08125
+LT,0.073958333333333
+LU,0.145833333333333
+HU,0.072916666666667
+MT,0.091666666666667
+NL,0.145833333333333
+AT,0.136458333333333
+PL,0.078125
+PT,0.092708333333333
+RO,0.0625
+SI,0.097916666666667
+SK,0.083333333333333
+FI,0.148958333333333
+SE,0.161458333333333
+GB,0.14375
+HR,0.080208333333333
+AL,0.055208333333333
+BA,0.059375
+MK,0.051041666666667
+ME,0.060416666666667
+RS,0.057291666666667
+KO,0.058333333333333
+UA,0.053125
+TR,0.078125
+MD,0.055208333333333
+CH,0.177083333333333
+NO,0.161458333333333
+MA,0.1
+NG,0.1
+BJ,0.1
+NA,0.1
+BR,0.1
+AE,0.1
+QA,0.1
+EG,0.1
diff --git a/data/temp_hard_coded/energy_totals.csv b/data/temp_hard_coded/energy_totals.csv
new file mode 100644
index 000000000..cb94680cd
--- /dev/null
+++ b/data/temp_hard_coded/energy_totals.csv
@@ -0,0 +1,37 @@
+country,total residential space,electricity residential space,total residential water,electricity residential water,total residential cooking,electricity residential cooking,total residential,electricity residential,derived heat residential,thermal uses residential,total services space,electricity services space,total services water,electricity services water,total services cooking,electricity services cooking,total services,electricity services,derived heat services,thermal uses services,total agriculture electricity,total agriculture heat,total agriculture machinery,total agriculture,total road,electricity road,total two-wheel,total passenger cars,electricity passenger cars,total other road passenger,electricity other road passenger,total light duty road freight,electricity light duty road freight,total heavy duty road freight,total rail,electricity rail,total rail passenger,electricity rail passenger,total rail freight,electricity rail freight,total aviation passenger,total aviation freight,total domestic aviation passenger,total international aviation passenger,total domestic aviation freight,total international aviation freight,total domestic aviation,total international aviation,total domestic navigation,district heat share,total international navigation
+AL,2.7488162085994774,0.33565441229798754,0.8186880852468945,0.2935681630661989,0.5876126186307569,0.4133041142766344,5.795,2.7119444444444447,,,0.7015204014345264,0.21339438510562977,0.16085230281569138,0.08980533653391662,0.21510664966169507,0.1666272170319332,1.9011111111111112,1.2980555555555557,,,,,,,8.658611111111112,0.0,0.11293992734115464,5.012148472073846,0.0,0.41883893104610537,0.0,1.027023388363935,0.0,2.087660392286071,0.035555555555555556,0.0,0.02700012495483377,0.0,0.00855543060072178,0.0,0.22298871032990178,0.01590017855898714,0.0,0.22298871032990178,0.0,0.01590017855898714,0.0,0.2388888888888889,0.23777777777777778,0.0,0.0
+AT,46.09595316297465,3.7088411225845146,9.130088223854552,1.9169282473111817,3.3241823708759717,2.1859785517387285,68.12759623448028,17.89608727824309,6.705828826078094,58.611179779652815,19.128966573551367,0.6672257209178536,3.401969524184086,0.10500134121606851,3.4192218216867016,2.036238290254376,35.02192174891562,12.04308062090128,10.588603994087725,26.421270067764656,0.6437819867533202,3.3380974776703938,1.7539298226462325,5.735809287069947,84.44686747629804,0.023014348842666586,0.5770811056953263,48.1396891335453,0.005082302937890749,3.0957840148665956,0.017261519330267264,5.3618371595975685,0.0006705265745085729,27.272476062593228,3.4674938979041348,2.9530470623465117,2.3133298743867328,1.9446718379317867,1.154164023517402,1.008375224414725,8.337414321498516,0.3456671637923202,0.31754085926093834,8.019873462237577,0.08287771118699262,0.26278945260532754,0.40041857044793094,8.282662914842904,0.05805591816196775,0.14,0.23611111111111113
+BA,4.521581968627476,0.5627703651445004,1.353670546906757,0.4922070328004311,0.978915195284958,0.6929606726000438,9.604722222222223,4.546944444444445,,,1.7557982432118706,0.2158498892997825,0.3149789075223063,0.09083871602235619,0.32148832498936736,0.16854457690098215,3.215522170273163,1.3129921287898147,,,,,,,6.0156632317881025,0.0,0.07846622969764248,3.4822440791948663,0.0,0.2909928538426024,0.0,0.7135355493260557,0.0,1.4504245197269359,0.1388888888888889,0.1388888888888889,0.10546923810481942,0.1097829232524015,0.03341965078406946,0.029105965636487405,0.13956511635536614,0.009951670949498134,0.0,0.13956511635536614,0.0,0.009951670949498134,0.0,0.14951678730486426,0.04381257394940196,0.08,0.0
+BE,60.07759573514164,2.4574590580229816,14.269796248030486,1.7687525943758393,5.3150528341341285,1.869339247170565,92.39298733318694,19.256978502352034,0.15027841087748214,79.74599606385434,30.52387518704442,5.98319045991244,4.406024721977337,1.1459566205952985,4.257282425660194,1.6251176809076644,51.73931110007098,21.649305145340918,1.0377790287661888,40.119189505718644,0.6735296575315048,5.349606793884193,1.9354834277204307,7.958619879136127,97.92152785552094,0.0037235863529514156,0.6958590248778924,57.6774759076758,0.001621646364895789,4.3240869846997185,0.0021019399880556266,11.321734111097756,0.0,23.902371827169755,2.21379669097965,1.5511036045791369,1.767094793484459,1.3713338815623115,0.4467018974951906,0.17976972301682553,13.926425532583533,3.226394430241005,0.0,13.926425532583533,0.6062760498029495,2.620118380438056,0.6062760498029495,16.546543913021587,1.9294409331483928,0.009911613901887219,78.50166666666667
+BG,15.34450879546816,1.1948032511633053,5.010635660353494,2.6674817677458496,2.9381856688027854,2.4998424360384854,27.680373902007464,10.91200314004214,4.1797076855536455,23.40169700086206,4.799509529916545,1.9213248265229492,1.4393963224086341,0.8415780032415145,1.6352650929946666,1.3509269228092395,12.256671707192313,8.514799907362562,1.7258493745019599,8.164882097871613,0.2411544187850093,1.3111580530137128,0.8304660005944632,2.382778472393185,28.905658327361348,0.006186517068539435,0.23691902435465959,18.58584785341711,0.0,3.0395474242667904,0.006186517068539435,1.786688905128767,0.0,5.256655120194014,0.46444007750147454,0.25305296607471406,0.29289081917566845,0.17429504605024307,0.17154925832580614,0.078757920024471,2.195900136902935,0.02604430754150471,0.0789871111111105,2.1169130257918245,0.0151817923295495,0.010862515211955207,0.09416890344065998,2.1277755410037797,0.0,0.16,0.8744444444444445
+CH,41.388888888888886,3.555555555555556,8.777777777777779,2.361111111111111,2.5833333333333335,1.3611111111111112,62.05555555555556,17.694444444444446,,,17.27777777777778,0.888888888888889,2.7777777777777777,0.19444444444444442,0.6666666666666666,0.6666666666666666,35.38888888888889,16.444444444444446,,,,,,,55.666666666666664,0.0,0.7260967386085588,32.223366391954016,0.0,2.6927375375072824,0.0,6.602787431527142,0.0,13.421678567069666,3.083333333333333,3.083333333333333,2.3414170859269907,2.4371808962033126,0.7419162474063419,0.6461524371300204,16.895389759843553,1.2157213512675575,0.8194594802459856,16.075930279597568,0.06942940864290333,1.1462919426246543,0.888888888888889,17.22222222222222,0.4444444444444445,0.04,0.0
+CZ,58.59225852701038,2.4397371460031145,9.920000585931934,2.8104727119514794,5.1398761189566375,2.795398161822098,79.39338451146106,14.199960862984167,12.911903976097026,73.67973401637705,22.226676229531854,4.123665807848028,2.9871449389938958,0.974060500075843,3.4761853439128725,1.7400914785868866,36.09634123035027,14.425308638984422,5.947460541150703,28.9685925795637,0.7502833058403744,3.337334059897441,2.362987517719688,6.450604883457503,65.17743647357334,0.11461286222857717,0.6745219969207359,32.94627378632074,0.0002015808401501622,3.9880971930196965,0.114411281388427,5.405543694495818,0.0,22.16299980281634,2.582197066119628,1.5033118260828273,1.933497057324229,1.0689062063545154,0.6487000087953989,0.4344056197283119,4.06708233878091,0.05853827533020912,0.10902295531872894,3.9580593834621816,0.03013694347020266,0.028401331860006464,0.1391598987889316,3.986460715322188,0.03583321105643493,0.1837279295500325,0.0
+DE,422.63700591976396,20.13959672172813,86.70647136646228,9.410722608372033,40.90856738368676,15.81902745537792,634.5784032356139,136.5998807002577,45.65193248491252,551.4743466376403,172.28581851293262,6.129155246337327,37.97466554752434,5.9552147050509925,46.22787655969591,21.25591844276544,358.9476566306718,138.0047914499899,23.983059770905506,261.1975741717025,7.172357267965595,5.5214722244297185,1.3577309259142931,14.05156041830961,591.9194716582508,0.05050420054304681,5.207215732491667,383.65594906711345,0.02877930018070664,19.499261748767733,0.01413965576102355,28.80429321088056,0.007585244601316613,154.75275189899736,16.33312001055445,12.084157080207738,11.993787447599404,8.46906597723943,4.339332562955044,3.615091102968311,84.94350522962422,12.524776804663865,8.43224142805425,76.51126380156997,2.1094557231162954,10.415321081547571,10.541697151170544,86.92658488311754,3.57499889114527,0.0856864750371445,31.408611111111114
+DK,34.53565732496317,0.6806012886411269,7.218235887609038,0.45889599724585783,2.9501884468581867,2.258315131597759,51.15544591088954,10.11102640577204,18.745897740078043,44.747036577480664,11.475853825810752,1.2385462978018895,1.904555805876222,0.40816329670928175,2.0812833509419217,1.6222263816288023,22.784158890115183,10.65418025026027,8.70825647431357,15.824042136275821,1.6216608790407236,4.878196128031019,3.1344058779643054,9.63426288503605,46.57926052841746,0.0017109292425222344,0.2646353354471898,25.58627236868954,0.0017109292425222344,3.980646619465156,0.0,6.6343555671294405,0.0,10.113350637686127,1.3338905353516775,0.3969435023050881,1.1985625014455203,0.3567919724863354,0.1353280339061572,0.04015152981875274,10.308948937840139,0.30022033375701207,0.7570890412500971,9.551859896590043,0.11831142223833246,0.18190891151867958,0.8754004634884297,9.733768808108723,2.0501746323762484,0.45325516397244575,8.084166666666667
+EE,7.524087112872855,0.11443500108583639,1.5731350788411405,0.09811231367330156,0.7499330885569813,0.6457207604008528,10.889283298156702,1.9339375314756475,3.8688839112667712,9.847287373882864,2.6579160452456514,0.6000549845808805,0.3640235438222673,0.1694131771993946,0.42707406284673427,0.36387085233399696,4.681279656641867,2.380786253140931,1.1969429044836677,3.458011907571063,0.1582739809227145,0.7096124761044406,0.3995805894739887,1.2674670465011437,8.153867025767806,0.01927244980418805,0.014073657887598809,4.954656881128455,0.00028399729179984755,0.8499484240384143,0.01898006520741134,0.595325092571906,8.387304976863395e-06,1.739862970141434,0.4574841926887977,0.058052614617976964,0.1339973783867172,0.058052614617976964,0.3234868143020805,0.0,0.39677317477543717,0.00934210992501796,0.0,0.39677317477543717,0.003467091041626906,0.005875018883391054,0.003467091041626906,0.40264819365882815,0.058886992038257285,0.38073753236138214,2.1597222222222223
+ES,80.372310906666,13.819023440188797,39.557576490863816,8.641524301473853,19.760765397495266,9.169181271047236,186.734573950555,80.18486078573926,0.0,141.46698671684436,44.387071919013835,10.384024487841591,13.789666475279082,7.391941124160933,17.800179339394454,13.43821278685975,125.67139313470406,81.29826081736151,0.0,85.33907109028121,3.4201273086846045,15.342477694688231,9.162390411860274,27.92499541523311,331.98656148730413,0.0016936958751923449,5.574506816789147,194.4928579316821,0.0016936958751923449,11.607993191831378,0.0,22.96786501955535,0.0,97.34333852744622,10.592495992600144,4.514991908109179,7.406098768146668,4.000539809592942,3.1863972244534766,0.5144520985162377,65.8381525590742,1.0762918525127656,24.535555543530545,41.30259701554366,0.40102774564942556,0.6752641068633399,24.936583289179968,41.977861122407,9.617504423289883,0.0,99.19194444444445
+FI,41.561921513414624,7.9584965638929654,8.827395229202057,3.1704980297864753,2.758708922235876,2.704926390704652,60.39558967888448,21.42594222815439,17.558888087765556,53.23478163580391,20.72623653865316,6.036854744876441,2.5267080310874555,0.9596191660270464,2.6849040780870688,2.636303872210049,33.48066887362547,17.262076927388012,11.871146662654901,26.225161074281324,1.2345608955841842,6.905567039616236,1.9294036825391927,10.069531617739614,45.924127830010946,0.000842558871110215,0.5352976616427196,26.125552858866715,0.00025549077092280805,2.999465252173668,0.0,5.27797529780554,0.0005638448739778003,10.985836759522305,1.1102757010139601,0.7299999932919197,0.567621888960476,0.5186247006985854,0.5426538120534841,0.2113752925933344,8.110129303054869,0.3456858904863211,1.7485334047369514,6.361595898317917,0.10590759659486869,0.2397782938914524,1.85444100133182,6.60137419220937,1.8316757120344753,0.37037573583205124,2.302777777777778
+FR,254.75525470296535,25.246348097381215,60.50375606907916,13.545313587220909,36.56396031268107,15.168094036351743,436.1891582716937,140.47482029341046,17.83332642573739,352.3467232589124,138.58804714932668,30.673093617965154,24.64445813826855,11.385478350216285,28.110293801347574,15.118554800476256,274.01188458307104,141.0643172515238,10.774452123403918,205.15869496487744,6.929023969173455,34.64087907321249,21.171263925927242,62.74116696831319,481.84605440750386,0.06128998473984608,6.436448227726867,253.32345257335328,0.012216103221788616,21.35284877800194,0.025311063314164304,90.80878811904758,0.023762818203893164,109.9245167093742,12.020960351288943,9.947409144358327,10.075432032135547,8.39339667115828,1.945528319153396,1.5540124732000469,73.17358165884323,3.6872419899176174,20.295352640822717,52.87822901802052,1.5031171267935584,2.1841248631240595,21.798469767616275,55.06235388114458,5.991667785858972,0.05,28.602222222222224
+GB,262.7982597107485,27.22762361611588,78.78916533927541,11.80007361147335,19.23101964399065,3.7000655297635276,425.2575592755557,111.591013616885,0.6038888888888893,361.0985788068445,107.23246230699095,16.791821595415946,21.008503355645562,8.784748134555482,24.36895905386316,10.776269415673813,210.73377537819806,95.66671802904204,4.475000000000009,156.62294500417684,3.275242232061276,4.588107048726044,2.1472568470367834,10.010606127824103,435.1830447460397,0.02819784255145931,2.2016605164130776,264.8344781523258,0.004131630295166207,14.104094083391427,0.015396732803841864,61.53024838065249,0.008669479452451237,92.51256361325679,11.468498778886454,4.231941269301228,10.204460016654354,3.8707058209832237,1.2640387622320988,0.36123544831800514,134.46842645168536,6.987566516477356,8.741120513246134,125.72730593843922,0.9104037973870717,6.077162719090285,9.651524310633205,131.8044686575295,9.753325660353585,0.009810078691537641,35.50194444444445
+GR,41.49582932608903,0.9719838223593845,8.096865102794412,3.75387436094054,5.931532031540117,4.541044459696281,63.562860347385374,17.628025018575457,0.626666666666667,57.15249509458662,5.792122631217957,2.2988540724338007,3.119339069483572,2.539427172959066,2.943323201142433,2.10606720586135,21.723009033444047,16.83907504415766,0.0,13.325701921801627,2.109571567817103,3.264854227440645,3.5260674790753335,8.900493274333082,69.04050906906491,0.0,3.115376018691967,37.83087174572504,0.0,5.198414931998759,0.0,7.661043521635481,0.0,15.234802851013654,0.3613895056484419,0.18388964136729427,0.31069737449182006,0.1737352543755475,0.05069213115662187,0.010154386991746771,11.091715982329,0.0882314118908981,1.6588918519757754,9.432824130353223,0.062164636806169515,0.026066775084728586,1.7210564887819448,9.458890905437952,5.996654216569651,0.008891638736458434,32.03472222222223
+HR,19.648707827931364,0.14774042559148423,4.576960072542419,1.2188446524343994,2.668168989486114,1.2856787106226006,30.600806066702532,6.541026804485754,1.7041698624888444,27.71345384661747,3.680948384765413,1.138737303317166,0.9492946075437665,0.47883955007363094,1.0083332221656265,0.6566155143730918,8.853004541785873,5.5158931369532125,0.5013786490290209,6.070138665900562,0.05801754322964838,2.055925545342165,0.7831251159601095,2.897068204531923,21.335069523416166,2.3880462609742222e-05,0.09282727591149446,13.953621714779754,0.0,1.8691799152008708,0.0,1.3528829460681484,2.3880462609742222e-05,4.066557671455898,0.5483275780364458,0.23999983390893212,0.3648325477791393,0.1557403927771871,0.18349503025730649,0.08425944113174501,1.337260794798596,0.006333089493564461,0.13449158891160376,1.202769205886992,0.0032182999214838155,0.0031147895720806448,0.1377098888330876,1.205883995459073,0.43749166655939575,0.063,0.2675
+HU,55.9583209540609,1.1073243682850533,9.122860548664773,2.077602345736839,5.811632986565434,1.9642839255117819,76.40251024297828,11.311950926833159,6.149728453882386,70.93977460767667,22.31329544003069,2.5732016194070657,2.693275283229499,0.5601065079974142,3.211727263200585,0.8049659305800232,35.50316019652054,11.458897256024539,2.3674937914782848,28.648022094092877,0.6388627655962039,3.420467502093073,1.5603916334044279,5.619721901093705,42.10688054521005,0.007178738276153479,0.20250674696253573,21.19465646216518,0.0013664726009006416,4.024701057027359,0.005812265675252838,4.984762332228973,0.0,11.700253946825994,1.6536088977755954,1.1169429746360504,1.3235668045314954,0.8480580037458347,0.3300420932441,0.2688849708902156,2.632879898269461,0.07378320529278866,0.0,2.632879898269461,0.045516872387854186,0.02826633290493448,0.045516872387854186,2.6611462311743956,0.011630000000000001,0.0792875588637399,0.0
+IE,19.578533353834807,1.0546704542071668,6.372000420850374,1.4324282537728528,1.6745663120427838,1.0119350730044179,32.171741542067174,8.28314036614851,0.0,27.640905273752438,6.5644231428164375,0.06100478986135852,1.71859492274644,0.18283256805456885,1.8233024508857372,0.8227124578443913,15.469971776049348,6.4972641275574325,0.0,10.369099364513948,0.47028996203489104,1.5437114175107056,0.9498965067211395,2.9638978862667362,41.2697978978524,0.0037968435736073928,0.050806740341467885,25.078918235351153,0.0033967742036306726,2.1141429655214496,0.00016632633806645697,5.542625500693079,0.0002337430319102632,8.483304455945245,0.5261124832709942,0.045000532825308394,0.5111291878729443,0.04500053282530839,0.014983295398050074,0.0,7.988220673324568,0.1706057550442699,0.03204196595898767,7.95617870736558,0.07719934353124704,0.09340641151302286,0.1092413094902347,8.049585118878603,0.23416560474621778,0.0,1.1216666666666668
+IT,221.398100417684,1.0346614727578969,66.42978048640587,6.420622047981239,34.33876157362275,4.969859647293299,376.55698593881596,70.13994780780855,6.601668090525953,328.6098596902793,78.49822064048436,8.2084958133708,19.55143619356035,6.605911116194935,22.927601142827903,7.315978964021103,184.9224586348693,86.93202722941588,2.0177806660948745,131.15220488631053,5.0362948255804065,15.575365448908837,13.39739348684073,34.00905376132997,414.91328569241944,0.038039603168984575,14.249005544423982,236.25334943317745,0.038039603168984575,26.394588248064593,0.0,73.78026726367446,0.0,64.23607520307891,10.818553788822426,10.286053928838843,9.37358715434626,8.885345509779574,1.4449666344761638,1.4007084190592687,44.83902594112198,1.328215423453257,13.606177443440373,31.23284849768161,0.27544531163978947,1.0527701118134674,13.881622755080162,32.285618609495074,12.505826838855644,0.018747629308126505,29.035555555555558
+LT,12.355798412102155,0.04002200151930364,2.5987966240019107,0.16765377677229842,1.3279139370277313,0.6917612756155628,17.894235790263227,2.618110926559587,5.647275283535294,16.283963205505646,3.6334169589178176,0.2807864932248237,0.63201857623936,0.2785025392392833,0.6771677818722958,0.491586099790956,6.845297312541881,2.9731425201845836,1.9844131306277386,4.964118713366294,0.1407059377828149,0.9069759779937155,0.29259725758254596,1.3402791733590764,16.20429128032357,0.01781549113040702,0.04896084431820287,10.890041029446927,7.777315492971165e-06,0.5808002071844686,0.01780771381491405,0.6224154520742412,0.0,4.062073747299726,0.7397147480552919,0.009962893723206358,0.13827976971049294,0.009962893723206358,0.6014349783447989,0.0,0.6407499537314395,0.006505588315712973,0.0,0.6407499537314395,0.0048820832433323835,0.0016235050723805904,0.0048820832433323835,0.6423734588038201,0.059724647385381906,0.3591706980094418,1.6391666666666669
+LU,3.9470246220332976,0.0723726259697635,0.5386631918947256,0.018124337142463587,0.20984206427004712,0.04860919801618621,5.2889509761889375,0.8519097113797371,0.0,4.709193401564832,2.3403492340472574,0.37355381415993927,0.3765315419262418,0.13299274281842746,0.4407813413467202,0.19222890271856125,4.317178145573238,1.8782453867581317,0.7283333346869453,3.2991165842992403,0.03927399876653334,0.18474316853999712,0.05769691095940215,0.28171407826593264,26.672537457904586,0.002789033021318427,0.024744706933444497,10.462831003373191,0.0023966216265285598,0.34715896922070705,0.0003447757003380651,0.4637386166834006,4.7635694451801965e-05,15.374064161693838,0.16423153043136135,0.12889441039871682,0.146824447526548,0.11618058898243262,0.01740708290481337,0.012713821416284198,1.4694582279351314,3.2241146519359214,0.0,1.4694582279351314,0.1360354308231823,3.088079221112739,0.1360354308231823,4.55753744904787,0.0,0.09094719559714451,0.0
+LV,11.156600291877822,0.048000954722733226,2.351668606233656,0.02026262729402798,0.5715422900441415,0.25958380837597833,15.441436050886324,1.7750577406080588,4.26998990245345,14.080003666076076,3.677039403497877,0.2722881030737441,0.6109911832283433,0.18535314239622583,0.6324008884340625,0.4364662221104241,6.50336947132678,2.5041925755890975,1.3799967366243002,4.9429649560415445,0.11380010076396964,0.9802403436071967,0.6878248634681197,1.7818653078392857,10.190791718865723,0.047830398479961206,0.02855911044596135,5.761667346504384,0.0,0.8872998502872564,0.04782037284551143,0.5733601396932794,1.0025634449766049e-05,2.939905271934841,0.934176310004225,0.0400104384895729,0.10222042642541893,0.0400104384895729,0.831955883578806,0.0,1.356584470502264,0.011763078965182399,0.0,1.356584470502264,0.008517943758076709,0.0032451352071056917,0.008517943758076709,1.3598296057093697,0.058887729008134916,0.29700867153345484,2.4741666666666666
+ME,1.6559164880111146,0.1476631876287879,0.45735420714444647,0.12914834173607742,0.2907681083346737,0.18182333000288275,3.1200000000000006,1.1930555555555555,,,0.06221323098811586,0.004429543196072349,0.010274580511307242,0.001864138164731417,0.009197700368106911,0.0034587716781719494,0.09833333333333334,0.026944444444444448,,,,,,,2.4411111111111112,0.0,0.03184100867710587,1.413068582098263,0.0,0.11808272195416168,0.0,0.28954738497135996,0.0,0.5885714134102208,0.013055555555555558,0.013055555555555558,0.009914108381853028,0.010319594785725741,0.0031414471737025297,0.0027359607698298166,0.11408724714553115,0.008134975076691095,0.0,0.11408724714553115,0.0,0.008134975076691095,0.0,0.12222222222222223,0.0125,0.0,0.0
+MK,2.737373705085587,0.4140070094123083,0.8676763843287041,0.3620964682621291,0.6776050509175459,0.5097826635377681,6.3133333333333335,3.345,,,0.9702384218304272,0.24513183377851924,0.20870138793433718,0.10316179039462109,0.2634248772158791,0.19140913781883528,2.3869444444444445,1.491111111111111,,,,,,,5.4775,0.0,0.07144661471368349,3.1707213666813434,0.0,0.2649605364376552,0.0,0.6497024219674836,0.0,1.320669060199834,0.065,0.016944444444444446,0.04935960343305549,0.013393516636792982,0.015640396566944506,0.0035509278076514637,0.04511632046209641,0.0032170128712369333,0.0,0.04511632046209641,0.0,0.0032170128712369333,0.0,0.04833333333333334,0.0,0.237745607009008,0.0
+NL,71.09123319489385,1.294018241969305,21.394448653138546,1.4305885462818118,9.127989493249794,1.245188144736828,119.22882920129575,23.043014665780344,2.9047294193101956,101.7101929282673,38.201768113822226,4.0229819358532755,8.68624134542251,3.1732338062445287,9.984680964764177,4.442823354462051,81.77555913322148,37.000766756874484,3.2247191098404038,59.080746125023275,6.003970234857895,31.554509594709653,6.329362019686628,43.88784184925417,130.9047319562777,0.021262683785678662,2.0023213442489487,68.3613949991795,0.0046700946223730685,4.1856077622538574,0.015676527461294425,19.73020286229721,0.000916061702011168,36.62520498829819,2.107496116107096,1.728051728132293,1.9390964762389407,1.598035731366381,0.16839963986815557,0.13001599676591186,35.61245743892048,5.83363127540512,0.0,35.61245743892048,0.4309475389629081,5.402683736442212,0.4309475389629081,41.01514117536269,4.198888264002294,0.03812060906690227,169.52166666666668
+NO,28.679513127735074,20.936044583246602,6.240851203967479,4.555821378896259,2.071590494496357,1.5122610609823404,45.95194444444444,36.058888888888895,,,17.429205728310578,12.723320181666722,2.536828913587433,1.8518851069188262,1.7310243187828043,1.2636477527114471,30.40777777777778,24.59388888888889,,,,,,,39.86666666666667,0.005,0.5200070056142733,23.077333056752696,0.0009980252952751733,1.9284515538076108,0.0036178277039541117,4.72870285515357,0.00038414700077071514,9.612172195338516,0.8269444444444445,0.683888888888889,0.6279638436760948,0.5405711140948251,0.19898060076834956,0.14331777479406402,8.349134530861472,0.6455876913607508,3.744417662549,4.6047168683124715,0.3172490041176663,0.32833868724308446,4.0616666666666665,4.933055555555556,9.583055555555555,0.04,4.383888888888889
+PL,159.19617990501106,0.6682423589152866,36.510837555801864,0.5184145609817947,19.35353484560614,7.2890716773996855,233.5976674865098,28.258081533828427,48.61112296922581,215.09833664177685,57.18624930106038,10.614023763786333,8.114052730584515,3.212626216217853,8.342251127109828,5.57905254787623,97.93343904655447,44.14369131922328,11.848058445750793,74.43865586430563,1.3911602789240956,34.99746023440522,6.431231030356537,42.81985154368584,195.13435796531672,0.06634874922140112,1.1788131629151593,103.3486891888789,0.0002813561761257034,9.673960866054456,0.06606739304527542,24.545596476439965,0.0,56.38729827102827,4.344083578894413,2.960839404924366,2.2572212348713467,1.8141255755734997,2.086862344023065,1.146713829350866,5.788210706247441,0.065976457330767,0.5100291013423637,5.278181604905077,0.03146512091754419,0.03451133641322282,0.5414942222599078,5.3126929413183,0.03611109255123829,0.20881332257986518,1.9836111111111112
+PT,5.657092973774139,0.15083838062827584,7.019297614887283,0.6666212321654327,10.398488535566496,3.690849741079436,32.28876877176269,13.752174409307244,0.010555725895792805,23.294026165291868,5.9134817406653735,1.9935259744708376,2.3586126301952732,1.4540160139270473,2.801041292274993,1.978715328630981,21.87204489325834,16.26528322517285,0.19277840706100358,12.677858226860144,0.8410934993614988,1.9882677641517388,2.1164759881330455,4.9458372516462825,66.02894939714282,0.004779400371054035,0.7240194008174893,41.36707492450902,0.0033358164772986517,2.3945987105071067,0.0014377610919926443,10.92193742827972,5.822801762738885e-06,10.621318933029494,0.5563993679617288,0.39084291807050103,0.4595771196833704,0.3339018317357902,0.09682224827835836,0.056941086334710816,11.793837692280889,0.25865988302692683,1.8461247206908706,9.947712971590015,0.13532248320399995,0.12333739982292688,1.9814472038948707,10.071050371412943,1.356110579061021,0.0007075471698113201,6.4944444444444445
+RO,62.219660019368916,0.7958413198010135,12.336939555018922,0.6016271003896877,8.238712379563015,0.5847237335645684,91.7512177481418,11.576943788537283,13.032219866838899,83.00947706509771,12.672155574687446,0.7381893433952651,2.3215779313191995,0.5045634707041677,2.676807019743927,1.0579010707596863,23.152460726701797,7.9152404125680995,2.6183681129300815,17.8213504555741,0.6488875923973445,2.3989088601921225,2.0815759901835698,5.129372442773037,55.13261970914581,0.0018091215888470047,0.19984865730038978,31.100989802996768,0.0017957348060126657,11.265900680500602,0.0,6.507014733097469,1.33867828343392e-05,6.05886583525058,3.6441396020061707,1.3438841899372298,2.638162689279144,0.950197263471695,1.0059769127270266,0.39368692646553477,2.6100696947998747,0.025747050385860972,0.2924988720415489,2.317570822758326,0.02270772365542296,0.0030393267304380136,0.31520659569697185,2.320610149488764,0.6077718590249878,0.15521630006022094,0.10722222222222223
+RS,18.98245934697394,1.8150710890976087,5.323224114316094,1.587487206895104,3.4750083589367504,2.23496644567455,36.59805555555556,14.665000000000001,,,6.690334208806246,0.8224794072624647,1.2002029095290632,0.3461339431440985,1.2250065443018223,0.6422261515005668,12.252500000000001,5.003055555555556,,,,,,,22.922222222222224,0.0,0.29898953527933275,13.268823326666894,0.0,1.1088058961831386,0.0,2.718872349549001,0.0,5.526731114543858,0.6505555555555557,0.528888888888889,0.49401791128297423,0.4180533717451449,0.15653764427258135,0.11083551714374405,0.53180214521701,0.037920077005212355,0.0,0.53180214521701,0.0,0.037920077005212355,0.0,0.5697222222222222,0.16694444444444445,0.25,0.0
+SE,56.8405529232076,18.94960502352269,13.091883280951613,6.4193447237979075,5.059619467776717,4.988946091260288,86.82448873561987,42.353928923015026,29.128058551067273,75.10186156309533,23.597518901720758,3.91287473733487,4.214802522745754,1.7636125133847242,4.280539109432537,4.109861715206538,46.80198659793274,24.63233380567647,14.457779264606945,32.912157513235876,0.9963078025020058,2.999416768482414,1.52788446551391,5.523609036498329,84.77468080828608,0.0038005922025692853,0.36042411771544564,51.40559182050632,0.0011605051717118228,5.9873283417851235,0.0011388489583268131,7.28111948265953,0.00079187450654651,19.740217045619683,2.651944454010767,2.6399999996863084,1.792882515351252,1.786819299002744,0.8590619386595152,0.8531807006835644,10.367967064646693,0.2395423533519642,2.170007229406291,8.197959835240402,0.09089205514495001,0.1486502982070142,2.260899284551241,8.346610133447417,1.3583350289115894,0.40352019291933794,19.82027777777778
+SI,9.013524638476808,0.3101884291816804,2.9493710498792414,0.46966649265929955,0.8994425859975202,0.5225613236098434,14.707442876350305,3.2109690075577566,1.0394319361311932,12.917508381676768,3.331227385925306,0.6703495155149303,0.5145564766645554,0.1474601925610313,0.6152129573524242,0.5008566027580501,6.372976998442289,3.2609990773297515,0.49778118919238035,4.610057850668179,0.0,0.5294209566488663,0.2556105646307224,0.7850315212795888,21.595300894617896,2.930797080591121e-05,0.11576818031650793,12.601361047221133,2.930797080591121e-05,0.7063829371769783,0.0,0.8568475768010727,0.0,7.314941153102202,0.28222504862054787,0.16389228335959946,0.16668508620881756,0.08771820600448986,0.11553996241173031,0.0761740773551096,0.28667223358914384,0.003606845181239247,0.0,0.28667223358914384,0.002084374742649007,0.0015224704385902405,0.002084374742649007,0.28819470402773406,0.0,0.08770259971899795,0.3666666666666667
+SK,16.274433740544108,0.6066878874227958,3.8647951785664603,0.4601773246977492,1.871127174178766,0.6223313665387125,24.670544706743268,4.502961082471661,5.324976326998166,22.018880845488454,11.329033062302202,2.0929383508132253,1.5155294390305374,0.810020507989601,1.4908393494717878,0.9476569505560773,18.60390600253911,8.194092744813428,2.3353021935940115,14.545653635872348,0.252873044434499,1.1348010240616653,0.44388857861974124,1.8315626471159052,24.51789764096202,0.03780165618552521,0.030128119498509954,10.119168332403872,4.503923111242812e-05,1.8264188273916395,0.03775661695441278,2.4569018659302015,0.0,10.085280495737795,0.5360986253061619,0.5360986253061619,0.3853396282592357,0.3853396282592357,0.15075899704692625,0.15075899704692625,0.4904426886431383,0.014834972559268286,0.0,0.4904426886431383,0.011648016114905079,0.0031869564443632046,0.011648016114905079,0.4936296450875014,0.18956900000000004,0.2095002337441789,0.0
+MA,80.372310906666,13.819023440188797,39.557576490863816,8.641524301473853,19.760765397495266,9.169181271047236,186.734573950555,80.18486078573926,0.0,141.46698671684436,44.387071919013835,10.384024487841591,13.789666475279082,7.391941124160933,17.800179339394454,13.43821278685975,125.67139313470406,81.29826081736151,0.0,85.33907109028121,3.4201273086846045,15.342477694688231,9.162390411860274,27.92499541523311,331.98656148730413,0.0016936958751923449,5.574506816789147,194.4928579316821,0.0016936958751923449,11.607993191831378,0.0,22.96786501955535,0.0,97.34333852744622,10.592495992600144,4.514991908109179,7.406098768146668,4.000539809592942,3.1863972244534766,0.5144520985162377,65.8381525590742,1.0762918525127656,24.535555543530545,41.30259701554366,0.40102774564942556,0.6752641068633399,24.936583289179968,41.977861122407,9.617504423289883,0.0,99.19194444444445
+NG,80.372310906666,13.819023440188797,39.557576490863816,8.641524301473853,19.760765397495266,9.169181271047236,186.734573950555,80.18486078573926,0.0,141.46698671684436,44.387071919013835,10.384024487841591,13.789666475279082,7.391941124160933,17.800179339394454,13.43821278685975,125.67139313470406,81.29826081736151,0.0,85.33907109028121,3.4201273086846045,15.342477694688231,9.162390411860274,27.92499541523311,331.98656148730413,0.0016936958751923449,5.574506816789147,194.4928579316821,0.0016936958751923449,11.607993191831378,0.0,22.96786501955535,0.0,97.34333852744622,10.592495992600144,4.514991908109179,7.406098768146668,4.000539809592942,3.1863972244534766,0.5144520985162377,65.8381525590742,1.0762918525127656,24.535555543530545,41.30259701554366,0.40102774564942556,0.6752641068633399,24.936583289179968,41.977861122407,9.617504423289883,0.0,99.19194444444445
+BJ,9.013524638476808,0.3101884291816804,2.9493710498792414,0.46966649265929955,0.8994425859975202,0.5225613236098434,14.707442876350305,3.2109690075577566,1.0394319361311932,12.917508381676768,3.331227385925306,0.6703495155149303,0.5145564766645554,0.1474601925610313,0.6152129573524242,0.5008566027580501,6.372976998442289,3.2609990773297515,0.49778118919238035,4.610057850668179,0.0,0.5294209566488663,0.2556105646307224,0.7850315212795888,21.595300894617896,2.930797080591121e-05,0.11576818031650793,12.601361047221133,2.930797080591121e-05,0.7063829371769783,0.0,0.8568475768010727,0.0,7.314941153102202,0.28222504862054787,0.16389228335959946,0.16668508620881756,0.08771820600448986,0.11553996241173031,0.0761740773551096,0.28667223358914384,0.003606845181239247,0.0,0.28667223358914384,0.002084374742649007,0.0015224704385902405,0.002084374742649007,0.28819470402773406,0.0,0.08770259971899795,0.3666666666666667
diff --git a/data/temp_hard_coded/transport_data.csv b/data/temp_hard_coded/transport_data.csv
new file mode 100644
index 000000000..fd0ba5595
--- /dev/null
+++ b/data/temp_hard_coded/transport_data.csv
@@ -0,0 +1,123 @@
+country,number cars,average fuel efficiency
+AL,563106,0.4
+AR,21633587,0.758
+AU,18326236,0.753
+AT,7421647,0.634
+AZ,1330551,0.755
+BD,2879708,0.858
+BY,4192291,0.795
+BE,7330718,0.714
+BJ,469761,0.324
+BO,1711005,0.593
+BA,978229,0.863
+BW,653274,0.679
+BR,93867016,0.552
+BG,4031748,0.805
+CI,905537,0.689
+KH,3751715,0.364
+CM,758145,0.462
+CA,23923806,0.682
+CL,4960945,0.689
+CN,294694457,0.914
+CO,13477996,0.588
+CR,1991398,0.314
+HR,1996056,0.634
+CU,633369,0.957
+CY,650805,0.696
+CZ,7325789,0.83
+DK,3131673,0.671
+DO,3854038,0.737
+EC,1925368,0.583
+EG,8412673,0.775
+SV,1008080,0.519
+ER,72405,0.696
+EE,865040,0.873
+ET,708416,0.531
+FI,5217850,0.761
+FR,42363000,0.576
+GE,1126470,0.578
+DE,56622000,0.786
+GH,2066943,0.446
+GR,9489299,0.752
+GT,3250194,0.6
+HN,1694504,0.605
+HU,4022798,0.728
+IS,289501,0.598
+IN,210023289,0.885
+ID,128398594,0.692
+IR,30377065,0.751
+IQ,5775777,0.794
+IE,2573961,0.679
+IL,3239305,0.739
+IT,52581575,0.67
+JM,541316,0.757
+JP,81602046,0.825
+JO,1502420,0.708
+KZ,4383120,0.939
+KE,2979910,0.471
+KW,2001940,0.847
+KG,993000,0.722
+LV,803628,0.571
+LB,1866407,0.756
+LY,3553497,0.607
+LT,1391568,0.538
+LU,466472,0.332
+MY,27613120,0.703
+MT,358947,0.769
+MU,507676,0.745
+MX,40205671,0.649
+MN,841537,0.889
+ME,211219,0.766
+MA,3791469,0.712
+MZ,698864,0.432
+MM,6381136,0.634
+NA,371281,0.446
+NP,2339169,0.56
+NL,10757655,0.801
+NZ,3656300,0.551
+NE,436420,0.408
+NG,11733425,0.646
+MK,442962,0.785
+NO,3969612,0.602
+OM,1370913,0.792
+PK,18352500,0.711
+PA,1288573,0.636
+PY,1871947,0.069
+PE,5604789,0.597
+PH,9251565,0.722
+PL,27409106,0.843
+PT,6590094,0.633
+QA,1330487,0.818
+KR,25680967,0.837
+MD,894253,0.753
+RO,7014661,0.776
+RU,54014259,0.838
+SA,6895799,0.741
+SN,468051,0.611
+RS,2282401,0.845
+SG,933534,0.848
+SK,2606412,0.793
+SI,1468439,0.585
+SS,69647,0.349
+ES,32986384,0.647
+LK,6795469,0.523
+SD,1252740,0.408
+SR,228388,0.663
+SE,6102914,0.467
+CH,5980512,0.552
+SY,2396544,0.76
+TR,21090424,0.802
+TJ,439972,0.665
+TH,37338139,0.753
+TG,64118,0.227
+TT,831803,0.862
+TN,2015601,0.747
+UA,14433709,0.886
+AE,3391125,0.79
+GB,38388214,0.715
+TZ,2163623,0.424
+US,281312446,0.666
+UY,2342026,0.447
+VE,7999760,0.677
+VN,50666855,0.777
+ZW,1198584,0.777
diff --git a/data/unsd_transactions.csv b/data/unsd_transactions.csv
new file mode 100644
index 000000000..a23602e12
--- /dev/null
+++ b/data/unsd_transactions.csv
@@ -0,0 +1,39 @@
+Transaction;clean_name
+consumption not elsewhere specified (industry);other
+consumption by food and tobacco ;food and tobacco
+consumption by non-metallic minerals ;non-metallic minerals
+consumption by non-ferrous metals;non-ferrous metals
+consumption by non-metallic minerals;non-metallic minerals
+consumption by iron and steel;iron and steel
+consumption by paper, pulp and print;paper pulp and print
+consumption by food and tobacco;food and tobacco
+consumption by chemical and petrochemical;chemical and petrochemical
+consumption by machinery;machinery
+consumption by textile and leather;textile and leather
+consumption by construction;construction
+consumption by mining and quarrying;mining and quarrying
+consumption by transport equipment ;transport equipment
+consumption by non-ferrous metals ;non-ferrous metals
+consumption by wood and wood products ;wood and wood products
+consumption by machinery ;machinery
+consumption by mining and quarrying ;mining and quarrying
+consumption by construction ;construction
+consumption by textile and leather ;textile and leather
+consumption by chemical and petrochemical industry;chemical and petrochemical
+consumption by industries not elsewhere specified;other
+consumption by non-ferrous metals industry;non-ferrous metals
+consumption by non-metallic minerals industry;non-metallic minerals
+consumption by mining and quarrying industry;mining and quarrying
+consumption by food, beverage and tobacco industry;food and tobacco
+consumption by iron and steel industry;iron and steel
+consumption by transport equipment industry;transport equipment
+consumption by machinery industry;machinery
+consumption by wood and wood products industry;wood and wood products
+consumption by construction industry;construction
+consumption by wood and wood products;wood and wood products
+consumption by transport equipment;transport equipment
+consumption by food and tobacco industry;food and tobacco
+consumption by textile and leather industry;textile and leather
+consumption by other manufacturing, construction and non-fuel;other
+consumption by chemical and petrochemicalindustry;chemical and petrochemical
+consumption by chemical industry;chemical and petrochemical
diff --git a/doc/configtables/licenses.csv b/doc/configtables/licenses.csv
index 08044668e..6277fa75e 100644
--- a/doc/configtables/licenses.csv
+++ b/doc/configtables/licenses.csv
@@ -9,3 +9,8 @@
 "data/raw/WorldPop/*","CC","x",,,,https://www.worldpop.org/
 "data/raw/GDP/*","CC1.0",,,,,https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0
 "data/osm/*","ODbL",,,,,https://www.openstreetmap.org/copyright
+"data/demand/unsd/*","Custom","x",,,,"https://unstats.un.org/unsd/energystats/data/"
+"data/industrial_database.csv","CC","x",,,,https://globalenergymonitor.org/projects/global-steel-plant-tracker/download-data/
+"data/industrial_database.csv","CC4.0","x",,,,https://www.cgfi.ac.uk/spatial-finance-initiative/geoasset-project/cement/
+"data/industrial_database.csv","CC4.0","x",,,,https://www.cgfi.ac.uk/spatial-finance-initiative/geoasset-project/pulp-and-paper-mill-database-for-latin-america/
+"data/airports.csv","Public Domain",,,,,https://ourairports.com/data/
diff --git a/doc/how_to_contribute.rst b/doc/how_to_contribute.rst
index 8fffc1123..f2b312535 100644
--- a/doc/how_to_contribute.rst
+++ b/doc/how_to_contribute.rst
@@ -42,8 +42,8 @@ Add a new test if you want to contribute new functionality to the config.
 We perform currently *multiple* integration tests which means various workflows need to work.
 All test configs are build by updating the ``config.tutorial.yaml`` with the configs in ``pypysa-earth/test/*.yaml``.
 
-  * You can test your contribution locally with ``snakemake --cores 4 run_tests``. This will build test configs and executes them.
-  * Run ``snakemake -j1 build_test_configs`` to build and analyse locally the test configs.
+  * You can test your contribution locally with ``make test``.
+  * See the Makefile for further information which configurations are tested.
 
 To contribute a test:
 
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 07e93bee7..838a42011 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -10,10 +10,13 @@ Upcoming release
 ================
 
 Please add descriptive release notes like in `PyPSA-Eur <https://github.com/PyPSA/pypsa-eur/blob/master/doc/release_notes.rst>`__.
-E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_tests` and in one sentence what it does.
+E.g. if a new rule becomes available describe how to use it `make test` and in one sentence what it does.
 
 **New Features and Major Changes**
 
+* The workflow configuration now supports incremental changes to the default configuration in the `config.yaml` and configfiles passed to snakemake via `--configfile myconfig.yaml`. Therefore the user may now only include settings in their `config.yaml` which differ from the default configuration. One can think of the new `config.yaml` as of a list of arguments in a python function that already have a default. So in principle the `config.yaml` could now be empty, and the workflow would still run. `PR #1053 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1053>`_
+
+* Local tests are now run with `make test`. This uses a `Makefile` which runs snakemake calls with different configurations. `PR #1053 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1053>`_
 
 **Minor Changes and bug-fixing**
 
@@ -21,7 +24,7 @@ E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_t
 PyPSA-Earth 0.4.1
 =================
 
-**New Features and Major Changes**
+**New Features and Major Changes (19th September 2024)**
 
 * Add functionality to modify the cost assumptions using config files `PR #1097 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/1097>`__
 
diff --git a/envs/environment.mac.yaml b/envs/environment.mac.yaml
deleted file mode 100644
index 96c7cf837..000000000
--- a/envs/environment.mac.yaml
+++ /dev/null
@@ -1,87 +0,0 @@
-# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
-#
-# SPDX-License-Identifier: AGPL-3.0-or-later
-
-name: pypsa-earth
-channels:
-- conda-forge
-- bioconda
-- gurobi
-dependencies:
-- python>=3.8
-- pip
-- mamba   # esp for windows build
-
-- pypsa>=0.24, <0.25
-# - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
-- dask
-- powerplantmatching>=0.5.7, <=0.5.15
-- earth-osm>=2.1
-- atlite
-
-  # Dependencies of the workflow itself
-- xlrd
-- openpyxl
-- seaborn
-- snakemake-minimal<8
-- memory_profiler
-- ruamel.yaml<=0.17.26
-- pytables
-- lxml
-- numpy
-- pandas
-- geopandas>=0.11.0, <=0.14.3
-- fiona<=1.9.6
-- xarray>=2023.11.0, <2023.12.0
-- netcdf4
-- networkx
-- scipy
-- pydoe2
-- shapely!=2.0.4
-- pre-commit
-- pyomo
-- matplotlib<=3.5.2
-- reverse-geocode
-- country_converter
-- pyogrio
-- numba
-- py7zr
-
-  # Keep in conda environment when calling ipython
-- ipython
-  # Jupyter notebook requirement
-- ipykernel
-- jupyterlab
-
-  # GIS dependencies:
-- cartopy
-- descartes
-- rasterio!=1.2.10
-- rioxarray
-
- # Plotting
-- geoviews
-- hvplot
-- graphviz
-- contextily
-- graphviz
-
-  # PyPSA-Eur-Sec Dependencies
-- geopy
-- tqdm
-- pytz
-- country_converter
-
-  # Cloud download
-# - googledrivedownloader  # Commented until https://github.com/ndrplz/google-drive-downloader/pull/28 is merged: PR installed using pip
-
-# Default solver for tests (required for CI)
-- glpk
-- ipopt
-- gurobi
-
-- pip:
-  - git+https://github.com/davide-f/google-drive-downloader@master  # google drive with fix for virus scan
-  - git+https://github.com/FRESNA/vresutils@master  # until new pip release > 0.3.1 (strictly)
-  - tsam>=1.1.0
-  - chaospy  # lastest version only available on pip
diff --git a/envs/environment.yaml b/envs/environment.yaml
index 3d9915a49..d98f772c3 100644
--- a/envs/environment.yaml
+++ b/envs/environment.yaml
@@ -15,7 +15,7 @@ dependencies:
 - pypsa>=0.24, <0.25
 # - atlite>=0.2.4  # until https://github.com/PyPSA/atlite/issues/244 is not merged
 - dask
-- powerplantmatching>=0.5.7, <=0.5.15
+- powerplantmatching
 - earth-osm>=2.1
 - atlite
 
@@ -31,7 +31,7 @@ dependencies:
 - numpy
 - pandas
 - geopandas>=0.11.0, <=0.14.3
-- fiona<=1.9.6
+- fiona<1.10.0
 - xarray>=2023.11.0, <2023.12.0
 - netcdf4
 - networkx
@@ -77,11 +77,14 @@ dependencies:
 
 # Default solver for tests (required for CI)
 - glpk
-- ipopt<3.13.3
+- ipopt
 - gurobi
 
 - pip:
+  - earth-osm>=2.2 # until conda release it out
+  - powerplantmatching>=0.5.19 # until conda release it out
   - git+https://github.com/davide-f/google-drive-downloader@master  # google drive with fix for virus scan
   - git+https://github.com/FRESNA/vresutils@master  # until new pip release > 0.3.1 (strictly)
   - tsam>=1.1.0
   - chaospy  # lastest version only available on pip
+  - fake_useragent
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 93a6cbc38..37152217e 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -5,12 +5,14 @@
 
 # -*- coding: utf-8 -*-
 
+import io
 import logging
 import os
 import pathlib
 import shutil
 import subprocess
 import sys
+import time
 import urllib
 
 import country_converter as coco
@@ -22,12 +24,14 @@
 import requests
 import snakemake as sm
 import yaml
+from fake_useragent import UserAgent
 from pypsa.clustering.spatial import _make_consense
 from pypsa.components import component_attrs, components
 from pypsa.descriptors import Dict
 from shapely.geometry import Point
 from snakemake.script import Snakemake
 from tqdm import tqdm
+from vresutils.costdata import annuity
 
 logger = logging.getLogger(__name__)
 
@@ -89,6 +93,14 @@ def handle_exception(exc_type, exc_value, exc_traceback):
         )
 
 
+def copy_default_files():
+    fn = pathlib.Path("config.yaml")
+    if not fn.exists():
+        fn.write_text(
+            "# Write down config entries differing from config.default.yaml\n\nrun: {}"
+        )
+
+
 def create_logger(logger_name, level=logging.INFO):
     """
     Create a logger for a module and adds a handler needed to capture in logs
@@ -147,38 +159,6 @@ def read_osm_config(*args):
         return tuple([osm_config[a] for a in args])
 
 
-def sets_path_to_root(root_directory_name, n=8):
-    """
-    Search and sets path to the given root directory (root/path/file).
-
-    Parameters
-    ----------
-    root_directory_name : str
-        Name of the root directory.
-    n : int
-        Number of folders the function will check upwards/root directed.
-    """
-
-    repo_name = root_directory_name
-    n0 = n
-
-    while n >= 0:
-        n -= 1
-        # if repo_name is current folder name, stop and set path
-        if repo_name == pathlib.Path(".").absolute().name:
-            repo_path = get_current_directory_path()  # current_path
-            os.chdir(repo_path)  # change dir_path to repo_path
-            print("This is the repository path: ", repo_path)
-            print("Had to go %d folder(s) up." % (n0 - 1 - n))
-            break
-        # if repo_name NOT current folder name for 5 levels then stop
-        if n == 0:
-            print("Can't find the repo path.")
-        # if repo_name NOT current folder name, go one directory higher
-        else:
-            change_to_script_dir(".")  # change to the upper folder
-
-
 def configure_logging(snakemake, skip_handlers=False):
     """
     Configure the basic behaviour for the logging module.
@@ -271,6 +251,42 @@ def load_network(import_name=None, custom_components=None):
     )
 
 
+def load_network_for_plots(
+    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
+):
+    import pypsa
+    from add_electricity import load_costs, update_transmission_costs
+
+    n = pypsa.Network(fn)
+
+    n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load"
+    n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
+
+    n.links["carrier"] = (
+        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
+    )
+    n.lines["carrier"] = "AC line"
+    n.transformers["carrier"] = "AC transformer"
+
+    n.lines["s_nom"] = n.lines["s_nom_min"]
+    n.links["p_nom"] = n.links["p_nom_min"]
+
+    if combine_hydro_ps:
+        n.storage_units.loc[
+            n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier"
+        ] = "hydro+PHS"
+
+    # if the carrier was not set on the heat storage units
+    # bus_carrier = n.storage_units.bus.map(n.buses.carrier)
+    # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks"
+
+    Nyears = n.snapshot_weightings.objective.sum() / 8760.0
+    costs = load_costs(tech_costs, cost_config, elec_config, Nyears)
+    update_transmission_costs(n, costs)
+
+    return n
+
+
 def update_p_nom_max(n):
     """
     If extendable carriers (solar/onwind/...) have capacity >= 0, e.g. existing
@@ -414,11 +430,72 @@ def dl_progress(count, block_size, total_size):
         data = urllib.parse.urlencode(data).encode()
 
     if headers:
-        opener = urllib.request.build_opener()
-        opener.addheaders = headers
-        urllib.request.install_opener(opener)
+        req = urllib.request.Request(url, headers=headers)
+        with urllib.request.urlopen(req) as response:
+            with open(file, "wb") as f:
+                f.write(response.read())
 
-    urllib.request.urlretrieve(url, file, reporthook=dl_progress, data=data)
+    else:
+        urllib.request.urlretrieve(url, file, reporthook=dl_progress, data=data)
+
+
+def content_retrieve(url, data=None, headers=None, max_retries=3, backoff_factor=0.3):
+    """
+    Retrieve the content of a url with improved robustness.
+
+    This function uses a more robust approach to handle permission issues
+    and avoid being blocked by the server. It implements exponential backoff
+    for retries and rotates user agents.
+
+    Parameters
+    ----------
+    url : str
+        URL to retrieve the content from
+    data : dict, optional
+        Data for the request, by default None
+    headers : dict, optional
+        Headers for the request, defaults to a fake user agent
+        If no headers are wanted at all, pass an empty dict.
+    max_retries : int, optional
+        Maximum number of retries, by default 3
+    backoff_factor : float, optional
+        Factor to apply between attempts, by default 0.3
+    """
+    if headers is None:
+        ua = UserAgent()
+        headers = {
+            "User-Agent": ua.random,
+            "Upgrade-Insecure-Requests": "1",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+            "Accept-Encoding": "gzip, deflate, br",
+            "DNT": "1",
+            "Connection": "keep-alive",
+            "Referer": "https://www.google.com/",
+        }
+
+    session = requests.Session()
+
+    for i in range(max_retries):
+        try:
+            response = session.get(url, headers=headers, data=data)
+            response.raise_for_status()
+            return io.BytesIO(response.content)
+        except (
+            requests.exceptions.RequestException,
+            requests.exceptions.HTTPError,
+        ) as e:
+            if i == max_retries - 1:  # last attempt
+                raise
+            else:
+                # Exponential backoff
+                wait_time = backoff_factor * (2**i) + np.random.uniform(0, 0.1)
+                time.sleep(wait_time)
+
+                # Rotate user agent for next attempt
+                headers["User-Agent"] = UserAgent().random
+
+    raise Exception("Max retries exceeded")
 
 
 def get_aggregation_strategies(aggregation_strategies):
@@ -438,7 +515,7 @@ def get_aggregation_strategies(aggregation_strategies):
     return bus_strategies, generator_strategies
 
 
-def mock_snakemake(rule_name, **wildcards):
+def mock_snakemake(rule_name, root_dir=None, submodule_dir=None, **wildcards):
     """
     This function is expected to be executed from the "scripts"-directory of "
     the snakemake project. It returns a snakemake.script.Snakemake object,
@@ -456,57 +533,72 @@ def mock_snakemake(rule_name, **wildcards):
     """
 
     script_dir = pathlib.Path(__file__).parent.resolve()
-    assert (
-        pathlib.Path.cwd().resolve() == script_dir
-    ), f"mock_snakemake has to be run from the repository scripts directory {script_dir}"
-    os.chdir(script_dir.parent)
-    for p in sm.SNAKEFILE_CHOICES:
-        if pathlib.Path(p).exists():
-            snakefile = p
-            break
-    workflow = sm.Workflow(
-        snakefile, overwrite_configfiles=[], rerun_triggers=[]
-    )  # overwrite_config=config
-    workflow.include(snakefile)
-    workflow.global_resources = {}
+    if root_dir is None:
+        root_dir = script_dir.parent
+    else:
+        root_dir = pathlib.Path(root_dir).resolve()
+
+    user_in_script_dir = pathlib.Path.cwd().resolve() == script_dir
+    if str(submodule_dir) in __file__:
+        # the submodule_dir path is only need to locate the project dir
+        os.chdir(pathlib.Path(__file__[: __file__.find(str(submodule_dir))]))
+    elif user_in_script_dir:
+        os.chdir(root_dir)
+    elif pathlib.Path.cwd().resolve() != root_dir:
+        raise RuntimeError(
+            "mock_snakemake has to be run from the repository root"
+            f" {root_dir} or scripts directory {script_dir}"
+        )
     try:
-        rule = workflow.get_rule(rule_name)
-    except Exception as exception:
-        print(
-            exception,
-            f"The {rule_name} might be a conditional rule in the Snakefile.\n"
-            f"Did you enable {rule_name} in the config?",
+        for p in sm.SNAKEFILE_CHOICES:
+            if pathlib.Path(p).exists():
+                snakefile = p
+                break
+        workflow = sm.Workflow(
+            snakefile, overwrite_configfiles=[], rerun_triggers=[]
+        )  # overwrite_config=config
+        workflow.include(snakefile)
+        workflow.global_resources = {}
+        try:
+            rule = workflow.get_rule(rule_name)
+        except Exception as exception:
+            print(
+                exception,
+                f"The {rule_name} might be a conditional rule in the Snakefile.\n"
+                f"Did you enable {rule_name} in the config?",
+            )
+            raise
+        dag = sm.dag.DAG(workflow, rules=[rule])
+        wc = Dict(wildcards)
+        job = sm.jobs.Job(rule, dag, wc)
+
+        def make_accessable(*ios):
+            for io in ios:
+                for i in range(len(io)):
+                    io[i] = pathlib.Path(io[i]).absolute()
+
+        make_accessable(job.input, job.output, job.log)
+        snakemake = Snakemake(
+            job.input,
+            job.output,
+            job.params,
+            job.wildcards,
+            job.threads,
+            job.resources,
+            job.log,
+            job.dag.workflow.config,
+            job.rule.name,
+            None,
         )
-        raise
-    dag = sm.dag.DAG(workflow, rules=[rule])
-    wc = Dict(wildcards)
-    job = sm.jobs.Job(rule, dag, wc)
-
-    def make_accessable(*ios):
-        for io in ios:
-            for i in range(len(io)):
-                io[i] = pathlib.Path(io[i]).absolute()
-
-    make_accessable(job.input, job.output, job.log)
-    snakemake = Snakemake(
-        job.input,
-        job.output,
-        job.params,
-        job.wildcards,
-        job.threads,
-        job.resources,
-        job.log,
-        job.dag.workflow.config,
-        job.rule.name,
-        None,
-    )
-    snakemake.benchmark = job.benchmark
+        snakemake.benchmark = job.benchmark
 
-    # create log and output dir if not existent
-    for path in list(snakemake.log) + list(snakemake.output):
-        build_directory(path)
+        # create log and output dir if not existent
+        for path in list(snakemake.log) + list(snakemake.output):
+            build_directory(path)
 
-    os.chdir(script_dir)
+    finally:
+        if user_in_script_dir:
+            os.chdir(script_dir)
     return snakemake
 
 
@@ -869,6 +961,35 @@ def get_relative_path(path, start_path="."):
     return pathlib.Path(path).relative_to(start_path)
 
 
+# PYPSA-EARTH-SEC
+
+
+def prepare_costs(
+    cost_file: str, USD_to_EUR: float, fill_values: dict, Nyears: float | int = 1
+):
+    # set all asset costs and other parameters
+    costs = pd.read_csv(cost_file, index_col=[0, 1]).sort_index()
+
+    # correct units to MW and EUR
+    costs.loc[costs.unit.str.contains("/kW"), "value"] *= 1e3
+    costs.loc[costs.unit.str.contains("USD"), "value"] *= USD_to_EUR
+
+    # min_count=1 is important to generate NaNs which are then filled by fillna
+    costs = (
+        costs.loc[:, "value"].unstack(level=1).groupby("technology").sum(min_count=1)
+    )
+    costs = costs.fillna(fill_values)
+
+    def annuity_factor(v):
+        return annuity(v["lifetime"], v["discount rate"]) + v["FOM"] / 100
+
+    costs["fixed"] = [
+        annuity_factor(v) * v["investment"] * Nyears for i, v in costs.iterrows()
+    ]
+
+    return costs
+
+
 def create_network_topology(n, prefix, connector=" <-> ", bidirectional=True):
     """
     Create a network topology like the power transmission network.
@@ -922,6 +1043,33 @@ def make_index(c):
     return topo
 
 
+def create_dummy_data(n, sector):
+    ind = n.buses_t.p.index
+    ind = n.buses.index[n.buses.carrier == "AC"]
+
+    if sector == "industry":
+        col = [
+            "electricity",
+            "coal",
+            "coke",
+            "solid biomass",
+            "methane",
+            "hydrogen",
+            "low-temperature heat",
+            "naphtha",
+            "process emission",
+            "process emission from feedstock",
+            "current electricity",
+        ]
+    else:
+        raise Exception("sector not found")
+    data = (
+        np.random.randint(10, 500, size=(len(ind), len(col))) * 1000 * 1
+    )  # TODO change 1 with temp. resolution
+
+    return pd.DataFrame(data, index=ind, columns=col)
+
+
 def cycling_shift(df, steps=1):
     """
     Cyclic shift on index of pd.Series|pd.DataFrame by number of steps.
@@ -932,6 +1080,34 @@ def cycling_shift(df, steps=1):
     return df
 
 
+def override_component_attrs(directory):
+    """Tell PyPSA that links can have multiple outputs by
+    overriding the component_attrs. This can be done for
+    as many buses as you need with format busi for i = 2,3,4,5,....
+    See https://pypsa.org/doc/components.html#link-with-multiple-outputs-or-inputs
+
+    Parameters
+    ----------
+    directory : string
+        Folder where component attributes to override are stored
+        analogous to ``pypsa/component_attrs``, e.g. `links.csv`.
+
+    Returns
+    -------
+    Dictionary of overridden component attributes.
+    """
+
+    attrs = {k: v.copy() for k, v in component_attrs.items()}
+
+    for component, list_name in components.list_name.items():
+        fn = f"{directory}/{list_name}.csv"
+        if pathlib.Path(fn).is_file():
+            overrides = pd.read_csv(fn, index_col=0, na_values="n/a")
+            attrs[component] = overrides.combine_first(attrs[component])
+
+    return attrs
+
+
 def get_gadm_filename(country_code, file_prefix="gadm41_"):
     """
     Function to get three digits country code for GADM.
@@ -1041,7 +1217,7 @@ def download_gadm(
     return gadm_input_file_gpkg, gadm_filename
 
 
-def get_gadm_layer_name(country_code, file_prefix, layer_id, code_layer):
+def get_gadm_layer_name(file_prefix, layer_id):
 
     if file_prefix == "gadm41_":
         return "ADM_ADM_" + str(layer_id)
@@ -1156,9 +1332,7 @@ def get_gadm_layer(
             # when layer id is negative or larger than the number of layers, select the last layer
             layer_id = len(list_layers) - 1
         code_layer = np.mod(layer_id, len(list_layers))
-        layer_name = get_gadm_layer_name(
-            country_code, file_prefix, layer_id, code_layer
-        )
+        layer_name = get_gadm_layer_name(file_prefix, layer_id)
 
         # read gpkg file
         geo_df_temp = gpd.read_file(
@@ -1291,34 +1465,6 @@ def locate_bus(
         ].item()  # looks for closest one shape=node
 
 
-def override_component_attrs(directory):
-    """Tell PyPSA that links can have multiple outputs by
-    overriding the component_attrs. This can be done for
-    as many buses as you need with format busi for i = 2,3,4,5,....
-    See https://pypsa.org/doc/components.html#link-with-multiple-outputs-or-inputs
-
-    Parameters
-    ----------
-    directory : string
-        Folder where component attributes to override are stored
-        analogous to ``pypsa/component_attrs``, e.g. `links.csv`.
-
-    Returns
-    -------
-    Dictionary of overridden component attributes.
-    """
-
-    attrs = Dict({k: v.copy() for k, v in component_attrs.items()})
-
-    for component, list_name in components.list_name.items():
-        fn = f"{directory}/{list_name}.csv"
-        if pathlib.Path(fn).is_file():
-            overrides = pd.read_csv(fn, index_col=0, na_values="n/a")
-            attrs[component] = overrides.combine_first(attrs[component])
-
-    return attrs
-
-
 def get_conv_factors(sector):
     """
     Create a dictionary with all the conversion factors for the standard net calorific value
@@ -1367,6 +1513,7 @@ def get_conv_factors(sector):
         "natural gas liquids": 0.01228,
         "oil shale": 0.00247,
         "other bituminous coal": 0.005556,
+        "other kerosene": 0.01216,
         "paraffin waxes": 0.01117,
         "patent fuel": 0.00575,
         "peat": 0.00271,
@@ -1387,30 +1534,35 @@ def aggregate_fuels(sector):
     gas_fuels = [
         "blast furnace gas",
         "natural gas (including lng)",
-        "natural gas liquids",
     ]
 
     oil_fuels = [
-        "bitumen",
+        "additives and oxygenates" "aviation gasoline" "bitumen",
         "conventional crude oil",
         "crude petroleum",
         "ethane",
         "fuel oil",
         "gas oil/ diesel oil",
+        "gasoline-type jet fuel",
         "kerosene-type jet fuel",
         "liquefied petroleum gas (lpg)",
         "lubricants",
         "motor gasoline",
         "naphtha",
-        "patent fuel",
+        "natural gas liquids",
+        "other kerosene",
+        "paraffin waxes" "patent fuel",
         "petroleum coke",
         "refinery gas",
     ]
 
     coal_fuels = [
         "anthracite",
+        "blast furnace gas",
         "brown coal",
         "brown coal briquettes",
+        "coal coke",
+        "coal tar",
         "coke-oven coke",
         "coke-oven gas",
         "coking coal",
@@ -1418,9 +1570,12 @@ def aggregate_fuels(sector):
         "gasworks gas",
         "hard coal",
         "lignite",
+        "oil shale",
         "other bituminous coal",
+        "patent fuel",
         "peat",
         "peat products",
+        "recovered gases",
         "sub-bituminous coal",
     ]
 
@@ -1432,6 +1587,11 @@ def aggregate_fuels(sector):
         "biodiesel",
         "charcoal",
         "black liquor",
+        "bio jet kerosene",
+        "animal waste",
+        "industrial waste",
+        "municipal wastes",
+        "vegetal waste",
     ]
 
     electricity = ["electricity"]
@@ -1461,7 +1621,7 @@ def modify_commodity(commodity):
     return commodity.strip().casefold()
 
 
-def safe_divide(numerator, denominator):
+def safe_divide(numerator, denominator, default_value=np.nan):
     """
     Safe division function that returns NaN when the denominator is zero.
     """
@@ -1471,7 +1631,7 @@ def safe_divide(numerator, denominator):
         logging.warning(
             f"Division by zero: {numerator} / {denominator}, returning NaN."
         )
-        return np.nan
+        return default_value
 
 
 def normed(x):
diff --git a/scripts/add_brownfield.py b/scripts/add_brownfield.py
new file mode 100644
index 000000000..ce5e243f8
--- /dev/null
+++ b/scripts/add_brownfield.py
@@ -0,0 +1,259 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Prepares brownfield data from previous planning horizon.
+"""
+
+import logging
+
+import numpy as np
+import pandas as pd
+import pypsa
+from _helpers import mock_snakemake
+from add_existing_baseyear import add_build_year_to_new_assets
+
+logger = logging.getLogger(__name__)
+idx = pd.IndexSlice
+
+
+def add_brownfield(n, n_p, year):
+    logger.info(f"Preparing brownfield for the year {year}")
+
+    # electric transmission grid set optimised capacities of previous as minimum
+    n.lines.s_nom_min = n_p.lines.s_nom_opt
+    dc_i = n.links[n.links.carrier == "DC"].index
+    n.links.loc[dc_i, "p_nom_min"] = n_p.links.loc[dc_i, "p_nom_opt"]
+
+    for c in n_p.iterate_components(["Link", "Generator", "Store"]):
+        attr = "e" if c.name == "Store" else "p"
+
+        # first, remove generators, links and stores that track
+        # CO2 or global EU values since these are already in n
+        n_p.mremove(c.name, c.df.index[c.df.lifetime == np.inf])
+
+        # remove assets whose build_year + lifetime < year
+        n_p.mremove(c.name, c.df.index[c.df.build_year + c.df.lifetime < year])
+
+        # remove assets if their optimized nominal capacity is lower than a threshold
+        # since CHP heat Link is proportional to CHP electric Link, make sure threshold is compatible
+        chp_heat = c.df.index[
+            (c.df[f"{attr}_nom_extendable"] & c.df.index.str.contains("urban central"))
+            & c.df.index.str.contains("CHP")
+            & c.df.index.str.contains("heat")
+        ]
+
+        threshold = snakemake.params.threshold_capacity
+
+        if not chp_heat.empty:
+            threshold_chp_heat = (
+                threshold
+                * c.df.efficiency[chp_heat.str.replace("heat", "electric")].values
+                * c.df.p_nom_ratio[chp_heat.str.replace("heat", "electric")].values
+                / c.df.efficiency[chp_heat].values
+            )
+            n_p.mremove(
+                c.name,
+                chp_heat[c.df.loc[chp_heat, f"{attr}_nom_opt"] < threshold_chp_heat],
+            )
+
+        n_p.mremove(
+            c.name,
+            c.df.index[
+                (c.df[f"{attr}_nom_extendable"] & ~c.df.index.isin(chp_heat))
+                & (c.df[f"{attr}_nom_opt"] < threshold)
+            ],
+        )
+
+        # copy over assets but fix their capacity
+        c.df[f"{attr}_nom"] = c.df[f"{attr}_nom_opt"]
+        c.df[f"{attr}_nom_extendable"] = False
+
+        n.import_components_from_dataframe(c.df, c.name)
+
+        # copy time-dependent
+        selection = n.component_attrs[c.name].type.str.contains(
+            "series"
+        ) & n.component_attrs[c.name].status.str.contains("Input")
+        for tattr in n.component_attrs[c.name].index[selection]:
+            n.import_series_from_dataframe(c.pnl[tattr], c.name, tattr)
+
+        # deal with gas network
+        pipe_carrier = ["gas pipeline"]
+        if snakemake.params.H2_retrofit:
+            # drop capacities of previous year to avoid duplicating
+            to_drop = n.links.carrier.isin(pipe_carrier) & (n.links.build_year != year)
+            n.mremove("Link", n.links.loc[to_drop].index)
+
+            # subtract the already retrofitted from today's gas grid capacity
+            h2_retrofitted_fixed_i = n.links[
+                (n.links.carrier == "H2 pipeline retrofitted")
+                & (n.links.build_year != year)
+            ].index
+            gas_pipes_i = n.links[n.links.carrier.isin(pipe_carrier)].index
+            CH4_per_H2 = 1 / snakemake.params.H2_retrofit_capacity_per_CH4
+            fr = "H2 pipeline retrofitted"
+            to = "gas pipeline"
+            # today's pipe capacity
+            pipe_capacity = n.links.loc[gas_pipes_i, "p_nom"]
+            # already retrofitted capacity from gas -> H2
+            already_retrofitted = (
+                n.links.loc[h2_retrofitted_fixed_i, "p_nom"]
+                .rename(lambda x: x.split("-2")[0].replace(fr, to))
+                .groupby(level=0)
+                .sum()
+            )
+            remaining_capacity = (
+                pipe_capacity
+                - CH4_per_H2
+                * already_retrofitted.reindex(index=pipe_capacity.index).fillna(0)
+            )
+            n.links.loc[gas_pipes_i, "p_nom"] = remaining_capacity
+        else:
+            new_pipes = n.links.carrier.isin(pipe_carrier) & (
+                n.links.build_year == year
+            )
+            n.links.loc[new_pipes, "p_nom"] = 0.0
+            n.links.loc[new_pipes, "p_nom_min"] = 0.0
+
+
+def disable_grid_expansion_if_limit_hit(n):
+    """
+    Check if transmission expansion limit is already reached; then turn off.
+
+    In particular, this function checks if the total transmission
+    capital cost or volume implied by s_nom_min and p_nom_min are
+    numerically close to the respective global limit set in
+    n.global_constraints. If so, the nominal capacities are set to the
+    minimum and extendable is turned off; the corresponding global
+    constraint is then dropped.
+    """
+    cols = {"cost": "capital_cost", "volume": "length"}
+    for limit_type in ["cost", "volume"]:
+        glcs = n.global_constraints.query(
+            f"type == 'transmission_expansion_{limit_type}_limit'"
+        )
+
+        for name, glc in glcs.iterrows():
+            total_expansion = (
+                (
+                    n.lines.query("s_nom_extendable")
+                    .eval(f"s_nom_min * {cols[limit_type]}")
+                    .sum()
+                )
+                + (
+                    n.links.query("carrier == 'DC' and p_nom_extendable")
+                    .eval(f"p_nom_min * {cols[limit_type]}")
+                    .sum()
+                )
+            ).sum()
+
+            # Allow small numerical differences
+            if np.abs(glc.constant - total_expansion) / glc.constant < 1e-6:
+                logger.info(
+                    f"Transmission expansion {limit_type} is already reached, disabling expansion and limit"
+                )
+                extendable_acs = n.lines.query("s_nom_extendable").index
+                n.lines.loc[extendable_acs, "s_nom_extendable"] = False
+                n.lines.loc[extendable_acs, "s_nom"] = n.lines.loc[
+                    extendable_acs, "s_nom_min"
+                ]
+
+                extendable_dcs = n.links.query(
+                    "carrier == 'DC' and p_nom_extendable"
+                ).index
+                n.links.loc[extendable_dcs, "p_nom_extendable"] = False
+                n.links.loc[extendable_dcs, "p_nom"] = n.links.loc[
+                    extendable_dcs, "p_nom_min"
+                ]
+
+                n.global_constraints.drop(name, inplace=True)
+
+
+# def adjust_renewable_profiles(n, input_profiles, params, year):
+#     """
+#     Adjusts renewable profiles according to the renewable technology specified,
+#     using the latest year below or equal to the selected year.
+#     """
+
+#     # spatial clustering
+#     cluster_busmap = pd.read_csv(snakemake.input.cluster_busmap, index_col=0).squeeze()
+#     simplify_busmap = pd.read_csv(
+#         snakemake.input.simplify_busmap, index_col=0
+#     ).squeeze()
+#     clustermaps = simplify_busmap.map(cluster_busmap)
+#     clustermaps.index = clustermaps.index.astype(str)
+
+#     # temporal clustering
+#     dr = pd.date_range(**params["snapshots"], freq="h")
+#     snapshotmaps = (
+#         pd.Series(dr, index=dr).where(lambda x: x.isin(n.snapshots), pd.NA).ffill()
+#     )
+
+#     for carrier in params["carriers"]:
+#         if carrier == "hydro":
+#             continue
+#         with xr.open_dataset(getattr(input_profiles, "profile_" + carrier)) as ds:
+#             if ds.indexes["bus"].empty or "year" not in ds.indexes:
+#                 continue
+
+#             closest_year = max(
+#                 (y for y in ds.year.values if y <= year), default=min(ds.year.values)
+#             )
+
+#             p_max_pu = (
+#                 ds["profile"]
+#                 .sel(year=closest_year)
+#                 .transpose("time", "bus")
+#                 .to_pandas()
+#             )
+
+#             # spatial clustering
+#             weight = ds["weight"].sel(year=closest_year).to_pandas()
+#             weight = weight.groupby(clustermaps).transform(normed_or_uniform)
+#             p_max_pu = (p_max_pu * weight).T.groupby(clustermaps).sum().T
+#             p_max_pu.columns = p_max_pu.columns + f" {carrier}"
+
+#             # temporal_clustering
+#             p_max_pu = p_max_pu.groupby(snapshotmaps).mean()
+
+#             # replace renewable time series
+#             n.generators_t.p_max_pu.loc[:, p_max_pu.columns] = p_max_pu
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+
+        snakemake = mock_snakemake(
+            "add_brownfield",
+            simpl="",
+            clusters="10",
+            ll="c1.0",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="144H",
+            discountrate=0.071,
+            demand="AB",
+            h2export="120",
+        )
+
+    logger.info(f"Preparing brownfield from the file {snakemake.input.network_p}")
+
+    year = int(snakemake.wildcards.planning_horizons)
+
+    n = pypsa.Network(snakemake.input.network)
+
+    # TODO
+    # adjust_renewable_profiles(n, snakemake.input, snakemake.params, year)
+
+    add_build_year_to_new_assets(n, year)
+
+    n_p = pypsa.Network(snakemake.input.network_p)
+
+    add_brownfield(n, n_p, year)
+
+    disable_grid_expansion_if_limit_hit(n)
+
+    n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards)))
+    n.export_to_netcdf(snakemake.output[0])
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index af68d1ad1..71aac94dd 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -90,15 +90,14 @@
 import pypsa
 import xarray as xr
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
     normed,
     read_csv_nafix,
-    sets_path_to_root,
     update_p_nom_max,
 )
+from powerplantmatching.export import map_country_bus
 
 idx = pd.IndexSlice
 
@@ -129,8 +128,6 @@ def _add_missing_carriers_from_costs(n, costs, carriers):
         costs.columns.to_series().loc[lambda s: s.str.endswith("_emissions")].values
     )
     suptechs = missing_carriers.str.split("-").str[0]
-    if "csp" in suptechs:
-        suptechs = suptechs.str.replace("csp", "csp-tower")
     emissions = costs.loc[suptechs, emissions_cols].fillna(0.0)
     emissions.index = missing_carriers
     n.import_components_from_dataframe(emissions, "Carrier")
@@ -191,6 +188,7 @@ def load_costs(tech_costs, config, elec_config, Nyears=1):
         config["rooftop_share"] * costs.at["solar-rooftop", "capital_cost"]
         + (1 - config["rooftop_share"]) * costs.at["solar-utility", "capital_cost"]
     )
+    costs.loc["csp"] = costs.loc["csp-tower"]
 
     def costs_for_storage(store, link1, link2=None, max_hours=1.0):
         capital_cost = link1["capital_cost"] + max_hours * store["capital_cost"]
@@ -373,13 +371,11 @@ def attach_wind_and_solar(
                     )
                 )
             else:
-                capital_cost = costs.at[
-                    "csp-tower" if tech == "csp" else tech, "capital_cost"
-                ]
+                capital_cost = costs.at[tech, "capital_cost"]
 
             if not df.query("carrier == @tech").empty:
                 buses = n.buses.loc[ds.indexes["bus"]]
-                caps = pm.export.map_country_bus(df.query("carrier == @tech"), buses)
+                caps = map_country_bus(df.query("carrier == @tech"), buses)
                 caps = caps.groupby(["bus"]).p_nom.sum()
                 caps = pd.Series(data=caps, index=ds.indexes["bus"]).fillna(0)
             else:
@@ -397,13 +393,9 @@ def attach_wind_and_solar(
                 p_nom_max=ds["p_nom_max"].to_pandas(),
                 p_max_pu=ds["profile"].transpose("time", "bus").to_pandas(),
                 weight=ds["weight"].to_pandas(),
-                marginal_cost=costs.at[
-                    "csp-tower" if suptech == "csp" else suptech, "marginal_cost"
-                ],
+                marginal_cost=costs.at[suptech, "marginal_cost"],
                 capital_cost=capital_cost,
-                efficiency=costs.at[
-                    "csp-tower" if suptech == "csp" else suptech, "efficiency"
-                ],
+                efficiency=costs.at[suptech, "efficiency"],
             )
 
 
@@ -776,7 +768,8 @@ def estimate_renewable_capacities_irena(
             (
                 n.generators_t.p_max_pu[tech_i].mean()
                 * n.generators.loc[tech_i, "p_nom_max"]
-            )  # maximal yearly generation
+            )
+            # maximal yearly generation
             .groupby(n.generators.bus.map(n.buses.country))
             .transform(lambda s: normed(s) * tech_capacities.at[s.name])
             .where(lambda s: s > 0.1, 0.0)
@@ -821,9 +814,7 @@ def add_nice_carrier_names(n, config):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("add_electricity")
-        sets_path_to_root("pypsa-earth")
 
     configure_logging(snakemake)
 
diff --git a/scripts/add_existing_baseyear.py b/scripts/add_existing_baseyear.py
new file mode 100644
index 000000000..61e0d560c
--- /dev/null
+++ b/scripts/add_existing_baseyear.py
@@ -0,0 +1,651 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Adds existing power and heat generation capacities for initial planning
+horizon.
+"""
+
+import logging
+from types import SimpleNamespace
+
+import country_converter as coco
+import numpy as np
+import pandas as pd
+import powerplantmatching as pm
+import pypsa
+import xarray as xr
+from _helpers import mock_snakemake
+from prepare_sector_network import define_spatial, prepare_costs
+
+logger = logging.getLogger(__name__)
+cc = coco.CountryConverter()
+idx = pd.IndexSlice
+spatial = SimpleNamespace()
+
+
+def add_build_year_to_new_assets(n, baseyear):
+    """
+    Parameters
+    ----------
+    n : pypsa.Network
+    baseyear : int
+        year in which optimized assets are built
+    """
+    # Give assets with lifetimes and no build year the build year baseyear
+    for c in n.iterate_components(["Link", "Generator", "Store"]):
+        assets = c.df.index[(c.df.lifetime != np.inf) & (c.df.build_year == 0)]
+        c.df.loc[assets, "build_year"] = baseyear
+
+        # add -baseyear to name
+        rename = pd.Series(c.df.index, c.df.index)
+        rename[assets] += f"-{str(baseyear)}"
+        c.df.rename(index=rename, inplace=True)
+
+        # rename time-dependent
+        selection = n.component_attrs[c.name].type.str.contains(
+            "series"
+        ) & n.component_attrs[c.name].status.str.contains("Input")
+        for attr in n.component_attrs[c.name].index[selection]:
+            c.pnl[attr] = c.pnl[attr].rename(columns=rename)
+
+
+def add_existing_renewables(df_agg):
+    """
+    Append existing renewables to the df_agg pd.DataFrame with the conventional
+    power plants.
+    """
+    tech_map = {"solar": "PV", "onwind": "Onshore", "offwind": "Offshore"}
+
+    countries = snakemake.config["countries"]
+    irena = pm.data.IRENASTAT().powerplant.convert_country_to_alpha2()
+    irena = irena.query("Country in @countries")
+    irena = irena.groupby(["Technology", "Country", "Year"]).Capacity.sum()
+
+    irena = irena.unstack().reset_index()
+
+    for carrier, tech in tech_map.items():
+        df = (
+            irena[irena.Technology.str.contains(tech)]
+            .drop(columns=["Technology"])
+            .set_index("Country")
+            .reindex(countries, fill_value=0.0)
+            .fillna(0.0)
+        )
+        df.columns = df.columns.astype(int)
+
+        # calculate yearly differences
+        df.insert(loc=0, value=0.0, column="1999")
+        df = df.diff(axis=1).drop("1999", axis=1).clip(lower=0)
+
+        # distribute capacities among nodes according to capacity factor
+        # weighting with nodal_fraction
+        elec_buses = n.buses.index[n.buses.carrier == "AC"].union(
+            n.buses.index[n.buses.carrier == "DC"]
+        )
+        nodal_fraction = pd.Series(0.0, elec_buses)
+
+        for country in n.buses.loc[elec_buses, "country"].unique():
+            gens = n.generators.index[
+                (n.generators.index.str[:2] == country)
+                & (n.generators.carrier == carrier)
+            ]
+            cfs = n.generators_t.p_max_pu[gens].mean()
+            cfs_key = cfs / cfs.sum()
+            nodal_fraction.loc[n.generators.loc[gens, "bus"]] = cfs_key.groupby(
+                n.generators.loc[gens, "bus"]
+            ).sum()
+
+        nodal_df = df.loc[n.buses.loc[elec_buses, "country"]]
+        nodal_df.index = elec_buses
+        nodal_df = nodal_df.multiply(nodal_fraction, axis=0)
+
+        for year in nodal_df.columns:
+            for node in nodal_df.index:
+                name = f"{node}-{carrier}-{year}"
+                capacity = nodal_df.loc[node, year]
+                if capacity > 0.0:
+                    df_agg.at[name, "Fueltype"] = carrier
+                    df_agg.at[name, "Capacity"] = capacity
+                    df_agg.at[name, "DateIn"] = year
+                    df_agg.at[name, "lifetime"] = costs.at[carrier, "lifetime"]
+                    df_agg.at[name, "DateOut"] = (
+                        year + costs.at[carrier, "lifetime"] - 1
+                    )
+                    df_agg.at[name, "cluster_bus"] = node
+
+
+def add_power_capacities_installed_before_baseyear(n, grouping_years, costs, baseyear):
+    """
+    Parameters
+    ----------
+    n : pypsa.Network
+    grouping_years :
+        intervals to group existing capacities
+    costs :
+        to read lifetime to estimate YearDecomissioning
+    baseyear : int
+    """
+    logger.debug(
+        f"Adding power capacities installed before {baseyear} from powerplants.csv"
+    )
+
+    df_agg = pd.read_csv(snakemake.input.powerplants, index_col=0)
+
+    rename_fuel = {
+        "Hard Coal": "coal",
+        "Lignite": "lignite",
+        "Nuclear": "nuclear",
+        "Oil": "oil",
+        "OCGT": "OCGT",
+        "CCGT": "CCGT",
+        "Bioenergy": "urban central solid biomass CHP",
+    }
+
+    # Replace Fueltype "Natural Gas" with the respective technology (OCGT or CCGT)
+    df_agg.loc[df_agg["Fueltype"] == "Natural Gas", "Fueltype"] = df_agg.loc[
+        df_agg["Fueltype"] == "Natural Gas", "Technology"
+    ]
+
+    fueltype_to_drop = [
+        "Hydro",
+        "Wind",
+        "Solar",
+        "Geothermal",
+        "Waste",
+        "Other",
+        "CCGT, Thermal",
+    ]
+
+    technology_to_drop = ["Pv", "Storage Technologies"]
+
+    # drop unused fueltyps and technologies
+    df_agg.drop(df_agg.index[df_agg.Fueltype.isin(fueltype_to_drop)], inplace=True)
+    df_agg.drop(df_agg.index[df_agg.Technology.isin(technology_to_drop)], inplace=True)
+    df_agg.Fueltype = df_agg.Fueltype.map(rename_fuel)
+
+    # Intermediate fix for DateIn & DateOut
+    # Fill missing DateIn
+    # TODO: revise CHP
+    biomass_i = df_agg.loc[df_agg.Fueltype == "urban central solid biomass CHP"].index
+    if biomass_i.empty:
+        mean = 0
+    else:
+        mean = df_agg.loc[biomass_i, "DateIn"].mean()
+    df_agg.loc[biomass_i, "DateIn"] = df_agg.loc[biomass_i, "DateIn"].fillna(int(mean))
+    # Fill missing DateOut
+    dateout = (
+        df_agg.loc[biomass_i, "DateIn"]
+        + snakemake.params.costs["fill_values"]["lifetime"]
+    )
+    df_agg.loc[biomass_i, "DateOut"] = df_agg.loc[biomass_i, "DateOut"].fillna(dateout)
+
+    # drop assets which are already phased out / decommissioned
+    phased_out = df_agg[df_agg["DateOut"] < baseyear].index
+    df_agg.drop(phased_out, inplace=True)
+
+    # assign clustered bus
+    busmap_s = pd.read_csv(snakemake.input.busmap_s, index_col=0).squeeze()
+    busmap = pd.read_csv(snakemake.input.busmap, index_col=0).squeeze()
+
+    inv_busmap = {}
+    for k, v in busmap.items():
+        inv_busmap[v] = inv_busmap.get(v, []) + [k]
+
+    clustermaps = busmap_s.map(busmap)
+    clustermaps.index = clustermaps.index.astype(int)
+
+    df_agg["cluster_bus"] = df_agg.bus.map(clustermaps)
+
+    # include renewables in df_agg
+    add_existing_renewables(df_agg)
+
+    df_agg["grouping_year"] = np.take(
+        grouping_years, np.digitize(df_agg.DateIn, grouping_years, right=True)
+    )
+
+    # calculate (adjusted) remaining lifetime before phase-out (+1 because assuming
+    # phase out date at the end of the year)
+    df_agg["lifetime"] = df_agg.DateOut - df_agg["grouping_year"] + 1
+
+    df = df_agg.pivot_table(
+        index=["grouping_year", "Fueltype"],
+        columns="cluster_bus",
+        values="Capacity",
+        aggfunc="sum",
+    )
+
+    lifetime = df_agg.pivot_table(
+        index=["grouping_year", "Fueltype"],
+        columns="cluster_bus",
+        values="lifetime",
+        aggfunc="mean",  # currently taken mean for clustering lifetimes
+    )
+
+    carrier = {
+        "OCGT": "gas",
+        "CCGT": "gas",
+        "coal": "coal",
+        "oil": "oil",
+        "lignite": "lignite",
+        "nuclear": "uranium",
+        "urban central solid biomass CHP": "biomass",
+    }
+
+    for grouping_year, generator in df.index:
+        # capacity is the capacity in MW at each node for this
+        capacity = df.loc[grouping_year, generator]
+        capacity = capacity[~capacity.isna()]
+        capacity = capacity[
+            capacity > snakemake.params.existing_capacities["threshold_capacity"]
+        ]
+        suffix = "-ac" if generator == "offwind" else ""
+        name_suffix = f" {generator}{suffix}-{grouping_year}"
+        asset_i = capacity.index + name_suffix
+        if generator in ["solar", "onwind", "offwind"]:
+            # to consider electricity grid connection costs or a split between
+            # solar utility and rooftop as well, rather take cost assumptions
+            # from existing network than from the cost database
+            capital_cost = n.generators.loc[
+                n.generators.carrier == generator + suffix, "capital_cost"
+            ].mean()
+            marginal_cost = n.generators.loc[
+                n.generators.carrier == generator + suffix, "marginal_cost"
+            ].mean()
+            # check if assets are already in network (e.g. for 2020)
+            already_build = n.generators.index.intersection(asset_i)
+            new_build = asset_i.difference(n.generators.index)
+
+            # this is for the year 2020
+            if not already_build.empty:
+                n.generators.loc[already_build, "p_nom_min"] = capacity.loc[
+                    already_build.str.replace(name_suffix, "")
+                ].values
+            new_capacity = capacity.loc[new_build.str.replace(name_suffix, "")]
+
+            if "m" in snakemake.wildcards.clusters:
+                for ind in new_capacity.index:
+                    # existing capacities are split evenly among regions in every country
+                    inv_ind = list(inv_busmap[ind])
+
+                    # for offshore the splitting only includes coastal regions
+                    inv_ind = [
+                        i for i in inv_ind if (i + name_suffix) in n.generators.index
+                    ]
+
+                    p_max_pu = n.generators_t.p_max_pu[
+                        [i + name_suffix for i in inv_ind]
+                    ]
+                    p_max_pu.columns = [i + name_suffix for i in inv_ind]
+
+                    n.madd(
+                        "Generator",
+                        [i + name_suffix for i in inv_ind],
+                        bus=ind,
+                        carrier=generator,
+                        p_nom=new_capacity[ind]
+                        / len(inv_ind),  # split among regions in a country
+                        marginal_cost=marginal_cost,
+                        capital_cost=capital_cost,
+                        efficiency=costs.at[generator, "efficiency"],
+                        p_max_pu=p_max_pu,
+                        build_year=grouping_year,
+                        lifetime=costs.at[generator, "lifetime"],
+                    )
+
+            else:
+                # TODO: revision of this line to avoid this hardfix
+                # try:
+                p_max_pu = n.generators_t.p_max_pu[
+                    capacity.index + f" {generator}{suffix}-{baseyear}"
+                ]
+                # except:
+                #     p_max_pu = n.generators_t.p_max_pu[
+                #         capacity.index + f" {generator}{suffix}"
+                #     ]
+
+                if not new_build.empty:
+                    n.madd(
+                        "Generator",
+                        new_capacity.index,
+                        suffix=" " + name_suffix,
+                        bus=new_capacity.index,
+                        carrier=generator,
+                        p_nom=new_capacity,
+                        marginal_cost=marginal_cost,
+                        capital_cost=capital_cost,
+                        efficiency=costs.at[generator, "efficiency"],
+                        p_max_pu=p_max_pu.rename(columns=n.generators.bus),
+                        build_year=grouping_year,
+                        lifetime=costs.at[generator, "lifetime"],
+                    )
+
+        else:
+            if generator not in vars(spatial).keys():
+                logger.debug(f"Carrier type {generator} not in spatial data, skipping")
+                continue
+
+            bus0 = vars(spatial)[carrier[generator]].nodes
+            if "EU" not in vars(spatial)[carrier[generator]].locations:
+                bus0 = bus0.intersection(capacity.index + " " + carrier[generator])
+
+            # check for missing bus
+            missing_bus = pd.Index(bus0).difference(n.buses.index)
+            if not missing_bus.empty:
+                logger.info(f"add buses {bus0}")
+                n.madd(
+                    "Bus",
+                    bus0,
+                    carrier=generator,
+                    location=vars(spatial)[carrier[generator]].locations,
+                    unit="MWh_el",
+                )
+
+            already_build = n.links.index.intersection(asset_i)
+            new_build = asset_i.difference(n.links.index)
+            lifetime_assets = lifetime.loc[grouping_year, generator].dropna()
+
+            # this is for the year 2020
+            if not already_build.empty:
+                n.links.loc[already_build, "p_nom_min"] = capacity.loc[
+                    already_build.str.replace(name_suffix, "")
+                ].values
+
+            if not new_build.empty:
+                new_capacity = capacity.loc[new_build.str.replace(name_suffix, "")]
+
+                if generator != "urban central solid biomass CHP":
+                    n.madd(
+                        "Link",
+                        new_capacity.index,
+                        suffix=name_suffix,
+                        bus0=bus0,
+                        bus1=new_capacity.index,
+                        bus2="co2 atmosphere",
+                        carrier=generator,
+                        marginal_cost=costs.at[generator, "efficiency"]
+                        * costs.at[generator, "VOM"],  # NB: VOM is per MWel
+                        capital_cost=costs.at[generator, "efficiency"]
+                        * costs.at[generator, "fixed"],  # NB: fixed cost is per MWel
+                        p_nom=new_capacity / costs.at[generator, "efficiency"],
+                        efficiency=costs.at[generator, "efficiency"],
+                        efficiency2=costs.at[carrier[generator], "CO2 intensity"],
+                        build_year=grouping_year,
+                        lifetime=lifetime_assets.loc[new_capacity.index],
+                    )
+                else:
+                    key = "central solid biomass CHP"
+                    n.madd(
+                        "Link",
+                        new_capacity.index,
+                        suffix=name_suffix,
+                        bus0=spatial.biomass.df.loc[new_capacity.index]["nodes"].values,
+                        bus1=new_capacity.index,
+                        bus2=new_capacity.index + " urban central heat",
+                        carrier=generator,
+                        p_nom=new_capacity / costs.at[key, "efficiency"],
+                        capital_cost=costs.at[key, "fixed"]
+                        * costs.at[key, "efficiency"],
+                        marginal_cost=costs.at[key, "VOM"],
+                        efficiency=costs.at[key, "efficiency"],
+                        build_year=grouping_year,
+                        efficiency2=costs.at[key, "efficiency-heat"],
+                        lifetime=lifetime_assets.loc[new_capacity.index],
+                    )
+        # check if existing capacities are larger than technical potential
+        existing_large = n.generators[
+            n.generators["p_nom_min"] > n.generators["p_nom_max"]
+        ].index
+        if len(existing_large):
+            logger.warning(
+                f"Existing capacities larger than technical potential for {existing_large},\
+                           adjust technical potential to existing capacities"
+            )
+            n.generators.loc[existing_large, "p_nom_max"] = n.generators.loc[
+                existing_large, "p_nom_min"
+            ]
+
+
+def add_heating_capacities_installed_before_baseyear(
+    n,
+    baseyear,
+    grouping_years,
+    ashp_cop,
+    gshp_cop,
+    time_dep_hp_cop,
+    costs,
+    default_lifetime,
+):
+    """
+    Parameters
+    ----------
+    n : pypsa.Network
+    baseyear : last year covered in the existing capacities database
+    grouping_years : intervals to group existing capacities
+        linear decommissioning of heating capacities from 2020 to 2045 is
+        currently assumed heating capacities split between residential and
+        services proportional to heating load in both 50% capacities
+        in rural busess 50% in urban buses
+    """
+    logger.debug(f"Adding heating capacities installed before {baseyear}")
+
+    existing_heating = pd.read_csv(
+        snakemake.input.existing_heating_distribution, header=[0, 1], index_col=0
+    )
+
+    techs = existing_heating.columns.get_level_values(1).unique()
+
+    for name in existing_heating.columns.get_level_values(0).unique():
+        name_type = "central" if name == "urban central" else "decentral"
+
+        nodes = pd.Index(n.buses.location[n.buses.index.str.contains(f"{name} heat")])
+
+        if (name_type != "central") and options["electricity_distribution_grid"]:
+            nodes_elec = nodes + " low voltage"
+        else:
+            nodes_elec = nodes
+
+        heat_pump_type = "air" if "urban" in name else "ground"
+
+        # Add heat pumps
+        costs_name = f"decentral {heat_pump_type}-sourced heat pump"
+
+        cop = {"air": ashp_cop, "ground": gshp_cop}
+
+        if time_dep_hp_cop:
+            efficiency = cop[heat_pump_type][nodes]
+        else:
+            efficiency = costs.at[costs_name, "efficiency"]
+
+        for i, grouping_year in enumerate(grouping_years):
+            if int(grouping_year) + default_lifetime <= int(baseyear):
+                continue
+
+            # installation is assumed to be linear for the past default_lifetime years
+            ratio = (int(grouping_year) - int(grouping_years[i - 1])) / default_lifetime
+
+            n.madd(
+                "Link",
+                nodes,
+                suffix=f" {name} {heat_pump_type} heat pump-{grouping_year}",
+                bus0=nodes_elec,
+                bus1=nodes + " " + name + " heat",
+                carrier=f"{name} {heat_pump_type} heat pump",
+                efficiency=efficiency,
+                capital_cost=costs.at[costs_name, "efficiency"]
+                * costs.at[costs_name, "fixed"],
+                p_nom=existing_heating.loc[nodes, (name, f"{heat_pump_type} heat pump")]
+                * ratio
+                / costs.at[costs_name, "efficiency"],
+                build_year=int(grouping_year),
+                lifetime=costs.at[costs_name, "lifetime"],
+            )
+
+            # add resistive heater, gas boilers and oil boilers
+            n.madd(
+                "Link",
+                nodes,
+                suffix=f" {name} resistive heater-{grouping_year}",
+                bus0=nodes_elec,
+                bus1=nodes + " " + name + " heat",
+                carrier=name + " resistive heater",
+                efficiency=costs.at[f"{name_type} resistive heater", "efficiency"],
+                capital_cost=(
+                    costs.at[f"{name_type} resistive heater", "efficiency"]
+                    * costs.at[f"{name_type} resistive heater", "fixed"]
+                ),
+                p_nom=(
+                    existing_heating.loc[nodes, (name, "resistive heater")]
+                    * ratio
+                    / costs.at[f"{name_type} resistive heater", "efficiency"]
+                ),
+                build_year=int(grouping_year),
+                lifetime=costs.at[f"{name_type} resistive heater", "lifetime"],
+            )
+
+            n.madd(
+                "Link",
+                nodes,
+                suffix=f" {name} gas boiler-{grouping_year}",
+                bus0="EU gas" if "EU gas" in spatial.gas.nodes else nodes + " gas",
+                bus1=nodes + " " + name + " heat",
+                bus2="co2 atmosphere",
+                carrier=name + " gas boiler",
+                efficiency=costs.at[f"{name_type} gas boiler", "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"],
+                capital_cost=(
+                    costs.at[f"{name_type} gas boiler", "efficiency"]
+                    * costs.at[f"{name_type} gas boiler", "fixed"]
+                ),
+                p_nom=(
+                    existing_heating.loc[nodes, (name, "gas boiler")]
+                    * ratio
+                    / costs.at[f"{name_type} gas boiler", "efficiency"]
+                ),
+                build_year=int(grouping_year),
+                lifetime=costs.at[f"{name_type} gas boiler", "lifetime"],
+            )
+
+            n.madd(
+                "Link",
+                nodes,
+                suffix=f" {name} oil boiler-{grouping_year}",
+                bus0=spatial.oil.nodes,
+                bus1=nodes + " " + name + " heat",
+                bus2="co2 atmosphere",
+                carrier=name + " oil boiler",
+                efficiency=costs.at["decentral oil boiler", "efficiency"],
+                efficiency2=costs.at["oil", "CO2 intensity"],
+                capital_cost=costs.at["decentral oil boiler", "efficiency"]
+                * costs.at["decentral oil boiler", "fixed"],
+                p_nom=(
+                    existing_heating.loc[nodes, (name, "oil boiler")]
+                    * ratio
+                    / costs.at["decentral oil boiler", "efficiency"]
+                ),
+                build_year=int(grouping_year),
+                lifetime=costs.at[f"{name_type} gas boiler", "lifetime"],
+            )
+
+            # delete links with p_nom=nan corresponding to extra nodes in country
+            n.mremove(
+                "Link",
+                [
+                    index
+                    for index in n.links.index.to_list()
+                    if str(grouping_year) in index and np.isnan(n.links.p_nom[index])
+                ],
+            )
+
+            # delete links with capacities below threshold
+            threshold = snakemake.params.existing_capacities["threshold_capacity"]
+            n.mremove(
+                "Link",
+                [
+                    index
+                    for index in n.links.index.to_list()
+                    if str(grouping_year) in index and n.links.p_nom[index] < threshold
+                ],
+            )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "add_existing_baseyear",
+            simpl="",
+            clusters="4",
+            ll="c1",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="144H",
+            discountrate=0.071,
+            demand="DF",
+            h2export="120",
+        )
+
+    # configure_logging(snakemake)
+    # set_scenario_config(snakemake)
+
+    # update_config_from_wildcards(snakemake.config, snakemake.wildcards)
+
+    options = snakemake.params.sector
+
+    baseyear = snakemake.params.baseyear
+
+    n = pypsa.Network(snakemake.input.network)
+
+    # define spatial resolution of carriers
+    spatial = define_spatial(n.buses[n.buses.carrier == "AC"].index, options)
+    add_build_year_to_new_assets(n, baseyear)
+
+    Nyears = n.snapshot_weightings.generators.sum() / 8760.0
+    costs = prepare_costs(
+        snakemake.input.costs,
+        snakemake.params.costs["USD2013_to_EUR2013"],
+        snakemake.params.costs["fill_values"],
+        Nyears,
+    )
+
+    grouping_years_power = snakemake.params.existing_capacities["grouping_years_power"]
+    grouping_years_heat = snakemake.params.existing_capacities["grouping_years_heat"]
+    add_power_capacities_installed_before_baseyear(
+        n, grouping_years_power, costs, baseyear
+    )
+
+    # TODO: not implemented in -sec yet
+    # if options["heating"]:
+    #     time_dep_hp_cop = options["time_dep_hp_cop"]
+    #     ashp_cop = (
+    #         xr.open_dataarray(snakemake.input.cop_air_total)
+    #         .to_pandas()
+    #         .reindex(index=n.snapshots)
+    #     )
+    #     gshp_cop = (
+    #         xr.open_dataarray(snakemake.input.cop_soil_total)
+    #         .to_pandas()
+    #         .reindex(index=n.snapshots)
+    #     )
+    #     default_lifetime = snakemake.params.existing_capacities[
+    #         "default_heating_lifetime"
+    #     ]
+    #     add_heating_capacities_installed_before_baseyear(
+    #         n,
+    #         baseyear,
+    #         grouping_years_heat,
+    #         ashp_cop,
+    #         gshp_cop,
+    #         time_dep_hp_cop,
+    #         costs,
+    #         default_lifetime,
+    #     )
+
+    # if options.get("cluster_heat_buses", False):
+    #     cluster_heat_buses(n)
+
+    n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards)))
+
+    # sanitize_carriers(n, snakemake.config)
+
+    n.export_to_netcdf(snakemake.output[0])
diff --git a/scripts/add_export.py b/scripts/add_export.py
new file mode 100644
index 000000000..46773c413
--- /dev/null
+++ b/scripts/add_export.py
@@ -0,0 +1,261 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+"""
+Proposed code structure:
+X read network (.nc-file)
+X add export bus
+X connect hydrogen buses (advanced: only ports, not all) to export bus
+X add store and connect to export bus
+X (add load and connect to export bus) only required if the "store" option fails
+
+Possible improvements:
+- Select port buses automatically (with both voronoi and gadm clustering). Use data/ports.csv?
+"""
+
+
+import logging
+
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import pypsa
+from _helpers import locate_bus, mock_snakemake, override_component_attrs, prepare_costs
+
+logger = logging.getLogger(__name__)
+
+
+def select_ports(
+    n,
+    export_ports_path,
+    gadm_level_val,
+    geo_crs_val,
+    file_prefix_val,
+    gadm_url_prefix_val,
+    contended_flag_val,
+    gadm_input_file_args_list,
+    shapes_path_val,
+    gadm_clustering_val,
+):
+    """
+    This function selects the buses where ports are located.
+    """
+
+    ports = pd.read_csv(
+        export_ports_path,
+        index_col=None,
+        keep_default_na=False,
+    ).squeeze()
+
+    ports = ports[ports.country.isin(countries)]
+    if len(ports) < 1:
+        logger.error(
+            "No export ports chosen, please add ports to the file data/export_ports.csv"
+        )
+
+    ports["gadm_{}".format(gadm_level_val)] = ports[["x", "y", "country"]].apply(
+        lambda port: locate_bus(
+            port[["x", "y"]],
+            port["country"],
+            gadm_level_val,
+            geo_crs_val,
+            file_prefix_val,
+            gadm_url_prefix_val,
+            gadm_input_file_args_list,
+            contended_flag_val,
+            path_to_gadm=shapes_path_val,
+            gadm_clustering=gadm_clustering_val,
+        ),
+        axis=1,
+    )
+
+    ports = ports.set_index("gadm_{}".format(gadm_level_val))
+
+    # Select the hydrogen buses based on nodes with ports
+    hydrogen_buses_ports = n.buses.loc[ports.index + " H2"]
+    hydrogen_buses_ports.index.name = "Bus"
+
+    return hydrogen_buses_ports
+
+
+def add_export(n, hydrogen_buses_ports, export_profile):
+    country_shape = gpd.read_file(snakemake.input["shapes_path"])
+    # Find most northwestern point in country shape and get x and y coordinates
+    country_shape = country_shape.to_crs(
+        "EPSG:3395"
+    )  # Project to Mercator projection (Projected)
+
+    # Get coordinates of the most western and northern point of the country and add a buffer of 2 degrees (equiv. to approx 220 km)
+    x_export = country_shape.geometry.centroid.x.min() - 2
+    y_export = country_shape.geometry.centroid.y.max() + 2
+
+    # add export bus
+    n.add(
+        "Bus",
+        "H2 export bus",
+        carrier="H2",
+        location="Earth",
+        x=x_export,
+        y=y_export,
+    )
+
+    # add export links
+    logger.info("Adding export links")
+    n.madd(
+        "Link",
+        names=hydrogen_buses_ports.index + " export",
+        bus0=hydrogen_buses_ports.index,
+        bus1="H2 export bus",
+        p_nom_extendable=True,
+    )
+
+    export_links = n.links[n.links.index.str.contains("export")]
+    logger.info(export_links)
+
+    # add store depending on config settings
+
+    if snakemake.params.store == True:
+        if snakemake.params.store_capital_costs == "no_costs":
+            capital_cost = 0
+        elif snakemake.params.store_capital_costs == "standard_costs":
+            capital_cost = costs.at[
+                "hydrogen storage tank type 1 including compressor", "fixed"
+            ]
+        else:
+            logger.error(
+                f"Value {snakemake.params.store_capital_costs} for ['export']['store_capital_costs'] is not valid"
+            )
+
+        n.add(
+            "Store",
+            "H2 export store",
+            bus="H2 export bus",
+            e_nom_extendable=True,
+            carrier="H2",
+            e_initial=0,  # actually not required, since e_cyclic=True
+            marginal_cost=0,
+            capital_cost=capital_cost,
+            e_cyclic=True,
+        )
+
+    elif snakemake.params.store == False:
+        pass
+
+    # add load
+    n.add(
+        "Load",
+        "H2 export load",
+        bus="H2 export bus",
+        carrier="H2",
+        p_set=export_profile,
+    )
+
+    return
+
+
+def create_export_profile():
+    """
+    This function creates the export profile based on the annual export demand
+    and resamples it to temp resolution obtained from the wildcard.
+    """
+
+    export_h2 = eval(snakemake.wildcards["h2export"]) * 1e6  # convert TWh to MWh
+
+    if snakemake.params.export_profile == "constant":
+        export_profile = export_h2 / 8760
+        snapshots = pd.date_range(freq="h", **snakemake.params.snapshots)
+        export_profile = pd.Series(export_profile, index=snapshots)
+
+    elif snakemake.params.export_profile == "ship":
+        # Import hydrogen export ship profile and check if it matches the export demand obtained from the wildcard
+        export_profile = pd.read_csv(snakemake.input.ship_profile, index_col=0)
+        export_profile.index = pd.to_datetime(export_profile.index)
+        export_profile = pd.Series(
+            export_profile["profile"], index=pd.to_datetime(export_profile.index)
+        )
+
+        if np.abs(export_profile.sum() - export_h2) > 1:  # Threshold of 1 MWh
+            logger.error(
+                f"Sum of ship profile ({export_profile.sum()/1e6} TWh) does not match export demand ({export_h2} TWh)"
+            )
+            raise ValueError(
+                f"Sum of ship profile ({export_profile.sum()/1e6} TWh) does not match export demand ({export_h2} TWh)"
+            )
+
+    # Resample to temporal resolution defined in wildcard "sopts" with pandas resample
+    sopts = snakemake.wildcards.sopts.split("-")
+    export_profile = export_profile.resample(sopts[0].casefold()).mean()
+
+    # revise logger msg
+    export_type = snakemake.params.export_profile
+    logger.info(
+        f"The yearly export demand is {export_h2/1e6} TWh, profile generated based on {export_type} method and resampled to {sopts[0]}"
+    )
+
+    return export_profile
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "add_export",
+            simpl="",
+            clusters="10",
+            ll="c1.0",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="144H",
+            discountrate="0.071",
+            demand="AB",
+            h2export="120",
+        )
+
+    overrides = override_component_attrs(snakemake.input.overrides)
+    n = pypsa.Network(snakemake.input.network, override_component_attrs=overrides)
+    export_ports = snakemake.input.export_ports
+    countries = list(n.buses.country.unique())
+    gadm_level = snakemake.params.gadm_level
+    geo_crs = snakemake.params.geo_crs
+    file_prefix = snakemake.params.gadm_file_prefix
+    gadm_url_prefix = snakemake.params.gadm_url_prefix
+    contended_flag = snakemake.params.contended_flag
+    gadm_input_file_args = ["data", "raw", "gadm"]
+    shapes_path = snakemake.input["shapes_path"]
+    gadm_clustering = snakemake.params.alternative_clustering
+
+    # Create export profile
+    export_profile = create_export_profile()
+
+    # Prepare the costs dataframe
+    Nyears = n.snapshot_weightings.generators.sum() / 8760
+
+    costs = prepare_costs(
+        snakemake.input.costs,
+        snakemake.params.costs["USD2013_to_EUR2013"],
+        snakemake.params.costs["fill_values"],
+        Nyears,
+    )
+
+    # get hydrogen export buses/ports
+    hydrogen_buses_ports = select_ports(
+        n,
+        export_ports,
+        gadm_level,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        contended_flag,
+        gadm_input_file_args,
+        shapes_path,
+        gadm_clustering,
+    )
+
+    # add export value and components to network
+    add_export(n, hydrogen_buses_ports, export_profile)
+
+    n.export_to_netcdf(snakemake.output[0])
+
+    logger.info("Network successfully exported")
diff --git a/scripts/add_extra_components.py b/scripts/add_extra_components.py
index 18f248e94..7a649f9d6 100644
--- a/scripts/add_extra_components.py
+++ b/scripts/add_extra_components.py
@@ -57,12 +57,7 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
+from _helpers import configure_logging, create_logger, mock_snakemake
 from add_electricity import (
     _add_missing_carriers_from_costs,
     add_nice_carrier_names,
@@ -109,7 +104,7 @@ def attach_stores(n, costs, config):
 
     _add_missing_carriers_from_costs(n, costs, carriers)
 
-    buses_i = n.buses.index
+    buses_i = n.buses.query("carrier == 'AC'").index
     bus_sub_dict = {k: n.buses[k].values for k in ["x", "y", "country"]}
 
     if "H2" in carriers:
@@ -192,20 +187,17 @@ def attach_stores(n, costs, config):
     if ("csp" in elec_opts["renewable_carriers"]) and (
         config["renewable"]["csp"]["csp_model"] == "advanced"
     ):
-        # get CSP generators and their buses
-        csp_gens = n.generators.query("carrier == 'csp'")
-        buses_csp_gens = n.buses.loc[csp_gens.bus]
-
-        csp_buses_i = csp_gens.index
-        c_buses_i = csp_gens.bus.values
-
-        csp_bus_sub_dict = {k: buses_csp_gens[k].values for k in ["x", "y", "country"]}
-
-        # add buses for csp
-        n.madd("Bus", csp_buses_i, carrier="csp", **csp_bus_sub_dict)
-
-        # change bus of existing csp generators
-        n.generators.loc[csp_gens.index, "bus"] = csp_buses_i
+        # add separate buses for csp
+        main_buses = n.generators.query("carrier == 'csp'").bus
+        csp_buses_i = n.madd(
+            "Bus",
+            main_buses + " csp",
+            carrier="csp",
+            x=n.buses.loc[main_buses, "x"].values,
+            y=n.buses.loc[main_buses, "y"].values,
+            country=n.buses.loc[main_buses, "country"].values,
+        )
+        n.generators.loc[main_buses.index, "bus"] = csp_buses_i
 
         # add stores for csp
         n.madd(
@@ -224,7 +216,7 @@ def attach_stores(n, costs, config):
             "Link",
             csp_buses_i,
             bus0=csp_buses_i,
-            bus1=c_buses_i,
+            bus1=main_buses,
             carrier="csp",
             efficiency=costs.at["csp-tower", "efficiency"],
             capital_cost=costs.at["csp-tower", "capital_cost"],
@@ -276,8 +268,7 @@ def attach_hydrogen_pipelines(n, costs, config):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
-        snakemake = mock_snakemake("add_extra_components", simpl="", clusters="20flex")
+        snakemake = mock_snakemake("add_extra_components", simpl="", clusters=10)
 
     configure_logging(snakemake)
 
diff --git a/scripts/augmented_line_connections.py b/scripts/augmented_line_connections.py
index 0bddebb30..433d2f3fc 100644
--- a/scripts/augmented_line_connections.py
+++ b/scripts/augmented_line_connections.py
@@ -33,12 +33,7 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
+from _helpers import configure_logging, create_logger, mock_snakemake
 from add_electricity import load_costs
 from networkx.algorithms import complement
 from networkx.algorithms.connectivity.edge_augmentation import k_edge_augmentation
@@ -56,7 +51,6 @@ def haversine(p):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "augmented_line_connections", network="elec", simpl="", clusters="54"
         )
diff --git a/scripts/base_network.py b/scripts/base_network.py
index a38527c9c..83c5a4830 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -64,7 +64,6 @@
 import shapely.prepared
 import shapely.wkt
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_path_size,
@@ -156,7 +155,6 @@ def _load_lines_from_osm(fp_osm_lines):
     lines["length"] /= 1e3  # m to km conversion
     lines["v_nom"] /= 1e3  # V to kV conversion
     lines = lines.loc[:, ~lines.columns.str.contains("^Unnamed")]  # remove unnamed col
-    # lines = _remove_dangling_branches(lines, buses)  # TODO: add dangling branch removal?
 
     return lines
 
@@ -172,8 +170,6 @@ def _load_converters_from_osm(fp_osm_converters):
         dtype=dict(converter_id="str", bus0="str", bus1="str"),
     ).set_index("converter_id")
 
-    # converters = _remove_dangling_branches(converters, buses)
-
     converters["carrier"] = "B2B"
     converters["dc"] = True
 
@@ -189,7 +185,6 @@ def _load_transformers_from_osm(fp_osm_transformers):
         .rename(columns=dict(line_id="transformer_id"))
         .set_index("transformer_id")
     )
-    # transformers = _remove_dangling_branches(transformers, buses)  # TODO: add dangling branch removal?
 
     return transformers
 
@@ -480,7 +475,6 @@ def base_network(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("base_network")
 
     configure_logging(snakemake)
diff --git a/scripts/build_base_energy_totals.py b/scripts/build_base_energy_totals.py
new file mode 100644
index 000000000..c86985a8f
--- /dev/null
+++ b/scripts/build_base_energy_totals.py
@@ -0,0 +1,473 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+import glob
+import logging
+import pathlib
+from io import BytesIO
+from urllib.request import urlopen
+from zipfile import ZipFile
+
+import country_converter as coco
+import numpy as np
+import pandas as pd
+from _helpers import aggregate_fuels, get_conv_factors, mock_snakemake, modify_commodity
+
+_logger = logging.getLogger(__name__)
+
+pd.options.mode.chained_assignment = None
+
+
+def calc_sector(sector):
+    for country in countries:
+        # print(country, sector)
+        df_co = df_yr[df_yr.country == country]
+
+        if sector == "navigation":
+            df_sector = df_co.loc[
+                (df_co["Commodity - Transaction"].str.lower().str.contains(sector))
+                | (
+                    df_co["Commodity - Transaction"]
+                    .str.lower()
+                    .str.contains("marine bunkers")
+                )
+            ]
+
+        elif sector == "non energy use":
+            df_sector = df_co.loc[
+                (df_co["Transaction"].str.lower().str.contains(sector))
+                | (
+                    df_co["Transaction"]
+                    .str.replace("-", " ")
+                    .str.replace("uses", "use")
+                    .str.lower()
+                    .str.contains(sector)
+                )
+            ]
+        elif sector == "other energy":
+            df_sector = df_co.loc[df_co["Transaction"].isin(other_energy)]
+        else:
+            df_sector = df_co.loc[
+                df_co["Commodity - Transaction"].str.lower().str.contains(sector)
+            ]
+            # assert df_yr[df_yr["Commodity - Transaction"].str.contains(sector)]["Unit"].unique() == 'Metric tons,  thousand', "Not all quantities have the expected unit: {}".format(expected_unit)
+
+        if df_sector.empty:
+            if sector == "consumption by households":
+                energy_totals_base.at[country, "electricity residential"] = np.NaN
+                energy_totals_base.at[country, "residential oil"] = np.NaN
+                energy_totals_base.at[country, "residential biomass"] = np.NaN
+                energy_totals_base.at[country, "residential gas"] = np.NaN
+                energy_totals_base.at[country, "total residential space"] = np.NaN
+                energy_totals_base.at[country, "total residential water"] = np.NaN
+
+            elif sector == "services":
+                energy_totals_base.at[country, "services electricity"] = np.NaN
+                energy_totals_base.at[country, "services oil"] = np.NaN
+                energy_totals_base.at[country, "services biomass"] = np.NaN
+                energy_totals_base.at[country, "services gas"] = np.NaN
+                energy_totals_base.at[country, "total services space"] = np.NaN
+                energy_totals_base.at[country, "total services water"] = np.NaN
+
+            elif sector == "road":
+                energy_totals_base.at[country, "total road"] = np.NaN
+
+            elif sector == "agriculture":
+                energy_totals_base.at[country, "agriculture electricity"] = np.NaN
+                energy_totals_base.at[country, "agriculture oil"] = np.NaN
+                energy_totals_base.at[country, "agriculture biomass"] = np.NaN
+                # energy_totals_base.at[country, "electricity rail"] = np.NaN
+
+            elif sector == "rail":
+                energy_totals_base.at[country, "total rail"] = np.NaN
+                energy_totals_base.at[country, "electricity rail"] = np.NaN
+
+            elif sector == "aviation":
+                energy_totals_base.at[country, "total international aviation"] = np.NaN
+                energy_totals_base.at[country, "total domestic aviation"] = np.NaN
+
+            elif sector == "navigation":
+                energy_totals_base.at[country, "total international navigation"] = (
+                    np.NaN
+                )
+                energy_totals_base.at[country, "total domestic navigation"] = np.NaN
+
+            _logger.warning("No data for " + country + " in the sector " + sector + ".")
+
+        else:
+            index_mass = df_sector.loc[
+                df_sector["Unit"] == "Metric tons,  thousand"
+            ].index
+            df_sector.loc[index_mass, "Quantity_TWh"] = df_sector.loc[index_mass].apply(
+                lambda x: x["Quantity"] * fuels_conv_toTWh[x["Commodity"]], axis=1
+            )
+
+            index_energy = df_sector[
+                df_sector["Unit"] == "Kilowatt-hours, million"
+            ].index
+            df_sector.loc[index_energy, "Quantity_TWh"] = df_sector.loc[
+                index_energy
+            ].apply(lambda x: x["Quantity"] / 1e3, axis=1)
+
+            index_energy_TJ = df_sector[df_sector["Unit"] == "Terajoules"].index
+            df_sector.loc[index_energy_TJ, "Quantity_TWh"] = df_sector.loc[
+                index_energy_TJ
+            ].apply(lambda x: x["Quantity"] / 3600, axis=1)
+
+            index_volume = df_sector[
+                df_sector["Unit"] == "Cubic metres, thousand"
+            ].index
+            df_sector.loc[index_volume, "Quantity_TWh"] = df_sector.loc[
+                index_volume
+            ].apply(lambda x: x["Quantity"] * fuels_conv_toTWh[x["Commodity"]], axis=1)
+
+            sectors_dfs[sector] = df_sector.copy()
+
+            if sector == "consumption by households":
+                if snakemake.params.shift_coal_to_elec:
+                    condition = (df_sector.Commodity == "Electricity") | (
+                        df_sector.Commodity.isin(coal_fuels)
+                    )
+                else:
+                    condition = df_sector.Commodity == "Electricity"
+
+                energy_totals_base.at[country, "electricity residential"] = round(
+                    df_sector[condition].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "residential oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "residential biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "residential gas"] = round(
+                    df_sector[df_sector.Commodity.isin(gas_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "total residential space"] = (
+                    round(
+                        df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(), 4
+                    )
+                    * snakemake.params.space_heat_share
+                )
+                energy_totals_base.at[country, "total residential water"] = round(
+                    df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(), 4
+                ) * (1 - snakemake.params.space_heat_share)
+
+            elif sector == "services":
+                if snakemake.params.shift_coal_to_elec:
+                    condition = (df_sector.Commodity == "Electricity") | (
+                        df_sector.Commodity.isin(coal_fuels)
+                    )
+                else:
+                    condition = df_sector.Commodity == "Electricity"
+
+                energy_totals_base.at[country, "services electricity"] = round(
+                    df_sector[condition].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "services oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "services biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "services gas"] = round(
+                    df_sector[df_sector.Commodity.isin(gas_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "total services space"] = (
+                    round(
+                        df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(), 4
+                    )
+                    * snakemake.params.space_heat_share
+                )
+                energy_totals_base.at[country, "total services water"] = round(
+                    df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(), 4
+                ) * (1 - snakemake.params.space_heat_share)
+
+            elif sector == "road":
+                energy_totals_base.at[country, "total road"] = round(
+                    df_sector.Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "road electricity"] = round(
+                    df_sector[df_sector.Commodity == "Electricity"].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "road gas"] = round(
+                    df_sector[df_sector.Commodity.isin(gas_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "road biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "road oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+
+            elif sector == "agriculture":
+                energy_totals_base.at[country, "agriculture electricity"] = round(
+                    df_sector[
+                        (df_sector.Commodity == "Electricity")
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "agriculture oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "agriculture biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "agriculture coal"] = round(
+                    df_sector[df_sector.Commodity.isin(coal_fuels)].Quantity_TWh.sum(),
+                    4,
+                )
+                # energy_totals_base.at[country, "electricity rail"] = round(df_house[(df_house.Commodity=="Electricity")].Quantity_TWh.sum(), 4)
+
+            elif sector == "rail":
+                energy_totals_base.at[country, "total rail"] = round(
+                    df_sector[
+                        (df_sector.Commodity == "Gas Oil/ Diesel Oil")
+                        | (df_sector.Commodity == "Biodiesel")
+                        | (df_sector.Commodity == "Electricity")
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "electricity rail"] = round(
+                    df_sector[
+                        (df_sector.Commodity == "Electricity")
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+
+            elif sector == "aviation":
+                energy_totals_base.at[country, "total international aviation"] = round(
+                    df_sector[
+                        (df_sector.Commodity == "Kerosene-type Jet Fuel")
+                        & (df_sector.Transaction == "International aviation bunkers")
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "total domestic aviation"] = round(
+                    df_sector[
+                        (df_sector.Commodity == "Kerosene-type Jet Fuel")
+                        & (df_sector.Transaction == "Consumption by domestic aviation")
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+
+            elif sector == "navigation":
+                energy_totals_base.at[country, "total international navigation"] = (
+                    round(
+                        df_sector[
+                            df_sector.Transaction == "International marine bunkers"
+                        ].Quantity_TWh.sum(),
+                        4,
+                    )
+                )
+                energy_totals_base.at[country, "total domestic navigation"] = round(
+                    df_sector[
+                        df_sector.Transaction == "Consumption by domestic navigation"
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+            elif sector == "other energy":
+                if snakemake.params.shift_coal_to_elec:
+                    condition = (df_sector.Commodity == "Electricity") | (
+                        df_sector.Commodity.isin(coal_fuels)
+                    )
+                else:
+                    condition = df_sector.Commodity == "Electricity"
+
+                energy_totals_base.at[country, "other electricity"] = round(
+                    df_sector[condition].Quantity_TWh.sum(), 4
+                )
+
+                energy_totals_base.at[country, "other oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "other biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "other gas"] = round(
+                    df_sector[df_sector.Commodity.isin(gas_fuels)].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "other heat"] = round(
+                    df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(),
+                    4,
+                )
+            elif sector == "non energy use":
+                if snakemake.params.shift_coal_to_elec:
+                    condition = (df_sector.Commodity == "Electricity") | (
+                        df_sector.Commodity.isin(coal_fuels)
+                    )
+                else:
+                    condition = df_sector.Commodity == "Electricity"
+
+                energy_totals_base.at[country, "non energy electricity"] = round(
+                    df_sector[condition].Quantity_TWh.sum(), 4
+                )
+
+                energy_totals_base.at[country, "non energy oil"] = round(
+                    df_sector[df_sector.Commodity.isin(oil_fuels)].Quantity_TWh.sum(), 4
+                )
+                energy_totals_base.at[country, "non energy biomass"] = round(
+                    df_sector[
+                        df_sector.Commodity.isin(biomass_fuels)
+                    ].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "non energy gas"] = round(
+                    df_sector[df_sector.Commodity.isin(gas_fuels)].Quantity_TWh.sum(),
+                    4,
+                )
+                energy_totals_base.at[country, "non energy heat"] = round(
+                    df_sector[df_sector.Commodity.isin(heat)].Quantity_TWh.sum(),
+                    4,
+                )
+            else:
+                print("wrong sector")
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_base_energy_totals",
+            simpl="",
+            clusters=19,
+            demand="AB",
+            planning_horizons=2030,
+        )
+
+    energy_stat_database = pd.read_excel(
+        snakemake.input.unsd_paths, index_col=0, header=0
+    )  # pd.read_excel("/nfs/home/haz43975/pypsa-earth-sec/scripts/Energy_Statistics_Database.xlsx"
+
+    # Load the links and make a dictionary
+    df = energy_stat_database.copy()
+    df = df.dropna(axis=0, subset=["Link"])
+    df = df.to_dict("dict")
+    d = df["Link"]
+
+    if snakemake.params.update_data:
+        # Delete and existing files to avoid duplication and double counting
+
+        files = glob.glob("data/demand/unsd/data/*.txt")
+        for f in files:
+            pathlib.Path(f).unlink(missing_ok=True)
+
+        # Feed the dictionary of links to the for loop, download and unzip all files
+        for key, value in d.items():
+            zipurl = value
+
+            with urlopen(zipurl) as zipresp:
+                with ZipFile(BytesIO(zipresp.read())) as zfile:
+                    zfile.extractall("data/demand/unsd/data")
+
+                    path = "data/demand/unsd/data"
+
+    # Get the files from the path provided in the OP
+    all_files = list(pathlib.Path("data/demand/unsd/data").glob("*.txt"))
+
+    # Create a dataframe from all downloaded files
+    df = pd.concat(
+        (pd.read_csv(f, encoding="utf8", sep=";") for f in all_files), ignore_index=True
+    )
+
+    # Split 'Commodity', 'Transaction' column to two
+    df[["Commodity", "Transaction", "extra"]] = df["Commodity - Transaction"].str.split(
+        " - ", expand=True
+    )
+
+    # Modify the commodity column, replacing typos and case-folding the strings
+    df["Commodity"] = df["Commodity"].map(modify_commodity)
+
+    # Remove Foootnote and Estimate from 'Commodity - Transaction' column
+    df = df.loc[df["Commodity - Transaction"] != "Footnote"]
+    df = df.loc[df["Commodity - Transaction"] != "Estimate"]
+
+    # Create a column with iso2 country code
+    cc = coco.CountryConverter()
+    Country = pd.Series(df["Country or Area"])
+
+    df["country"] = cc.pandas_convert(series=Country, to="ISO2", not_found="not found")
+
+    # remove countries or areas that have no iso2 such as former countries names
+    df = df.loc[df["country"] != "not found"]
+
+    # Convert country column that contains lists for some country names that are identified with more than one country.
+    df["country"] = df["country"].astype(str)
+
+    # Remove all iso2 conversions for some country names that are identified with more than one country.
+    df = df[~df.country.str.contains(",", na=False)].reset_index(drop=True)
+
+    # Create a dictionary with all the conversion factors from ktons or m3 to TWh based on https://unstats.un.org/unsd/energy/balance/2014/05.pdf
+    fuels_conv_toTWh = get_conv_factors("industry")
+
+    # Fetch country list and demand base year from the config file
+    year = snakemake.params.base_year
+    countries = snakemake.params.countries
+
+    # Filter for the year and country
+    df_yr = df[df.Year == year]
+    df_yr = df_yr[df_yr.country.isin(countries)]
+
+    # Create an empty dataframe for energy_totals_base
+    energy_totals_cols = pd.read_csv("data/energy_totals_DF_2030.csv").columns
+    energy_totals_base = pd.DataFrame(columns=energy_totals_cols, index=countries)
+
+    # Lists that combine the different fuels in the dataset to the model's carriers
+    (
+        gas_fuels,
+        oil_fuels,
+        biomass_fuels,
+        coal_fuels,
+        heat,
+        electricity,
+    ) = aggregate_fuels("industry")
+
+    other_energy = [
+        "consumption not elsewhere specified (other)",
+        "consumption not elsewhere specified (other)"
+        "Consumption not elsewhere specified (other)",
+        "Consumption by other consumers not elsewhere specified",
+        "consumption by other consumers not elsewhere specified",
+    ]
+
+    # non_energy = ['non energy uses', 'non-energy uses', 'consumption for non-energy uses', 'Consumption for non-energy uses', 'non-energy use']
+    # Create a dictionary to save the data if need to be checked
+    sectors_dfs = {}
+
+    # Run the function that processes the data for all the sectors
+    sectors = [
+        "consumption by households",
+        "road",
+        "rail",
+        "aviation",
+        "navigation",
+        "agriculture",
+        "services",
+        "other energy",
+        "non energy use",
+    ]
+    for sector in sectors:
+        calc_sector(sector)
+
+    # Export the base energy totals file
+    energy_totals_base.to_csv(snakemake.output.energy_totals_base)
diff --git a/scripts/build_base_industry_totals.py b/scripts/build_base_industry_totals.py
new file mode 100644
index 000000000..954a1a631
--- /dev/null
+++ b/scripts/build_base_industry_totals.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Created on Thu Jul 14 19:01:13 2022.
+
+@author: user
+"""
+
+import pathlib
+
+import country_converter as coco
+import pandas as pd
+from _helpers import (
+    aggregate_fuels,
+    get_conv_factors,
+    get_path,
+    mock_snakemake,
+    modify_commodity,
+    read_csv_nafix,
+)
+
+
+def calculate_end_values(df):
+    return (1 + df) ** no_years
+
+
+def create_industry_base_totals(df):
+    # Converting values of mass (ktons) to energy (TWh)
+    index_mass = df.loc[df["Unit"] == "Metric tons,  thousand"].index
+    df.loc[index_mass, "Quantity_TWh"] = df.loc[index_mass].apply(
+        lambda x: x["Quantity"] * fuels_conv_toTWh.get(x["Commodity"], float("nan")),
+        axis=1,
+    )
+
+    # Converting values of energy (GWh) to energy (TWh)
+    index_energy = df[df["Unit"] == "Kilowatt-hours, million"].index
+    df.loc[index_energy, "Quantity_TWh"] = df.loc[index_energy].apply(
+        lambda x: x["Quantity"] / 1e3, axis=1
+    )
+
+    # Converting values of energy (TJ) to energy (TWh)
+    index_energy_TJ = df[df["Unit"] == "Terajoules"].index
+    df.loc[index_energy_TJ, "Quantity_TWh"] = df.loc[index_energy_TJ].apply(
+        lambda x: x["Quantity"] / 3600, axis=1
+    )
+
+    # Converting values of volume (thousand m3) to energy (TWh)
+    index_volume = df[df["Unit"] == "Cubic metres, thousand"].index
+    df.loc[index_volume, "Quantity_TWh"] = df.loc[index_volume].apply(
+        lambda x: x["Quantity"] * fuels_conv_toTWh[x["Commodity"]], axis=1
+    )
+
+    df["carrier"] = df["Commodity"].map(fuel_dict)
+
+    # Aggregating and grouping the dataframe
+    df_agg = (
+        df.groupby(["country", "carrier", "Transaction"])
+        .agg({"Quantity_TWh": "sum"})
+        .reset_index()
+    )
+    industry_totals_base = df_agg.pivot_table(
+        columns="Transaction", index=["country", "carrier"]
+    ).fillna(0.0)
+    industry_totals_base = industry_totals_base.droplevel(level=0, axis=1)
+    # industry_totals_base["other"] = 0
+
+    if not include_other:
+        # Loop through the columns in the list and sum them if they exist
+        print(
+            "unspecified industries are not included, check thoroughly as values sometimes significant for some countries"
+        )
+        industry_totals_base.drop("other", axis=1)
+
+    industry_totals_base = industry_totals_base.rename(
+        columns={"paper, pulp and print": "paper pulp and print"}
+    )
+
+    missing_columns = [
+        col for col in clean_industry_list if col not in industry_totals_base.columns
+    ]
+
+    # Add missing columns with all values set to 0
+    for col in missing_columns:
+        industry_totals_base[col] = 0
+
+    return industry_totals_base * 1e6  # change from TWh to MWh
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_base_industry_totals",
+            planning_horizons=2030,
+            demand="EG",
+        )
+
+    # Loading config file and wild cards
+
+    year = snakemake.params.base_year
+    countries = snakemake.params.countries
+
+    investment_year = int(snakemake.wildcards.planning_horizons)
+    demand_sc = snakemake.wildcards.demand
+    no_years = int(snakemake.wildcards.planning_horizons) - int(
+        snakemake.params.base_year
+    )
+    include_other = snakemake.params.other_industries
+
+    transaction = read_csv_nafix(
+        snakemake.input.transactions_path,
+        sep=";",
+    )
+
+    renaming_dit = transaction.set_index("Transaction")["clean_name"].to_dict()
+    clean_industry_list = list(transaction.clean_name.unique())
+
+    unsd_path = get_path(
+        pathlib.Path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
+    )
+
+    # Get the files from the path provided in the OP
+    all_files = list(pathlib.Path(unsd_path).glob("*.txt"))
+
+    # Create a dataframe from all downloaded files
+    df = pd.concat(
+        (pd.read_csv(f, encoding="utf8", sep=";") for f in all_files), ignore_index=True
+    )
+
+    # Split 'Commodity', 'Transaction' column to two
+    df[["Commodity", "Transaction", "extra"]] = df["Commodity - Transaction"].str.split(
+        " - ", expand=True
+    )
+
+    # Modify the commodity column, replacing typos and case-folding the strings
+    df["Commodity"] = df["Commodity"].map(modify_commodity)
+
+    df = df[
+        df.Commodity != "Other bituminous coal"
+    ]  # dropping problematic column leading to double counting
+
+    # Remove fill na in Transaction column
+    df["Transaction"] = df["Transaction"].fillna("NA")
+    df["Transaction"] = df["Transaction"].str.lower()
+    # Remove Foootnote and Estimate from 'Commodity - Transaction' column
+    df = df.loc[df["Commodity - Transaction"] != "Footnote"]
+    df = df.loc[df["Commodity - Transaction"] != "Estimate"]
+
+    # Create a column with iso2 country code
+    cc = coco.CountryConverter()
+    Country = pd.Series(df["Country or Area"])
+
+    df["country"] = cc.pandas_convert(series=Country, to="ISO2", not_found="not found")
+
+    # remove countries or areas that have no iso2 such as former countries names
+    df = df.loc[df["country"] != "not found"]
+
+    # Convert country column that contains lists for some country names that are identified with more than one country.
+    df["country"] = df["country"].astype(str)
+
+    # Remove all iso2 conversions for some country names that are identified with more than one country.
+    df = df[~df.country.str.contains(",", na=False)].reset_index(drop=True)
+
+    # Create a dictionary with all the conversion factors from ktons or m3 to TWh based on https://unstats.un.org/unsd/energy/balance/2014/05.pdf
+    fuels_conv_toTWh = get_conv_factors("industry")
+
+    # Lists that combine the different fuels in the dataset to the model's carriers
+
+    # Fetch the fuel categories from the helpers script
+    (
+        gas_fuels,
+        oil_fuels,
+        biomass_fuels,
+        coal_fuels,
+        heat,
+        electricity,
+    ) = aggregate_fuels("industry")
+
+    # Create fuel dictionary to use for mapping all fuels to the pypsa representative fuels
+    fuel_dict = {
+        element: var_name
+        for var_name, element_list in [
+            ("gas", gas_fuels),
+            ("oil", oil_fuels),
+            ("biomass", biomass_fuels),
+            ("heat", heat),
+            ("coal", coal_fuels),
+            ("electricity", electricity),
+        ]
+        for element in element_list
+    }
+
+    # Filter for the year and country
+    df_yr = df[df.Year == year]
+
+    df_yr = df_yr[df_yr.Transaction.isin(transaction.Transaction)]
+
+    df_yr["Transaction"] = df_yr["Transaction"].map(renaming_dit)
+
+    # Create the industry totals file
+    industry_totals_base = create_industry_base_totals(df_yr)
+
+    # Export the industry totals dataframe
+    industry_totals_base.to_csv(snakemake.output["base_industry_totals"])
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index 9af7f1be9..0af9bb4e7 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -47,13 +47,7 @@
 import numpy as np
 import pandas as pd
 import pypsa
-from _helpers import (
-    REGION_COLS,
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
+from _helpers import REGION_COLS, configure_logging, create_logger, mock_snakemake
 from scipy.spatial import Voronoi
 from shapely.geometry import Polygon
 
@@ -150,7 +144,6 @@ def get_gadm_shape(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_bus_regions")
 
     configure_logging(snakemake)
diff --git a/scripts/build_clustered_population_layouts.py b/scripts/build_clustered_population_layouts.py
new file mode 100644
index 000000000..c88c50b49
--- /dev/null
+++ b/scripts/build_clustered_population_layouts.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build clustered population layouts.
+"""
+
+import atlite
+import geopandas as gpd
+import pandas as pd
+import xarray as xr
+from _helpers import mock_snakemake, to_csv_nafix
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_clustered_population_layouts",
+            simpl="",
+            clusters=38,
+        )
+
+    cutout_path = snakemake.input.cutout
+    cutout = atlite.Cutout(cutout_path)
+
+    clustered_regions = (
+        gpd.read_file(snakemake.input.regions_onshore)
+        .set_index("name")
+        .buffer(0)
+        .squeeze()
+    )
+
+    I = cutout.indicatormatrix(clustered_regions)
+
+    pop = {}
+    for item in ["total", "urban", "rural"]:
+        pop_layout = xr.open_dataarray(snakemake.input[f"pop_layout_{item}"])
+        pop[item] = I.dot(pop_layout.stack(spatial=("y", "x")))
+
+    pop = pd.DataFrame(pop, index=clustered_regions.index)
+
+    pop["ct"] = gpd.read_file(snakemake.input.regions_onshore).set_index("name").country
+    country_population = pop.total.groupby(pop.ct).sum()
+    pop["fraction"] = (pop.total / pop.ct.map(country_population)).fillna(0.0)
+
+    to_csv_nafix(pop, snakemake.output.clustered_pop_layout)
+
+    gdp_layout = xr.open_dataarray(snakemake.input["gdp_layout"])
+    gdp = I.dot(gdp_layout.stack(spatial=("y", "x")))
+    gdp = pd.DataFrame(gdp, index=clustered_regions.index, columns=["total"])
+
+    gdp["ct"] = gpd.read_file(snakemake.input.regions_onshore).set_index("name").country
+    country_gdp = gdp.total.groupby(gdp.ct).sum()
+    gdp["fraction"] = (gdp.total / gdp.ct.map(country_gdp)).fillna(0.0)
+    to_csv_nafix(gdp, snakemake.output.clustered_gdp_layout)
diff --git a/scripts/build_cop_profiles.py b/scripts/build_cop_profiles.py
new file mode 100644
index 000000000..afff7957c
--- /dev/null
+++ b/scripts/build_cop_profiles.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build COP time series for air- or ground-sourced heat pumps.
+"""
+
+import xarray as xr
+from _helpers import mock_snakemake
+
+
+def coefficient_of_performance(delta_T, source="air"):
+    """
+    COP is function of temp difference source to sink.
+
+    The quadratic regression is based on Staffell et al. (2012)
+    https://doi.org/10.1039/C2EE22653G.
+    """
+    if source == "air":
+        return 6.81 - 0.121 * delta_T + 0.000630 * delta_T**2
+    elif source == "soil":
+        return 8.77 - 0.150 * delta_T + 0.000734 * delta_T**2
+    else:
+        raise NotImplementedError("'source' must be one of  ['air', 'soil']")
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_cop_profiles",
+            simpl="",
+            clusters=15,
+        )
+
+    for area in ["total", "urban", "rural"]:
+        for source in ["air", "soil"]:
+            source_T = xr.open_dataarray(snakemake.input[f"temp_{source}_{area}"])
+
+            delta_T = snakemake.params.heat_pump_sink_T - source_T
+
+            cop = coefficient_of_performance(delta_T, source)
+
+            cop.to_netcdf(snakemake.output[f"cop_{source}_{area}"])
diff --git a/scripts/build_cutout.py b/scripts/build_cutout.py
index 186e52ab6..951214bfc 100644
--- a/scripts/build_cutout.py
+++ b/scripts/build_cutout.py
@@ -97,19 +97,13 @@
 import atlite
 import geopandas as gpd
 import pandas as pd
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
+from _helpers import configure_logging, create_logger, mock_snakemake
 
 logger = create_logger(__name__)
 
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_cutout", cutout="africa-2013-era5")
 
     configure_logging(snakemake)
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index 39414109f..154c5050e 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -51,7 +51,6 @@
 import scipy.sparse as sparse
 import xarray as xr
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_path,
@@ -59,7 +58,6 @@
     normed,
     read_csv_nafix,
     read_osm_config,
-    sets_path_to_root,
 )
 from shapely.prepared import prep
 from shapely.validation import make_valid
@@ -296,10 +294,7 @@ def upsample(cntry, group):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_demand_profiles")
-        sets_path_to_root("pypsa-earth")
-
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.base_network)
diff --git a/scripts/build_existing_heating_distribution.py b/scripts/build_existing_heating_distribution.py
new file mode 100644
index 000000000..4391e9789
--- /dev/null
+++ b/scripts/build_existing_heating_distribution.py
@@ -0,0 +1,176 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Builds table of existing heat generation capacities for initial planning
+horizon.
+
+Existing heat generation capacities are distributed to nodes based on population.
+Within the nodes, the capacities are distributed to sectors (residential and services) based on sectoral consumption and urban/rural based population distribution.
+
+Inputs:
+-------
+- Existing heating generators: `data/existing_heating_raw.csv` per country
+- Population layout: `resources/{run_name}/pop_layout_s<simpl>_<clusters>.csv`. Output of `scripts/build_clustered_population_layout.py`
+- Population layout with energy demands: `resources/<run_name>/pop_weighted_energy_totals_s<simpl>_<clusters>.csv`
+- District heating share: `resources/<run_name>/district_heat_share_elec_s<simpl>_<clusters>_<planning_horizons>.csv`
+
+Outputs:
+--------
+- Existing heat generation capacities distributed to nodes: `resources/{run_name}/existing_heating_distribution_elec_s{simpl}_{clusters}_{planning_horizons}.csv`
+
+Relevant settings:
+------------------
+.. code:: yaml
+    scenario:
+        planning_horizons
+    sector:
+    existing_capacities:
+
+Notes:
+------
+- Data for Albania, Montenegro and Macedonia is not included in input database and assumed 0.
+- Coal and oil boilers are assimilated to oil boilers.
+- All ground-source heat pumps are assumed in rural areas and all air-source heat pumps are assumed to be in urban areas.
+
+References:
+-----------
+- "Mapping and analyses of the current and future (2020 - 2030) heating/cooling fuel deployment (fossil/renewables)" (https://energy.ec.europa.eu/publications/mapping-and-analyses-current-and-future-2020-2030-heatingcooling-fuel-deployment-fossilrenewables-1_en)
+"""
+import logging
+
+import country_converter as coco
+import numpy as np
+import pandas as pd
+from _helpers import mock_snakemake
+
+logger = logging.getLogger(__name__)
+
+cc = coco.CountryConverter()
+
+
+def build_existing_heating():
+    # retrieve existing heating capacities
+
+    # Add existing heating capacities, data comes from the study
+    # "Mapping and analyses of the current and future (2020 - 2030)
+    # heating/cooling fuel deployment (fossil/renewables) "
+    # https://energy.ec.europa.eu/publications/mapping-and-analyses-current-and-future-2020-2030-heatingcooling-fuel-deployment-fossilrenewables-1_en
+    # file: "WP2_DataAnnex_1_BuildingTechs_ForPublication_201603.xls" -> "existing_heating_raw.csv".
+    # data is for buildings only (i.e. NOT district heating) and represents the year 2012
+    # TODO start from original file
+
+    existing_heating = pd.read_csv(
+        snakemake.input.existing_heating, index_col=0, header=0
+    )
+
+    # data for Albania, Montenegro and Macedonia not included in database
+    existing_heating.loc["Albania"] = np.nan
+    existing_heating.loc["Montenegro"] = np.nan
+    existing_heating.loc["Macedonia"] = np.nan
+
+    existing_heating.fillna(0.0, inplace=True)
+
+    fillvalue_missing = existing_heating.loc["DEFAULT"]
+
+    # convert GW to MW
+    existing_heating *= 1e3
+
+    existing_heating.index = cc.convert(existing_heating.index, to="iso2")
+
+    # coal and oil boilers are assimilated to oil boilers
+    existing_heating["oil boiler"] = (
+        existing_heating["oil boiler"] + existing_heating["coal boiler"]
+    )
+    existing_heating.drop(["coal boiler"], axis=1, inplace=True)
+
+    # distribute technologies to nodes by population
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout, index_col=0)
+
+    # fill missing rows
+    missing_countries = list(set(pop_layout.ct.unique()) - set(existing_heating.index))
+    if len(missing_countries) > 0:
+        logger.warning(
+            f"Missing population data for countries: {missing_countries}. Filling with DEFAULT values."
+        )
+        for country in missing_countries:
+            existing_heating.loc[country] = fillvalue_missing
+
+    nodal_heating = existing_heating.loc[pop_layout.ct]
+    nodal_heating.index = pop_layout.index
+    nodal_heating = nodal_heating.multiply(pop_layout.fraction, axis=0)
+
+    district_heat_info = pd.read_csv(snakemake.input.district_heat_share, index_col=0)
+    urban_fraction = pop_layout["fraction"]
+
+    energy_layout = pd.read_csv(
+        snakemake.input.clustered_pop_energy_layout, index_col=0
+    )
+
+    uses = ["space", "water"]
+    sectors = ["residential", "services"]
+
+    nodal_sectoral_totals = pd.DataFrame(dtype=float)
+
+    for sector in sectors:
+        nodal_sectoral_totals[sector] = energy_layout[
+            [f"total {sector} {use}" for use in uses]
+        ].sum(axis=1)
+
+    nodal_sectoral_fraction = nodal_sectoral_totals.div(
+        nodal_sectoral_totals.sum(axis=1), axis=0
+    )
+
+    nodal_heat_name_fraction = pd.DataFrame(index=district_heat_info.index, dtype=float)
+
+    nodal_heat_name_fraction["urban central"] = 0.0
+
+    for sector in sectors:
+        nodal_heat_name_fraction[f"{sector} rural"] = nodal_sectoral_fraction[
+            sector
+        ] * (1 - urban_fraction)
+        nodal_heat_name_fraction[f"{sector} urban decentral"] = (
+            nodal_sectoral_fraction[sector] * urban_fraction
+        )
+
+    nodal_heat_name_tech = pd.concat(
+        {
+            name: nodal_heating.multiply(nodal_heat_name_fraction[name], axis=0)
+            for name in nodal_heat_name_fraction.columns
+        },
+        axis=1,
+        names=["heat name", "technology"],
+    )
+
+    # move all ground HPs to rural, all air to urban
+
+    for sector in sectors:
+        nodal_heat_name_tech[(f"{sector} rural", "ground heat pump")] += (
+            nodal_heat_name_tech[("urban central", "ground heat pump")]
+            * nodal_sectoral_fraction[sector]
+            + nodal_heat_name_tech[(f"{sector} urban decentral", "ground heat pump")]
+        )
+        nodal_heat_name_tech[(f"{sector} urban decentral", "ground heat pump")] = 0.0
+
+        nodal_heat_name_tech[
+            (f"{sector} urban decentral", "air heat pump")
+        ] += nodal_heat_name_tech[(f"{sector} rural", "air heat pump")]
+        nodal_heat_name_tech[(f"{sector} rural", "air heat pump")] = 0.0
+
+    nodal_heat_name_tech[("urban central", "ground heat pump")] = 0.0
+
+    nodal_heat_name_tech.to_csv(snakemake.output.existing_heating_distribution)
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_existing_heating_distribution",
+            simpl="",
+            clusters=4,
+            planning_horizons=2030,
+            demand="DF",
+        )
+
+    build_existing_heating()
diff --git a/scripts/build_heat_demand.py b/scripts/build_heat_demand.py
new file mode 100644
index 000000000..17651bdab
--- /dev/null
+++ b/scripts/build_heat_demand.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build heat demand time series.
+"""
+
+import atlite
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import xarray as xr
+from _helpers import mock_snakemake
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake("build_heat_demand", simpl="", clusters="10")
+
+    time = pd.date_range(freq="h", **snakemake.params.snapshots)
+    cutout_config = snakemake.input.cutout
+    cutout = atlite.Cutout(cutout_config).sel(time=time)
+
+    clustered_regions = (
+        gpd.read_file(snakemake.input.regions_onshore)
+        .set_index("name")
+        .buffer(0)
+        .squeeze()
+    )
+
+    I = cutout.indicatormatrix(clustered_regions)
+
+    for area in ["rural", "urban", "total"]:
+        pop_layout = xr.open_dataarray(snakemake.input[f"pop_layout_{area}"])
+
+        stacked_pop = pop_layout.stack(spatial=("y", "x"))
+        M = I.T.dot(np.diag(I.dot(stacked_pop)))
+
+        heat_demand = cutout.heat_demand(matrix=M.T, index=clustered_regions.index)
+
+        heat_demand.to_netcdf(snakemake.output[f"heat_demand_{area}"])
diff --git a/scripts/build_industrial_database.py b/scripts/build_industrial_database.py
new file mode 100644
index 000000000..f28c2f8ef
--- /dev/null
+++ b/scripts/build_industrial_database.py
@@ -0,0 +1,524 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import math
+
+import country_converter as coco
+import numpy as np
+import pandas as pd
+import pycountry
+import requests
+from _helpers import content_retrieve, mock_snakemake
+from geopy.geocoders import Nominatim
+
+
+def get_cocode_from_name(df, country_column_name):
+    country_codes = {}
+
+    for country in pycountry.countries:
+        country_codes[country.name] = country.alpha_2
+
+    df["country"] = df[country_column_name].map(country_codes)
+    return df
+
+
+def get_cocode_from_coords(df):
+    geolocator = Nominatim(user_agent="geoapi")  # Initialize geolocator
+
+    # Initialize an empty list to store country codes
+    country_codes = []
+
+    for index, row in df.iterrows():
+        # Get latitude and longitude from the row
+        latitude = row["Latitude"]
+        longitude = row["Longitude"]
+
+        # Perform reverse geocoding to get location information
+        tries = 0
+        location = None
+        while tries < 10:
+            try:
+                location = geolocator.reverse((latitude, longitude), exactly_one=True)
+                break
+            except:
+                tries += 1
+                if tries == 10:
+                    print(
+                        "Country code of location ({},{}) could not be geocoded after 10 tries.".format(
+                            latitude, longitude
+                        )
+                    )
+
+        if location and location.raw.get("address", {}).get("country_code"):
+            # Extract and append the country code to the list
+            country_code = location.raw["address"]["country_code"].upper()
+            country_codes.append(country_code)
+        else:
+            country_codes.append(None)
+
+    # Add the country code list as a new column to the DataFrame
+    df["country"] = country_codes
+
+    return df
+
+
+def create_steel_db():
+    # Global Steel Plant Tracker data set you requested from Global Energy Monitor from the link below:
+
+    # The following excel file was downloaded from the following webpage
+    # https://globalenergymonitor.org/wp-content/uploads/2023/03/Global-Steel-Plant-Tracker-2023-03.xlsx . The dataset contains 1433 Steel plants globally.
+
+    url = "https://globalenergymonitor.org/wp-content/uploads/2023/03/Global-Steel-Plant-Tracker-2023-03.xlsx"
+
+    df_steel = pd.read_excel(
+        content_retrieve(url),
+        index_col=0,
+        sheet_name="Steel Plants",
+        header=0,
+    )
+
+    df_steel = df_steel[
+        [
+            "Plant name (English)",
+            "Country",
+            "Coordinates",
+            "Coordinate accuracy",
+            "Status",
+            "Start date",
+            "Plant age (years)",
+            "Nominal crude steel capacity (ttpa)",
+            "Nominal BOF steel capacity (ttpa)",
+            "Nominal EAF steel capacity (ttpa)",
+            "Nominal OHF steel capacity (ttpa)",
+            "Nominal iron capacity (ttpa)",
+            "Nominal BF capacity (ttpa)",
+            "Nominal DRI capacity (ttpa)",
+            "Ferronickel capacity (ttpa)",
+            "Sinter plant capacity (ttpa)",
+            "Coking plant capacity (ttpa)",
+            "Pelletizing plant capacity (ttpa)",
+            "Category steel product",
+            "Main production process",
+            "Municipality",
+        ]
+    ]
+
+    # Keep only operating steel plants
+    df_steel = df_steel.loc[df_steel["Status"] == "operating"]
+
+    # Create a column with iso2 country code
+    cc = coco.CountryConverter()
+    Country = pd.Series(df_steel["Country"])
+    df_steel["country"] = cc.pandas_convert(series=Country, to="ISO2")
+
+    # Split Coordeinates column into x and y columns
+    df_steel[["y", "x"]] = df_steel["Coordinates"].str.split(",", expand=True)
+
+    # Drop Coordinates column as it contains a ',' and is not needed anymore
+    df_steel = df_steel.drop(columns="Coordinates", axis=1)
+
+    # Fetch steel plants that uses DRI and BF techs and drop them from main df
+    mixed_steel_plants = df_steel[
+        df_steel["Main production process"] == "integrated (BF and DRI)"
+    ].copy()
+    df_steel = df_steel.drop(mixed_steel_plants.index)
+
+    # Separate the two techs in two dataframes
+    DRI_share = mixed_steel_plants.copy()
+    BF_share = mixed_steel_plants.copy()
+    BF_share["Main production process"] = "integrated (BF)"
+    DRI_share["Main production process"] = "integrated (DRI)"
+
+    # Calculate the share of both techs according to the capacities of iron production
+    BF_share["Nominal crude steel capacity (ttpa)"] = BF_share[
+        "Nominal crude steel capacity (ttpa)"
+    ] * mixed_steel_plants.apply(
+        lambda x: x["Nominal BF capacity (ttpa)"] / x["Nominal iron capacity (ttpa)"],
+        axis=1,
+    )
+    DRI_share["Nominal crude steel capacity (ttpa)"] = (
+        mixed_steel_plants["Nominal crude steel capacity (ttpa)"]
+        - BF_share["Nominal crude steel capacity (ttpa)"]
+    )
+
+    # Add suffix to the index to differentiate between them in the main df
+    DRI_share.index += "_DRI"
+    BF_share.index += "_BF"
+
+    # Merge them back to the main df
+    df_steel = pd.concat([df_steel, BF_share, DRI_share])
+    df_steel["Main production process"].value_counts()
+
+    # Remove plants with unknown production technology
+    unknown_ind = df_steel[
+        df_steel["Main production process"].str.contains("unknown")
+    ].index
+    df_steel = df_steel.drop(unknown_ind)
+    if len(unknown_ind) > 0:
+        print(
+            "dropped {0} steel/iron plants with unknown production technology of total {1} plants".format(
+                len(unknown_ind), len(df_steel)
+            )
+        )
+    df_steel["Main production process"].value_counts()
+
+    # Dict to map the technology names of the source to that expected in the workflow
+    iron_techs = {
+        "electric": "Electric arc",
+        "integrated (BF)": "Integrated steelworks",
+        "integrated (DRI)": "DRI + Electric arc",
+        "ironmaking (BF)": "Integrated steelworks",
+        "ironmaking (DRI)": "DRI + Electric arc",
+        "oxygen": "Integrated steelworks",
+        "electric, oxygen": "Electric arc",
+    }
+
+    # Creating the necessary columns in the dataframe
+    iron_making = df_steel[
+        df_steel["Main production process"].str.contains("ironmaking")
+    ].index
+    df_steel.loc[iron_making, "Nominal crude steel capacity (ttpa)"] = df_steel.loc[
+        iron_making, "Nominal iron capacity (ttpa)"
+    ]
+    df_steel["unit"] = "kt/yr"
+    df_steel["quality"] = "exact"
+    df_steel = df_steel.reset_index()
+    df_steel = df_steel.rename(
+        columns={
+            "Nominal crude steel capacity (ttpa)": "capacity",
+            "Municipality": "location",
+            "Plant ID": "ID",
+        }
+    )
+    df_steel.capacity = pd.to_numeric(df_steel.capacity)
+    df_steel["technology"] = df_steel["Main production process"].apply(
+        lambda x: iron_techs[x]
+    )
+    df_steel.x = df_steel.x.apply(lambda x: eval(x))
+    df_steel.y = df_steel.y.apply(lambda y: eval(y))
+
+    return df_steel[
+        [
+            "country",
+            "y",
+            "x",
+            "location",
+            "technology",
+            "capacity",
+            "unit",
+            "quality",
+            "ID",
+        ]
+    ].dropna()
+
+
+def create_cement_db():
+    # -------------
+    # CEMENT
+    # -------------
+    # The following excel file was downloaded from the following webpage https://www.cgfi.ac.uk/spatial-finance-initiative/geoasset-project/cement/.
+    # The dataset contains 3117 cement plants globally.
+    fn = "https://www.cgfi.ac.uk/wp-content/uploads/2021/08/SFI-Global-Cement-Database-July-2021.xlsx"
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+    cement_orig = pd.read_excel(
+        fn,
+        index_col=0,
+        storage_options=storage_options,
+        sheet_name="SFI_ALD_Cement_Database",
+        header=0,
+    )
+
+    df_cement = cement_orig.copy()
+    df_cement = df_cement[
+        [
+            "country",
+            "iso3",
+            "latitude",
+            "longitude",
+            "status",
+            "plant_type",
+            "capacity",
+            "year",
+            "city",
+        ]
+    ]
+    df_cement = df_cement.rename(
+        columns={
+            "country": "Country",
+            "latitude": "y",
+            "longitude": "x",
+            "city": "location",
+        }
+    )
+    df_cement["unit"] = "Kt/yr"
+    df_cement["technology"] = "Cement"
+    df_cement["capacity"] = df_cement["capacity"] * 1000
+    # Keep only operating steel plants
+    df_cement = df_cement.loc[df_cement["status"] == "Operating"]
+
+    # Create a column with iso2 country code
+    cc = coco.CountryConverter()
+    iso3 = pd.Series(df_cement["iso3"])
+    df_cement["country"] = cc.pandas_convert(series=iso3, to="ISO2")
+
+    # Dropping the null capacities reduces the dataframe from 3000+  rows to 1672 rows
+    na_index = df_cement[df_cement.capacity.isna()].index
+    print(
+        "There are {} out of {} total cement plants with unknown capacities, setting value to country average".format(
+            len(na_index), len(df_cement)
+        )
+    )
+    avg_c_cap = df_cement.groupby(df_cement.country)["capacity"].mean()
+    df_cement["capacity"] = df_cement.apply(
+        lambda x: (
+            avg_c_cap[x["country"]] if math.isnan(x["capacity"]) else x["capacity"]
+        ),
+        axis=1,
+    )
+
+    df_cement["quality"] = "actual"
+    df_cement.loc[na_index, "quality"] = "actual"  # TODO change
+
+    df_cement = df_cement.reset_index()
+    df_cement = df_cement.rename(columns={"uid": "ID"})
+    df_cement.capacity = pd.to_numeric(df_cement.capacity)
+
+    return df_cement[
+        [
+            "country",
+            "y",
+            "x",
+            "location",
+            "technology",
+            "capacity",
+            "unit",
+            "quality",
+            "ID",
+        ]
+    ]
+
+
+def create_refineries_df():
+    # -------------
+    # OIL REFINERIES
+    # -------------
+    # The data were downloaded directly from arcgis server using a query found on this webpage:
+    # https://www.arcgis.com/home/item.html?id=a6979b6bccbf4e719de3f703ea799259&sublayer=0#data
+    # and https://www.arcgis.com/home/item.html?id=a917ac2766bc47e1877071f0201b6280
+
+    # The dataset contains 536 global Oil refineries.
+
+    base_url = "https://services.arcgis.com"
+    facts = "/jDGuO8tYggdCCnUJ/arcgis/rest/services/Global_Oil_Refinery_Complex_and_Daily_Capacity/FeatureServer/0/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=FID%20ASC&resultOffset=0&resultRecordCount=537&cacheHint=true&quantizationParameters=%7B%22mode%22%3A%22edit%22%7D"
+
+    first_response = requests.get(base_url + facts)
+    response_list = first_response.json()
+
+    data = []
+    for response in response_list["features"]:
+        data.append(
+            {
+                "FID_": response["attributes"].get("FID_"),
+                "Company": response["attributes"].get("Company"),
+                "Name": response["attributes"].get("Name"),
+                "City": response["attributes"].get("City"),
+                "Facility": response["attributes"].get("Facility"),
+                "Prov_State": response["attributes"].get("Prov_State"),
+                "Country": response["attributes"].get("Country"),
+                "Address": response["attributes"].get("Address"),
+                "Zip": response["attributes"].get("Zip"),
+                "County": response["attributes"].get("County"),
+                "PADD": response["attributes"].get("PADD"),
+                "Capacity": response["attributes"].get("Capacity"),
+                "Longitude": response["attributes"].get("Longitude"),
+                "Latitude": response["attributes"].get("Latitude"),
+                "Markets": response["attributes"].get("Markets"),
+                "CORPORATIO": response["attributes"].get("CORPORATIO"),
+            }
+        )
+
+    df = pd.DataFrame(data)
+
+    df = get_cocode_from_name(df, "Country")
+
+    df_nans = df[df.country.isna()]
+    df = df.dropna(axis=0)
+
+    df_bylocation = get_cocode_from_coords(df_nans)
+
+    df_refineries = pd.concat([df, df_bylocation])
+
+    # Creating the necessary columns in the dataframe
+    # df_refineries["technology"] = df_refineries["Main production process"].apply(lambda x: iron_techs[x])
+    df_refineries["unit"] = "bpd"
+    df_refineries["quality"] = "exact"
+    df_refineries["technology"] = "HVC"
+
+    df_refineries = df_refineries.rename(
+        columns={
+            "Capacity": "capacity",
+            "Prov_State": "location",
+            "Latitude": "y",
+            "Longitude": "x",
+            "FID_": "ID",
+        }
+    )
+    df_refineries = df_refineries.reset_index()
+    df_refineries.capacity = pd.to_numeric(df_refineries.capacity)
+
+    return df_refineries[
+        [
+            "country",
+            "y",
+            "x",
+            "location",
+            "technology",
+            "capacity",
+            "unit",
+            "quality",
+            "ID",
+        ]
+    ]
+
+
+def create_paper_df():
+    # -------------
+    # Paper
+    # -------------
+    # The following excel file was downloaded from the following webpage https://www.cgfi.ac.uk/spatial-finance-initiative/geoasset-project/cement/ . The dataset contains 3117 cement plants globally.
+
+    fn = "https://www.cgfi.ac.uk/wp-content/uploads/2023/03/SFI_ALD_Pulp_Paper_Sample_LatAm_Jan_2023.xlsx"
+
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+    paper_orig = pd.read_excel(
+        fn,
+        index_col=0,
+        storage_options=storage_options,
+        sheet_name="SFI_ALD_PPM_LatAm",
+        header=0,
+    )
+
+    df_paper = paper_orig.copy()
+    df_paper = df_paper[
+        [
+            "country",
+            "iso3",
+            "latitude",
+            "longitude",
+            "status",
+            "primary_product",
+            "capacity_paper",
+            "city",
+        ]
+    ]
+
+    df_paper = df_paper.rename(
+        columns={
+            "country": "Country",
+            "latitude": "y",
+            "longitude": "x",
+            "city": "location",
+            "capacity_paper": "capacity",
+        }
+    )
+    df_paper["unit"] = "10kt/yr"
+    df_paper["technology"] = "Paper"
+    df_paper["capacity"] = df_paper["capacity"]
+
+    df_paper.capacity = df_paper.capacity.apply(
+        lambda x: x if type(x) == int or type(x) == int == float else np.nan
+    )
+
+    # Keep only operating steel plants
+    # df_paper = df_paper.loc[df_paper["status"] == "Operating"]
+
+    # Create a column with iso2 country code
+    cc = coco.CountryConverter()
+    iso3 = pd.Series(df_paper["iso3"])
+    df_paper["country"] = cc.pandas_convert(series=iso3, to="ISO2")
+
+    # Dropping the null capacities reduces the dataframe from 3000+  rows to 1672 rows
+    na_index = df_paper[df_paper.capacity.isna()].index
+    print(
+        "There are {} out of {} total paper plants with unknown capacities, setting value to country average".format(
+            len(na_index), len(df_paper)
+        )
+    )
+    avg_c_cap = df_paper.groupby(df_paper.country)["capacity"].mean()
+    na_index
+
+    df_paper["capacity"] = df_paper.apply(
+        lambda x: (
+            avg_c_cap[x["country"]] if math.isnan(x["capacity"]) else x["capacity"]
+        ),
+        axis=1,
+    )
+
+    df_paper["quality"] = "actual"
+    df_paper.loc[na_index, "quality"] = "actual"  # TODO change
+    df_paper.capacity = pd.to_numeric(df_paper.capacity)
+
+    df_paper = df_paper.reset_index()
+    df_paper = df_paper.rename(columns={"uid": "ID"})
+
+    industrial_database_paper = df_paper[
+        [
+            "country",
+            "y",
+            "x",
+            "location",
+            "technology",
+            "capacity",
+            "unit",
+            "quality",
+            "ID",
+        ]
+    ]
+
+    no_infp_index = industrial_database_paper[
+        industrial_database_paper.y == "No information"
+    ].index
+    print(
+        "Setting plants of countries with no values for paper plants to 1.0".format(
+            len(na_index), len(df_paper)
+        )
+    )
+    industrial_database_paper = industrial_database_paper.drop(no_infp_index)
+    industrial_database_paper.capacity = industrial_database_paper.capacity.fillna(1)
+
+    return industrial_database_paper
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_industrial_database",
+            simpl="",
+            clusters="4",
+            ll="c1.0",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="144H",
+            discountrate="0.071",
+            demand="DF",
+        )
+
+    industrial_database_steel = create_steel_db()
+    industrial_database_cement = create_cement_db()
+    industrial_database_refineries = create_refineries_df()
+    industrial_database_paper = create_paper_df()
+
+    industrial_database = pd.concat(
+        [
+            industrial_database_steel,
+            industrial_database_cement,
+            industrial_database_refineries,
+            industrial_database_paper,
+        ]
+    )
+
+    industrial_database.to_csv(
+        snakemake.output["industrial_database"], header=True, index=0
+    )
diff --git a/scripts/build_industrial_distribution_key.py b/scripts/build_industrial_distribution_key.py
new file mode 100644
index 000000000..6fbf66c39
--- /dev/null
+++ b/scripts/build_industrial_distribution_key.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build industrial distribution keys from hotmaps database.
+"""
+
+import logging
+from distutils.version import StrictVersion
+from itertools import product
+
+import geopandas as gpd
+import pandas as pd
+from _helpers import locate_bus, mock_snakemake, three_2_two_digits_country
+
+logger = logging.getLogger(__name__)
+gpd_version = StrictVersion(gpd.__version__)
+
+
+def map_industry_to_buses(
+    df,
+    countries_list,
+    gadm_level_val,
+    geo_crs_val,
+    file_prefix_val,
+    gadm_url_prefix_val,
+    contended_flag_val,
+    gadm_input_file_args_list,
+    shapes_path_val,
+    gadm_clustering_val,
+):
+    """
+    Load hotmaps database of industrial sites and map onto bus regions. Build
+    industrial demand... Change name and add other functions.
+
+    Function similar to aviation/shipping. Use functions to disaggregate.
+    Only cement not steel - proof of concept.
+    Change hotmaps to more descriptive name, etc.
+    """
+    df = df[df.country.isin(countries_list)]
+    df["gadm_{}".format(gadm_level)] = df[["x", "y", "country"]].apply(
+        lambda site: locate_bus(
+            site[["x", "y"]].astype("float"),
+            site["country"],
+            gadm_level_val,
+            geo_crs_val,
+            file_prefix_val,
+            gadm_url_prefix_val,
+            gadm_input_file_args_list,
+            contended_flag_val,
+            path_to_gadm=shapes_path_val,
+            gadm_clustering=gadm_clustering_val,
+        ),
+        axis=1,
+    )
+
+    return df.set_index("gadm_" + str(gadm_level_val))
+
+
+def build_nodal_distribution_key(
+    industrial_database, regions, industry, countries
+):  # returns percentage of co2 emissions
+    """
+    Build nodal distribution keys for each sector.
+    """
+
+    # countries = regions["name"].str[:2].unique()
+
+    keys = pd.DataFrame(index=regions.name, columns=industry, dtype=float)
+
+    pop = pd.read_csv(
+        snakemake.input.clustered_pop_layout,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+
+    gdp = pd.read_csv(
+        snakemake.input.clustered_gdp_layout,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+
+    # pop["country"] = pop.index.str[:2]
+    keys["population"] = pop["total"].values / pop["total"].sum()
+
+    keys["gdp"] = gdp["total"].values / gdp["total"].sum()
+
+    for tech, country in product(industry, countries):
+        regions_ct = regions.name[regions.name.str.contains(country)]
+
+        facilities = industrial_database.query(
+            "country == @country and industry == @tech"
+        )
+        # TODO adapt for facilities with production values not emissions
+        if not facilities.empty:
+            indicator = facilities["capacity"]
+            if indicator.sum() == 0:
+                key = pd.Series(1 / len(facilities), facilities.index)
+            else:
+                # TODO BEWARE: this is a strong assumption
+                # indicator = indicator.fillna(0)
+                key = indicator / indicator.sum()
+            key = (
+                key.groupby(facilities.index).sum().reindex(regions_ct, fill_value=0.0)
+            )
+        else:
+            key = keys.loc[regions_ct, "gdp"]
+
+        keys.loc[regions_ct, tech] = key
+    keys["country"] = pop["ct"]
+    return keys
+
+
+def match_technology(df):
+    industry_mapping = {
+        "Integrated steelworks": "iron and steel",
+        "DRI + Electric arc": "iron and steel",
+        "Electric arc": "iron and steel",
+        "Cement": "non-metallic minerals",
+        "HVC": "chemical and petrochemical",
+        "Paper": "paper pulp and print",
+        "Aluminium": "non-ferrous metals",
+    }
+
+    df["industry"] = df["technology"].map(industry_mapping)
+    return df
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_industrial_distribution_key",
+            simpl="",
+            clusters=12,
+            demand="AB",
+            planning_horizons=2050,
+        )
+
+    regions = gpd.read_file(snakemake.input.regions_onshore)
+    shapes_path = snakemake.input.shapes_path
+    gadm_level = snakemake.params.gadm_level
+    countries = snakemake.params.countries
+    gadm_clustering = snakemake.params.alternative_clustering
+    geo_crs = snakemake.params.geo_crs
+    file_prefix = snakemake.params.gadm_file_prefix
+    gadm_url_prefix = snakemake.params.gadm_url_prefix
+    contended_flag = snakemake.params.contended_flag
+    gadm_input_file_args = ["data", "raw", "gadm"]
+
+    if regions["name"][0][
+        :3
+    ].isalpha():  # TODO clean later by changing all codes to 2 letters
+        regions["name"] = regions["name"].apply(
+            lambda name: three_2_two_digits_country(name[:3]) + name[3:]
+        )
+
+    if snakemake.params.industry_database:
+        logger.info(
+            "Using custom industry database from 'data/custom/industrial_database.csv' instead of default"
+        )
+        geo_locs = pd.read_csv(
+            "data/custom/industrial_database.csv",
+            sep=",",
+            header=0,
+            keep_default_na=False,  # , index_col=0
+        )
+        geo_locs["industry"] = geo_locs["technology"]
+    else:
+        logger.info("Using default industry database")
+        geo_locs = pd.read_csv(
+            snakemake.input.industrial_database,
+            sep=",",
+            header=0,
+            keep_default_na=False,  # , index_col=0
+        )
+        geo_locs = geo_locs[geo_locs["country"].isin(countries)]
+        geo_locs["capacity"] = pd.to_numeric(geo_locs.capacity)
+
+        # Call the function to add the "industry" column
+        df_with_industry = match_technology(geo_locs)
+
+    geo_locs.capacity = pd.to_numeric(geo_locs.capacity)
+
+    geo_locs = geo_locs[geo_locs.quality != "nonexistent"]
+
+    industry = geo_locs.industry.unique()
+
+    industrial_database = map_industry_to_buses(
+        geo_locs[geo_locs.quality != "unavailable"],
+        countries,
+        gadm_level,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        contended_flag,
+        gadm_input_file_args,
+        shapes_path,
+        gadm_clustering,
+    )
+
+    keys = build_nodal_distribution_key(
+        industrial_database, regions, industry, countries
+    )
+
+    keys.to_csv(snakemake.output.industrial_distribution_key)
diff --git a/scripts/build_industry_demand.py b/scripts/build_industry_demand.py
new file mode 100644
index 000000000..78bd7c42f
--- /dev/null
+++ b/scripts/build_industry_demand.py
@@ -0,0 +1,318 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Created on Thu Jul 14 21:18:06 2022.
+
+@author: user
+"""
+
+import logging
+from itertools import product
+
+import pandas as pd
+from _helpers import mock_snakemake, read_csv_nafix
+
+_logger = logging.getLogger(__name__)
+
+
+def calculate_end_values(df):
+    return (1 + df) ** no_years
+
+
+def country_to_nodal(industrial_production, keys):
+    # keys["country"] = keys.index.str[:2]  # TODO 2digit_3_digit adaptation needed
+
+    nodal_production = pd.DataFrame(
+        index=keys.index, columns=industrial_production.columns, dtype=float
+    )
+
+    countries = keys.country.unique()
+    sectors = industrial_production.columns
+
+    for country, sector in product(countries, sectors):
+        buses = keys.index[keys.country == country]
+
+        if sector not in dist_keys.columns or dist_keys[sector].sum() == 0:
+            mapping = "gdp"
+        else:
+            mapping = sector
+
+        key = keys.loc[buses, mapping]
+        # print(sector)
+        nodal_production.loc[buses, sector] = (
+            industrial_production.at[country, sector] * key
+        )
+
+    return nodal_production
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_industry_demand",
+            simpl="",
+            clusters=10,
+            planning_horizons=2030,
+            demand="AB",
+        )
+
+    countries = snakemake.params.countries
+
+    if snakemake.params.industry_demand:
+        _logger.info(
+            "Fetching custom industry demand data.. expecting file at 'data/custom/industry_demand_{0}_{1}.csv'".format(
+                snakemake.wildcards["demand"], snakemake.wildcards["planning_horizons"]
+            )
+        )
+
+        industry_demand = pd.read_csv(
+            "data/custom/industry_demand_{0}_{1}.csv".format(
+                snakemake.wildcards["demand"], snakemake.wildcards["planning_horizons"]
+            ),
+            index_col=[0, 1],
+        )
+        keys_path = snakemake.input.industrial_distribution_key
+
+        dist_keys = pd.read_csv(
+            keys_path, index_col=0, keep_default_na=False, na_values=[""]
+        )
+        production_base = pd.DataFrame(
+            1, columns=industry_demand.columns, index=countries
+        )
+        nodal_keys = country_to_nodal(production_base, dist_keys)
+
+        nodal_df = pd.DataFrame()
+
+        for country in countries:
+            nodal_production_tom_co = nodal_keys[
+                nodal_keys.index.to_series().str.startswith(country)
+            ]
+            industry_base_totals_co = industry_demand.loc[country]
+            # final energy consumption per node and industry (TWh/a)
+            nodal_df_co = nodal_production_tom_co.dot(industry_base_totals_co.T)
+            nodal_df = pd.concat([nodal_df, nodal_df_co])
+
+    else:
+        no_years = int(snakemake.wildcards.planning_horizons) - int(
+            snakemake.params.base_year
+        )
+
+        cagr = read_csv_nafix(snakemake.input.industry_growth_cagr, index_col=0)
+
+        # Building nodal industry production growth
+        for country in countries:
+            if country not in cagr.index:
+                cagr.loc[country] = cagr.loc["DEFAULT"]
+                _logger.warning(
+                    "No industry growth data for "
+                    + country
+                    + " using default data instead."
+                )
+
+        cagr = cagr[cagr.index.isin(countries)]
+
+        growth_factors = calculate_end_values(cagr)
+
+        industry_base_totals = read_csv_nafix(
+            snakemake.input["base_industry_totals"], index_col=[0, 1]
+        )
+
+        production_base = cagr.map(lambda x: 1)
+        production_tom = production_base * growth_factors
+
+        # non-used line; commented out
+        # industry_totals = (production_tom * industry_base_totals).fillna(0)
+
+        industry_util_factor = snakemake.params.industry_util_factor
+
+        # Load distribution keys
+        keys_path = snakemake.input.industrial_distribution_key
+
+        dist_keys = pd.read_csv(
+            keys_path, index_col=0, keep_default_na=False, na_values=[""]
+        )
+
+        # production of industries per node compared to current
+        nodal_production_tom = country_to_nodal(production_tom, dist_keys)
+
+        clean_industry_list = [
+            "iron and steel",
+            "chemical and petrochemical",
+            "non-ferrous metals",
+            "non-metallic minerals",
+            "transport equipment",
+            "machinery",
+            "mining and quarrying",
+            "food and tobacco",
+            "paper pulp and print",
+            "wood and wood products",
+            "textile and leather",
+            "construction",
+            "other",
+        ]
+
+        emission_factors = {  # Based on JR data following PyPSA-EUR
+            "iron and steel": 0.025,
+            "chemical and petrochemical": 0.51,  # taken from HVC including process and feedstock
+            "non-ferrous metals": 1.5,  # taken from Aluminum primary
+            "non-metallic minerals": 0.542,  # taken for cement
+            "transport equipment": 0,
+            "machinery": 0,
+            "mining and quarrying": 0,  # assumed
+            "food and tobacco": 0,
+            "paper pulp and print": 0,
+            "wood and wood products": 0,
+            "textile and leather": 0,
+            "construction": 0,  # assumed
+            "other": 0,
+        }
+
+        # fill industry_base_totals
+        level_2nd = industry_base_totals.index.get_level_values(1).unique()
+        mlv_index = pd.MultiIndex.from_product([countries, level_2nd])
+        industry_base_totals = industry_base_totals.reindex(mlv_index, fill_value=0)
+
+        geo_locs = pd.read_csv(
+            snakemake.input.industrial_database,
+            sep=",",
+            header=0,
+            keep_default_na=False,
+            index_col=0,
+        )
+        geo_locs["capacity"] = pd.to_numeric(geo_locs.capacity)
+
+        def match_technology(df):
+            industry_mapping = {
+                "Integrated steelworks": "iron and steel",
+                "DRI + Electric arc": "iron and steel",
+                "Electric arc": "iron and steel",
+                "Cement": "non-metallic minerals",
+                "HVC": "chemical and petrochemical",
+                "Paper": "paper pulp and print",
+            }
+
+            df["industry"] = df["technology"].map(industry_mapping)
+            return df
+
+        # Calculating emissions
+
+        # get the subset of countries that al
+        countries_geo = geo_locs.index.unique().intersection(countries)
+        geo_locs = match_technology(geo_locs).loc[countries_geo]
+
+        aluminium_year = snakemake.params.aluminium_year
+        AL = read_csv_nafix("data/AL_production.csv", index_col=0)
+        AL_prod_tom = AL.query("Year == @aluminium_year and index in @countries_geo")[
+            "production[ktons/a]"
+        ].reindex(countries_geo, fill_value=0.0)
+        AL_emissions = AL_prod_tom * emission_factors["non-ferrous metals"]
+
+        Steel_emissions = (
+            geo_locs[geo_locs.industry == "iron and steel"]
+            .groupby("country")
+            .sum()
+            .capacity
+            * 1000
+            * emission_factors["iron and steel"]
+            * industry_util_factor
+        )
+        NMM_emissions = (
+            geo_locs[geo_locs.industry == "non-metallic minerals"]
+            .groupby("country")
+            .sum()
+            .capacity
+            * 1000
+            * emission_factors["non-metallic minerals"]
+            * industry_util_factor
+        )
+        refinery_emissons = (
+            geo_locs[geo_locs.industry == "chemical and petrochemical"]
+            .groupby("country")
+            .sum()
+            .capacity
+            * emission_factors["chemical and petrochemical"]
+            * 0.136
+            * 365
+            * industry_util_factor
+        )
+
+        for country in countries:
+            industry_base_totals.loc[(country, "process emissions"), :] = 0
+            try:
+                industry_base_totals.loc[
+                    (country, "process emissions"), "non-metallic minerals"
+                ] = NMM_emissions.loc[country]
+            except KeyError:
+                pass
+
+            try:
+                industry_base_totals.loc[
+                    (country, "process emissions"), "iron and steel"
+                ] = Steel_emissions.loc[country]
+            except KeyError:
+                pass
+            try:
+                industry_base_totals.loc[
+                    (country, "process emissions"), "non-ferrous metals"
+                ] = AL_emissions.loc[country]
+            except KeyError:
+                pass
+            try:
+                industry_base_totals.loc[
+                    (country, "process emissions"), "chemical and petrochemical"
+                ] = refinery_emissons.loc[country]
+            except KeyError:
+                pass
+        industry_base_totals = industry_base_totals.sort_index()
+
+        all_carriers = [
+            "electricity",
+            "gas",
+            "coal",
+            "oil",
+            "hydrogen",
+            "biomass",
+            "low-temperature heat",
+        ]
+
+        # Fill missing carriers with 0s
+        for country in countries:
+            carriers_present = industry_base_totals.xs(country, level=0).index
+            missing_carriers = set(all_carriers) - set(carriers_present)
+            for carrier in missing_carriers:
+                # Add the missing carrier with a value of 0
+                industry_base_totals.loc[(country, carrier), :] = 0
+
+        # temporary fix: merge other manufacturing, construction and non-fuel into other and drop the column
+        other_cols = list(set(industry_base_totals.columns) - set(clean_industry_list))
+        if len(other_cols) > 0:
+            industry_base_totals["other"] += industry_base_totals[other_cols].sum(
+                axis=1
+            )
+            industry_base_totals.drop(columns=other_cols, inplace=True)
+
+        nodal_df = pd.DataFrame()
+
+        for country in countries:
+            nodal_production_tom_co = nodal_production_tom[
+                nodal_production_tom.index.to_series().str.startswith(country)
+            ]
+            industry_base_totals_co = industry_base_totals.loc[country]
+            # final energy consumption per node and industry (TWh/a)
+            nodal_df_co = nodal_production_tom_co.dot(industry_base_totals_co.T)
+            nodal_df = pd.concat([nodal_df, nodal_df_co])
+
+    rename_sectors = {
+        "elec": "electricity",
+        "biomass": "solid biomass",
+        "heat": "low-temperature heat",
+    }
+    nodal_df.rename(columns=rename_sectors, inplace=True)
+
+    nodal_df.index.name = "MWh/a (tCO2/a)"
+
+    nodal_df.to_csv(
+        snakemake.output.industrial_energy_demand_per_node, float_format="%.2f"
+    )
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index 2cea6f681..07a479e54 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -52,13 +52,7 @@
 import numpy as np
 import pandas as pd
 import rasterio as rio
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    get_path,
-    mock_snakemake,
-)
+from _helpers import configure_logging, create_logger, get_path, mock_snakemake
 from rasterio.features import geometry_mask
 from rasterio.warp import transform_bounds
 from shapely.ops import unary_union
@@ -184,11 +178,9 @@ def unify_protected_shape_areas(inputs, natura_crs, out_logging):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "build_natura_raster", cutouts=["cutouts/africa-2013-era5.nc"]
         )
-
     configure_logging(snakemake)
 
     # get crs
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index 1ab20b70a..1ebbd6ca6 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -10,13 +10,11 @@
 import pandas as pd
 from _helpers import (
     build_directory,
-    change_to_script_dir,
     configure_logging,
     create_logger,
     mock_snakemake,
     read_geojson,
     read_osm_config,
-    sets_path_to_root,
     to_csv_nafix,
 )
 from shapely.geometry import LineString, Point
@@ -44,7 +42,7 @@ def line_endings_to_bus_conversion(lines):
 
 
 # tol in m
-def set_substations_ids(buses, distance_crs, tol=2000):
+def set_substations_ids(buses, distance_crs_val, tol=2000):
     """
     Function to set substations ids to buses, accounting for location
     tolerance.
@@ -63,7 +61,7 @@ def set_substations_ids(buses, distance_crs, tol=2000):
     buses["station_id"] = -1
 
     # create temporary series to execute distance calculations using m as reference distances
-    temp_bus_geom = buses.geometry.to_crs(distance_crs)
+    temp_bus_geom = buses.geometry.to_crs(distance_crs_val)
 
     # set tqdm options for substation ids
     tqdm_kwargs_substation_ids = dict(
@@ -115,7 +113,7 @@ def set_substations_ids(buses, distance_crs, tol=2000):
                 buses.loc[buses.index[close_nodes], "station_id"] = sub_id
 
 
-def set_lines_ids(lines, buses, distance_crs):
+def set_lines_ids(lines, buses, distance_crs_val):
     """
     Function to set line buses ids to the closest bus in the list.
     """
@@ -131,8 +129,8 @@ def set_lines_ids(lines, buses, distance_crs):
     lines["bus0"] = -1
     lines["bus1"] = -1
 
-    busesepsg = buses.to_crs(distance_crs)
-    linesepsg = lines.to_crs(distance_crs)
+    busesepsg = buses.to_crs(distance_crs_val)
+    linesepsg = lines.to_crs(distance_crs_val)
 
     for i, row in tqdm(linesepsg.iterrows(), **tqdm_kwargs_line_ids):
         # select buses having the voltage level of the current line
@@ -343,7 +341,7 @@ def get_transformers(buses, lines):
     return df_transformers
 
 
-def get_converters(buses, lines):
+def get_converters(buses):
     """
     Function to create fake converter lines that connect buses of the same
     station_id of different polarities.
@@ -513,7 +511,7 @@ def set_lv_substations(buses):
 
 
 def merge_stations_lines_by_station_id_and_voltage(
-    lines, buses, geo_crs, distance_crs, tol=2000
+    lines, buses, distance_crs_val, tol=2000
 ):
     """
     Function to merge close stations and adapt the line datasets to adhere to
@@ -525,7 +523,7 @@ def merge_stations_lines_by_station_id_and_voltage(
     )
 
     # set substation ids
-    set_substations_ids(buses, distance_crs, tol=tol)
+    set_substations_ids(buses, distance_crs_val, tol=tol)
 
     logger.info("Stage 3b/4: Merge substations with the same id")
 
@@ -536,7 +534,7 @@ def merge_stations_lines_by_station_id_and_voltage(
     logger.info("Stage 3c/4: Specify the bus ids of the line endings")
 
     # set the bus ids to the line dataset
-    lines, buses = set_lines_ids(lines, buses, distance_crs)
+    lines, buses = set_lines_ids(lines, buses, distance_crs_val)
 
     # drop lines starting and ending in the same node
     lines.drop(lines[lines["bus0"] == lines["bus1"]].index, inplace=True)
@@ -559,9 +557,7 @@ def merge_stations_lines_by_station_id_and_voltage(
     return lines, buses
 
 
-def create_station_at_equal_bus_locations(
-    lines, buses, geo_crs, distance_crs, tol=2000
-):
+def create_station_at_equal_bus_locations(lines, buses, distance_crs_val, tol=2000):
     # V1. Create station_id at same bus location
     # - We saw that buses are not connected exactly at one point, they are
     #   usually connected to a substation "area" (analysed on maps)
@@ -580,10 +576,10 @@ def create_station_at_equal_bus_locations(
     bus_all = buses
 
     # set substation ids
-    set_substations_ids(buses, distance_crs, tol=tol)
+    set_substations_ids(buses, distance_crs_val, tol=tol)
 
     # set the bus ids to the line dataset
-    lines, buses = set_lines_ids(lines, buses, distance_crs)
+    lines, buses = set_lines_ids(lines, buses, distance_crs_val)
 
     # update line endings
     lines = line_endings_to_bus_conversion(lines)
@@ -628,7 +624,7 @@ def _split_linestring_by_point(linestring, points):
     return list_linestrings
 
 
-def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
+def fix_overpassing_lines(lines, buses, distance_crs_val, tol=1):
     """
     Function to avoid buses overpassing lines with no connection when the bus
     is within a given tolerance from the line.
@@ -639,6 +635,8 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
         Geodataframe of lines
     buses : GeoDataFrame
         Geodataframe of substations
+    distance_crs_val: str
+        Coordinate reference system
     tol : float
         Tolerance in meters of the distance between the substation and the line
         below which the line will be split
@@ -647,8 +645,8 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
     lines_to_add = []  # list of lines to be added
     lines_to_split = []  # list of lines that have been split
 
-    lines_epsgmod = lines.to_crs(distance_crs)
-    buses_epsgmod = buses.to_crs(distance_crs)
+    lines_epsgmod = lines.to_crs(distance_crs_val)
+    buses_epsgmod = buses.to_crs(distance_crs_val)
 
     # set tqdm options for substation ids
     tqdm_kwargs_substation_ids = dict(
@@ -711,7 +709,7 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
     df_to_add.set_index(lines.index[-1] + df_to_add.index, inplace=True)
 
     # update length
-    df_to_add["length"] = df_to_add.to_crs(distance_crs).geometry.length
+    df_to_add["length"] = df_to_add.to_crs(distance_crs_val).geometry.length
 
     # update line endings
     df_to_add = line_endings_to_bus_conversion(df_to_add)
@@ -739,13 +737,13 @@ def force_ac_lines(df, col="tag_frequency"):
     # TODO: default frequency may be by country
     default_ac_frequency = 50
 
-    df["tag_frequency"] = default_ac_frequency
+    df[col] = default_ac_frequency
     df["dc"] = False
 
     return df
 
 
-def add_buses_to_empty_countries(country_list, fp_country_shapes, buses):
+def add_buses_to_empty_countries(geo_crs_val, country_list, fp_country_shapes, buses):
     """
     Function to add a bus for countries missing substation data.
     """
@@ -763,7 +761,7 @@ def add_buses_to_empty_countries(country_list, fp_country_shapes, buses):
         no_data_countries_shape = (
             country_shapes[country_shapes.index.isin(no_data_countries) == True]
             .reset_index()
-            .to_crs(geo_crs)
+            .to_crs(geo_crs_val)
         )
         length = len(no_data_countries)
         df = gpd.GeoDataFrame(
@@ -783,7 +781,7 @@ def add_buses_to_empty_countries(country_list, fp_country_shapes, buses):
                 "geometry": no_data_countries_shape["geometry"].centroid,
                 "substation_lv": [True] * length,
             },
-            crs=geo_crs,
+            crs=geo_crs_val,
         ).astype(
             buses.dtypes.to_dict()
         )  # keep the same dtypes as buses
@@ -812,8 +810,8 @@ def built_network(
     outputs,
     build_osm_network_config,
     countries_config,
-    geo_crs,
-    distance_crs,
+    geo_crs_val,
+    distance_crs_val,
     force_ac=False,
 ):
     logger.info("Stage 1/5: Read input data")
@@ -845,12 +843,14 @@ def built_network(
         tol = build_osm_network_config.get("overpassing_lines_tolerance", 1)
         logger.info("Stage 3/5: Avoid nodes overpassing lines: enabled with tolerance")
 
-        lines, buses = fix_overpassing_lines(lines, buses, distance_crs, tol=tol)
+        lines, buses = fix_overpassing_lines(lines, buses, distance_crs_val, tol=tol)
     else:
         logger.info("Stage 3/5: Avoid nodes overpassing lines: disabled")
 
     # Add bus to countries with no buses
-    buses = add_buses_to_empty_countries(countries_config, inputs.country_shapes, buses)
+    buses = add_buses_to_empty_countries(
+        geo_crs_val, countries_config, inputs.country_shapes, buses
+    )
 
     # METHOD to merge buses with same voltage and within tolerance Step 4/5
     if build_osm_network_config.get("group_close_buses", False):
@@ -859,7 +859,7 @@ def built_network(
             f"Stage 4/5: Aggregate close substations: enabled with tolerance {tol} m"
         )
         lines, buses = merge_stations_lines_by_station_id_and_voltage(
-            lines, buses, geo_crs, distance_crs, tol=tol
+            lines, buses, distance_crs_val, tol=tol
         )
     else:
         logger.info("Stage 4/5: Aggregate close substations: disabled")
@@ -870,7 +870,7 @@ def built_network(
     transformers = get_transformers(buses, lines)
 
     # get converters: currently modelled as links connecting buses with different polarity
-    converters = get_converters(buses, lines)
+    converters = get_converters(buses)
 
     logger.info("Save outputs")
 
@@ -891,7 +891,6 @@ def built_network(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_osm_network")
 
     configure_logging(snakemake)
@@ -903,8 +902,6 @@ def built_network(
     build_osm_network = snakemake.params.build_osm_network
     countries = snakemake.params.countries
 
-    sets_path_to_root("pypsa-earth")
-
     built_network(
         snakemake.input,
         snakemake.output,
diff --git a/scripts/build_population_layouts.py b/scripts/build_population_layouts.py
new file mode 100644
index 000000000..968702865
--- /dev/null
+++ b/scripts/build_population_layouts.py
@@ -0,0 +1,131 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build mapping between grid cells and population (total, urban, rural)
+"""
+import multiprocessing as mp
+
+import atlite
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import xarray as xr
+from _helpers import mock_snakemake, read_csv_nafix
+from vresutils import shapes as vshapes
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_population_layouts",
+            planning_horizons=2030,
+        )
+
+    cutout_path = snakemake.input.cutout
+    cutout = atlite.Cutout(cutout_path)
+
+    grid_cells = cutout.grid.geometry.to_list()
+
+    # nuts3 has columns country, gdp, pop, geometry
+    nuts3 = gpd.read_file(snakemake.input.nuts3_shapes).set_index("GADM_ID")
+
+    # Set value of population to same dimension as in PyPSA-Eur-Sec, where the value is given in 1e3
+    nuts3["pop"] = nuts3["pop"] / 1000
+
+    # Indicator matrix NUTS3 -> grid cells
+    I = atlite.cutout.compute_indicatormatrix(nuts3.geometry, grid_cells)
+
+    # Indicator matrix grid_cells -> NUTS3; inprinciple Iinv*I is identity
+    # but imprecisions mean not perfect
+    Iinv = cutout.indicatormatrix(nuts3.geometry)
+
+    countries = np.sort(nuts3.country.unique())
+
+    urban_percent_df = read_csv_nafix(
+        snakemake.input.urban_percent,
+        usecols=[0, 1, 4],
+        index_col=0,
+    )
+
+    # Filter for the year used in the workflow
+    urban_percent_df = urban_percent_df.loc[
+        (urban_percent_df["Year"] == int(snakemake.wildcards.planning_horizons))
+    ]
+
+    # Filter for urban percent column
+    urban_percent_df = urban_percent_df[
+        ["Urban population as percentage of total population"]
+    ]
+
+    # Remove index header
+    urban_percent_df.index.name = None
+
+    # Squeeze into a Series
+    urban_fraction = urban_percent_df.squeeze() / 100.0
+    urban_fraction = urban_fraction.groupby(urban_fraction.index).sum()
+
+    # population in each grid cell
+    pop_cells = pd.Series(I.dot(nuts3["pop"]))
+    gdp_cells = pd.Series(I.dot(nuts3["gdp"]))
+
+    # in km^2
+    with mp.Pool(processes=snakemake.threads) as pool:
+        cell_areas = pd.Series(pool.map(vshapes.area, grid_cells)) / 1e6
+
+    # pop per km^2
+    density_cells_pop = pop_cells / cell_areas
+    density_cells_gdp = gdp_cells / cell_areas
+
+    # rural or urban population in grid cell
+    pop_rural = pd.Series(0.0, density_cells_pop.index)
+    pop_urban = pd.Series(0.0, density_cells_pop.index)
+
+    for ct in countries:
+        indicator_nuts3_ct = nuts3.country.apply(lambda x: 1.0 if x == ct else 0.0)
+
+        indicator_cells_ct = pd.Series(Iinv.T.dot(indicator_nuts3_ct))
+
+        density_cells_pop_ct = indicator_cells_ct * density_cells_pop
+        density_cells_gdp_ct = indicator_cells_ct * density_cells_gdp
+
+        pop_cells_ct = indicator_cells_ct * pop_cells
+        gdp_cells_ct = indicator_cells_ct * gdp_cells
+        # correct for imprecision of Iinv*I
+        pop_ct = nuts3.loc[nuts3.country == ct, "pop"].sum()
+        pop_cells_ct *= pop_ct / pop_cells_ct.sum()
+
+        gdp_ct = nuts3.loc[nuts3.country == ct, "gdp"].sum()
+        gdp_cells_ct *= gdp_ct / gdp_cells_ct.sum()
+
+        # The first low density grid cells to reach rural fraction are rural
+        asc_density_i = density_cells_pop_ct.sort_values().index
+        asc_density_cumsum = pop_cells_ct[asc_density_i].cumsum() / pop_cells_ct.sum()
+        rural_fraction_ct = 1 - urban_fraction[ct]
+        pop_ct_rural_b = asc_density_cumsum < rural_fraction_ct
+        pop_ct_urban_b = ~pop_ct_rural_b
+
+        pop_ct_rural_b[indicator_cells_ct == 0.0] = False
+        pop_ct_urban_b[indicator_cells_ct == 0.0] = False
+
+        pop_rural += pop_cells_ct.where(pop_ct_rural_b, 0.0)
+        pop_urban += pop_cells_ct.where(pop_ct_urban_b, 0.0)
+
+    pop_cells = {"total": pop_cells}
+    pop_cells["rural"] = pop_rural
+    pop_cells["urban"] = pop_urban
+
+    for key, pop in pop_cells.items():
+        ycoords = ("y", cutout.coords["y"].data)
+        xcoords = ("x", cutout.coords["x"].data)
+        values = pop.values.reshape(cutout.shape)
+        pop_layout = xr.DataArray(values, [ycoords, xcoords])
+
+        pop_layout.to_netcdf(snakemake.output[f"pop_layout_{key}"])
+
+    # for key, gdp in gdp_cells.items():
+    ycoords = ("y", cutout.coords["y"].data)
+    xcoords = ("x", cutout.coords["x"].data)
+    values = gdp_cells.values.reshape(cutout.shape)
+    gdp_layout = xr.DataArray(values, [ycoords, xcoords])
+    gdp_layout.to_netcdf(snakemake.output[f"gdp_layout"])
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index 28e60e8af..ee950d4e8 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -107,7 +107,6 @@
 import pypsa
 import yaml
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
@@ -215,7 +214,6 @@ def replace_natural_gas_technology(df: pd.DataFrame):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_powerplants")
 
     configure_logging(snakemake)
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index 8e719c5dd..4c518c071 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -201,15 +201,9 @@
 import pandas as pd
 import progressbar as pgb
 import xarray as xr
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-    sets_path_to_root,
-)
+from _helpers import configure_logging, create_logger, mock_snakemake
 from add_electricity import load_powerplants
-from dask.distributed import Client, LocalCluster
+from dask.distributed import Client
 from pypsa.geo import haversine
 from shapely.geometry import LineString, Point, box
 
@@ -222,7 +216,7 @@
 GEBCO_CRS = "EPSG:4326"
 
 
-def check_cutout_match(cutout, geodf):
+def check_cutout_match(cutout):
     cutout_box = box(*cutout.bounds)
     region_box = box(*regions.total_bounds)
 
@@ -285,7 +279,7 @@ def get_hydro_capacities_annual_hydro_generation(fn, countries, year):
     return hydro_prod_by_country
 
 
-def check_cutout_completness(cf):
+def check_cutout_completeness(cf):
     """
     Check if a cutout contains missed values.
 
@@ -491,9 +485,7 @@ def create_scaling_factor(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_renewable_profiles", technology="solar")
-        sets_path_to_root("pypsa-earth")
 
     configure_logging(snakemake)
 
@@ -501,7 +493,7 @@ def create_scaling_factor(
     countries = snakemake.params.countries
     paths = snakemake.input
     nprocesses = int(snakemake.threads)
-    noprogress = not snakemake.config["atlite"].get("show_progress", False)
+    noprogress = not snakemake.config["enable"]["progress_bar"]
     config = snakemake.params.renewable[snakemake.wildcards.technology]
     resource = config["resource"]
     correction_factor = config.get("correction_factor", 1.0)
@@ -526,12 +518,14 @@ def create_scaling_factor(
     # do not pull up, set_index does not work if geo dataframe is empty
     regions = regions.set_index("name").rename_axis("bus")
 
-    cluster = LocalCluster(n_workers=nprocesses, threads_per_worker=1)
-    client = Client(cluster, asynchronous=True)
+    if nprocesses > 1:
+        client = Client(n_workers=nprocesses, threads_per_worker=1)
+    else:
+        client = None
 
     cutout = atlite.Cutout(paths["cutout"])
 
-    check_cutout_match(cutout=cutout, geodf=regions)
+    check_cutout_match(cutout=cutout)
 
     if not snakemake.wildcards.technology.startswith("hydro"):
         # the region should be restricted for non-hydro technologies, as the hydro potential is calculated across hydrobasins which may span beyond the region of the country
@@ -754,7 +748,7 @@ def create_scaling_factor(
         capacity_factor = correction_factor * func(capacity_factor=True, **resource)
         layout = capacity_factor * area * capacity_per_sqkm
 
-        n_cells_lost = check_cutout_completness(capacity_factor)
+        n_cells_lost = check_cutout_completeness(capacity_factor)
 
         profile, capacities = func(
             matrix=availability.stack(spatial=["y", "x"]),
@@ -837,4 +831,6 @@ def create_scaling_factor(
             ds["profile"] = ds["profile"].where(ds["profile"] >= min_p_max_pu, 0)
 
         ds.to_netcdf(snakemake.output.profile)
-    client.shutdown()
+
+    if client is not None:
+        client.shutdown()
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index a642efee8..dea430737 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -19,14 +19,12 @@
 import xarray as xr
 from _helpers import (
     build_directory,
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
     get_gadm_layer,
     get_path,
     mock_snakemake,
-    sets_path_to_root,
     three_2_two_digits_country,
     two_2_three_digits_country,
 )
@@ -40,9 +38,6 @@
 from shapely.validation import make_valid
 from tqdm import tqdm
 
-sets_path_to_root("pypsa-earth")
-
-
 logger = create_logger(__name__)
 
 
@@ -145,7 +140,7 @@ def load_eez(countries_codes, geo_crs, eez_gpkg_file="./data/eez/eez_v11.gpkg"):
     """
     if not pathlib.Path(eez_gpkg_file).exists():
         raise Exception(
-            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(EEZ_gpkg).parent}"
+            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(eez_gpkg).parent}"
         )
 
     geodf_EEZ = gpd.read_file(eez_gpkg_file, engine="pyogrio").to_crs(geo_crs)
@@ -1137,14 +1132,12 @@ def get_gadm_shapes(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_shapes")
-        sets_path_to_root("pypsa-earth")
     configure_logging(snakemake)
 
     out = snakemake.output
 
-    EEZ_gpkg = snakemake.input["eez"]
+    eez_gpkg = snakemake.input["eez"]
     mem_mb = snakemake.resources["mem_mb"]
 
     countries_list = snakemake.params.countries
@@ -1176,7 +1169,7 @@ def get_gadm_shapes(
     country_shapes_df.to_file(snakemake.output.country_shapes)
 
     offshore_shapes = get_eez(
-        countries_list, geo_crs, country_shapes_df, EEZ_gpkg, out_logging
+        countries_list, geo_crs, country_shapes_df, eez_gpkg, out_logging
     )
 
     offshore_shapes.reset_index().to_file(snakemake.output.offshore_shapes)
diff --git a/scripts/build_ship_profile.py b/scripts/build_ship_profile.py
new file mode 100644
index 000000000..4ffa7837e
--- /dev/null
+++ b/scripts/build_ship_profile.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import logging
+
+import numpy as np
+import pandas as pd
+from _helpers import mock_snakemake
+
+logger = logging.getLogger(__name__)
+
+
+def build_ship_profile(export_volume, ship_opts):
+    ship_capacity = ship_opts["ship_capacity"]
+    travel_time = ship_opts["travel_time"]
+    fill_time = ship_opts["fill_time"]
+    unload_time = ship_opts["unload_time"]
+
+    landing = export_volume / ship_capacity  # fraction of max delivery
+    pause_time = 8760 / landing - (fill_time + travel_time)
+    full_cycle = fill_time + travel_time + unload_time + pause_time
+
+    max_transport = ship_capacity * 8760 / (fill_time + travel_time + unload_time)
+    print(f"The maximum transport capacity per ship is {max_transport:.2f} TWh/year")
+
+    # throw error if max_transport < export_volume
+    if max_transport < export_volume:
+        ships = np.ceil(export_volume / max_transport)
+        print(f"Number of ships needed to export {export_volume} TWh/year is {ships}")
+        logger.info(
+            "Not enough ship capacity to export all hydrogen in one ship. Extending the number of shipts to {}".format(
+                ships
+            )
+        )
+
+    # Set fill_time ->  1 and travel_time, unload_time, pause_time -> 0
+    ship = pd.Series(
+        [1.0] * fill_time + [0.0] * int(travel_time + unload_time + pause_time)
+    )  # , index)
+    ship.name = "profile"
+    ship = pd.concat(
+        [ship] * 1000, ignore_index=True
+    )  # extend ship series to above 8760 hours
+
+    # Add index, cut profile after length of snapshots
+    snapshots = pd.date_range(freq="h", **snakemake.params.snapshots)
+    ship = ship[: len(snapshots)]
+    ship.index = snapshots
+
+    # Scale ship profile to export_volume
+    export_profile = ship / ship.sum() * export_volume * 1e6  # in MWh
+
+    # Check profile
+    if abs(export_profile.sum() / 1e6 - export_volume) > 0.001:
+        raise ValueError(
+            f"Sum of ship profile ({export_profile.sum()/1e6} TWh) does not match export demand ({export_volume} TWh)"
+        )
+
+    return export_profile
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "build_ship_profile",
+            h2export="120",
+        )
+
+    # Get parameters from config and wildcard
+    ship_opts = snakemake.params.ship_opts
+    export_volume = eval(snakemake.wildcards.h2export)
+
+    # Create export profile
+    if export_volume > 0:
+        export_profile = build_ship_profile(export_volume, ship_opts)
+    else:
+        export_profile = pd.Series(
+            0,
+            index=pd.date_range(freq="h", **snakemake.params.snapshots),
+            name="profile",
+        )
+
+    # Save export profile
+    export_profile.to_csv(snakemake.output.ship_profile)  # , header=False)
+
+    logger.info("Ship profile successfully created")
diff --git a/scripts/build_solar_thermal_profiles.py b/scripts/build_solar_thermal_profiles.py
new file mode 100644
index 000000000..6ffd6be74
--- /dev/null
+++ b/scripts/build_solar_thermal_profiles.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build solar thermal collector time series.
+"""
+
+import atlite
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import xarray as xr
+from _helpers import mock_snakemake
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+
+        snakemake = mock_snakemake(
+            "build_solar_thermal_profiles",
+            simpl="",
+            clusters=15,
+        )
+
+    config = snakemake.params.solar_thermal_config
+
+    time = pd.date_range(freq="h", **snakemake.params.snapshots)
+    cutout_config = snakemake.input.cutout
+    cutout = atlite.Cutout(cutout_config).sel(time=time)
+
+    clustered_regions = (
+        gpd.read_file(snakemake.input.regions_onshore)
+        .set_index("name")
+        .buffer(0)
+        .squeeze()
+    )
+
+    I = cutout.indicatormatrix(clustered_regions)
+
+    for area in ["total", "rural", "urban"]:
+        pop_layout = xr.open_dataarray(snakemake.input[f"pop_layout_{area}"])
+
+        stacked_pop = pop_layout.stack(spatial=("y", "x"))
+        M = I.T.dot(np.diag(I.dot(stacked_pop)))
+
+        nonzero_sum = M.sum(axis=0, keepdims=True)
+        nonzero_sum[nonzero_sum == 0.0] = 1.0
+        M_tilde = M / nonzero_sum
+
+        solar_thermal = cutout.solar_thermal(
+            **config, matrix=M_tilde.T, index=clustered_regions.index
+        )
+
+        solar_thermal.to_netcdf(snakemake.output[f"solar_thermal_{area}"])
diff --git a/scripts/build_temperature_profiles.py b/scripts/build_temperature_profiles.py
new file mode 100644
index 000000000..bfa2ad598
--- /dev/null
+++ b/scripts/build_temperature_profiles.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Build temperature profiles.
+"""
+import atlite
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import xarray as xr
+from _helpers import mock_snakemake
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+
+        snakemake = mock_snakemake(
+            "build_temperature_profiles",
+            simpl="",
+            clusters=900,
+        )
+
+    time = pd.date_range(freq="h", **snakemake.params.snapshots)
+    cutout_path = (
+        snakemake.input.cutout
+    )  # os.path.abspath(snakemake.config["atlite"]["cutout"])
+
+    cutout = atlite.Cutout(cutout_path).sel(time=time)
+
+    clustered_regions = (
+        gpd.read_file(snakemake.input.regions_onshore)
+        .set_index("name")
+        .buffer(0)
+        .squeeze()
+    )
+
+    I = cutout.indicatormatrix(clustered_regions)
+
+    for area in ["total", "rural", "urban"]:
+        pop_layout = xr.open_dataarray(snakemake.input[f"pop_layout_{area}"])
+
+        stacked_pop = pop_layout.stack(spatial=("y", "x"))
+        M = I.T.dot(np.diag(I.dot(stacked_pop)))
+
+        nonzero_sum = M.sum(axis=0, keepdims=True)
+        nonzero_sum[nonzero_sum == 0.0] = 1.0
+        M_tilde = M / nonzero_sum
+
+        temp_air = cutout.temperature(matrix=M_tilde.T, index=clustered_regions.index)
+
+        temp_air.to_netcdf(snakemake.output[f"temp_air_{area}"])
+
+        temp_soil = cutout.soil_temperature(
+            matrix=M_tilde.T, index=clustered_regions.index
+        )
+
+        temp_soil.to_netcdf(snakemake.output[f"temp_soil_{area}"])
diff --git a/scripts/build_test_configs.py b/scripts/build_test_configs.py
index 0dec51ae0..e64a6d407 100644
--- a/scripts/build_test_configs.py
+++ b/scripts/build_test_configs.py
@@ -15,12 +15,7 @@
 import collections.abc
 import copy
 
-from _helpers import (
-    change_to_script_dir,
-    get_current_directory_path,
-    get_path,
-    mock_snakemake,
-)
+from _helpers import get_current_directory_path, get_path, mock_snakemake
 from ruamel.yaml import YAML
 
 
@@ -90,7 +85,6 @@ def create_test_config(default_config, diff_config, output_path):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("build_test_configs")
 
     # Input paths
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
index 2907d7f59..dbab231b3 100644
--- a/scripts/clean_osm_data.py
+++ b/scripts/clean_osm_data.py
@@ -11,7 +11,6 @@
 import reverse_geocode as rg
 from _helpers import (
     REGION_COLS,
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_path_size,
@@ -1061,7 +1060,6 @@ def clean_data(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("clean_osm_data")
 
     configure_logging(snakemake)
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index 92fe8d5da..b98b78730 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -131,13 +131,11 @@
 import pypsa
 from _helpers import (
     REGION_COLS,
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_aggregation_strategies,
     mock_snakemake,
     normed,
-    sets_path_to_root,
     update_p_nom_max,
 )
 from add_electricity import load_costs
@@ -379,8 +377,7 @@ def n_bounds(model, *n_id):
     )
 
 
-def busmap_for_gadm_clusters(inputs, n, gadm_level, geo_crs, country_list):
-    # gdf = get_GADM_layer(country_list, gadm_level, geo_crs)
+def busmap_for_gadm_clusters(inputs, n, gadm_level):
     gdf = gpd.read_file(inputs.gadm_shapes)
 
     def locate_bus(coords, co):
@@ -562,7 +559,6 @@ def clustering_for_n_clusters(
     n_clusters,
     alternative_clustering,
     gadm_layer_id,
-    geo_crs,
     country_list,
     distribution_cluster,
     build_shape_options,
@@ -582,9 +578,7 @@ def clustering_for_n_clusters(
 
     if not isinstance(custom_busmap, pd.Series):
         if alternative_clustering:
-            busmap = busmap_for_gadm_clusters(
-                inputs, n, gadm_layer_id, geo_crs, country_list
-            )
+            busmap = busmap_for_gadm_clusters(inputs, n, gadm_layer_id)
         else:
             busmap = busmap_for_n_clusters(
                 inputs,
@@ -654,12 +648,9 @@ def cluster_regions(busmaps, inputs, output):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "cluster_network", network="elec", simpl="", clusters="min"
         )
-        sets_path_to_root("pypsa-earth")
-
     configure_logging(snakemake)
 
     inputs, outputs, config = snakemake.input, snakemake.output, snakemake.config
@@ -671,7 +662,6 @@ def cluster_regions(busmaps, inputs, output):
     gadm_layer_id = snakemake.params.build_shape_options["gadm_layer_id"]
     focus_weights = snakemake.params.get("focus_weights", None)
     country_list = snakemake.params.countries
-    geo_crs = snakemake.params.geo_crs
 
     renewable_carriers = pd.Index(
         [
@@ -753,7 +743,6 @@ def consense(x):
             n_clusters,
             alternative_clustering,
             gadm_layer_id,
-            geo_crs,
             country_list,
             distribution_cluster,
             snakemake.params.build_shape_options,
diff --git a/scripts/copy_config.py b/scripts/copy_config.py
new file mode 100644
index 000000000..6550b8988
--- /dev/null
+++ b/scripts/copy_config.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+from shutil import copy
+
+from _helpers import mock_snakemake
+
+files_to_copy = {
+    "./config.yaml": "config.yaml",
+    "./Snakefile": "Snakefile",
+    "./scripts/solve_network.py": "solve_network.py",
+    "./scripts/prepare_sector_network.py": "prepare_sector_network.py",
+}
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake("copy_config")
+
+    directory = snakemake.output["folder"]
+    for f, name in files_to_copy.items():
+        copy(f, directory + "/" + name)
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index 2a1e366d2..47aa61a2a 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -26,23 +26,16 @@
 - ``data/osm/out``:  Prepared power data as .geojson and .csv files per country
 - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
 """
-
-import sys
-
-print("sys path download_osm_data", sys.path)
-
 import pathlib
 import shutil
 
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
     get_path,
     mock_snakemake,
     read_osm_config,
-    sets_path_to_root,
 )
 from earth_osm import eo
 
@@ -105,10 +98,7 @@ def convert_iso_to_geofk(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("download_osm_data")
-        sets_path_to_root("pypsa-earth")
-
     configure_logging(snakemake)
 
     run = snakemake.config.get("run", {})
@@ -129,6 +119,7 @@ def convert_iso_to_geofk(
         out_dir=store_path_resources,
         out_format=["csv", "geojson"],
         out_aggregate=True,
+        progress_bar=snakemake.config["enable"]["progress_bar"],
     )
 
     out_path = get_path(store_path_resources, "out")
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index 18a2e3a23..a87eb2a17 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -32,12 +32,10 @@
 import pypsa
 import xarray as xr
 from _helpers import (
-    change_to_script_dir,
     create_country_list,
     create_logger,
     get_path_size,
     mock_snakemake,
-    sets_path_to_root,
     three_2_two_digits_country,
     to_csv_nafix,
 )
@@ -52,9 +50,7 @@ def _multi_index_scen(rulename, keys):
 
 
 def _mock_snakemake(rule, **kwargs):
-    change_to_script_dir(__file__)
     snakemake = mock_snakemake(rule, **kwargs)
-    sets_path_to_root("pypsa-earth")
     return snakemake
 
 
@@ -587,11 +583,8 @@ def calculate_stats(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("make_statistics")
 
-    sets_path_to_root("pypsa-earth")
-
     fp_stats = snakemake.output["stats"]
     scenario = snakemake.params.scenario
     scenario_name = snakemake.config["run"]["name"]
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index 5ad6c5bfc..8867c2c6a 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -58,7 +58,6 @@
 import pypsa
 from _helpers import (
     build_directory,
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_path,
@@ -542,7 +541,6 @@ def to_csv(dfs, dir):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "make_summary",
             simpl="",
diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index 9ba7d4948..4f80059dc 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -67,13 +67,12 @@
 wildcard {unc}, which is described in the config.yaml and created in the Snakefile as a range from
 0 to (total number of) SAMPLES.
 """
-
 import chaospy
 import numpy as np
 import pandas as pd
 import pypsa
 import seaborn as sns
-from _helpers import change_to_script_dir, configure_logging, create_logger
+from _helpers import configure_logging, create_logger
 from pyDOE2 import lhs
 from scipy.stats import beta, gamma, lognorm, norm, qmc, triang
 from sklearn.preprocessing import MinMaxScaler, minmax_scale
@@ -343,7 +342,6 @@ def validate_parameters(
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "monte_carlo",
             simpl="",
diff --git a/scripts/non_workflow/zenodo_handler.py b/scripts/non_workflow/zenodo_handler.py
index ab65fd7d7..41d31ccbc 100644
--- a/scripts/non_workflow/zenodo_handler.py
+++ b/scripts/non_workflow/zenodo_handler.py
@@ -18,7 +18,7 @@
 Relevant Settings
 -----------------
 """
-from pathlib import Path
+import pathlib
 
 import zenodopy
 
@@ -65,7 +65,7 @@
         metadata=METADATA,
     )
     for path in UPLOAD_PATHS:
-        path = Path.joinpath(Path(ROOT), path)
+        path = pathlib.Path.joinpath(pathlib.Path(ROOT), path)
         zeno.upload_zip(source_dir=str(path))
 
 
@@ -76,7 +76,7 @@
         metadata=METADATA,
     )
     for path in UPLOAD_PATHS:
-        path = Path.joinpath(Path(ROOT), path)
+        path = pathlib.Path.joinpath(pathlib.Path(ROOT), path)
         if path.exists():
             if TYPE == "upload":
                 if path.exists():
diff --git a/scripts/non_workflow/zip_folder.py b/scripts/non_workflow/zip_folder.py
index 63dbddf24..4931d3bde 100644
--- a/scripts/non_workflow/zip_folder.py
+++ b/scripts/non_workflow/zip_folder.py
@@ -11,8 +11,6 @@
 import pathlib
 import zipfile
 
-from _helpers import sets_path_to_root
-
 # Zip the files from given directory that matches the filter
 
 
@@ -40,11 +38,8 @@ def zipFilesInDir(dirName, zipFileName, filter, include_parent=True):
 
 if __name__ == "__main__":
     # Set path to this file
-    os.chdir(pathlib.Path(__file__).parent.absolute())
-    # Required to set path to pypsa-earth
-    sets_path_to_root("pypsa-earth")
-
-# Execute zip function
-# zipFilesInDir("./resources", "resources.zip", lambda x: True, include_parent=False)
-zipFilesInDir("./data", "data.zip", lambda x: True, include_parent=False)
-# zipFilesInDir("./cutouts", "cutouts.zip", lambda x: True, include_parent=False)
+
+    # Execute zip function
+    # zipFilesInDir("./resources", "resources.zip", lambda x: True, include_parent=False)
+    zipFilesInDir("./data", "data.zip", lambda x: True, include_parent=False)
+    # zipFilesInDir("./cutouts", "cutouts.zip", lambda x: True, include_parent=False)
diff --git a/scripts/override_respot.py b/scripts/override_respot.py
new file mode 100644
index 000000000..665af0654
--- /dev/null
+++ b/scripts/override_respot.py
@@ -0,0 +1,109 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import pandas as pd
+import pypsa
+from _helpers import mock_snakemake, override_component_attrs
+
+
+def override_values(tech, year, dr):
+    custom_res_t = pd.read_csv(
+        snakemake.input["custom_res_pot_{0}_{1}_{2}".format(tech, year, dr)],
+        index_col=0,
+        parse_dates=True,
+    ).filter(buses, axis=1)
+
+    custom_res = (
+        pd.read_csv(
+            snakemake.input["custom_res_ins_{0}_{1}_{2}".format(tech, year, dr)],
+            index_col=0,
+        )
+        .filter(buses, axis=0)
+        .reset_index()
+    )
+
+    custom_res["Generator"] = custom_res["Generator"].apply(lambda x: x + " " + tech)
+    custom_res = custom_res.set_index("Generator")
+
+    if tech.replace("-", " ") in n.generators.carrier.unique():
+        to_drop = n.generators[n.generators.carrier == tech].index
+        n.mremove("Generator", to_drop)
+
+    if snakemake.wildcards["planning_horizons"] == 2050:
+        directory = "results/" + snakemake.params.run.replace("2050", "2030")
+        n_name = snakemake.input.network.split("/")[-1].replace(
+            n.config["scenario"]["clusters"], ""
+        )
+        df = pd.read_csv(directory + "/res_caps_" + n_name, index_col=0)
+        # df = pd.read_csv(snakemake.config["custom_data"]["existing_renewables"], index_col=0)
+        existing_res = df.loc[tech]
+        existing_res.index = existing_res.index.str.apply(lambda x: x + tech)
+    else:
+        existing_res = custom_res["installedcapacity"].values
+
+    n.madd(
+        "Generator",
+        buses,
+        " " + tech,
+        bus=buses,
+        carrier=tech,
+        p_nom_extendable=True,
+        p_nom_max=custom_res["p_nom_max"].values,
+        # weight=ds["weight"].to_pandas(),
+        # marginal_cost=custom_res["fixedomEuroPKW"].values * 1000,
+        capital_cost=custom_res["annualcostEuroPMW"].values,
+        efficiency=1.0,
+        p_max_pu=custom_res_t,
+        lifetime=custom_res["lifetime"][0],
+        p_nom_min=existing_res,
+    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "override_respot",
+            simpl="",
+            clusters="16",
+            ll="c1.0",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="3H",
+            demand="AP",
+            discountrate=0.071,
+        )
+
+    overrides = override_component_attrs(snakemake.input.overrides)
+    n = pypsa.Network(snakemake.input.network, override_component_attrs=overrides)
+    m = n.copy()
+    if snakemake.params.custom_data["renewables"]:
+        buses = list(n.buses[n.buses.carrier == "AC"].index)
+        energy_totals = pd.read_csv(snakemake.input.energy_totals, index_col=0)
+        countries = snakemake.params.countries
+        if snakemake.params.custom_data["renewables"]:
+            techs = snakemake.params.custom_data["renewables"]
+            year = snakemake.wildcards["planning_horizons"]
+            dr = snakemake.wildcards["discountrate"]
+
+            m = n.copy()
+
+            for tech in techs:
+                override_values(tech, year, dr)
+
+        else:
+            print("No RES potential techs to override...")
+
+        if snakemake.params.custom_data["elec_demand"]:
+            for country in countries:
+                n.loads_t.p_set.filter(like=country)[buses] = (
+                    (
+                        n.loads_t.p_set.filter(like=country)[buses]
+                        / n.loads_t.p_set.filter(like=country)[buses].sum().sum()
+                    )
+                    * energy_totals.loc[country, "electricity residential"]
+                    * 1e6
+                )
+
+    n.export_to_netcdf(snakemake.output[0])
diff --git a/scripts/plot_network.py b/scripts/plot_network.py
index 07c0115e4..1dc782cbd 100644
--- a/scripts/plot_network.py
+++ b/scripts/plot_network.py
@@ -27,12 +27,11 @@
 from _helpers import (
     aggregate_costs,
     aggregate_p,
-    change_to_script_dir,
     configure_logging,
     create_logger,
+    load_network_for_plots,
     mock_snakemake,
 )
-from add_electricity import load_costs, update_transmission_costs
 from matplotlib.legend_handler import HandlerPatch
 from matplotlib.patches import Circle, Ellipse
 
@@ -41,6 +40,20 @@
 logger = create_logger(__name__)
 
 
+def assign_location(n):
+    for c in n.iterate_components(n.one_port_components | n.branch_components):
+        i_find = pd.Series(c.df.index.str.find(" ", start=4), c.df.index)
+
+        for i in i_find.value_counts().index:
+            # these have already been assigned defaults
+            if i == -1:
+                continue
+
+            names = i_find.index[i_find == i]
+
+            c.df.loc[names, "location"] = names.str[:i]
+
+
 def make_handler_map_to_scale_circles_as_in(ax, dont_resize_actively=False):
     fig = ax.get_figure()
 
@@ -357,43 +370,704 @@ def split_costs(n):
     ax.grid(True, axis="y", color="k", linestyle="dotted")
 
 
-def load_network_for_plots(
-    fn, tech_costs, cost_config, elec_config, combine_hydro_ps=True
+#############################################
+# plot Hydrogen infrastructure map
+#############################################
+
+# TODO function redundant with plot_h2_infra
+# def plot_h2_infra(network):
+#     n = network.copy()
+
+#     # assign_location(n)
+
+#     bus_size_factor = 1e5
+#     linewidth_factor = 1e3
+#     # MW below which not drawn
+#     line_lower_threshold = 1e2
+#     bus_color = "m"
+#     link_color = "c"
+
+#     n.links.loc[:, "p_nom_opt"] = n.links.loc[:, "p_nom_opt"]
+#     # n.links.loc[n.links.carrier == "H2 Electrolysis"].p_nom_opt
+
+#     # Drop non-electric buses so they don't clutter the plot
+#     n.buses.drop(n.buses.index[n.buses.carrier != "AC"], inplace=True)
+
+#     elec = n.links.index[n.links.carrier == "SMR"]
+
+#     bus_sizes = (
+#         n.links.loc[elec, "p_nom_opt"].groupby(n.links.loc[elec, "bus0"]).sum()
+#         / bus_size_factor
+#     )
+
+#     # make a fake MultiIndex so that area is correct for legend
+#     bus_sizes.index = pd.MultiIndex.from_product([bus_sizes.index, ["SMR"]])
+
+#     # n.links.drop(n.links.index[n.links.carrier != "H2 pipeline"], inplace=True)
+
+#     # link_widths = n.links.p_nom_opt / linewidth_factor
+#     # link_widths[n.links.p_nom_opt < line_lower_threshold] = 0.0
+
+#     # n.links.bus0 = n.links.bus0.str.replace(" H2", "")
+#     # n.links.bus1 = n.links.bus1.str.replace(" H2", "")
+
+#     # print(link_widths.sort_values())
+
+#     # print(n.links[["bus0", "bus1"]])
+
+#     fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
+
+#     fig.set_size_inches(10.5, 9)
+#     bus_sizes.index = bus_sizes.index.set_levels(
+#         bus_sizes.index.levels[0].str.replace(" gas", ""), level=0
+#     )
+#     n.plot(
+#         bus_sizes=bus_sizes,
+#         bus_colors={"SMR": "darkolivegreen"},
+#         # link_colors=link_color,
+#         # link_widths=link_widths,
+#         branch_components=["Link"],
+#         color_geomap={"ocean": "lightblue", "land": "oldlace"},
+#         ax=ax,
+#         boundaries=(-20, 0, 25, 40),
+#     )
+
+#     handles = make_legend_circles_for(
+#         [5000, 1000], scale=bus_size_factor, facecolor="darkolivegreen"
+#     )
+#     labels = ["{} GW".format(s) for s in (5, 1)]
+#     l2 = ax.legend(
+#         handles,
+#         labels,
+#         loc="upper left",
+#         bbox_to_anchor=(0.01, 1.01),
+#         labelspacing=0.8,
+#         framealpha=1.0,
+#         title="SMR capacity",
+#         handler_map=make_handler_map_to_scale_circles_as_in(ax),
+#     )
+#     ax.add_artist(l2)
+
+#     handles = []
+#     labels = []
+
+#     for s in (5, 1):
+#         handles.append(
+#             plt.Line2D([0], [0], color=link_color, linewidth=s * 1e3 / linewidth_factor)
+#         )
+#         labels.append("{} GW".format(s))
+#     l1_1 = ax.legend(
+#         handles,
+#         labels,
+#         loc="upper left",
+#         bbox_to_anchor=(0.32, 1.01),
+#         framealpha=1,
+#         labelspacing=0.8,
+#         handletextpad=1.5,
+#         title="H2 pipeline capacity",
+#     )
+#     ax.add_artist(l1_1)
+
+#     # fig.savefig(snakemake.output.hydrogen, bbox_inches='tight', transparent=True,
+#     fig.savefig(
+#         snakemake.output.map.replace("-costs-all", "-h2_network"), bbox_inches="tight"
+#     )
+
+
+def plot_h2_infra(network):
+    n = network.copy()
+
+    # assign_location(n)
+
+    bus_size_factor = 1e5
+    linewidth_factor = 4e2
+    # MW below which not drawn
+    line_lower_threshold = 1e2
+    bus_color = "m"
+    link_color = "c"
+
+    n.links.loc[:, "p_nom_opt"] = n.links.loc[:, "p_nom_opt"]
+    # n.links.loc[n.links.carrier == "H2 Electrolysis"].p_nom_opt
+
+    # Drop non-electric buses so they don't clutter the plot
+    n.buses.drop(n.buses.index[n.buses.carrier != "AC"], inplace=True)
+
+    elec = n.links.index[n.links.carrier == "H2 Electrolysis"]
+
+    bus_sizes = (
+        n.links.loc[elec, "p_nom_opt"].groupby(n.links.loc[elec, "bus0"]).sum()
+        / bus_size_factor
+    )
+
+    # make a fake MultiIndex so that area is correct for legend
+    bus_sizes.index = pd.MultiIndex.from_product([bus_sizes.index, ["electrolysis"]])
+
+    n.links.drop(n.links.index[n.links.carrier != "H2 pipeline"], inplace=True)
+
+    link_widths = n.links.p_nom_opt / linewidth_factor
+    link_widths[n.links.p_nom_opt < line_lower_threshold] = 0.0
+
+    n.links.bus0 = n.links.bus0.str.replace(" H2", "")
+    n.links.bus1 = n.links.bus1.str.replace(" H2", "")
+
+    print(link_widths.sort_values())
+
+    print(n.links[["bus0", "bus1"]])
+
+    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
+
+    fig.set_size_inches(10.5, 9)
+
+    n.plot(
+        bus_sizes=bus_sizes,
+        bus_colors={"electrolysis": bus_color},
+        link_colors=link_color,
+        link_widths=link_widths,
+        branch_components=["Link"],
+        color_geomap={"ocean": "lightblue", "land": "oldlace"},
+        ax=ax,
+        # boundaries=(-20, 0, 25, 40),
+    )
+
+    handles = make_legend_circles_for(
+        [5000, 1000], scale=bus_size_factor, facecolor=bus_color
+    )
+    labels = ["{} GW".format(s) for s in (5, 1)]
+    l2 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.01, 1.01),
+        labelspacing=0.8,
+        framealpha=1.0,
+        title="Electrolyzer capacity",
+        handler_map=make_handler_map_to_scale_circles_as_in(ax),
+    )
+    ax.add_artist(l2)
+
+    handles = []
+    labels = []
+
+    for s in (5, 1):
+        handles.append(
+            plt.Line2D([0], [0], color=link_color, linewidth=s * 1e3 / linewidth_factor)
+        )
+        labels.append("{} GW".format(s))
+    l1_1 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.32, 1.01),
+        framealpha=1,
+        labelspacing=0.8,
+        handletextpad=1.5,
+        title="H2 pipeline capacity",
+    )
+    ax.add_artist(l1_1)
+
+    # fig.savefig(snakemake.output.hydrogen, bbox_inches='tight', transparent=True,
+    fig.savefig(
+        snakemake.output.map.replace("-costs-all", "-h2_network"), bbox_inches="tight"
+    )
+
+
+def plot_smr(network):
+    n = network.copy()
+
+    # assign_location(n)
+
+    bus_size_factor = 1e5
+    linewidth_factor = 1e3
+    # MW below which not drawn
+    line_lower_threshold = 1e2
+    bus_color = "m"
+    link_color = "c"
+
+    n.links.loc[:, "p_nom_opt"] = n.links.loc[:, "p_nom_opt"]
+    # n.links.loc[n.links.carrier == "H2 Electrolysis"].p_nom_opt
+
+    # Drop non-electric buses so they don't clutter the plot
+    n.buses.drop(n.buses.index[n.buses.carrier != "AC"], inplace=True)
+
+    elec = n.links.index[n.links.carrier == "SMR"]
+
+    bus_sizes = (
+        n.links.loc[elec, "p_nom_opt"].groupby(n.links.loc[elec, "bus0"]).sum()
+        / bus_size_factor
+    )
+
+    # make a fake MultiIndex so that area is correct for legend
+    bus_sizes.index = pd.MultiIndex.from_product([bus_sizes.index, ["SMR"]])
+
+    # n.links.drop(n.links.index[n.links.carrier != "H2 pipeline"], inplace=True)
+
+    # link_widths = n.links.p_nom_opt / linewidth_factor
+    # link_widths[n.links.p_nom_opt < line_lower_threshold] = 0.0
+
+    # n.links.bus0 = n.links.bus0.str.replace(" H2", "")
+    # n.links.bus1 = n.links.bus1.str.replace(" H2", "")
+
+    # print(link_widths.sort_values())
+
+    # print(n.links[["bus0", "bus1"]])
+
+    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
+
+    fig.set_size_inches(10.5, 9)
+    bus_sizes.index = bus_sizes.index.set_levels(
+        bus_sizes.index.levels[0].str.replace(" gas", ""), level=0
+    )
+    n.plot(
+        bus_sizes=bus_sizes,
+        bus_colors={"SMR": "darkolivegreen"},
+        # link_colors=link_color,
+        # link_widths=link_widths,
+        branch_components=["Link"],
+        color_geomap={"ocean": "lightblue", "land": "oldlace"},
+        ax=ax,
+        # boundaries=(-20, 0, 25, 40),
+    )
+
+    handles = make_legend_circles_for(
+        [5000, 1000], scale=bus_size_factor, facecolor="darkolivegreen"
+    )
+    labels = ["{} GW".format(s) for s in (5, 1)]
+    l2 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.01, 1.01),
+        labelspacing=0.8,
+        framealpha=1.0,
+        title="SMR capacity",
+        handler_map=make_handler_map_to_scale_circles_as_in(ax),
+    )
+    ax.add_artist(l2)
+
+    handles = []
+    labels = []
+
+    for s in (5, 1):
+        handles.append(
+            plt.Line2D([0], [0], color=link_color, linewidth=s * 1e3 / linewidth_factor)
+        )
+        labels.append("{} GW".format(s))
+    l1_1 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.32, 1.01),
+        framealpha=1,
+        labelspacing=0.8,
+        handletextpad=1.5,
+        title="H2 pipeline capacity",
+    )
+    ax.add_artist(l1_1)
+
+    # fig.savefig(snakemake.output.hydrogen, bbox_inches='tight', transparent=True,
+    fig.savefig(snakemake.output.map.replace("-costs-all", "-SMR"), bbox_inches="tight")
+
+
+def plot_transmission_topology(network):
+    n = network.copy()
+    bus_size_factor = 1e5  # Def 1e5
+    linewidth_factor = 2e4  # Def 1e4
+    line_lower_threshold = 1e2  # MW below which not drawn. Def 1e3
+
+    DC_lines = n.links[n.links.carrier == "DC"]
+
+    n.links = n.links[n.links.carrier == "H2 pipeline"]
+    n.links.bus0 = n.links.bus0.str.replace(" H2", "")
+    n.links.bus1 = n.links.bus1.str.replace(" H2", "")
+
+    n.lines = pd.concat([n.lines, DC_lines[["bus0", "bus1"]]])
+
+    n.madd("Line", names=DC_lines.index, bus0=DC_lines.bus0, bus1=DC_lines.bus1)
+
+    fig = plt.figure()
+    fig.set_size_inches(10.5, 9)
+
+    n.plot(
+        branch_components=["Link", "Line"],
+        # boundaries=(-20, 0, 25, 40),
+        color_geomap={"ocean": "lightblue", "land": "oldlace"},
+        line_colors="darkblue",
+        link_colors="turquoise",
+        link_widths=5,
+        bus_sizes=0.03,
+        bus_colors="red",
+        line_widths=1,
+    )
+
+    # Legend
+    Elec_Circle = plt.Line2D(
+        [0],
+        [0],
+        marker="o",
+        color="darkblue",
+        label="Clustered node",
+        markerfacecolor="red",
+        markersize=10,
+    )
+    elec_Line = plt.Line2D(
+        [0],
+        [0],
+        marker="_",
+        color="darkblue",
+        label="Existing Power Lines",
+        markerfacecolor="w",
+        markersize=16,
+        lw=4,
+    )
+
+    H2_Line = plt.Line2D(
+        [0],
+        [0],
+        marker="_",
+        color="turquoise",
+        label="Allowed H2 Pipeline Routes",
+        markerfacecolor="w",
+        markersize=16,
+        lw=4,
+    )
+
+    plt.legend(handles=[Elec_Circle, elec_Line, H2_Line], loc="upper left")
+
+    fig.savefig(
+        snakemake.output.map.replace("-costs-all", "-full_topology"),
+        bbox_inches="tight",
+    )
+
+
+preferred_order = pd.Index(
+    [
+        "transmission lines",
+        "hydroelectricity",
+        "hydro reservoir",
+        "run of river",
+        "pumped hydro storage",
+        "solid biomass",
+        "biogas",
+        "onshore wind",
+        "offshore wind",
+        "offshore wind (AC)",
+        "offshore wind (DC)",
+        "solar PV",
+        "solar thermal",
+        "solar",
+        "building retrofitting",
+        "ground heat pump",
+        "air heat pump",
+        "heat pump",
+        "resistive heater",
+        "power-to-heat",
+        "gas-to-power/heat",
+        "CHP",
+        "OCGT",
+        "gas boiler",
+        "gas",
+        "natural gas",
+        "helmeth",
+        "methanation",
+        "hydrogen storage",
+        "power-to-gas",
+        "power-to-liquid",
+        "battery storage",
+        "hot water storage",
+        "CO2 sequestration",
+    ]
+)
+
+
+def rename_techs(label):
+    prefix_to_remove = [
+        "residential ",
+        "services ",
+        "urban ",
+        "rural ",
+        "central ",
+        "decentral ",
+    ]
+
+    rename_if_contains = [
+        "CHP",
+        "gas boiler",
+        "biogas",
+        "solar thermal",
+        "air heat pump",
+        "ground heat pump",
+        "resistive heater",
+        "Fischer-Tropsch",
+    ]
+
+    rename_if_contains_dict = {
+        "water tanks": "hot water storage",
+        "retrofitting": "building retrofitting",
+        "H2": "hydrogen storage",
+        "battery": "battery storage",
+        "CCS": "CCS",
+    }
+
+    rename = {
+        "solar": "solar PV",
+        "Sabatier": "methanation",
+        "offwind": "offshore wind",
+        "offwind-ac": "offshore wind (AC)",
+        "offwind-dc": "offshore wind (DC)",
+        "onwind": "onshore wind",
+        "ror": "hydroelectricity",
+        "hydro": "hydroelectricity",
+        "PHS": "hydroelectricity",
+        "co2 Store": "DAC",
+        "co2 stored": "CO2 sequestration",
+        "AC": "transmission lines",
+        "DC": "transmission lines",
+        "B2B": "transmission lines",
+    }
+
+    for ptr in prefix_to_remove:
+        if label[: len(ptr)] == ptr:
+            label = label[len(ptr) :]
+
+    for rif in rename_if_contains:
+        if rif in label:
+            label = rif
+
+    for old, new in rename_if_contains_dict.items():
+        if old in label:
+            label = new
+
+    for old, new in rename.items():
+        if old == label:
+            label = new
+    return label
+
+
+def rename_techs_tyndp(tech):
+    tech = rename_techs(tech)
+    if "heat pump" in tech or "resistive heater" in tech:
+        return "power-to-heat"
+    elif tech in ["methanation", "hydrogen storage", "helmeth"]:
+        return "power-to-gas"
+    elif tech in ["OCGT", "CHP", "gas boiler"]:
+        return "gas-to-power/heat"
+    elif "solar" in tech:
+        return "solar"
+    elif tech == "Fischer-Tropsch":
+        return "power-to-liquid"
+    elif "offshore wind" in tech:
+        return "offshore wind"
+    else:
+        return tech
+
+
+def plot_sector_map(
+    network,
+    components=[
+        "links",
+        "generators",
+        "stores",
+    ],  # "storage_units"], #TODO uncomment after adding storage units
+    bus_size_factor=2e10,
+    transmission=False,
+    geometry=True,
 ):
+    n = network.copy()
+    assign_location(n)
+    # Drop non-electric buses so they don't clutter the plot
+    n.buses.drop(n.buses.index[n.buses.carrier != "AC"], inplace=True)
+
+    costs = pd.DataFrame(index=n.buses.index)
+
+    for comp in components:
+        df_c = getattr(n, comp)
+        df_c["nice_group"] = df_c.carrier.map(rename_techs_tyndp)
+
+        attr = "e_nom_opt" if comp == "stores" else "p_nom_opt"
+
+        costs_c = (
+            (df_c.capital_cost * df_c[attr])
+            .groupby([df_c.location, df_c.nice_group])
+            .sum()
+            .unstack()
+            .fillna(0.0)
+        )
+        costs = pd.concat([costs, costs_c], axis=1)
+
+        print(comp, costs)
+    costs = costs.groupby(costs.columns, axis=1).sum()
+
+    costs.drop(list(costs.columns[(costs == 0.0).all()]), axis=1, inplace=True)
+
+    new_columns = preferred_order.intersection(costs.columns).append(
+        costs.columns.difference(preferred_order)
+    )
+    costs = costs[new_columns]
 
-    n = pypsa.Network(fn)
+    for item in new_columns:
+        if item not in tech_colors:
+            print("Warning!", item, "not in config/plotting/tech_colors")
 
-    n.loads["carrier"] = n.loads.bus.map(n.buses.carrier) + " load"
-    n.stores["carrier"] = n.stores.bus.map(n.buses.carrier)
+    costs = costs.stack()  # .sort_index()
 
-    n.links["carrier"] = (
-        n.links.bus0.map(n.buses.carrier) + "-" + n.links.bus1.map(n.buses.carrier)
+    n.links.drop(
+        n.links.index[(n.links.carrier != "DC") & (n.links.carrier != "B2B")],
+        inplace=True,
     )
-    n.lines["carrier"] = "AC line"
-    n.transformers["carrier"] = "AC transformer"
 
-    n.lines["s_nom"] = n.lines["s_nom_min"]
-    n.links["p_nom"] = n.links["p_nom_min"]
+    # drop non-bus
+    to_drop = costs.index.levels[0].symmetric_difference(n.buses.index)
+    if len(to_drop) != 0:
+        print("dropping non-buses", list(to_drop))
+        costs.drop(to_drop, level=0, inplace=True, axis=0)
+
+    # make sure they are removed from index
+    costs.index = pd.MultiIndex.from_tuples(costs.index.values)
+
+    # PDF has minimum width, so set these to zero
+    line_lower_threshold = 500.0
+    line_upper_threshold = 1e4
+    linewidth_factor = 2e3
+    ac_color = "gray"
+    dc_color = "m"
+
+    # if snakemake.wildcards["lv"] == "1.0":         #TODO when we add wildcard lv
+    # should be zero
+    line_widths = n.lines.s_nom_opt - n.lines.s_nom
+    link_widths = n.links.p_nom_opt - n.links.p_nom
+    title = "Technologies"
+
+    if transmission:
+        line_widths = n.lines.s_nom_opt
+        link_widths = n.links.p_nom_opt
+        linewidth_factor = 2e3
+        line_lower_threshold = 0.0
+        title = "Technologies"
+    else:
+        line_widths = n.lines.s_nom_opt - n.lines.s_nom_min
+        line_widths = (
+            n.lines.s_nom_opt - n.lines.s_nom_opt
+        )  # TODO when we add wildcard lv
+        link_widths = n.links.p_nom_opt - n.links.p_nom_min
+        title = "Transmission reinforcement"
+
+        if transmission:
+            line_widths = n.lines.s_nom_opt
+            link_widths = n.links.p_nom_opt
+            title = "Total transmission"
+
+    line_widths.loc[line_widths < line_lower_threshold] = 0.0
+    link_widths.loc[link_widths < line_lower_threshold] = 0.0
+
+    line_widths.loc[line_widths > line_upper_threshold] = line_upper_threshold
+    link_widths.loc[link_widths > line_upper_threshold] = line_upper_threshold
+
+    fig, ax = plt.subplots(subplot_kw={"projection": ccrs.PlateCarree()})
+    fig.set_size_inches(10.5, 9)
+
+    n.plot(
+        bus_sizes=costs / bus_size_factor,
+        bus_colors=tech_colors,
+        line_colors=ac_color,
+        link_colors=dc_color,
+        line_widths=line_widths / linewidth_factor,
+        link_widths=link_widths / linewidth_factor,
+        ax=ax,
+        # boundaries=(-20, 0, 25, 40),
+        geomap="10m",
+        color_geomap={"ocean": "lightblue", "land": "oldlace"},
+    )
+
+    handles = make_legend_circles_for(
+        [5e9, 1e9], scale=bus_size_factor, facecolor="gray"
+    )
+    labels = ["{} b€/a".format(s) for s in (5, 1)]
+    l2 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.33, 1.005),
+        labelspacing=1.0,
+        framealpha=1.0,
+        title="System cost",
+        fontsize=12,
+        handler_map=make_handler_map_to_scale_circles_as_in(ax),
+    )
+    ax.add_artist(l2)
+
+    handles = []
+    labels = []
 
-    if combine_hydro_ps:
-        n.storage_units.loc[
-            n.storage_units.carrier.isin({"PHS", "hydro"}), "carrier"
-        ] = "hydro+PHS"
+    for s in list(plot_labeles.keys()):
+        handles.append(plt.Line2D([0], [0], color=tech_colors[s], linewidth=5))
+        labels.append("{}".format(s))
 
-    # if the carrier was not set on the heat storage units
-    # bus_carrier = n.storage_units.bus.map(n.buses.carrier)
-    # n.storage_units.loc[bus_carrier == "heat","carrier"] = "water tanks"
+    l1_1 = ax.legend(
+        handles,
+        labels,
+        loc="upper left",
+        bbox_to_anchor=(0.001, 1.002),
+        framealpha=1,
+        labelspacing=0.4,
+        handletextpad=1.5,
+        fontsize=10,
+    )
 
-    Nyears = n.snapshot_weightings.objective.sum() / 8760.0
-    costs = load_costs(tech_costs, cost_config, elec_config, Nyears)
-    update_transmission_costs(n, costs)
+    ax.add_artist(l1_1)
 
-    return n
+    # import matplotlib.patches as mpatches
+    # red_patch = mpatches.Patch(color='red', label='The red data')
+    # plt.legend(handles=[red_patch])
+
+    fig.savefig(snakemake.output.map, transparent=True, bbox_inches="tight")
+    fig.savefig(
+        snakemake.output.map.replace("pdf", "png"),
+        transparent=True,
+        bbox_inches="tight",
+    )
+    # fig.savefig('plot_map.pdf', transparent=True,
+    #         bbox_inches="tight")#, dpi=300)
+
+
+plot_labeles = {
+    "onshore wind": "b",
+    "offshore wind": "c",
+    "hydroelectricity": "",
+    "solar": "y",
+    "power-to-gas": "#FF1493",
+    "gas-to-power/heat": "orange",
+    "power-to-heat": "",
+    "power-to-liquid": "",
+    "DAC": "",
+    "electricity distribution grid": "",
+}
+
+
+nice_names = {
+    "OCGT": "Gas",
+    "OCGT marginal": "Gas (marginal)",
+    "offwind": "offshore wind",
+    "onwind": "onshore wind",
+    "battery": "Battery storage",
+    "lines": "Transmission lines",
+    "AC line": "AC lines",
+    "AC-AC": "DC lines",
+    "ror": "Run of river",
+}
+
+nice_names_n = {
+    "offwind": "offshore\nwind",
+    "onwind": "onshore\nwind",
+    "OCGT": "Gas",
+    "H2": "Hydrogen\nstorage",
+    "OCGT marginal": "Gas (marginal)",
+    "lines": "transmission\nlines",
+    "ror": "run of river",
+}
 
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "plot_network",
             network="elec",
@@ -407,49 +1081,64 @@ def load_network_for_plots(
 
     configure_logging(snakemake)
 
-    # load africa shape to identify borders of the image
-    africa_shape = gpd.read_file(snakemake.input.africa_shape)["geometry"].iloc[0]
+    if snakemake.rule == "plot_network":
 
-    set_plot_style()
+        # load africa shape to identify borders of the image
+        africa_shape = gpd.read_file(snakemake.input.africa_shape)["geometry"].iloc[0]
 
-    opts = snakemake.params.plotting
-    map_figsize = opts["map"]["figsize"]
-    map_boundaries = opts["map"]["boundaries"]
+        set_plot_style()
 
-    if len(map_boundaries) != 4:
-        map_boundaries = africa_shape.boundary.bounds
+        opts = snakemake.params.plotting
+        map_figsize = opts["map"]["figsize"]
+        map_boundaries = opts["map"]["boundaries"]
 
-    n = load_network_for_plots(
-        snakemake.input.network,
-        snakemake.input.tech_costs,
-        snakemake.params.costs,
-        snakemake.params.electricity,
-    )
+        if len(map_boundaries) != 4:
+            map_boundaries = africa_shape.boundary.bounds
 
-    scenario_opts = snakemake.wildcards.opts.split("-")
+        n = load_network_for_plots(
+            snakemake.input.network,
+            snakemake.input.tech_costs,
+            snakemake.params.costs,
+            snakemake.params.electricity,
+        )
 
-    fig, ax = plt.subplots(
-        figsize=map_figsize, subplot_kw={"projection": ccrs.PlateCarree()}
-    )
-    plot_map(n, ax, snakemake.wildcards.attr, opts)
+        scenario_opts = snakemake.wildcards.opts.split("-")
 
-    fig.savefig(snakemake.output.only_map, dpi=150, bbox_inches="tight")
+        fig, ax = plt.subplots(
+            figsize=map_figsize, subplot_kw={"projection": ccrs.PlateCarree()}
+        )
+        plot_map(n, ax, snakemake.wildcards.attr, opts)
+
+        fig.savefig(snakemake.output.only_map, dpi=150, bbox_inches="tight")
 
-    ax1 = fig.add_axes([-0.115, 0.625, 0.2, 0.2])
-    plot_total_energy_pie(n, ax1)
+        ax1 = fig.add_axes([-0.115, 0.625, 0.2, 0.2])
+        plot_total_energy_pie(n, ax1)
 
-    ax2 = fig.add_axes([-0.075, 0.1, 0.1, 0.45])
-    plot_total_cost_bar(n, ax2)
+        ax2 = fig.add_axes([-0.075, 0.1, 0.1, 0.45])
+        plot_total_cost_bar(n, ax2)
 
-    ll = snakemake.wildcards.ll
-    ll_type = ll[0]
-    ll_factor = ll[1:]
-    lbl = dict(c="line cost", v="line volume")[ll_type]
-    amnt = "{ll} x today's".format(ll=ll_factor) if ll_factor != "opt" else "optimal"
-    fig.suptitle(
-        "Expansion to {amount} {label} at {clusters} clusters".format(
-            amount=amnt, label=lbl, clusters=snakemake.wildcards.clusters
+        ll = snakemake.wildcards.ll
+        ll_type = ll[0]
+        ll_factor = ll[1:]
+        lbl = dict(c="line cost", v="line volume")[ll_type]
+        amnt = (
+            "{ll} x today's".format(ll=ll_factor) if ll_factor != "opt" else "optimal"
         )
-    )
+        fig.suptitle(
+            "Expansion to {amount} {label} at {clusters} clusters".format(
+                amount=amnt, label=lbl, clusters=snakemake.wildcards.clusters
+            )
+        )
+
+        fig.savefig(snakemake.output.ext, transparent=True, bbox_inches="tight")
+
+    if snakemake.rule == "plot_sector_network":
+
+        n = pypsa.Network(snakemake.input.network)
 
-    fig.savefig(snakemake.output.ext, transparent=True, bbox_inches="tight")
+        tech_colors = snakemake.config["plotting"]["tech_colors"]
+        plot_sector_map(n, transmission=False)
+        plot_transmission_topology(n)
+        if snakemake.config["sector"]["SMR"]:
+            plot_smr(n)
+        plot_h2_infra(n)
diff --git a/scripts/plot_summary.py b/scripts/plot_summary.py
index d89ef53ec..a2618bdc1 100644
--- a/scripts/plot_summary.py
+++ b/scripts/plot_summary.py
@@ -19,13 +19,7 @@
 
 import matplotlib.pyplot as plt
 import pandas as pd
-from _helpers import (
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    get_path,
-    mock_snakemake,
-)
+from _helpers import configure_logging, create_logger, get_path, mock_snakemake
 
 logger = create_logger(__name__)
 
@@ -223,7 +217,6 @@ def plot_energy(infn, snmk, fn=None):
 if __name__ == "__main__":
     if "snakemake" not in globals():
 
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "plot_summary",
             summary="energy",
diff --git a/scripts/prepare_airports.py b/scripts/prepare_airports.py
new file mode 100644
index 000000000..79cb91018
--- /dev/null
+++ b/scripts/prepare_airports.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import pandas as pd
+from _helpers import mock_snakemake
+
+# logger = logging.getLogger(__name__)
+
+
+def download_airports():
+    """
+    Downloads the world airports as .csv File in addition to runnways
+    information.
+
+    The following csv file was downloaded from the webpage
+    https://ourairports.com/data/
+    as a .csv file. The dataset contains 74844 airports.
+    """
+    fn = "https://davidmegginson.github.io/ourairports-data/airports.csv"
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+    airports_csv = pd.read_csv(
+        fn, index_col=0, storage_options=storage_options, encoding="utf8"
+    )
+
+    fn = "https://davidmegginson.github.io/ourairports-data/runways.csv"
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+    runways_csv = pd.read_csv(
+        fn, index_col=0, storage_options=storage_options, encoding="utf8"
+    )
+
+    return (airports_csv, runways_csv)
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake("prepare_airports")
+
+    # run = snakemake.config.get("run", {})
+    # RDIR = run["name"] + "/" if run.get("name") else ""
+    # store_path_data = Path.joinpath(Path().cwd(), "data")
+    # country_list = country_list_to_geofk(snakemake.config["countries"])'
+
+    # Prepare downloaded data
+    airports_csv = download_airports()[0].copy()
+    airports_csv = airports_csv[
+        [
+            "ident",
+            "type",
+            "name",
+            "latitude_deg",
+            "longitude_deg",
+            "elevation_ft",
+            "continent",
+            "iso_country",
+            "iso_region",
+            "municipality",
+            "scheduled_service",
+            "iata_code",
+        ]
+    ]
+    airports_csv.loc[airports_csv["iso_country"].isnull(), "iso_country"] = "NA"
+    airports_csv = airports_csv.rename(columns={"latitude_deg": "y"})
+    airports_csv = airports_csv.rename(columns={"longitude_deg": "x"})
+
+    runways_csv = download_airports()[1].copy()
+    runways_csv = runways_csv[
+        ["airport_ident", "length_ft", "width_ft", "surface", "lighted", "closed"]
+    ]
+    runways_csv = runways_csv.drop_duplicates(subset=["airport_ident"])
+
+    airports_original = pd.merge(
+        airports_csv, runways_csv, how="left", left_on="ident", right_on="airport_ident"
+    )
+    airports_original = airports_original.drop("airport_ident", axis=1)
+
+    df = airports_original.copy()
+
+    # Keep only airports that are of type medium and large
+    df = df.loc[df["type"].isin(["large_airport", "medium_airport"])]
+
+    # Filtering out the military airbases and keeping only commercial airports
+    df = df[~df.iata_code.isnull()]
+
+    # Keep only airports that have schedules
+    df = df.loc[df["scheduled_service"].isin(["yes"])]
+
+    df.insert(2, "airport_size_nr", 1)
+    df.loc[df["type"].isin(["medium_airport"]), "airport_size_nr"] = 1
+    df.loc[df["type"].isin(["large_airport"]), "airport_size_nr"] = (
+        snakemake.params.airport_sizing_factor
+    )
+
+    # Calculate the number of total airports size
+    df1 = df.copy()
+    df1 = df1.groupby(["iso_country"]).sum("airport_size_nr")
+    df1 = df1[["airport_size_nr"]]
+    df1 = df1.rename(columns={"airport_size_nr": "Total_airport_size_nr"}).reset_index()
+
+    # Merge dataframes to get additional info on runnway for most ports
+    airports = pd.merge(
+        df, df1, how="left", left_on="iso_country", right_on="iso_country"
+    )
+
+    # Calculate fraction based on size
+    airports["fraction"] = (
+        airports["airport_size_nr"] / airports["Total_airport_size_nr"]
+    )
+
+    # Rename columns
+    airports = airports.rename(columns={"iso_country": "country"})
+
+    # Save
+    airports.to_csv(snakemake.output[0], sep=",", encoding="utf-8", header="true")
diff --git a/scripts/prepare_db.py b/scripts/prepare_db.py
new file mode 100644
index 000000000..21a4122cd
--- /dev/null
+++ b/scripts/prepare_db.py
@@ -0,0 +1,496 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Created on Sun May 30 18:11:07 2021.
+
+@author: haz43975
+"""
+
+
+# -*- coding: utf-8 -*-
+"""
+Created on Tue May  4 10:22:36 2021
+
+@author: haz43975
+"""
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import pypsa
+from _helpers import mock_snakemake
+
+# %%
+
+# base_path = os.path.dirname(os.path.realpath(__file__))
+# dataset_paths = {'IGG': os.path.join(base_path, 'IGG', 'data'),
+#                      'EMAP': os.path.join(base_path, 'EMAP', 'data')}
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+
+        snakemake = mock_snakemake(
+            "prepare_db",
+            simpl="",
+            clusters="244",
+            ll="c1.0",
+            opts="Co2L1",
+            planning_horizons="2030",
+            sopts="720H",
+            discountrate=0.071,
+            demand="AP",
+            h2export="0",
+        )
+
+    n0 = pypsa.Network(snakemake.input.network)
+
+    tech_colors = snakemake.params.tech_colors
+
+
+# %%
+# def summary_h2(n, t):
+t = 720
+
+n = n0.copy()
+# n = pypsa.Network("../results/MA_REALISTIC_2030/postnetworks/elec_s_195_ec_lc1.0_Co2L_3H_2030_0.071_AP_428export.nc")
+# n = pypsa.Network("../results/MA_REALISTIC_2030_Q0_NoGreeness/postnetworks/elec_s_198_ec_lc1.0_Co2L_3H_2030_0.071_AP_0export.nc")
+# n = pypsa.Network("../results/MA_REALISTIC_2030_Q0_oilnew_13/postnetworks/elec_s_213_ec_lc1.0_Co2L_720H_2030_0.071_AP_0export.nc")
+summary_index = (n0.buses.loc[n0.buses.carrier == "AC"].index).sort_values()
+
+nodes = n.buses.loc[n.buses.carrier == "AC"].index.tolist()
+gens = n.generators_t.p.rename_axis(None, axis=1) * t  # /1e3
+loads = n.loads_t.p.rename_axis(None, axis=1) * t  # /1e3
+stores = n.stores_t.p.rename_axis(None, axis=1) * t  # /1e3
+storage = n.storage_units_t.p.rename_axis(None, axis=1) * t  # /1e3
+
+pipelines_h2 = n.links_t.p0.filter(like="H2 pipeline")
+ac_lines = n.lines_t.p0.rename(columns=dict(n.lines.bus0 + " -> " + n.lines.bus1))
+
+dc_lines = n.links_t.p0[
+    n.links_t.p0.columns.intersection(n.links[n.links.carrier == "DC"].index.tolist())
+].rename(
+    columns=dict(
+        n.links[n.links.carrier == "DC"].bus0
+        + " -> "
+        + n.links[n.links.carrier == "DC"].bus1
+    )
+)
+
+summary_h2 = pd.DataFrame(index=n0.buses.loc[n0.buses.carrier == "AC"].index)
+
+solar = (gens.filter(regex="solar$")).reset_index
+
+summary_elec = pd.DataFrame(index=n0.buses.loc[n0.buses.carrier == "AC"].index)
+
+db = pd.DataFrame(columns=["node_id", "carrier", "flow", "tech", "value"])
+
+names = {"g": "Generator"}
+
+
+def populate_db(tech_col, carrier, flow, tech, ngv=False):  # TODO Add scenario id
+    global db
+    dbf = tech_col.copy()
+    # if tech != 'ac':
+    #     dbf.name=dbf.name.str.replace(' '+tech, '')
+    dbf = (
+        dbf.stack()
+        .reset_index(level=0)
+        .rename(columns={"snapshot": "DateTime", 0: "value"})
+        .reset_index()
+        .rename(columns={"index": "node_id"})
+    )
+    dbf.node_id = dbf.node_id.str.replace(" " + tech, "")
+    # dbf.columns = ['node_id', 'value']
+    dbf["carrier"] = carrier
+    dbf["flow"] = flow
+    dbf["tech"] = tech
+    if flow == "s":
+        dbf["value"] = dbf["value"]
+    else:
+        if ngv == True:
+            dbf["value"] = -1 * abs(dbf["value"])
+        elif ngv == False:
+            dbf["value"] = abs(dbf["value"])
+
+    db = db.append(dbf)
+
+
+def add_gen(tech, carrier, reg=False):
+    if not reg:
+        tech_col = gens.filter(like=tech)
+    else:
+        tech_col = gens.filter(regex=tech + "$")
+
+    # tech_col.columns = tech_col.columns.str.replace(' '+tech, '')
+    populate_db(tech_col, carrier, "g", tech, ngv=False)
+    # summary_elec['{0}_g_{1}'.format(carrier, tech.replace(' ', '_'))] = tech_col.sum()
+
+
+def add_load(tech, carrier, reg=False):
+    global db
+    if tech == "ac":
+        ac_labels = loads.stack().reset_index(level=1).level_1
+        ac_labels = ac_labels[ac_labels.str.len() < 11].unique()
+        tech_col = loads.filter(ac_labels.tolist())
+        # ac_labels = loads.reset_index()[loads.reset_index().name.str.len()<7].name.tolist() #TODO hard coded
+        # tech_col = loads[ac_labels]
+
+        # summary_elec['elec_l_ac'] = loads
+    else:
+        if reg == False:
+            tech_col = loads.filter(regex=tech)  #
+        else:
+            tech_col = loads.filter(regex=tech + "$")  #
+
+    populate_db(tech_col, carrier, "l", tech, ngv=True)
+
+    # tech_col.index = tech_col.name.apply(lambda x: x.replace(' '+tech, ''))
+    # summary_elec['{0}_l_{1}'.format(carrier, tech.replace(' ', '_'))] = -tech_col[0]
+
+
+# Check node_id in db
+
+
+def add_conv(tech, carrier, p, ngv, reg=False):
+    global db
+    if p == 0:
+        links = n.links_t.p0.rename_axis(None, axis=1) * t  # /1e3
+    elif p == 1:
+        links = n.links_t.p1.rename_axis(None, axis=1) * t  # /1e3
+    elif p == 2:
+        links = n.links_t.p2.rename_axis(None, axis=1) * t  # /1e3
+    elif p == 3:
+        links = n.links_t.p3.rename_axis(None, axis=1) * t  # /1e3
+    else:
+        links = n.links_t.p4.rename_axis(None, axis=1) * t  # /1e3
+
+    if tech == "battery charger" or tech == "battery discharger":
+        drop_list = links.filter(like="home battery").columns.tolist()
+        tech_col = links.drop(drop_list, axis=1).filter(like=tech)
+    else:
+        if not reg:
+            tech_col = links.filter(like=tech)
+        else:
+            tech_col = links.filter(regex=tech + "$")
+    populate_db(tech_col, carrier, "c", tech, ngv)
+
+
+#    tech_col.index = tech_col.name.apply(lambda x: x.replace(' '+tech, ''))
+#    summary_elec['{0}_c_{1}'.format(carrier, tech.replace(' ', '_'))] = -tech_col[0]
+
+
+def add_store(tech, carrier, reg=False):
+    global db
+    if not reg:
+        tech_col = stores.filter(like=tech)
+    else:
+        tech_col = stores.filter(regex=tech + "$")
+
+    if tech == "co2 atmosphere" or tech == "co2 stored":
+        tech_col *= -1
+    populate_db(tech_col, carrier, "s", tech)
+
+
+# tech_col.index = tech_col.name.apply(lambda x: x.replace(' '+tech, ''))
+# summary_elec['{0}_s_{1}'.format(carrier, tech.replace(' ', '_'))] = tech_col[0]
+
+
+def add_storage(
+    tech, carrier, reg=False
+):  # TODO commented out because there is no storage untis
+    global db
+    if not reg:
+        tech_col = storage.filter(like=tech)
+    else:
+        tech_col = storage.filter(regex=tech + "$")
+    populate_db(tech_col, carrier, "s", tech)
+    # tech_col.index = tech_col.name.apply(lambda x: x.replace(' '+tech, ''))
+    # summary_elec['{0}_s_{1}'.format(carrier, tech.replace(' ', '_'))] = tech_col[0]
+
+
+def net_flow(co_code, tech, carrier, flow):
+    global db
+    if tech == "h2flow":
+        tech_df = pipelines_h2
+    elif tech == "acflow":
+        tech_df = ac_lines
+    elif tech == "dcflow":
+        tech_df = dc_lines
+    else:
+        pass
+
+    inflow = tech_df.filter(regex="{}$".format(co_code)).sum(axis=1) * t
+    outflow = tech_df.filter(like="{} -> ".format(co_code)).sum(axis=1) * t
+
+    dbf = pd.DataFrame()
+    dbf["DateTime"] = inflow.index.copy()
+    dbf["node_id"] = co_code
+    dbf["carrier"] = carrier
+    dbf["flow"] = flow
+    dbf["tech"] = tech
+    dbf["value"] = (inflow - outflow).reset_index(drop=True)  # /10**6
+    db = db.append(dbf)
+    return dbf
+
+
+temp = pd.DataFrame(data=nodes)
+temp[0].apply(net_flow, args=("h2flow", "h2", "t"))
+temp[0].apply(net_flow, args=("acflow", "hv", "t"))
+temp[0].apply(net_flow, args=("dcflow", "hv", "t"))
+
+
+add_gen("solar", "hv", reg=True)
+add_gen("onwind", "hv")
+add_gen("offwind-ac", "hv")
+add_gen("offwind-dc", "hv")
+add_gen("ror", "hv")
+
+add_conv("H2 export", "h2", 0, True)
+
+add_conv("H2 Electrolysis", "hv", 0, True)
+add_conv("H2 Fuel Cell", "hv", 1, False)
+# add_conv("H2 Export", "hv", 1, False)
+add_conv("DAC", "hv", 2, True)
+add_conv("helmeth", "hv", 0, True)
+add_conv("electricity distribution grid", "hv", 0, True)
+add_conv("OCGT", "hv", 1, False)
+add_conv("battery charger", "hv", 0, True, reg=True)
+add_conv("battery discharger", "hv", 1, False, reg=True)
+add_conv("urban central gas CHP", "hv", 1, False, reg=True)
+add_conv("urban central gas CHP CC", "hv", 1, False)
+add_conv("urban central solid biomass CHP", "hv", 1, False, reg=True)
+add_conv("urban central solid biomass CHP CC", "hv", 1, False)
+
+add_store("battery", "hv", reg=True)
+add_store("battery storage", "hv")
+add_store("home battery", "hv")
+
+add_storage("PHS", "hv")  # TODO commented out because there is no storage untis
+add_storage("hydro", "hv")
+
+
+add_load("H2 for shipping", "h2")
+add_load("H2 for industry", "h2")
+add_load("land transport fuel cell", "h2")
+
+add_conv("H2 Electrolysis", "h2", 1, False)
+add_conv("H2 Fuel Cell", "h2", 0, True)
+add_conv("Fischer-Tropsch", "h2", 0, True)
+add_conv("Sabatier", "h2", 1, True)
+add_conv("SMR", "h2", 1, False, reg=True)
+add_conv("SMR CC", "h2", 1, False)
+
+add_store("H2", "h2", reg=True)
+add_store("H2 Store", "h2")
+
+
+add_gen("solar rooftop", "hv")
+
+add_conv("electricity distribution grid", "hv", 1, False)
+add_conv("BEV charger", "hv", 0, True)
+add_conv("V2G", "hv", 1, False)
+add_conv("residential rural ground heat pump", "hv", 0, True)
+add_conv("residential rural resistive heater", "hv", 0, True)
+add_conv("services rural ground heat pump", "hv", 0, True)
+add_conv("residential rural resistive heater", "hv", 0, True)
+add_conv("urban central air heat pump", "hv", 0, True)
+add_conv("urban central resistive heater", "hv", 0, True)
+add_conv("home battery charger", "hv", 0, True)
+add_conv("home battery discharger", "hv", 1, False)
+
+
+add_load("ac", "hv")
+add_load("industry electricity", "hv")
+
+add_gen("residential rural solar thermal collector", "heat")
+add_gen("services rural solar thermal collector", "heat")
+add_gen("urban central solar thermal collector", "heat")
+
+add_load("residential rural heat", "heat")
+add_load("services rural heat", "heat")
+add_load("urban central heat", "heat")
+add_load("low-temperature heat for industry", "heat")
+
+add_conv("residential rural water tanks charger", "heat", 0, True)
+add_conv("services rural water tanks charger", "heat", 0, True)
+add_conv("urban central water tanks charger", "heat", 0, True)
+add_conv("residential rural ground heat pump", "heat", 1, False)
+add_conv("residential rural water tanks discharger", "heat", 1, False)
+add_conv("residential rural resistive heater", "heat", 1, False)
+add_conv("residential rural gas boiler", "heat", 1, False)
+add_conv("services rural ground heat pump", "heat", 1, False)
+add_conv("services rural water tanks discharger", "heat", 1, False)
+add_conv("services rural resistive heater", "heat", 1, False)
+add_conv("services rural gas boiler", "heat", 1, False)
+add_conv("urban central air heat pump", "heat", 1, False)
+add_conv("urban central water tanks discharger", "heat", 1, False)
+add_conv("urban central resistive heater", "heat", 1, False)
+add_conv("urban central gas boiler", "heat", 1, False)
+add_conv("H2 Fuel Cell", "heat", 2, False)
+add_conv("urban central gas CHP", "heat", 2, False)
+add_conv("urban central gas CHP CC", "heat", 2, False)
+add_conv("urban central solid biomass CHP", "heat", 2, False)
+add_conv("urban central solid biomass CHP CC", "heat", 2, False)
+add_conv("DAC", "heat", 3, True)
+add_conv("Fischer-Tropsch", "heat", 3, False)
+
+
+add_conv("services urban decentral DAC", "co2", 0, False)
+add_conv("urban central DAC", "co2", 0, False)
+
+add_conv("process emissions", "co2", 1, True, True)
+add_conv("process emissions CC", "co2", 1, True)
+add_conv("co2 vent", "co2", 1, True)
+
+add_conv("OCGT", "co2", 2, True)
+add_conv("biogas to gas", "co2", 2, False)
+add_conv("biomass EOP", "co2", 2, True)
+add_conv("residential rural gas boiler", "co2", 2, True)
+add_conv("services rural gas boiler", "co2", 2, True)
+add_conv("residential urban decentral gas boiler", "co2", 2, True)
+add_conv("services urban decentral gas boiler", "co2", 2, True)
+add_conv("urban central gas boiler", "co2", 2, True)
+add_conv("solid biomass for industry CC", "co2", 2, False)
+add_conv("gas for industry", "co2", 2, True)
+add_conv("gas for industry CC", "co2", 2, True)
+
+
+add_load("industry oil emissions", "co2")
+add_load("shipping oil emissions", "co2")
+add_load("land transport oil emissions", "co2")
+add_load("aviation oil emissions", "co2")
+add_load("residential oil emissions", "co2")
+add_load("residential biomass emissions", "co2")
+add_load("services biomass emissions", "co2")
+
+# add_store("co2 stored", "co2")
+add_store("co2 atmosphere", "co2")
+
+add_conv("Fischer-Tropsch", "oil", 1, False)
+
+add_load("naphtha for industry", "oil")
+add_load("residential oil", "oil", reg=True)
+add_load("rail transport oil", "oil", reg=True)
+add_load("agriculture oil", "oil", reg=True)
+add_load("shipping oil", "oil", reg=True)
+add_load("land transport oil", "oil", reg=True)  # mistakenly add oil emissions
+add_load("kerosene for aviation", "oil")
+add_store("oil Store", "oil", 1)
+add_gen("oil", "oil")
+
+# add_load("gas for industry", "gas")
+add_conv("OCGT", "gas", 0, True)
+add_conv("residential rural gas boiler", "gas", 0, True)
+add_conv("services rural gas boiler", "gas", 0, True)
+add_conv("residential urban decentral gas boiler", "gas", 0, True)
+add_conv("services urban decentral gas boiler", "gas", 0, True)
+add_conv("gas for industry", "gas", 0, True, True)
+add_conv("gas for industry CC", "gas", 0, True)
+add_conv("urban central gas boiler", "gas", 0, True)
+
+add_conv("Sabatier", "gas", 1, False)
+add_conv("helmeth", "gas", 1, False)
+
+add_store("gas Store", "gas")
+add_gen("gas", "gas")
+
+add_conv("biogas to gas", "gas", 1, False)
+
+add_conv("gas for industry", "gas", 1, True)
+add_conv("gas for industry CC", "gas", 1, True)
+
+
+add_conv("co2 vent", "co2 stored", 0, True)
+# add_conv("CO2 pipeline", "co2 stored", 0, True)
+add_conv("DAC", "co2 stored", 1, True)
+
+add_conv("Fischer-Tropsch", "co2 stored", 2, False)
+add_conv("Sabatier", "co2 stored", 2, False)
+add_conv("helmeth", "co2 stored", 2, False)
+add_conv("process emissions CC", "co2 stored", 2, True)
+
+add_conv("solid biomass for industry CC", "co2 stored", 3, True)
+add_conv("gas for industry CC", "co2 stored", 3, True)
+
+# add_store("co2 stored")
+add_store("co2 stored", "co2 stored")
+
+
+# add
+# REMEMBER TO ADD ALL DECENTRAL SHIT
+# %%
+
+# summary_elec['h2_t_pipeline'] = summary_elec.apply(lambda row: h2_net_flow(n0, row.name, 24), axis=1)
+
+h2_flows = pd.DataFrame(index=pipelines_h2.index.copy(), columns=["node_id", "flow"])
+
+# summary_elec['h2_balance'] = summary_elec.sum(axis=1)
+# summary_elec=summary_elec.apply(lambda x: round(x, 2))
+
+db.reset_index(drop=True, inplace=True)
+# round(db).to_csv('db_fraction.csv')
+round(db).to_csv(snakemake.output.db)
+yearly_agg = round(db.groupby([db.node_id, db.carrier, db.flow, db.tech]).sum() / 1e3)
+
+
+# yearly_agg.to_csv('summary_db.csv')
+# yearly_agg.to_csv(snakemake.output.yr_agg)
+# %%
+def calc_energy_flow(carrier, node_id):
+    agg = yearly_agg.reset_index()
+    agg = agg[(agg.carrier == carrier)]
+    agg.value = agg.value.apply(int)
+    if node_id == "all":
+        agg = agg.groupby("tech").sum().reset_index()
+    else:
+        agg = agg[agg.node_id.str.contains(node_id)].groupby("tech").sum().reset_index()
+    return agg
+
+
+def fetch_data_2(carrier, node_id):
+    agg = db[db.DateTime == "2013-02-01"].drop("DateTime", axis=1)
+
+    return agg[(agg.carrier == carrier) & (agg.node_id == node_id)]
+
+
+def energy_pie(carrier, node_id, sign):
+    sign_dict = {1: "generation", -1: "consumption"}
+    agg = yearly_agg.reset_index()
+    agg = agg[(agg.carrier == carrier) & (agg.value * sign > 0)]
+
+    if node_id == "all":
+        agg = agg[(agg.carrier == carrier) & (agg.value * sign > 0) & (agg.flow != "t")]
+    else:
+        agg = agg[agg.node_id.str.contains(node_id)]
+    agg = agg.groupby("tech").sum().reset_index()
+    agg["pct"] = round(agg["value"] / agg.value.sum(), 3)
+    if agg.pct.sum() < 1:
+        agg = agg.append(
+            pd.DataFrame([["other", 0, 1 - agg.pct.sum()]], columns=agg.columns)
+        )
+    agg = agg[agg.pct > 0.009]
+
+    fig1, ax1 = plt.subplots()  # figsize=(6, 4))
+    ax1.pie(
+        agg.pct,
+        labels=agg.tech,
+        autopct="%1.0f%%",
+        colors=[tech_colors.get(key) for key in agg.tech.values.tolist()],
+        explode=[0.05] * len(agg),
+    )
+    ax1.axis("equal")
+    plt.title(
+        "Yearly aggregate {0} of {1} at {2} node(s)\n".format(
+            sign_dict[sign], carrier, node_id
+        )
+        + "Value = {} GWh".format(round(agg.value.sum(), 1)),
+        # bbox={"facecolor": "0.8", "pad": 5},
+    )
+    plt.show()
+    fig1.savefig(
+        "Yearly_aggregate_{0}_of_{1}_at_{2}_node(s).png".format(
+            sign_dict[sign], carrier, node_id
+        ),
+        dpi=100,
+    )
diff --git a/scripts/prepare_energy_totals.py b/scripts/prepare_energy_totals.py
new file mode 100644
index 000000000..3d1388419
--- /dev/null
+++ b/scripts/prepare_energy_totals.py
@@ -0,0 +1,293 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+import logging
+
+from _helpers import mock_snakemake, read_csv_nafix
+
+_logger = logging.getLogger(__name__)
+
+
+def get(item, investment_year=None):
+    """
+    Check whether item depends on investment year.
+    """
+    if isinstance(item, dict):
+        return item[investment_year]
+    else:
+        return item
+
+
+def calculate_end_values(df):
+    return (1 + df) ** no_years
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "prepare_energy_totals",
+            simpl="",
+            clusters=32,
+            demand="EG",
+            planning_horizons=2030,
+        )
+
+    countries = snakemake.params.countries
+    # countries = ["NG", "BJ"]
+    investment_year = int(snakemake.wildcards.planning_horizons)
+    demand_sc = snakemake.wildcards.demand  # loading the demand scenrario wildcard
+
+    base_energy_totals = read_csv_nafix("data/energy_totals_base.csv", index_col=0)
+    growth_factors_cagr = read_csv_nafix(
+        snakemake.input.growth_factors_cagr, index_col=0
+    )
+    efficiency_gains_cagr = read_csv_nafix(
+        snakemake.input.efficiency_gains_cagr, index_col=0
+    )
+    fuel_shares = read_csv_nafix(snakemake.input.fuel_shares, index_col=0)
+    district_heating = read_csv_nafix(snakemake.input.district_heating, index_col=0)
+
+    no_years = int(snakemake.wildcards.planning_horizons) - int(
+        snakemake.params.base_year
+    )
+    growth_factors = calculate_end_values(growth_factors_cagr)
+    efficiency_gains = calculate_end_values(efficiency_gains_cagr)
+
+    for country in countries:
+        if country not in efficiency_gains.index:
+            efficiency_gains.loc[country] = efficiency_gains.loc["DEFAULT"]
+            _logger.warning(
+                "No efficiency gains cagr data for "
+                + country
+                + " using default data instead."
+            )
+        if country not in growth_factors.index:
+            growth_factors.loc[country] = growth_factors.loc["DEFAULT"]
+            _logger.warning(
+                "No growth factors cagr data for "
+                + country
+                + " using default data instead."
+            )
+        if country not in fuel_shares.index:
+            fuel_shares.loc[country] = fuel_shares.loc["DEFAULT"]
+            _logger.warning(
+                "No fuel share data for " + country + " using default data instead."
+            )
+        if country not in district_heating.index:
+            district_heating.loc[country] = district_heating.loc["DEFAULT"]
+            _logger.warning(
+                "No heating data for " + country + " using default data instead."
+            )
+
+    efficiency_gains = efficiency_gains[efficiency_gains.index.isin(countries)]
+    fuel_shares = fuel_shares[fuel_shares.index.isin(countries)]
+    district_heating = district_heating[district_heating.index.isin(countries)]
+    growth_factors = growth_factors[growth_factors.index.isin(countries)]
+
+    options = snakemake.params.sector_options
+
+    fuel_cell_share = get(
+        options["land_transport_fuel_cell_share"],
+        demand_sc + "_" + str(investment_year),
+    )
+    electric_share = get(
+        options["land_transport_electric_share"], demand_sc + "_" + str(investment_year)
+    )
+
+    hydrogen_shipping_share = get(
+        options["shipping_hydrogen_share"], demand_sc + "_" + str(investment_year)
+    )
+
+    energy_totals = (
+        base_energy_totals
+        * efficiency_gains.loc[countries]
+        * growth_factors.loc[countries]
+    )
+
+    # Residential
+    efficiency_heat_oil_to_elec = snakemake.params.sector_options[
+        "efficiency_heat_oil_to_elec"
+    ]
+    efficiency_heat_biomass_to_elec = snakemake.params.sector_options[
+        "efficiency_heat_biomass_to_elec"
+    ]
+    efficiency_heat_gas_to_elec = snakemake.params.sector_options[
+        "efficiency_heat_gas_to_elec"
+    ]
+
+    energy_totals["electricity residential space"] = (
+        base_energy_totals["total residential space"]
+        + (
+            fuel_shares["biomass to elec heat share"]
+            * fuel_shares["biomass residential heat share"]
+            * (fuel_shares["space to water heat share"])
+            * base_energy_totals["residential biomass"]
+            * efficiency_heat_biomass_to_elec
+        )
+        + (
+            fuel_shares["oil to elec heat share"]
+            * fuel_shares["oil residential heat share"]
+            * (fuel_shares["space to water heat share"])
+            * base_energy_totals["residential oil"]
+            * efficiency_heat_oil_to_elec
+        )
+        + (
+            fuel_shares["gas to elec heat share"]
+            * fuel_shares["gas residential heat share"]
+            * (fuel_shares["space to water heat share"])
+            * base_energy_totals["residential gas"]
+            * efficiency_heat_gas_to_elec
+        )
+    )
+
+    energy_totals["electricity residential water"] = (
+        base_energy_totals["total residential water"]
+        + (
+            fuel_shares["biomass to elec heat share"]
+            * fuel_shares["biomass residential heat share"]
+            * (1 - fuel_shares["space to water heat share"])
+            * base_energy_totals["residential biomass"]
+            * efficiency_heat_biomass_to_elec
+        )
+        + (
+            fuel_shares["oil to elec heat share"]
+            * fuel_shares["oil residential heat share"]
+            * (1 - fuel_shares["space to water heat share"])
+            * base_energy_totals["residential oil"]
+            * efficiency_heat_oil_to_elec
+        )
+        + (
+            fuel_shares["gas to elec heat share"]
+            * fuel_shares["gas residential heat share"]
+            * (1 - fuel_shares["space to water heat share"])
+            * base_energy_totals["residential gas"]
+            * efficiency_heat_gas_to_elec
+        )
+    )
+
+    energy_totals["residential heat oil"] = (
+        base_energy_totals["residential oil"]
+        * fuel_shares["oil residential heat share"]
+        * (1 - fuel_shares["oil to elec heat share"])
+        * efficiency_gains["residential heat oil"]
+        * growth_factors["residential heat oil"]
+    )
+
+    energy_totals["residential oil"] = (
+        base_energy_totals["residential oil"]
+        * (1 - fuel_shares["oil residential heat share"])
+        * (1 - fuel_shares["oil to elec share"])
+        * efficiency_gains["residential oil"]
+        * growth_factors["residential oil"]
+    )
+
+    energy_totals["residential heat biomass"] = (
+        base_energy_totals["residential biomass"]
+        * fuel_shares["biomass residential heat share"]
+        * (1 - fuel_shares["biomass to elec heat share"])
+        * efficiency_gains["residential heat biomass"]
+        * growth_factors["residential heat biomass"]
+    )
+
+    energy_totals["residential biomass"] = (
+        base_energy_totals["residential biomass"]
+        * (1 - fuel_shares["biomass residential heat share"])
+        * (1 - fuel_shares["biomass to elec share"])
+        * efficiency_gains["residential biomass"]
+        * growth_factors["residential biomass"]
+    )
+
+    energy_totals["residential heat gas"] = (
+        base_energy_totals["residential gas"]
+        * fuel_shares["gas residential heat share"]
+        * (1 - fuel_shares["gas to elec heat share"])
+        * efficiency_gains["residential heat gas"]
+        * growth_factors["residential heat gas"]
+    )
+
+    energy_totals["residential gas"] = (
+        base_energy_totals["residential gas"]
+        * (1 - fuel_shares["gas residential heat share"])
+        * (1 - fuel_shares["gas to elec share"])
+        * efficiency_gains["residential gas"]
+        * growth_factors["residential gas"]
+    )
+
+    energy_totals["total residential space"] = energy_totals[
+        "electricity residential space"
+    ] + (
+        energy_totals["residential heat oil"]
+        + energy_totals["residential heat biomass"]
+        + energy_totals["residential heat gas"]
+    ) * (
+        fuel_shares["space to water heat share"]
+    )
+
+    energy_totals["total residential water"] = energy_totals[
+        "electricity residential water"
+    ] + (
+        energy_totals["residential heat oil"]
+        + energy_totals["residential heat biomass"]
+        + energy_totals["residential heat gas"]
+    ) * (
+        1 - fuel_shares["space to water heat share"]
+    )
+
+    energy_totals["electricity residential"] = (
+        energy_totals["electricity residential"]
+        + (
+            fuel_shares["oil to elec share"]
+            * (1 - fuel_shares["oil residential heat share"])
+            * base_energy_totals["residential oil"]
+        )
+        + (
+            fuel_shares["biomass to elec share"]
+            * (1 - fuel_shares["biomass residential heat share"])
+            * base_energy_totals["residential biomass"]
+        )
+        + (
+            fuel_shares["gas to elec share"]
+            * (1 - fuel_shares["gas residential heat share"])
+            * base_energy_totals["residential gas"]
+        )
+    )
+
+    # Road
+    energy_totals["total road"] = (
+        (1 - fuel_cell_share - electric_share)
+        * efficiency_gains["total road ice"]
+        * base_energy_totals["total road"]
+        + fuel_cell_share
+        * efficiency_gains["total road fcev"]
+        * base_energy_totals["total road"]
+        + electric_share
+        * efficiency_gains["total road ev"]
+        * base_energy_totals["total road"]
+    ) * growth_factors["total road"]
+
+    # Navigation
+    energy_totals["total domestic navigation"] = (
+        (1 - hydrogen_shipping_share)
+        * efficiency_gains["total navigation oil"]
+        * base_energy_totals["total domestic navigation"]
+        + hydrogen_shipping_share
+        * efficiency_gains["total navigation hydrogen"]
+        * base_energy_totals["total domestic navigation"]
+    ) * growth_factors["total domestic navigation"]
+
+    energy_totals["total international navigation"] = (
+        (1 - hydrogen_shipping_share)
+        * efficiency_gains["total navigation oil"]
+        * base_energy_totals["total international navigation"]
+        + hydrogen_shipping_share
+        * efficiency_gains["total navigation hydrogen"]
+        * base_energy_totals["total international navigation"]
+    ) * growth_factors["total international navigation"]
+
+    energy_totals["district heat share"] = district_heating["current"]
+
+    energy_totals["electricity services space"] = 0
+    energy_totals["electricity services water"] = 0
+
+    energy_totals.fillna(0).to_csv(snakemake.output.energy_totals)
diff --git a/scripts/prepare_gas_network.py b/scripts/prepare_gas_network.py
new file mode 100644
index 000000000..d7b50ba59
--- /dev/null
+++ b/scripts/prepare_gas_network.py
@@ -0,0 +1,554 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+"""
+Prepare gas network.
+"""
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+import pathlib
+import zipfile
+
+import geopandas as gpd
+import pandas as pd
+from _helpers import content_retrieve, mock_snakemake, progress_retrieve
+from build_shapes import get_gadm_shapes
+from pyproj import CRS
+from pypsa.geo import haversine_pts
+from shapely.geometry import Point
+from shapely.ops import unary_union
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "prepare_gas_network",
+            simpl="",
+            clusters="10",
+        )
+
+
+def download_IGGIELGN_gas_network():
+    """
+    Downloads a global dataset for gas networks as .xlsx.
+
+    The following xlsx file was downloaded from the webpage
+    https://globalenergymonitor.org/projects/global-gas-infrastructure-tracker/
+    The dataset contains 3144 pipelines.
+    """
+
+    url = "https://zenodo.org/record/4767098/files/IGGIELGN.zip"
+
+    # Save locations
+    zip_fn = pathlib.Path("IGGIELGN.zip")
+    to_fn = pathlib.Path("data/gas_network/scigrid-gas")
+
+    logger.info(f"Downloading databundle from '{url}'.")
+    progress_retrieve(url, zip_fn)
+
+    logger.info(f"Extracting databundle.")
+    zipfile.ZipFile(zip_fn).extractall(to_fn)
+
+    zip_fn.unlink()
+
+    logger.info(f"Gas infrastructure data available in '{to_fn}'.")
+
+
+def download_GGIT_gas_network():
+    """
+    Downloads a global dataset for gas networks as .xlsx.
+
+    The following xlsx file was downloaded from the webpage
+    https://globalenergymonitor.org/projects/global-gas-infrastructure-tracker/
+    The dataset contains 3144 pipelines.
+    """
+    url = "https://globalenergymonitor.org/wp-content/uploads/2022/12/GEM-GGIT-Gas-Pipelines-December-2022.xlsx"
+    GGIT_gas_pipeline = pd.read_excel(
+        content_retrieve(url),
+        index_col=0,
+        sheet_name="Gas Pipelines 2022-12-16",
+        header=0,
+    )
+
+    return GGIT_gas_pipeline
+
+
+def diameter_to_capacity(pipe_diameter_mm):
+    """
+    Calculate pipe capacity in MW based on diameter in mm.
+
+    20 inch (500 mm)  50 bar -> 1.5   GW CH4 pipe capacity (LHV) 24 inch
+    (600 mm)  50 bar -> 5     GW CH4 pipe capacity (LHV) 36 inch (900
+    mm)  50 bar -> 11.25 GW CH4 pipe capacity (LHV) 48 inch (1200 mm) 80
+    bar -> 21.7  GW CH4 pipe capacity (LHV)
+
+    Based on p.15 of
+    https://gasforclimate2050.eu/wp-content/uploads/2020/07/2020_European-Hydrogen-Backbone_Report.pdf
+    """
+    # slopes definitions
+    m0 = (1500 - 0) / (500 - 0)
+    m1 = (5000 - 1500) / (600 - 500)
+    m2 = (11250 - 5000) / (900 - 600)
+    m3 = (21700 - 11250) / (1200 - 900)
+
+    # intercept
+    a0 = 0
+    a1 = -16000
+    a2 = -7500
+    a3 = -20100
+
+    if pipe_diameter_mm < 500:
+        return a0 + m0 * pipe_diameter_mm
+    elif pipe_diameter_mm < 600:
+        return a1 + m1 * pipe_diameter_mm
+    elif pipe_diameter_mm < 900:
+        return a2 + m2 * pipe_diameter_mm
+    else:
+        return a3 + m3 * pipe_diameter_mm
+
+
+def inch_to_mm(len_inch):
+    return len_inch / 0.0393701
+
+
+def bcm_to_MW(cap_bcm):
+    return cap_bcm * 9769444.44 / 8760
+
+
+def correct_Diameter_col(value):
+    value = str(value)
+    # Check if the value contains a comma
+    if "," in value:
+        # Split the value by comma and convert each part to a float
+        diameter_values = [float(val) for val in value.split(",")]
+        # Calculate the mean of the values
+        return sum(diameter_values) / len(diameter_values)
+    elif "/" in value:
+        # Split the value by slash and convert each part to a float
+        diameter_values = [float(val) for val in value.split("/")]
+        # Calculate the mean of the values
+        return sum(diameter_values) / len(diameter_values)
+    elif "-" in value:
+        # Split the value by slash and convert each part to a float
+        diameter_values = [float(val) for val in value.split("-")]
+        # Calculate the mean of the values
+        return sum(diameter_values) / len(diameter_values)
+    else:
+        # Return the original value for rows without a comma or slash
+        return float(value)
+
+
+def prepare_GGIT_data(GGIT_gas_pipeline):
+    df = GGIT_gas_pipeline.copy().reset_index()
+
+    # Drop rows containing "--" in the 'WKTFormat' column
+    df = df[df["WKTFormat"] != "--"]
+
+    # Keep pipelines that are as below
+    df = df[df["Status"].isin(snakemake.params.gas_config["network_data_GGIT_status"])]
+
+    # Convert the WKT column to a GeoDataFrame
+    df = gpd.GeoDataFrame(df, geometry=gpd.GeoSeries.from_wkt(df["WKTFormat"]))
+
+    # Set the CRS to EPSG:4326
+    df.crs = CRS.from_epsg(4326)
+
+    # Convert CRS to EPSG:3857 so we can measure distances
+    df = df.to_crs(epsg=3857)
+
+    # Convert and correct diameter column to be in mm
+    df.loc[df["DiameterUnits"] == "mm", "diameter_mm"] = df.loc[
+        df["DiameterUnits"] == "mm", "Diameter"
+    ].apply(correct_Diameter_col)
+    df.loc[df["DiameterUnits"] == "in", "diameter_mm"] = (
+        df.loc[df["DiameterUnits"] == "in", "Diameter"]
+        .apply(correct_Diameter_col)
+        .apply(lambda d: inch_to_mm(float(d)))
+    )
+
+    # Convert Bcm/y to MW
+    df["CapacityBcm/y"] = pd.to_numeric(df["CapacityBcm/y"], errors="coerce")
+    df["capacity [MW]"] = df["CapacityBcm/y"].apply(lambda d: bcm_to_MW(d))
+
+    # Get capacity from diameter for rows where no capacity is given
+    df.loc[df["CapacityBcm/y"] == "--", "capacity [MW]"] = df.loc[
+        df["CapacityBcm/y"] == "--", "diameter_mm"
+    ].apply(lambda d: diameter_to_capacity(int(d)))
+    df["diameter_mm"] = pd.to_numeric(
+        df["diameter_mm"], errors="coerce", downcast="integer"
+    )
+    df.loc[pd.isna(df["CapacityBcm/y"]), "capacity [MW]"] = df.loc[
+        pd.isna(df["CapacityBcm/y"]), "diameter_mm"
+    ].apply(lambda d: diameter_to_capacity(d))
+
+    return df
+
+
+def load_IGGIELGN_data(fn):
+    df = gpd.read_file(fn)
+    param = df.param.apply(pd.Series)
+    method = df.method.apply(pd.Series)[["diameter_mm", "max_cap_M_m3_per_d"]]
+    method.columns = method.columns + "_method"
+    df = pd.concat([df, param, method], axis=1)
+    to_drop = ["param", "uncertainty", "method", "tags"]
+    to_drop = df.columns.intersection(to_drop)
+    df.drop(to_drop, axis=1, inplace=True)
+    return df
+
+
+def prepare_IGGIELGN_data(
+    df,
+    length_factor=1.5,
+    correction_threshold_length=4,
+    correction_threshold_p_nom=8,
+    bidirectional_below=10,
+):  # Taken from pypsa-eur and adapted
+    # extract start and end from LineString
+    df["point0"] = df.geometry.apply(lambda x: Point(x.coords[0]))
+    df["point1"] = df.geometry.apply(lambda x: Point(x.coords[-1]))
+
+    conversion_factor = 437.5  # MCM/day to MWh/h
+    df["p_nom"] = df.max_cap_M_m3_per_d * conversion_factor
+
+    # for inferred diameters, assume 500 mm rather than 900 mm (more conservative)
+    df.loc[df.diameter_mm_method != "raw", "diameter_mm"] = 500.0
+
+    keep = [
+        "name",
+        "diameter_mm",
+        "is_H_gas",
+        "is_bothDirection",
+        "length_km",
+        "p_nom",
+        "max_pressure_bar",
+        "start_year",
+        "point0",
+        "point1",
+        "geometry",
+    ]
+    to_rename = {
+        "is_bothDirection": "bidirectional",
+        "is_H_gas": "H_gas",
+        "start_year": "build_year",
+        "length_km": "length",
+    }
+    df = df[keep].rename(columns=to_rename)
+
+    df.bidirectional = df.bidirectional.astype(bool)
+    df.H_gas = df.H_gas.astype(bool)
+
+    # short lines below 10 km are assumed to be bidirectional
+    short_lines = df["length"] < bidirectional_below
+    df.loc[short_lines, "bidirectional"] = True
+
+    # correct all capacities that deviate correction_threshold factor
+    # to diameter-based capacities, unless they are NordStream pipelines
+    # also all capacities below 0.5 GW are now diameter-based capacities
+    df["p_nom_diameter"] = df.diameter_mm.apply(diameter_to_capacity)
+    ratio = df.p_nom / df.p_nom_diameter
+    not_nordstream = df.max_pressure_bar < 220
+    df.p_nom.update(
+        df.p_nom_diameter.where(
+            (df.p_nom <= 500)
+            | ((ratio > correction_threshold_p_nom) & not_nordstream)
+            | ((ratio < 1 / correction_threshold_p_nom) & not_nordstream)
+        )
+    )
+
+    # lines which have way too discrepant line lengths
+    # get assigned haversine length * length factor
+    df["length_haversine"] = df.apply(
+        lambda p: length_factor
+        * haversine_pts([p.point0.x, p.point0.y], [p.point1.x, p.point1.y]),
+        axis=1,
+    )
+    ratio = df.eval("length / length_haversine")
+    df["length"].update(
+        df.length_haversine.where(
+            (df["length"] < 20)
+            | (ratio > correction_threshold_length)
+            | (ratio < 1 / correction_threshold_length)
+        )
+    )
+
+    # Convert CRS to EPSG:3857 so we can measure distances
+    df = df.to_crs(epsg=3857)
+
+    return df
+
+
+def load_bus_region(onshore_path, pipelines):
+    """
+    Load pypsa-earth-sec onshore regions.
+
+    TODO: Think about including Offshore regions but only for states that have offshore pipelines.
+    """
+    bus_regions_onshore = gpd.read_file(onshore_path)
+    # Convert CRS to EPSG:3857 so we can measure distances
+    bus_regions_onshore = bus_regions_onshore.to_crs(epsg=3857)
+
+    bus_regions_onshore = bus_regions_onshore.rename({"name": "gadm_id"}, axis=1).loc[
+        :, ["gadm_id", "geometry"]
+    ]
+
+    if snakemake.params.alternative_clustering:
+        countries_list = snakemake.params.countries_list
+        layer_id = snakemake.params.layer_id
+        update = snakemake.params.update
+        out_logging = snakemake.params.out_logging
+        year = snakemake.params.year
+        nprocesses = snakemake.params.nprocesses
+        contended_flag = snakemake.params.contended_flag
+        geo_crs = snakemake.params.geo_crs
+        file_prefix = snakemake.params.gadm_file_prefix
+        gadm_url_prefix = snakemake.params.gadm_url_prefix
+        gadm_input_file_args = ["data", "gadm"]
+
+        bus_regions_onshore = get_gadm_shapes(
+            False,
+            False,
+            countries_list,
+            geo_crs,
+            file_prefix,
+            gadm_url_prefix,
+            gadm_input_file_args,
+            contended_flag,
+            None,
+            layer_id,
+            update,
+            out_logging,
+            year,
+            nprocesses=nprocesses,
+        )
+
+        bus_regions_onshore = bus_regions_onshore.rename(columns={"GADM_ID": "gadm_id"})
+        bus_regions_onshore = bus_regions_onshore.to_crs(epsg=3857)
+
+    country_borders = unary_union(bus_regions_onshore.geometry)
+
+    # Create a new GeoDataFrame containing the merged polygon
+    country_borders = gpd.GeoDataFrame(geometry=[country_borders], crs=pipelines.crs)
+
+    return bus_regions_onshore, country_borders
+
+
+def get_states_in_order(pipeline, bus_regions_onshore):
+    states_p = []
+
+    if pipeline.geom_type == "LineString":
+        # Interpolate points along the LineString with a given step size (e.g., 5)
+        step_size = 10000
+        interpolated_points = [
+            pipeline.interpolate(i) for i in range(0, int(pipeline.length), step_size)
+        ]
+        interpolated_points.append(
+            pipeline.interpolate(pipeline.length)
+        )  # Add the last point
+
+    elif pipeline.geom_type == "MultiLineString":
+        interpolated_points = []
+        # Iterate over each LineString within the MultiLineString
+        for line in pipeline.geoms:
+            # Interpolate points along each LineString with a given step size (e.g., 5)
+            step_size = 10000
+            interpolated_points_line = [
+                line.interpolate(i) for i in range(0, int(line.length), step_size)
+            ]
+            interpolated_points_line.append(
+                line.interpolate(line.length)
+            )  # Add the last point
+            interpolated_points.extend(interpolated_points_line)
+
+    # Check each interpolated point against the state geometries
+    for point in interpolated_points:
+        for index, state_row in bus_regions_onshore.iterrows():
+            if state_row.geometry.contains(point):
+                gadm_id = state_row["gadm_id"]
+                if gadm_id not in states_p:
+                    states_p.append(gadm_id)
+                break  # Stop checking other states once a match is found
+
+    return states_p
+
+
+def parse_states(pipelines, bus_regions_onshore):
+    # Parse the states of the points which are connected by the pipeline geometry object
+    pipelines["nodes"] = None
+    pipelines["states_passed"] = None
+    pipelines["amount_states_passed"] = None
+
+    for pipeline, row in pipelines.iterrows():
+        states_p = get_states_in_order(row.geometry, bus_regions_onshore)
+        # states_p = pd.unique(states_p)
+        row["states_passed"] = states_p
+        row["amount_states_passed"] = len(states_p)
+        row["nodes"] = list(zip(states_p[0::1], states_p[1::1]))
+        pipelines.loc[pipeline] = row
+    print(
+        "The maximum number of states which are passed by one single pipeline amounts to {}.".format(
+            pipelines.states_passed.apply(lambda n: len(n)).max()
+        )
+    )
+    return pipelines
+
+
+def cluster_gas_network(pipelines, bus_regions_onshore, length_factor=1.25):
+    # drop innerstatal pipelines
+    pipelines_interstate = pipelines.drop(
+        pipelines.loc[pipelines.amount_states_passed < 2].index
+    )
+
+    # Convert CRS to EPSG:3857 so we can measure distances
+    pipelines_interstate = pipelines_interstate.to_crs(epsg=3857)  # 3857
+
+    # Perform overlay operation to split lines by polygons
+    pipelines_interstate = gpd.overlay(
+        pipelines_interstate, bus_regions_onshore, how="intersection"
+    )
+
+    column_set = ["ProjectID", "nodes", "gadm_id", "capacity [MW]"]
+
+    if snakemake.params.gas_config["network_data"] == "IGGIELGN":
+        pipelines_per_state = (
+            pipelines_interstate.rename(
+                {"p_nom": "capacity [MW]", "name": "ProjectID"}, axis=1
+            )
+            .loc[:, column_set]
+            .reset_index(drop=True)
+        )
+    elif snakemake.params.gas_config["network_data"] == "GGIT":
+        pipelines_per_state = pipelines_interstate.loc[:, column_set].reset_index(
+            drop=True
+        )
+
+    # Explode the column containing lists of tuples
+    df_exploded = pipelines_per_state.explode("nodes").reset_index(drop=True)
+
+    # Create new columns for the tuples
+    df_exploded.insert(0, "bus1", pd.DataFrame(df_exploded["nodes"].tolist())[1])
+    df_exploded.insert(0, "bus0", pd.DataFrame(df_exploded["nodes"].tolist())[0])
+
+    # Drop the original column
+    df_exploded.drop("nodes", axis=1, inplace=True)
+
+    # Reset the index if needed
+    df_exploded.reset_index(drop=True, inplace=True)
+
+    # Custom function to check if value in column 'gadm_id' exists in either column 'bus0' or column 'bus1'
+    def check_existence(row):
+        return row["gadm_id"] in [row["bus0"], row["bus1"]]
+
+    # Apply the custom function to each row and keep only the rows that satisfy the condition
+    df_filtered = df_exploded[df_exploded.apply(check_existence, axis=1)]
+    df_grouped = df_filtered.groupby(["bus0", "bus1", "ProjectID"], as_index=False).agg(
+        {
+            "capacity [MW]": "first",
+        }
+    )
+
+    # Rename columns to match pypsa-earth-sec format
+    df_grouped = df_grouped.rename({"capacity [MW]": "capacity"}, axis=1).loc[
+        :, ["bus0", "bus1", "capacity"]
+    ]
+
+    # Group by buses to get average length and sum of capacites of all pipelines between any two states on the route.
+    grouped = df_grouped.groupby(["bus0", "bus1"], as_index=False).agg(
+        {"capacity": "sum"}
+    )
+    states1 = bus_regions_onshore.copy()
+    states1 = states1.set_index("gadm_id")
+
+    # Create center points for each polygon and store them in a new column 'center_point'
+    states1["center_point"] = (
+        states1["geometry"].to_crs(3857).centroid.to_crs(4326)
+    )  # ----> If haversine_pts method  for length calc is used
+    # states1['center_point'] = states1['geometry'].centroid
+
+    # Create an empty DataFrame to store distances
+    distance_data = []
+
+    # Iterate over all combinations of polygons
+    for i in range(len(states1)):
+        for j in range(len(states1)):
+            if i != j:
+                polygon1 = states1.iloc[i]
+                polygon2 = states1.iloc[j]
+
+                # Calculate Haversine distance
+                distance = haversine_pts(
+                    [
+                        Point(polygon1["center_point"].coords[0]).x,
+                        Point(polygon1["center_point"].coords[-1]).y,
+                    ],
+                    [
+                        Point(polygon2["center_point"].coords[0]).x,
+                        Point(polygon2["center_point"].coords[-1]).y,
+                    ],
+                )  # ----> If haversine_pts method  for length calc is used
+
+                # Store the distance along with polygon IDs or other relevant information
+                polygon_id1 = states1.index[i]
+                polygon_id2 = states1.index[j]
+                distance_data.append([polygon_id1, polygon_id2, distance])
+
+    # Create a DataFrame from the distance data
+    distance_df = pd.DataFrame(distance_data, columns=["bus0", "bus1", "distance"])
+
+    merged_df = pd.merge(grouped, distance_df, on=["bus0", "bus1"], how="left")
+
+    merged_df["length"] = merged_df["distance"] * length_factor
+
+    merged_df = merged_df.drop("distance", axis=1)
+
+    merged_df["GWKm"] = (merged_df["capacity"] / 1000) * merged_df["length"]
+
+    return merged_df
+
+
+if not snakemake.params.custom_gas_network:
+    if snakemake.params.gas_config["network_data"] == "GGIT":
+        pipelines = download_GGIT_gas_network()
+        pipelines = prepare_GGIT_data(pipelines)
+
+    elif snakemake.params.gas_config["network_data"] == "IGGIELGN":
+        download_IGGIELGN_gas_network()
+
+        gas_network = "data/gas_network/scigrid-gas/data/IGGIELGN_PipeSegments.geojson"
+
+        pipelines = load_IGGIELGN_data(gas_network)
+        pipelines = prepare_IGGIELGN_data(pipelines)
+
+    bus_regions_onshore = load_bus_region(snakemake.input.regions_onshore, pipelines)[0]
+    bus_regions_onshore.geometry = bus_regions_onshore.geometry.buffer(0)
+    country_borders = load_bus_region(snakemake.input.regions_onshore, pipelines)[1]
+
+    pipelines = parse_states(pipelines, bus_regions_onshore)
+
+    if len(pipelines.loc[pipelines.amount_states_passed >= 2]) > 0:
+
+        pipelines = cluster_gas_network(
+            pipelines, bus_regions_onshore, length_factor=1.25
+        )
+
+        pipelines.to_csv(snakemake.output.clustered_gas_network, index=False)
+
+        average_length = pipelines["length"].mean()
+        print("average_length = ", average_length)
+
+        total_system_capacity = pipelines["GWKm"].sum()
+        print("total_system_capacity = ", total_system_capacity)
+
+    else:
+        print(
+            "The following countries have no existing Natural Gas network between the chosen bus regions:\n"
+            + ", ".join(bus_regions_onshore.country.unique().tolist())
+        )
+
+        # Create an empty DataFrame with the specified column names
+        pipelines = {"bus0": [], "bus1": [], "capacity": [], "length": [], "GWKm": []}
+
+        pipelines = pd.DataFrame(pipelines)
+        pipelines.to_csv(snakemake.output.clustered_gas_network, index=False)
diff --git a/scripts/prepare_heat_data.py b/scripts/prepare_heat_data.py
new file mode 100644
index 000000000..d50e1ea72
--- /dev/null
+++ b/scripts/prepare_heat_data.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+from itertools import product
+
+import pandas as pd
+import pypsa
+import pytz
+import xarray as xr
+from _helpers import mock_snakemake
+
+
+def generate_periodic_profiles(dt_index, nodes, weekly_profile, localize=None):
+    """
+    Give a 24*7 long list of weekly hourly profiles, generate this for each
+    country for the period dt_index, taking account of time zones and summer
+    time.
+    """
+
+    weekly_profile = pd.Series(weekly_profile, range(24 * 7))
+
+    week_df = pd.DataFrame(index=dt_index, columns=nodes)
+
+    for node in nodes:
+        timezone = pytz.timezone(pytz.country_timezones[node[:2]][0])
+        tz_dt_index = dt_index.tz_convert(timezone)
+        week_df[node] = [24 * dt.weekday() + dt.hour for dt in tz_dt_index]
+        week_df[node] = week_df[node].map(weekly_profile)
+
+    week_df = week_df.tz_localize(localize)
+
+    return week_df
+
+
+def prepare_heat_data(n):
+    ashp_cop = (
+        xr.open_dataarray(snakemake.input.cop_air_total)
+        .to_pandas()
+        .reindex(index=n.snapshots)
+    )
+    gshp_cop = (
+        xr.open_dataarray(snakemake.input.cop_soil_total)
+        .to_pandas()
+        .reindex(index=n.snapshots)
+    )
+
+    solar_thermal = (
+        xr.open_dataarray(snakemake.input.solar_thermal_total)
+        .to_pandas()
+        .reindex(index=n.snapshots)
+    )
+    # 1e3 converts from W/m^2 to MW/(1000m^2) = kW/m^2
+    solar_thermal = options["solar_cf_correction"] * solar_thermal / 1e3
+
+    energy_totals = pd.read_csv(
+        snakemake.input.energy_totals_name,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+
+    nodal_energy_totals = energy_totals.loc[pop_layout.ct].fillna(0.0)
+    nodal_energy_totals.index = pop_layout.index
+    # # district heat share not weighted by population
+    district_heat_share = nodal_energy_totals["district heat share"]  # .round(2)
+    nodal_energy_totals = nodal_energy_totals.multiply(pop_layout.fraction, axis=0)
+
+    # copy forward the daily average heat demand into each hour, so it can be multiplied by the intraday profile
+    daily_space_heat_demand = (
+        xr.open_dataarray(snakemake.input.heat_demand_total)
+        .to_pandas()
+        .reindex(index=n.snapshots, method="ffill")
+    )
+
+    intraday_profiles = pd.read_csv(
+        snakemake.input.heat_profile, index_col=0
+    )  # TODO GHALAT
+
+    sectors = ["residential", "services"]
+    uses = ["water", "space"]
+
+    heat_demand = {}
+    electric_heat_supply = {}
+    for sector, use in product(sectors, uses):
+        weekday = list(intraday_profiles[f"{sector} {use} weekday"])
+        weekend = list(intraday_profiles[f"{sector} {use} weekend"])
+        weekly_profile = weekday * 5 + weekend * 2
+        intraday_year_profile = generate_periodic_profiles(
+            daily_space_heat_demand.index.tz_localize("UTC"),
+            nodes=daily_space_heat_demand.columns,
+            weekly_profile=weekly_profile,
+        )
+
+        if use == "space":
+            heat_demand_shape = daily_space_heat_demand * intraday_year_profile
+        else:
+            heat_demand_shape = intraday_year_profile
+
+        heat_demand[f"{sector} {use}"] = (
+            heat_demand_shape / heat_demand_shape.sum()
+        ).multiply(
+            nodal_energy_totals[f"total {sector} {use}"]
+        ) * 1e6  # TODO v0.0.2
+        electric_heat_supply[f"{sector} {use}"] = (
+            heat_demand_shape / heat_demand_shape.sum()
+        ).multiply(
+            nodal_energy_totals[f"electricity {sector} {use}"]
+        ) * 1e6  # TODO v0.0.2
+
+    heat_demand = pd.concat(heat_demand, axis=1)
+    electric_heat_supply = pd.concat(electric_heat_supply, axis=1)
+
+    # subtract from electricity load since heat demand already in heat_demand #TODO v0.1
+    # electric_nodes = n.loads.index[n.loads.carrier == "electricity"]
+    # n.loads_t.p_set[electric_nodes] = (
+    #     n.loads_t.p_set[electric_nodes]
+    #     - electric_heat_supply.groupby(level=1, axis=1).sum()[electric_nodes]
+    # )
+
+    return (
+        nodal_energy_totals,
+        heat_demand,
+        ashp_cop,
+        gshp_cop,
+        solar_thermal,
+        district_heat_share,
+    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "prepare_heat_data",
+            simpl="",
+            clusters="10",
+            planning_horizons=2030,
+            demand="DF",
+        )
+
+    n = pypsa.Network(snakemake.input.network)
+
+    # Get pop_layout
+    pop_layout = pd.read_csv(
+        snakemake.input.clustered_pop_layout,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+
+    # Add options
+    options = snakemake.config["sector"]
+
+    # Get Nyears
+    Nyears = n.snapshot_weightings.generators.sum() / 8760
+
+    # Prepare transport data
+    (
+        nodal_energy_totals,
+        heat_demand,
+        ashp_cop,
+        gshp_cop,
+        solar_thermal,
+        district_heat_share,
+    ) = prepare_heat_data(n)
+
+    # Save the generated output files to snakemake paths
+    nodal_energy_totals.to_csv(snakemake.output.nodal_energy_totals)
+    heat_demand.to_csv(snakemake.output.heat_demand)
+    ashp_cop.to_csv(snakemake.output.ashp_cop)
+    gshp_cop.to_csv(snakemake.output.gshp_cop)
+    solar_thermal.to_csv(snakemake.output.solar_thermal)
+    district_heat_share.to_csv(snakemake.output.district_heat_share)
diff --git a/scripts/prepare_network.py b/scripts/prepare_network.py
index e0f061537..ec8ea9366 100755
--- a/scripts/prepare_network.py
+++ b/scripts/prepare_network.py
@@ -65,7 +65,6 @@
 import pypsa
 import requests
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_current_directory_path,
@@ -94,13 +93,9 @@ def download_emission_data():
         with requests.get(url) as rq:
             with open("data/co2.zip", "wb") as file:
                 file.write(rq.content)
-        root_path = get_current_directory_path()
-        file_path = get_path(root_path, "data/co2.zip")
+        file_path = "data/co2.zip"
         with ZipFile(file_path, "r") as zipObj:
-            zipObj.extract(
-                "v60_CO2_excl_short-cycle_org_C_1970_2018.xls",
-                get_path(root_path, "data"),
-            )
+            zipObj.extract("v60_CO2_excl_short-cycle_org_C_1970_2018.xls", "data")
         pathlib.Path(file_path).unlink(missing_ok=True)
         return "v60_CO2_excl_short-cycle_org_C_1970_2018.xls"
     except requests.exceptions.RequestException as e:
@@ -328,7 +323,6 @@ def set_line_nom_max(n, s_nom_max_set=np.inf, p_nom_max_set=np.inf):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake(
             "prepare_network",
             simpl="",
diff --git a/scripts/prepare_ports.py b/scripts/prepare_ports.py
new file mode 100644
index 000000000..beb4f6ebb
--- /dev/null
+++ b/scripts/prepare_ports.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import country_converter as coco
+import pandas as pd
+from _helpers import mock_snakemake
+
+
+def download_ports():
+    """
+    Downloads the world ports index csv File and NOT as shape or other because
+    it is updated on a monthly basis.
+
+    The following csv file was downloaded from the webpage
+    https://msi.nga.mil/Publications/WPI
+    as a csv file that is updated monthly as mentioned on the webpage. The dataset contains 3711 ports.
+    """
+    fn = "https://msi.nga.mil/api/publications/download?type=view&key=16920959/SFH00000/UpdatedPub150.csv"
+    wpi_csv = pd.read_csv(fn, index_col=0)
+
+    return wpi_csv
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake("prepare_ports")
+
+    df = download_ports().copy()
+
+    # Add ISO2 country code for each country
+    df = df.rename(
+        columns={
+            "Country Code": "country_full_name",
+            "Latitude": "y",
+            "Longitude": "x",
+            "Main Port Name": "name",
+        }
+    )
+    df["country"] = df.country_full_name.apply(
+        lambda x: coco.convert(names=x, to="ISO2", not_found=None)
+    )
+
+    # Drop small islands that have no ISO2:
+    df = df[df.country_full_name != "Wake Island"]
+    df = df[df.country_full_name != "Johnson Atoll"]
+    df = df[df.country_full_name != "Midway Islands"]
+
+    # Select the columns that we need to keep
+    df = df.reset_index()
+    df = df[
+        [
+            "World Port Index Number",
+            "Region Name",
+            "name",
+            "Alternate Port Name",
+            "country",
+            "World Water Body",
+            "Liquified Natural Gas Terminal Depth (m)",
+            "Harbor Size",
+            "Harbor Type",
+            "Harbor Use",
+            "country_full_name",
+            "y",
+            "x",
+        ]
+    ]
+
+    # Drop ports that are very small and that have unknown size (Unknown size ports are in total 19 and not suitable for H2 - checked visually)
+    ports = df.loc[df["Harbor Size"].isin(["Small", "Large", "Medium"])]
+
+    ports.insert(8, "Harbor_size_nr", 1)
+    ports.loc[ports["Harbor Size"].isin(["Small"]), "Harbor_size_nr"] = 1
+    ports.loc[ports["Harbor Size"].isin(["Medium"]), "Harbor_size_nr"] = 2
+    ports.loc[ports["Harbor Size"].isin(["Large"]), "Harbor_size_nr"] = 3
+
+    df1 = ports.copy()
+    df1 = df1.groupby(["country_full_name"]).sum("Harbor_size_nr")
+    df1 = df1[["Harbor_size_nr"]]
+    df1 = df1.rename(columns={"Harbor_size_nr": "Total_Harbor_size_nr"})
+
+    ports = ports.set_index("country_full_name").join(df1, how="left")
+
+    ports["fraction"] = ports["Harbor_size_nr"] / ports["Total_Harbor_size_nr"]
+
+    ports.to_csv(snakemake.output[0], sep=",", encoding="utf-8", header="true")
diff --git a/scripts/prepare_sector_network.py b/scripts/prepare_sector_network.py
new file mode 100644
index 000000000..bbf4d2acd
--- /dev/null
+++ b/scripts/prepare_sector_network.py
@@ -0,0 +1,2816 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+import logging
+import re
+from types import SimpleNamespace
+
+import numpy as np
+import pandas as pd
+import pypsa
+from _helpers import (
+    create_network_topology,
+    cycling_shift,
+    locate_bus,
+    mock_snakemake,
+    override_component_attrs,
+    prepare_costs,
+    safe_divide,
+)
+
+logger = logging.getLogger(__name__)
+
+spatial = SimpleNamespace()
+
+
+def add_lifetime_wind_solar(n, costs):
+    """
+    Add lifetime for solar and wind generators.
+    """
+    for carrier in ["solar", "onwind", "offwind"]:
+        gen_i = n.generators.index.str.contains(carrier)
+        n.generators.loc[gen_i, "lifetime"] = costs.at[carrier, "lifetime"]
+
+
+def add_carrier_buses(n, carrier, nodes=None):
+    """
+    Add buses to connect e.g. coal, nuclear and oil plants.
+    """
+
+    if nodes is None:
+        nodes = vars(spatial)[carrier].nodes
+    location = vars(spatial)[carrier].locations
+
+    # skip if carrier already exists
+    if carrier in n.carriers.index:
+        return
+
+    if not isinstance(nodes, pd.Index):
+        nodes = pd.Index(nodes)
+
+    n.add("Carrier", carrier, co2_emissions=costs.at[carrier, "CO2 intensity"])
+
+    n.madd("Bus", nodes, location=location, carrier=carrier)
+
+    # capital cost could be corrected to e.g. 0.2 EUR/kWh * annuity and O&M
+    n.madd(
+        "Store",
+        nodes + " Store",
+        bus=nodes,
+        e_nom_extendable=True,
+        e_cyclic=True,
+        carrier=carrier,
+    )
+
+    n.madd(
+        "Generator",
+        nodes,
+        bus=nodes,
+        p_nom_extendable=True,
+        carrier=carrier,
+        marginal_cost=costs.at[carrier, "fuel"],
+    )
+
+
+def add_generation(n, costs):
+    """
+    Adds conventional generation as specified in config.
+
+    Args:
+        n (network): PyPSA prenetwork
+        costs (dataframe): _description_
+
+    Returns:
+        _type_: _description_
+    """ """"""
+
+    logger.info("adding electricity generation")
+
+    # Not required, because nodes are already defined in "nodes"
+    # nodes = pop_layout.index
+
+    fallback = {"OCGT": "gas"}
+    conventionals = options.get("conventional_generation", fallback)
+
+    for generator, carrier in conventionals.items():
+        add_carrier_buses(n, carrier)
+        carrier_nodes = vars(spatial)[carrier].nodes
+        n.madd(
+            "Link",
+            spatial.nodes + " " + generator,
+            bus0=carrier_nodes,
+            bus1=spatial.nodes,
+            bus2="co2 atmosphere",
+            marginal_cost=costs.at[generator, "efficiency"]
+            * costs.at[generator, "VOM"],  # NB: VOM is per MWel
+            # NB: fixed cost is per MWel
+            capital_cost=costs.at[generator, "efficiency"]
+            * costs.at[generator, "fixed"],
+            p_nom_extendable=True,
+            carrier=generator,
+            efficiency=costs.at[generator, "efficiency"],
+            efficiency2=costs.at[carrier, "CO2 intensity"],
+            lifetime=costs.at[generator, "lifetime"],
+        )
+
+
+def add_oil(n, costs):
+    """
+    Function to add oil carrier and bus to network.
+
+    If-Statements are required in case oil was already added from config
+    ['sector']['conventional_generation'] Oil is copper plated
+    """
+    # TODO function will not be necessary if conventionals are added using "add_carrier_buses()"
+    # TODO before using add_carrier_buses: remove_elec_base_techs(n), otherwise carriers are added double
+    # spatial.gas = SimpleNamespace()
+
+    spatial.oil = SimpleNamespace()
+
+    if options["oil"]["spatial_oil"]:
+        spatial.oil.nodes = spatial.nodes + " oil"
+        spatial.oil.locations = spatial.nodes
+    else:
+        spatial.oil.nodes = ["Africa oil"]
+        spatial.oil.locations = ["Africa"]
+
+    if "oil" not in n.carriers.index:
+        n.add("Carrier", "oil")
+
+    # Set the "co2_emissions" of the carrier "oil" to 0, because the emissions of oil usage taken from the spatial.oil.nodes are accounted separately (directly linked to the co2 atmosphere bus). Setting the carrier to 0 here avoids double counting. Be aware to link oil emissions to the co2 atmosphere bus.
+    n.carriers.loc["oil", "co2_emissions"] = 0
+    # print("co2_emissions of oil set to 0 for testing")  # TODO add logger.info
+
+    n.madd(
+        "Bus",
+        spatial.oil.nodes,
+        location=spatial.oil.locations,
+        carrier="oil",
+    )
+
+    e_initial = (snakemake.config["fossil_reserves"]).get("oil", 0) * 1e6
+    # could correct to e.g. 0.001 EUR/kWh * annuity and O&M
+    n.madd(
+        "Store",
+        [oil_bus + " Store" for oil_bus in spatial.oil.nodes],
+        bus=spatial.oil.nodes,
+        e_nom_extendable=True,
+        e_cyclic=False,
+        carrier="oil",
+        e_initial=e_initial,
+        marginal_cost=costs.at["oil", "fuel"],
+    )
+
+    # TODO check non-unique generators
+    n.madd(
+        "Generator",
+        spatial.oil.nodes,
+        bus=spatial.oil.nodes,
+        p_nom_extendable=True,
+        carrier="oil",
+        marginal_cost=costs.at["oil", "fuel"],
+    )
+
+
+def add_gas(n):
+    spatial.gas = SimpleNamespace()
+
+    if options["gas"]["spatial_gas"]:
+        spatial.gas.nodes = spatial.nodes + " gas"
+        spatial.gas.locations = spatial.nodes
+        spatial.gas.biogas = spatial.nodes + " biogas"
+        spatial.gas.industry = spatial.nodes + " gas for industry"
+        if snakemake.config["sector"]["cc"]:
+            spatial.gas.industry_cc = spatial.nodes + " gas for industry CC"
+        spatial.gas.biogas_to_gas = spatial.nodes + " biogas to gas"
+    else:
+        spatial.gas.nodes = ["Africa gas"]
+        spatial.gas.locations = ["Africa"]
+        spatial.gas.biogas = ["Africa biogas"]
+        spatial.gas.industry = ["gas for industry"]
+        if snakemake.config["sector"]["cc"]:
+            spatial.gas.industry_cc = ["gas for industry CC"]
+        spatial.gas.biogas_to_gas = ["Africa biogas to gas"]
+
+    spatial.gas.df = pd.DataFrame(vars(spatial.gas), index=spatial.nodes)
+
+    gas_nodes = vars(spatial)["gas"].nodes
+
+    add_carrier_buses(n, "gas", gas_nodes)
+
+
+def H2_liquid_fossil_conversions(n, costs):
+    """
+    Function to add conversions between H2 and liquid fossil Carrier and bus is
+    added in add_oil, which later on might be switched to add_generation.
+    """
+
+    n.madd(
+        "Link",
+        spatial.nodes + " Fischer-Tropsch",
+        bus0=spatial.nodes + " H2",
+        bus1=spatial.oil.nodes,
+        bus2=spatial.co2.nodes,
+        carrier="Fischer-Tropsch",
+        efficiency=costs.at["Fischer-Tropsch", "efficiency"],
+        capital_cost=costs.at["Fischer-Tropsch", "fixed"]
+        * costs.at[
+            "Fischer-Tropsch", "efficiency"
+        ],  # Use efficiency to convert from EUR/MW_FT/a to EUR/MW_H2/a
+        efficiency2=-costs.at["oil", "CO2 intensity"]
+        * costs.at["Fischer-Tropsch", "efficiency"],
+        p_nom_extendable=True,
+        p_min_pu=options.get("min_part_load_fischer_tropsch", 0),
+        lifetime=costs.at["Fischer-Tropsch", "lifetime"],
+    )
+
+
+def add_hydrogen(n, costs):
+    "function to add hydrogen as an energy carrier with its conversion technologies from and to AC"
+
+    n.add("Carrier", "H2")
+
+    n.madd(
+        "Bus",
+        spatial.nodes + " H2",
+        location=spatial.nodes,
+        carrier="H2",
+        x=n.buses.loc[list(spatial.nodes)].x.values,
+        y=n.buses.loc[list(spatial.nodes)].y.values,
+    )
+
+    if snakemake.config["sector"]["hydrogen"]["hydrogen_colors"]:
+        n.madd(
+            "Bus",
+            nodes + " grid H2",
+            location=nodes,
+            carrier="grid H2",
+            x=n.buses.loc[list(nodes)].x.values,
+            y=n.buses.loc[list(nodes)].y.values,
+        )
+
+        n.madd(
+            "Link",
+            nodes + " H2 Electrolysis",
+            bus0=nodes,
+            bus1=nodes + " grid H2",
+            p_nom_extendable=True,
+            carrier="H2 Electrolysis",
+            efficiency=costs.at["electrolysis", "efficiency"],
+            capital_cost=costs.at["electrolysis", "fixed"],
+            lifetime=costs.at["electrolysis", "lifetime"],
+        )
+
+        n.madd(
+            "Link",
+            nodes + " grid H2",
+            bus0=nodes + " grid H2",
+            bus1=nodes + " H2",
+            p_nom_extendable=True,
+            carrier="grid H2",
+            efficiency=1,
+            capital_cost=0,
+        )
+
+    else:
+        n.madd(
+            "Link",
+            nodes + " H2 Electrolysis",
+            bus1=nodes + " H2",
+            bus0=nodes,
+            p_nom_extendable=True,
+            carrier="H2 Electrolysis",
+            efficiency=costs.at["electrolysis", "efficiency"],
+            capital_cost=costs.at["electrolysis", "fixed"],
+            lifetime=costs.at["electrolysis", "lifetime"],
+        )
+
+    n.madd(
+        "Link",
+        spatial.nodes + " H2 Fuel Cell",
+        bus0=spatial.nodes + " H2",
+        bus1=spatial.nodes,
+        p_nom_extendable=True,
+        carrier="H2 Fuel Cell",
+        efficiency=costs.at["fuel cell", "efficiency"],
+        # NB: fixed cost is per MWel
+        capital_cost=costs.at["fuel cell", "fixed"]
+        * costs.at["fuel cell", "efficiency"],
+        lifetime=costs.at["fuel cell", "lifetime"],
+    )
+
+    cavern_nodes = pd.DataFrame()
+
+    if snakemake.config["sector"]["hydrogen"]["underground_storage"]:
+        if snakemake.config["custom_data"]["h2_underground"]:
+            custom_cavern = pd.read_csv(
+                "data/custom/h2_underground_{0}_{1}.csv".format(
+                    demand_sc, investment_year
+                )
+            )
+            # countries = n.buses.country.unique().to_list()
+            countries = snakemake.config["countries"]
+            custom_cavern = custom_cavern[custom_cavern.country.isin(countries)]
+
+            cavern_nodes = n.buses[n.buses.country.isin(custom_cavern.country)]
+
+            h2_pot = custom_cavern.set_index("id_region")["storage_cap_MWh"]
+
+            h2_capital_cost = costs.at["hydrogen storage underground", "fixed"]
+
+            n.madd(
+                "Bus",
+                nodes + " H2 UHS",
+                location=nodes,
+                carrier="H2 UHS",
+                x=n.buses.loc[list(nodes)].x.values,
+                y=n.buses.loc[list(nodes)].y.values,
+            )
+
+            n.madd(
+                "Store",
+                cavern_nodes.index + " H2 UHS",
+                bus=cavern_nodes.index + " H2 UHS",
+                e_nom_extendable=True,
+                e_nom_max=h2_pot.values,
+                e_cyclic=True,
+                carrier="H2 UHS",
+                capital_cost=h2_capital_cost,
+            )
+
+            n.madd(
+                "Link",
+                nodes + " H2 UHS charger",
+                bus0=nodes + " H2",
+                bus1=nodes + " H2 UHS",
+                carrier="H2 UHS charger",
+                p_nom_extendable=True,
+            )
+
+            n.madd(
+                "Link",
+                nodes + " H2 UHS discharger",
+                bus0=nodes + " H2 UHS",
+                bus1=nodes + " H2",
+                carrier="H2 UHS discharger",
+                efficiency=1,
+                p_nom_extendable=True,
+            )
+
+        else:
+            h2_salt_cavern_potential = pd.read_csv(
+                snakemake.input.h2_cavern, index_col=0
+            ).squeeze()
+            h2_cavern_ct = h2_salt_cavern_potential[~h2_salt_cavern_potential.isna()]
+            cavern_nodes = n.buses[n.buses.country.isin(h2_cavern_ct.index)]
+
+            h2_capital_cost = costs.at["hydrogen storage underground", "fixed"]
+
+            # assumptions: weight storage potential in a country by population
+            # TODO: fix with real geographic potentials
+            # convert TWh to MWh with 1e6
+            h2_pot = h2_cavern_ct.loc[cavern_nodes.country]
+            h2_pot.index = cavern_nodes.index
+
+            # distribute underground potential equally over all nodes #TODO change with real data
+            s = pd.Series(h2_pot.index, index=h2_pot.index)
+            country_codes = s.str[:2]
+            code_counts = country_codes.value_counts()
+            fractions = country_codes.map(code_counts).rdiv(1)
+            h2_pot = h2_pot * fractions * 1e6
+
+            n.madd(
+                "Bus",
+                nodes + " H2 UHS",
+                location=nodes,
+                carrier="H2 UHS",
+                x=n.buses.loc[list(nodes)].x.values,
+                y=n.buses.loc[list(nodes)].y.values,
+            )
+
+            n.madd(
+                "Store",
+                cavern_nodes.index + " H2 UHS",
+                bus=cavern_nodes.index + " H2 UHS",
+                e_nom_extendable=True,
+                e_nom_max=h2_pot.values,
+                e_cyclic=True,
+                carrier="H2 UHS",
+                capital_cost=h2_capital_cost,
+            )
+
+            n.madd(
+                "Link",
+                nodes + " H2 UHS charger",
+                bus0=nodes,
+                bus1=nodes + " H2 UHS",
+                carrier="H2 UHS charger",
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+            n.madd(
+                "Link",
+                nodes + " H2 UHS discharger",
+                bus0=nodes,
+                bus1=nodes + " H2 UHS",
+                carrier="H2 UHS discharger",
+                efficiency=1,
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+    # hydrogen stored overground (where not already underground)
+    h2_capital_cost = costs.at[
+        "hydrogen storage tank type 1 including compressor", "fixed"
+    ]
+    nodes_overground = nodes
+    n.madd(
+        "Store",
+        nodes_overground + " H2 Store Tank",
+        bus=nodes_overground + " H2",
+        e_nom_extendable=True,
+        e_cyclic=True,
+        carrier="H2 Store Tank",
+        capital_cost=h2_capital_cost,
+    )
+
+    # Hydrogen network:
+    # -----------------
+    def add_links_repurposed_H2_pipelines():
+        n.madd(
+            "Link",
+            h2_links.index + " repurposed",
+            bus0=h2_links.bus0.values + " H2",
+            bus1=h2_links.bus1.values + " H2",
+            p_min_pu=-1,
+            p_nom_extendable=True,
+            p_nom_max=h2_links.capacity.values
+            * 0.8,  # https://gasforclimate2050.eu/wp-content/uploads/2020/07/2020_European-Hydrogen-Backbone_Report.pdf
+            length=h2_links.length.values,
+            capital_cost=costs.at["H2 (g) pipeline repurposed", "fixed"]
+            * h2_links.length.values,
+            carrier="H2 pipeline repurposed",
+            lifetime=costs.at["H2 (g) pipeline repurposed", "lifetime"],
+        )
+
+    def add_links_new_H2_pipelines():
+        n.madd(
+            "Link",
+            h2_links.index,
+            bus0=h2_links.bus0.values + " H2",
+            bus1=h2_links.bus1.values + " H2",
+            p_min_pu=-1,
+            p_nom_extendable=True,
+            length=h2_links.length.values,
+            capital_cost=costs.at["H2 (g) pipeline", "fixed"] * h2_links.length.values,
+            carrier="H2 pipeline",
+            lifetime=costs.at["H2 (g) pipeline", "lifetime"],
+        )
+
+    def add_links_elec_routing_new_H2_pipelines():
+        attrs = ["bus0", "bus1", "length"]
+        h2_links = pd.DataFrame(columns=attrs)
+
+        candidates = pd.concat(
+            {
+                "lines": n.lines[attrs],
+                "links": n.links.loc[n.links.carrier == "DC", attrs],
+            }
+        )
+
+        for candidate in candidates.index:
+            buses = [
+                candidates.at[candidate, "bus0"],
+                candidates.at[candidate, "bus1"],
+            ]
+            buses.sort()
+            name = f"H2 pipeline {buses[0]} -> {buses[1]}"
+            if name not in h2_links.index:
+                h2_links.at[name, "bus0"] = buses[0]
+                h2_links.at[name, "bus1"] = buses[1]
+                h2_links.at[name, "length"] = candidates.at[candidate, "length"]
+
+        n.madd(
+            "Link",
+            h2_links.index,
+            bus0=h2_links.bus0.values + " H2",
+            bus1=h2_links.bus1.values + " H2",
+            p_min_pu=-1,
+            p_nom_extendable=True,
+            length=h2_links.length.values,
+            capital_cost=costs.at["H2 (g) pipeline", "fixed"] * h2_links.length.values,
+            carrier="H2 pipeline",
+            lifetime=costs.at["H2 (g) pipeline", "lifetime"],
+        )
+
+    # Add H2 Links:
+    if snakemake.config["sector"]["hydrogen"]["network"]:
+        h2_links = pd.read_csv(snakemake.input.pipelines)
+
+        # Order buses to detect equal pairs for bidirectional pipelines
+        buses_ordered = h2_links.apply(lambda p: sorted([p.bus0, p.bus1]), axis=1)
+
+        if snakemake.config["build_osm_network"]["force_ac"]:
+            # Appending string for carrier specification '_AC'
+            h2_links["bus0"] = buses_ordered.str[0] + "_AC"
+            h2_links["bus1"] = buses_ordered.str[1] + "_AC"
+
+        # Create index column
+        h2_links["buses_idx"] = (
+            "H2 pipeline " + h2_links["bus0"] + " -> " + h2_links["bus1"]
+        )
+
+        # Aggregate pipelines applying mean on length and sum on capacities
+        h2_links = h2_links.groupby("buses_idx").agg(
+            {"bus0": "first", "bus1": "first", "length": "mean", "capacity": "sum"}
+        )
+
+        if len(h2_links) > 0:
+            if snakemake.config["sector"]["hydrogen"]["gas_network_repurposing"]:
+                add_links_elec_routing_new_H2_pipelines()
+            if snakemake.config["sector"]["hydrogen"]["network_routes"] == "greenfield":
+                add_links_elec_routing_new_H2_pipelines()
+            else:
+                add_links_new_H2_pipelines()
+        else:
+            print(
+                "No existing gas network; applying greenfield for H2 network"
+            )  # TODO change to logger.info
+            add_links_elec_routing_new_H2_pipelines()
+
+        if snakemake.config["sector"]["hydrogen"]["hydrogen_colors"]:
+            nuclear_gens_bus = n.generators[
+                n.generators.carrier == "nuclear"
+            ].bus.values
+            buses_with_nuclear = n.buses.loc[nuclear_gens_bus]
+            buses_with_nuclear_ind = n.buses.loc[nuclear_gens_bus].index
+
+            n.madd(
+                "Bus",
+                nuclear_gens_bus + " nuclear electricity",
+                location=buses_with_nuclear_ind,
+                carrier="nuclear electricity",
+                x=buses_with_nuclear.x.values,
+                y=buses_with_nuclear.y.values,
+            )
+
+            n.madd(
+                "Bus",
+                nuclear_gens_bus + " pink H2",
+                location=buses_with_nuclear_ind,
+                carrier="pink H2",
+                x=buses_with_nuclear.x.values,
+                y=buses_with_nuclear.y.values,
+            )
+
+            n.generators.loc[n.generators.carrier == "nuclear", "bus"] = (
+                n.generators.loc[n.generators.carrier == "nuclear", "bus"]
+                + " nuclear electricity"
+            )
+
+            n.madd(
+                "Link",
+                buses_with_nuclear_ind + " nuclear-to-grid",
+                bus0=buses_with_nuclear_ind + " nuclear electricity",
+                bus1=buses_with_nuclear_ind,
+                carrier="nuclear-to-grid",
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+            n.madd(
+                "Link",
+                buses_with_nuclear_ind + " high-temp electrolysis",
+                bus0=buses_with_nuclear_ind + " nuclear electricity",
+                bus1=buses_with_nuclear_ind + " pink H2",
+                carrier="high-temp electrolysis",
+                p_nom_extendable=True,
+                efficiency=costs.at["electrolysis", "efficiency"] + 0.1,
+                capital_cost=costs.at["electrolysis", "fixed"]
+                + costs.at["electrolysis", "fixed"] * 0.1,
+                lifetime=costs.at["electrolysis", "lifetime"],
+            )
+
+            n.madd(
+                "Link",
+                buses_with_nuclear_ind + " pink H2",
+                bus0=buses_with_nuclear_ind + " pink H2",
+                bus1=buses_with_nuclear_ind + " H2",
+                carrier="pink H2",
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+
+def define_spatial(nodes, options):
+    """
+    Namespace for spatial.
+
+    Parameters
+    ----------
+    nodes : list-like
+    """
+
+    global spatial
+
+    spatial.nodes = nodes
+
+    # biomass
+
+    spatial.biomass = SimpleNamespace()
+
+    if options["biomass_transport"]:
+        spatial.biomass.nodes = nodes + " solid biomass"
+        spatial.biomass.locations = nodes
+        spatial.biomass.industry = nodes + " solid biomass for industry"
+        spatial.biomass.industry_cc = nodes + " solid biomass for industry CC"
+    else:
+        spatial.biomass.nodes = ["Africa solid biomass"]
+        spatial.biomass.locations = ["Africa"]
+        spatial.biomass.industry = ["solid biomass for industry"]
+        spatial.biomass.industry_cc = ["solid biomass for industry CC"]
+
+    spatial.biomass.df = pd.DataFrame(vars(spatial.biomass), index=nodes)
+
+    # co2
+
+    spatial.co2 = SimpleNamespace()
+
+    if options["co2_network"]:
+        spatial.co2.nodes = nodes + " co2 stored"
+        spatial.co2.locations = nodes
+        spatial.co2.vents = nodes + " co2 vent"
+    else:
+        spatial.co2.nodes = ["co2 stored"]
+        spatial.co2.locations = ["Africa"]
+        spatial.co2.vents = ["co2 vent"]
+
+    spatial.co2.df = pd.DataFrame(vars(spatial.co2), index=nodes)
+
+    return spatial
+
+
+def add_biomass(n, costs):
+    logger.info("adding biomass")
+
+    # TODO get biomass potentials dataset and enable spatially resolved potentials
+
+    # Get biomass and biogas potentials from config and convert from TWh to MWh
+    biomass_pot = snakemake.config["sector"]["solid_biomass_potential"] * 1e6  # MWh
+    biogas_pot = snakemake.config["sector"]["biogas_potential"] * 1e6  # MWh
+    logger.info("Biomass and Biogas potential fetched from config")
+
+    # Convert from total to nodal potentials,
+    biomass_pot_spatial = biomass_pot / len(spatial.biomass.nodes)
+    biogas_pot_spatial = biogas_pot / len(spatial.gas.biogas)
+    logger.info("Biomass potentials spatially resolved equally across all nodes")
+
+    n.add("Carrier", "biogas")
+    n.add("Carrier", "solid biomass")
+
+    n.madd(
+        "Bus", spatial.gas.biogas, location=spatial.biomass.locations, carrier="biogas"
+    )
+
+    n.madd(
+        "Bus",
+        spatial.biomass.nodes,
+        location=spatial.biomass.locations,
+        carrier="solid biomass",
+    )
+
+    n.madd(
+        "Store",
+        spatial.gas.biogas,
+        bus=spatial.gas.biogas,
+        carrier="biogas",
+        e_nom=biogas_pot_spatial,
+        marginal_cost=costs.at["biogas", "fuel"],
+        e_initial=biogas_pot_spatial,
+    )
+
+    n.madd(
+        "Store",
+        spatial.biomass.nodes,
+        bus=spatial.biomass.nodes,
+        carrier="solid biomass",
+        e_nom=biomass_pot_spatial,
+        marginal_cost=costs.at["solid biomass", "fuel"],
+        e_initial=biomass_pot_spatial,
+    )
+
+    biomass_gen = "biomass EOP"
+    n.madd(
+        "Link",
+        spatial.nodes + " biomass EOP",
+        bus0=spatial.biomass.nodes,
+        bus1=spatial.nodes,
+        marginal_cost=costs.at[biomass_gen, "efficiency"]
+        * costs.at[biomass_gen, "VOM"],  # NB: VOM is per MWel
+        # NB: fixed cost is per MWel
+        capital_cost=costs.at[biomass_gen, "efficiency"]
+        * costs.at[biomass_gen, "fixed"],
+        p_nom_extendable=True,
+        carrier=biomass_gen,
+        efficiency=costs.at[biomass_gen, "efficiency"],
+        lifetime=costs.at[biomass_gen, "lifetime"],
+    )
+    n.madd(
+        "Link",
+        spatial.gas.biogas_to_gas,
+        bus0=spatial.gas.biogas,
+        bus1=spatial.gas.nodes,
+        bus2="co2 atmosphere",
+        carrier="biogas to gas",
+        capital_cost=costs.loc["biogas upgrading", "fixed"],
+        marginal_cost=costs.loc["biogas upgrading", "VOM"],
+        efficiency2=-costs.at["gas", "CO2 intensity"],
+        p_nom_extendable=True,
+    )
+
+    if options["biomass_transport"]:
+        # TODO add biomass transport costs
+        transport_costs = pd.read_csv(
+            snakemake.input.biomass_transport_costs,
+            index_col=0,
+            keep_default_na=False,
+        ).squeeze()
+
+        # add biomass transport
+        biomass_transport = create_network_topology(
+            n, "biomass transport ", bidirectional=False
+        )
+
+        # costs
+        countries_not_in_index = set(countries) - set(biomass_transport.index)
+        if countries_not_in_index:
+            logger.info(
+                "No transport values found for {0}, using default value of {1}".format(
+                    ", ".join(countries_not_in_index),
+                    snakemake.config["sector"]["biomass_transport_default_cost"],
+                )
+            )
+
+        bus0_costs = biomass_transport.bus0.apply(
+            lambda x: transport_costs.get(
+                x[:2], snakemake.config["sector"]["biomass_transport_default_cost"]
+            )
+        )
+        bus1_costs = biomass_transport.bus1.apply(
+            lambda x: transport_costs.get(
+                x[:2], snakemake.config["sector"]["biomass_transport_default_cost"]
+            )
+        )
+        biomass_transport["costs"] = pd.concat([bus0_costs, bus1_costs], axis=1).mean(
+            axis=1
+        )
+
+        n.madd(
+            "Link",
+            biomass_transport.index,
+            bus0=biomass_transport.bus0 + " solid biomass",
+            bus1=biomass_transport.bus1 + " solid biomass",
+            p_nom_extendable=True,
+            length=biomass_transport.length.values,
+            marginal_cost=biomass_transport.costs * biomass_transport.length.values,
+            capital_cost=1,
+            carrier="solid biomass transport",
+        )
+
+    # AC buses with district heating
+    urban_central = n.buses.index[n.buses.carrier == "urban central heat"]
+    if not urban_central.empty and options["chp"]:
+        urban_central = urban_central.str[: -len(" urban central heat")]
+
+        key = "central solid biomass CHP"
+
+        n.madd(
+            "Link",
+            urban_central + " urban central solid biomass CHP",
+            bus0=spatial.biomass.df.loc[urban_central, "nodes"].values,
+            bus1=urban_central,
+            bus2=urban_central + " urban central heat",
+            carrier="urban central solid biomass CHP",
+            p_nom_extendable=True,
+            capital_cost=costs.at[key, "fixed"] * costs.at[key, "efficiency"],
+            marginal_cost=costs.at[key, "VOM"],
+            efficiency=costs.at[key, "efficiency"],
+            efficiency2=costs.at[key, "efficiency-heat"],
+            lifetime=costs.at[key, "lifetime"],
+        )
+
+        if snakemake.config["sector"]["cc"]:
+            n.madd(
+                "Link",
+                urban_central + " urban central solid biomass CHP CC",
+                bus0=spatial.biomass.df.loc[urban_central, "nodes"].values,
+                bus1=urban_central,
+                bus2=urban_central + " urban central heat",
+                bus3="co2 atmosphere",
+                bus4=spatial.co2.df.loc[urban_central, "nodes"].values,
+                carrier="urban central solid biomass CHP CC",
+                p_nom_extendable=True,
+                capital_cost=costs.at[key, "fixed"] * costs.at[key, "efficiency"]
+                + costs.at["biomass CHP capture", "fixed"]
+                * costs.at["solid biomass", "CO2 intensity"],
+                marginal_cost=costs.at[key, "VOM"],
+                efficiency=costs.at[key, "efficiency"]
+                - costs.at["solid biomass", "CO2 intensity"]
+                * (
+                    costs.at["biomass CHP capture", "electricity-input"]
+                    + costs.at["biomass CHP capture", "compression-electricity-input"]
+                ),
+                efficiency2=costs.at[key, "efficiency-heat"]
+                + costs.at["solid biomass", "CO2 intensity"]
+                * (
+                    costs.at["biomass CHP capture", "heat-output"]
+                    + costs.at["biomass CHP capture", "compression-heat-output"]
+                    - costs.at["biomass CHP capture", "heat-input"]
+                ),
+                efficiency3=-costs.at["solid biomass", "CO2 intensity"]
+                * costs.at["biomass CHP capture", "capture_rate"],
+                efficiency4=costs.at["solid biomass", "CO2 intensity"]
+                * costs.at["biomass CHP capture", "capture_rate"],
+                lifetime=costs.at[key, "lifetime"],
+            )
+
+
+def add_co2(n, costs):
+    "add carbon carrier, it's networks and storage units"
+
+    # minus sign because opposite to how fossil fuels used:
+    # CH4 burning puts CH4 down, atmosphere up
+    n.add("Carrier", "co2", co2_emissions=-1.0)
+
+    # this tracks CO2 in the atmosphere
+    n.add(
+        "Bus",
+        "co2 atmosphere",
+        location="Africa",  # TODO Ignoed by pypsa check
+        carrier="co2",
+    )
+
+    # can also be negative
+    n.add(
+        "Store",
+        "co2 atmosphere",
+        e_nom_extendable=True,
+        e_min_pu=-1,
+        carrier="co2",
+        bus="co2 atmosphere",
+    )
+
+    # this tracks CO2 stored, e.g. underground
+    n.madd(
+        "Bus",
+        spatial.co2.nodes,
+        location=spatial.co2.locations,
+        carrier="co2 stored",
+    )
+    """
+    co2_stored_x = n.buses.filter(like="co2 stored", axis=0).loc[:, "x"]
+    co2_stored_y = n.buses.loc[n.buses[n.buses.carrier == "co2
+    stored"].location].y.
+
+    n.buses[n.buses.carrier == "co2 stored"].x = co2_stored_x.values
+    n.buses[n.buses.carrier == "co2 stored"].y = co2_stored_y.values
+    """
+
+    n.madd(
+        "Link",
+        spatial.co2.vents,
+        bus0=spatial.co2.nodes,
+        bus1="co2 atmosphere",
+        carrier="co2 vent",
+        efficiency=1.0,
+        p_nom_extendable=True,
+    )
+
+    # logger.info("Adding CO2 network.")
+    co2_links = create_network_topology(n, "CO2 pipeline ")
+
+    cost_onshore = (
+        (1 - co2_links.underwater_fraction)
+        * costs.at["CO2 pipeline", "fixed"]
+        * co2_links.length
+    )
+    cost_submarine = (
+        co2_links.underwater_fraction
+        * costs.at["CO2 submarine pipeline", "fixed"]
+        * co2_links.length
+    )
+    capital_cost = cost_onshore + cost_submarine
+
+    n.madd(
+        "Link",
+        co2_links.index,
+        bus0=co2_links.bus0.values + " co2 stored",
+        bus1=co2_links.bus1.values + " co2 stored",
+        p_min_pu=-1,
+        p_nom_extendable=True,
+        length=co2_links.length.values,
+        capital_cost=capital_cost.values,
+        carrier="CO2 pipeline",
+        lifetime=costs.at["CO2 pipeline", "lifetime"],
+    )
+
+    n.madd(
+        "Store",
+        spatial.co2.nodes,
+        e_nom_extendable=True,
+        e_nom_max=np.inf,
+        capital_cost=options["co2_sequestration_cost"],
+        carrier="co2 stored",
+        bus=spatial.co2.nodes,
+    )
+
+    # logger.info("Adding CO2 network.")
+    co2_links = create_network_topology(n, "CO2 pipeline ")
+
+    cost_onshore = (
+        (1 - co2_links.underwater_fraction)
+        * costs.at["CO2 pipeline", "fixed"]
+        * co2_links.length
+    )
+    cost_submarine = (
+        co2_links.underwater_fraction
+        * costs.at["CO2 submarine pipeline", "fixed"]
+        * co2_links.length
+    )
+    capital_cost = cost_onshore + cost_submarine
+
+
+def add_aviation(
+    n,
+    costs,
+    gadm_level_val,
+    geo_crs_val,
+    file_prefix_val,
+    gadm_url_prefix_val,
+    contended_flag_val,
+    gadm_input_file_args_list,
+    shapes_path_val,
+    gadm_clustering_val,
+):
+    all_aviation = ["total international aviation", "total domestic aviation"]
+
+    aviation_demand = (
+        energy_totals.loc[countries, all_aviation].sum(axis=1).sum()  # * 1e6 / 8760
+    )
+
+    airports = pd.read_csv(snakemake.input.airports, keep_default_na=False)
+    airports = airports[airports.country.isin(countries)]
+
+    gadm_level = options["gadm_level"]
+
+    airports["gadm_{}".format(gadm_level)] = airports[["x", "y", "country"]].apply(
+        lambda airport: locate_bus(
+            airport[["x", "y"]],
+            airport["country"],
+            gadm_level_val,
+            geo_crs_val,
+            file_prefix_val,
+            gadm_url_prefix_val,
+            gadm_input_file_args_list,
+            contended_flag_val,
+            path_to_gadm=shapes_path_val,
+            gadm_clustering=gadm_clustering_val,
+        ),
+        axis=1,
+    )
+    # To change 3 country code to 2
+    # airports["gadm_{}".format(gadm_level)] = airports["gadm_{}".format(gadm_level)].apply(
+    # lambda cocode: three_2_two_digits_country(cocode[:3]) + " " + cocode[4:-2])
+
+    airports = airports.set_index("gadm_{}".format(gadm_level))
+
+    ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])
+
+    ind = ind.set_index(n.buses.index[n.buses.carrier == "AC"])
+    airports["p_set"] = airports["fraction"].apply(
+        lambda frac: frac * aviation_demand * 1e6 / 8760
+    )
+
+    airports = pd.concat([airports, ind])
+
+    airports = airports.groupby(airports.index).sum()
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" kerosene for aviation",
+        bus=spatial.oil.nodes,
+        carrier="kerosene for aviation",
+        p_set=airports["p_set"],
+    )
+
+    if snakemake.config["sector"]["international_bunkers"]:
+        co2 = airports["p_set"].sum() * costs.at["oil", "CO2 intensity"]
+    else:
+        domestic_to_total = energy_totals["total domestic aviation"] / (
+            energy_totals["total international aviation"]
+            + energy_totals["total domestic aviation"]
+        )
+
+        co2 = (
+            airports["p_set"].sum()
+            * domestic_to_total
+            * costs.at["oil", "CO2 intensity"]
+        ).sum()
+
+    n.add(
+        "Load",
+        "aviation oil emissions",
+        bus="co2 atmosphere",
+        carrier="oil emissions",
+        p_set=-co2,
+    )
+
+
+def add_storage(n, costs):
+    "function to add the different types of storage systems"
+    n.add("Carrier", "battery")
+
+    n.madd(
+        "Bus",
+        spatial.nodes + " battery",
+        location=spatial.nodes,
+        carrier="battery",
+        x=n.buses.loc[list(spatial.nodes)].x.values,
+        y=n.buses.loc[list(spatial.nodes)].y.values,
+    )
+
+    n.madd(
+        "Store",
+        spatial.nodes + " battery",
+        bus=spatial.nodes + " battery",
+        e_cyclic=True,
+        e_nom_extendable=True,
+        carrier="battery",
+        capital_cost=costs.at["battery storage", "fixed"],
+        lifetime=costs.at["battery storage", "lifetime"],
+    )
+
+    n.madd(
+        "Link",
+        spatial.nodes + " battery charger",
+        bus0=spatial.nodes,
+        bus1=spatial.nodes + " battery",
+        carrier="battery charger",
+        efficiency=costs.at["battery inverter", "efficiency"] ** 0.5,
+        capital_cost=costs.at["battery inverter", "fixed"],
+        p_nom_extendable=True,
+        lifetime=costs.at["battery inverter", "lifetime"],
+    )
+
+    n.madd(
+        "Link",
+        spatial.nodes + " battery discharger",
+        bus0=spatial.nodes + " battery",
+        bus1=spatial.nodes,
+        carrier="battery discharger",
+        efficiency=costs.at["battery inverter", "efficiency"] ** 0.5,
+        marginal_cost=options["marginal_cost_storage"],
+        p_nom_extendable=True,
+        lifetime=costs.at["battery inverter", "lifetime"],
+    )
+
+
+def h2_hc_conversions(n, costs):
+    """
+    Function to add the conversion technologies between H2 and hydrocarbons.
+    """
+    if options["methanation"]:
+        n.madd(
+            "Link",
+            spatial.nodes,
+            suffix=" Sabatier",
+            bus0=spatial.nodes + " H2",
+            bus1=spatial.gas.nodes,
+            bus2=spatial.co2.nodes,
+            p_nom_extendable=True,
+            carrier="Sabatier",
+            efficiency=costs.at["methanation", "efficiency"],
+            efficiency2=-costs.at["methanation", "efficiency"]
+            * costs.at["gas", "CO2 intensity"],
+            # costs given per kW_gas
+            capital_cost=costs.at["methanation", "fixed"]
+            * costs.at["methanation", "efficiency"],
+            lifetime=costs.at["methanation", "lifetime"],
+        )
+
+    if options["helmeth"]:
+        n.madd(
+            "Link",
+            spatial.nodes,
+            suffix=" helmeth",
+            bus0=spatial.nodes,
+            bus1=spatial.gas.nodes,
+            bus2=spatial.co2.nodes,
+            carrier="helmeth",
+            p_nom_extendable=True,
+            efficiency=costs.at["helmeth", "efficiency"],
+            efficiency2=-costs.at["helmeth", "efficiency"]
+            * costs.at["gas", "CO2 intensity"],
+            capital_cost=costs.at["helmeth", "fixed"],
+            lifetime=costs.at["helmeth", "lifetime"],
+        )
+
+    if options["SMR CC"]:
+        if snakemake.config["sector"]["hydrogen"]["hydrogen_colors"]:
+            n.madd(
+                "Bus",
+                nodes + " blue H2",
+                location=nodes,
+                carrier="blue H2",
+                x=n.buses.loc[list(nodes)].x.values,
+                y=n.buses.loc[list(nodes)].y.values,
+            )
+
+            n.madd(
+                "Link",
+                spatial.nodes,
+                suffix=" SMR CC",
+                bus0=spatial.gas.nodes,
+                bus1=nodes + " blue H2",
+                bus2="co2 atmosphere",
+                bus3=spatial.co2.nodes,
+                p_nom_extendable=True,
+                carrier="SMR CC",
+                efficiency=costs.at["SMR CC", "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"]
+                * (1 - options["cc_fraction"]),
+                efficiency3=costs.at["gas", "CO2 intensity"] * options["cc_fraction"],
+                capital_cost=costs.at["SMR CC", "fixed"],
+                lifetime=costs.at["SMR CC", "lifetime"],
+            )
+
+            n.madd(
+                "Link",
+                nodes + " blue H2",
+                bus0=nodes + " blue H2",
+                bus1=nodes + " H2",
+                carrier="blue H2",
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+        else:
+            n.madd(
+                "Link",
+                spatial.nodes,
+                suffix=" SMR CC",
+                bus0=spatial.gas.nodes,
+                bus1=nodes + " H2",
+                bus2="co2 atmosphere",
+                bus3=spatial.co2.nodes,
+                p_nom_extendable=True,
+                carrier="SMR CC",
+                efficiency=costs.at["SMR CC", "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"]
+                * (1 - options["cc_fraction"]),
+                efficiency3=costs.at["gas", "CO2 intensity"] * options["cc_fraction"],
+                capital_cost=costs.at["SMR CC", "fixed"],
+                lifetime=costs.at["SMR CC", "lifetime"],
+            )
+
+    if options["SMR"]:
+        if snakemake.config["sector"]["hydrogen"]["hydrogen_colors"]:
+            n.madd(
+                "Bus",
+                nodes + " grey H2",
+                location=nodes,
+                carrier="grey H2",
+                x=n.buses.loc[list(nodes)].x.values,
+                y=n.buses.loc[list(nodes)].y.values,
+            )
+
+            n.madd(
+                "Link",
+                nodes + " SMR",
+                bus0=spatial.gas.nodes,
+                bus1=nodes + " grey H2",
+                bus2="co2 atmosphere",
+                p_nom_extendable=True,
+                carrier="SMR",
+                efficiency=costs.at["SMR", "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"],
+                capital_cost=costs.at["SMR", "fixed"],
+                lifetime=costs.at["SMR", "lifetime"],
+            )
+
+            n.madd(
+                "Link",
+                nodes + " grey H2",
+                bus0=nodes + " grey H2",
+                bus1=nodes + " H2",
+                carrier="grey H2",
+                capital_cost=0,
+                p_nom_extendable=True,
+            )
+
+        else:
+            n.madd(
+                "Link",
+                nodes + " SMR",
+                bus0=spatial.gas.nodes,
+                bus1=nodes + " H2",
+                bus2="co2 atmosphere",
+                p_nom_extendable=True,
+                carrier="SMR",
+                efficiency=costs.at["SMR", "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"],
+                capital_cost=costs.at["SMR", "fixed"],
+                lifetime=costs.at["SMR", "lifetime"],
+            )
+
+
+def add_shipping(
+    n,
+    costs,
+    gadm_level_val,
+    geo_crs_val,
+    file_prefix_val,
+    gadm_url_prefix_val,
+    contended_flag_val,
+    gadm_input_file_args_list,
+    shapes_path_val,
+    gadm_clustering_val,
+):
+    ports = pd.read_csv(
+        snakemake.input.ports, index_col=None, keep_default_na=False
+    ).squeeze()
+    ports = ports[ports.country.isin(countries)]
+
+    gadm_level = options["gadm_level"]
+
+    all_navigation = ["total international navigation", "total domestic navigation"]
+
+    navigation_demand = (
+        energy_totals.loc[countries, all_navigation].sum(axis=1).sum()  # * 1e6 / 8760
+    )
+
+    efficiency = (
+        options["shipping_average_efficiency"] / costs.at["fuel cell", "efficiency"]
+    )
+
+    # check whether item depends on investment year
+    shipping_hydrogen_share = get(
+        options["shipping_hydrogen_share"], demand_sc + "_" + str(investment_year)
+    )
+
+    ports["gadm_{}".format(gadm_level)] = ports[["x", "y", "country"]].apply(
+        lambda port: locate_bus(
+            port[["x", "y"]],
+            port["country"],
+            gadm_level_val,
+            geo_crs_val,
+            file_prefix_val,
+            gadm_url_prefix_val,
+            gadm_input_file_args_list,
+            contended_flag_val,
+            path_to_gadm=shapes_path_val,
+            gadm_clustering=gadm_clustering_val,
+        ),
+        axis=1,
+    )
+
+    ports = ports.set_index("gadm_{}".format(gadm_level))
+
+    ind = pd.DataFrame(n.buses.index[n.buses.carrier == "AC"])
+    ind = ind.set_index(n.buses.index[n.buses.carrier == "AC"])
+
+    ports["p_set"] = ports["fraction"].apply(
+        lambda frac: shipping_hydrogen_share
+        * frac
+        * navigation_demand
+        * efficiency
+        * 1e6
+        / 8760
+        # TODO double check the use of efficiency
+    )  # TODO use real data here
+
+    ports = pd.concat([ports, ind]).drop("Bus", axis=1)
+
+    # ports = ports.fillna(0.0)
+    ports = ports.groupby(ports.index).sum()
+
+    if options["shipping_hydrogen_liquefaction"]:
+        n.madd(
+            "Bus",
+            nodes,
+            suffix=" H2 liquid",
+            carrier="H2 liquid",
+            location=spatial.nodes,
+        )
+
+        # link the H2 supply to liquified H2
+        n.madd(
+            "Link",
+            spatial.nodes + " H2 liquefaction",
+            bus0=spatial.nodes + " H2",
+            bus1=spatial.nodes + " H2 liquid",
+            carrier="H2 liquefaction",
+            efficiency=costs.at["H2 liquefaction", "efficiency"],
+            capital_cost=costs.at["H2 liquefaction", "fixed"],
+            p_nom_extendable=True,
+            lifetime=costs.at["H2 liquefaction", "lifetime"],
+        )
+
+        shipping_bus = spatial.nodes + " H2 liquid"
+    else:
+        shipping_bus = spatial.nodes + " H2"
+
+    if not (
+        snakemake.config["policy_config"]["hydrogen"]["is_reference"]
+        and snakemake.config["policy_config"]["hydrogen"]["remove_h2_load"]
+    ):
+        n.madd(
+            "Load",
+            nodes,
+            suffix=" H2 for shipping",
+            bus=shipping_bus,
+            carrier="H2 for shipping",
+            p_set=ports["p_set"],
+        )
+
+    if shipping_hydrogen_share < 1:
+        shipping_oil_share = 1 - shipping_hydrogen_share
+
+        ports["p_set"] = ports["fraction"].apply(
+            lambda frac: shipping_oil_share * frac * navigation_demand * 1e6 / 8760
+        )
+
+        n.madd(
+            "Load",
+            spatial.nodes,
+            suffix=" shipping oil",
+            bus=spatial.oil.nodes,
+            carrier="shipping oil",
+            p_set=ports["p_set"],
+        )
+
+        if snakemake.config["sector"]["international_bunkers"]:
+            co2 = ports["p_set"].sum() * costs.at["oil", "CO2 intensity"]
+        else:
+            domestic_to_total = energy_totals["total domestic navigation"] / (
+                energy_totals["total domestic navigation"]
+                + energy_totals["total international navigation"]
+            )
+
+            co2 = (
+                ports["p_set"].sum()
+                * domestic_to_total
+                * costs.at["oil", "CO2 intensity"]
+            ).sum()
+
+        n.add(
+            "Load",
+            "shipping oil emissions",
+            bus="co2 atmosphere",
+            carrier="shipping oil emissions",
+            p_set=-co2,
+        )
+
+    if "oil" not in n.buses.carrier.unique():
+        n.madd("Bus", spatial.oil.nodes, location=spatial.oil.locations, carrier="oil")
+    if "oil" not in n.stores.carrier.unique():
+        # could correct to e.g. 0.001 EUR/kWh * annuity and O&M
+        n.madd(
+            "Store",
+            [oil_bus + " Store" for oil_bus in spatial.oil.nodes],
+            bus=spatial.oil.nodes,
+            e_nom_extendable=True,
+            e_cyclic=True,
+            carrier="oil",
+        )
+
+    if "oil" not in n.generators.carrier.unique():
+        n.madd(
+            "Generator",
+            spatial.oil.nodes,
+            bus=spatial.oil.nodes,
+            p_nom_extendable=True,
+            carrier="oil",
+            marginal_cost=costs.at["oil", "fuel"],
+        )
+
+
+def add_industry(n, costs):
+    logger.info("adding industrial demand")
+    # 1e6 to convert TWh to MWh
+
+    # industrial_demand.reset_index(inplace=True)
+
+    # Add carrier Biomass
+
+    n.madd(
+        "Bus",
+        spatial.biomass.industry,
+        location=spatial.biomass.locations,
+        carrier="solid biomass for industry",
+    )
+
+    if options["biomass_transport"]:
+        p_set = (
+            industrial_demand.loc[spatial.biomass.locations, "solid biomass"].rename(
+                index=lambda x: x + " solid biomass for industry"
+            )
+            / 8760
+        )
+    else:
+        p_set = industrial_demand["solid biomass"].sum() / 8760
+
+    n.madd(
+        "Load",
+        spatial.biomass.industry,
+        bus=spatial.biomass.industry,
+        carrier="solid biomass for industry",
+        p_set=p_set,
+    )
+
+    n.madd(
+        "Link",
+        spatial.biomass.industry,
+        bus0=spatial.biomass.nodes,
+        bus1=spatial.biomass.industry,
+        carrier="solid biomass for industry",
+        p_nom_extendable=True,
+        efficiency=1.0,
+    )
+    if snakemake.config["sector"]["cc"]:
+        n.madd(
+            "Link",
+            spatial.biomass.industry_cc,
+            bus0=spatial.biomass.nodes,
+            bus1=spatial.biomass.industry,
+            bus2="co2 atmosphere",
+            bus3=spatial.co2.nodes,
+            carrier="solid biomass for industry CC",
+            p_nom_extendable=True,
+            capital_cost=costs.at["cement capture", "fixed"]
+            * costs.at["solid biomass", "CO2 intensity"],
+            efficiency=0.9,  # TODO: make config option
+            efficiency2=-costs.at["solid biomass", "CO2 intensity"]
+            * costs.at["cement capture", "capture_rate"],
+            efficiency3=costs.at["solid biomass", "CO2 intensity"]
+            * costs.at["cement capture", "capture_rate"],
+            lifetime=costs.at["cement capture", "lifetime"],
+        )
+
+    # CARRIER = FOSSIL GAS
+    n.madd(
+        "Bus",
+        spatial.gas.industry,
+        location=spatial.gas.locations,
+        carrier="gas for industry",
+    )
+
+    gas_demand = industrial_demand.loc[spatial.nodes, "gas"] / 8760.0
+
+    if options["gas"]["spatial_gas"]:
+        spatial_gas_demand = gas_demand.rename(index=lambda x: x + " gas for industry")
+    else:
+        spatial_gas_demand = gas_demand.sum()
+
+    n.madd(
+        "Load",
+        spatial.gas.industry,
+        bus=spatial.gas.industry,
+        carrier="gas for industry",
+        p_set=spatial_gas_demand,
+    )
+
+    n.madd(
+        "Link",
+        spatial.gas.industry,
+        bus0=spatial.gas.nodes,
+        bus1=spatial.gas.industry,
+        bus2="co2 atmosphere",
+        carrier="gas for industry",
+        p_nom_extendable=True,
+        efficiency=1.0,
+        efficiency2=costs.at["gas", "CO2 intensity"],
+    )
+    if snakemake.config["sector"]["cc"]:
+        n.madd(
+            "Link",
+            spatial.gas.industry_cc,
+            bus0=spatial.gas.nodes,
+            bus1=spatial.gas.industry,
+            bus2="co2 atmosphere",
+            bus3=spatial.co2.nodes,
+            carrier="gas for industry CC",
+            p_nom_extendable=True,
+            capital_cost=costs.at["cement capture", "fixed"]
+            * costs.at["gas", "CO2 intensity"],
+            efficiency=0.9,
+            efficiency2=costs.at["gas", "CO2 intensity"]
+            * (1 - costs.at["cement capture", "capture_rate"]),
+            efficiency3=costs.at["gas", "CO2 intensity"]
+            * costs.at["cement capture", "capture_rate"],
+            lifetime=costs.at["cement capture", "lifetime"],
+        )
+
+    #################################################### CARRIER = HYDROGEN
+
+    if not (
+        snakemake.config["policy_config"]["hydrogen"]["is_reference"]
+        and snakemake.config["policy_config"]["hydrogen"]["remove_h2_load"]
+    ):
+        n.madd(
+            "Load",
+            nodes,
+            suffix=" H2 for industry",
+            bus=nodes + " H2",
+            carrier="H2 for industry",
+            p_set=industrial_demand["hydrogen"].apply(lambda frac: frac / 8760),
+        )
+
+    # CARRIER = LIQUID HYDROCARBONS
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" naphtha for industry",
+        bus=spatial.oil.nodes,
+        carrier="naphtha for industry",
+        p_set=industrial_demand["oil"] / 8760,
+    )
+
+    #     #NB: CO2 gets released again to atmosphere when plastics decay or kerosene is burned
+    #     #except for the process emissions when naphtha is used for petrochemicals, which can be captured with other industry process emissions
+    #     #tco2 per hour
+    # TODO kerosene for aviation should be added too but in the right func.
+    co2_release = [" naphtha for industry"]
+    # check land transport
+
+    co2 = (
+        n.loads.loc[spatial.nodes + co2_release, "p_set"].sum()
+        * costs.at["oil", "CO2 intensity"]
+        # - industrial_demand["process emission from feedstock"].sum()
+        # / 8760
+    )
+
+    n.add(
+        "Load",
+        "industry oil emissions",
+        bus="co2 atmosphere",
+        carrier="industry oil emissions",
+        p_set=-co2,
+    )
+
+    co2 = (
+        industrial_demand["coal"].sum()
+        * costs.at["coal", "CO2 intensity"]
+        # - industrial_demand["process emission from feedstock"].sum()
+        / 8760
+    )
+
+    n.add(
+        "Load",
+        "industry coal emissions",
+        bus="co2 atmosphere",
+        carrier="industry coal emissions",
+        p_set=-co2,
+    )
+
+    ########################################################### CARRIER = HEAT
+    # TODO simplify bus expression
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" low-temperature heat for industry",
+        bus=[
+            (
+                node + " urban central heat"
+                if node + " urban central heat" in n.buses.index
+                else node + " services urban decentral heat"
+            )
+            for node in spatial.nodes
+        ],
+        carrier="low-temperature heat for industry",
+        p_set=industrial_demand.loc[spatial.nodes, "low-temperature heat"] / 8760,
+    )
+
+    ################################################## CARRIER = ELECTRICITY
+
+    #     # remove today's industrial electricity demand by scaling down total electricity demand
+    for ct in n.buses.country.dropna().unique():
+        # TODO map onto n.bus.country
+        # TODO make sure to check this one, should AC have carrier pf "electricity"?
+        loads_i = n.loads.index[
+            (n.loads.index.str[:2] == ct) & (n.loads.carrier == "AC")
+        ]
+        if n.loads_t.p_set.columns.intersection(loads_i).empty:
+            continue
+
+    industrial_elec = industrial_demand["electricity"] / 8760
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" industry electricity",
+        bus=spatial.nodes,
+        carrier="industry electricity",
+        p_set=industrial_elec,
+    )
+
+    n.add("Bus", "process emissions", location="Africa", carrier="process emissions")
+
+    # this should be process emissions fossil+feedstock
+    # then need load on atmosphere for feedstock emissions that are currently going to atmosphere via Link Fischer-Tropsch demand
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" process emissions",
+        bus="process emissions",
+        carrier="process emissions",
+        p_set=-(
+            #    industrial_demand["process emission from feedstock"]+
+            industrial_demand["process emissions"]
+        )
+        / 8760,
+    )
+
+    n.add(
+        "Link",
+        "process emissions",
+        bus0="process emissions",
+        bus1="co2 atmosphere",
+        carrier="process emissions",
+        p_nom_extendable=True,
+        efficiency=1.0,
+    )
+
+    # assume enough local waste heat for CC
+    if snakemake.config["sector"]["cc"]:
+        n.madd(
+            "Link",
+            spatial.co2.locations,
+            suffix=" process emissions CC",
+            bus0="process emissions",
+            bus1="co2 atmosphere",
+            bus2=spatial.co2.nodes,
+            carrier="process emissions CC",
+            p_nom_extendable=True,
+            capital_cost=costs.at["cement capture", "fixed"],
+            efficiency=1 - costs.at["cement capture", "capture_rate"],
+            efficiency2=costs.at["cement capture", "capture_rate"],
+            lifetime=costs.at["cement capture", "lifetime"],
+        )
+
+
+def get(item, investment_year=None):
+    """
+    Check whether item depends on investment year.
+    """
+    if isinstance(item, dict):
+        return item[investment_year]
+    else:
+        return item
+
+
+"""
+Missing data:
+ - transport
+ - aviation data
+ - nodal_transport_data
+ - cycling_shift
+ - dsm_profile
+ - avail_profile
+"""
+
+
+def add_land_transport(n, costs):
+    """
+    Function to add land transport to network.
+    """
+    # TODO options?
+
+    logger.info("adding land transport")
+
+    if options["dynamic_transport"]["enable"] == False:
+        fuel_cell_share = get(
+            options["land_transport_fuel_cell_share"],
+            demand_sc + "_" + str(investment_year),
+        )
+        electric_share = get(
+            options["land_transport_electric_share"],
+            demand_sc + "_" + str(investment_year),
+        )
+
+    elif options["dynamic_transport"]["enable"] == True:
+        fuel_cell_share = options["dynamic_transport"][
+            "land_transport_fuel_cell_share"
+        ][snakemake.wildcards.opts]
+        electric_share = options["dynamic_transport"]["land_transport_electric_share"][
+            snakemake.wildcards.opts
+        ]
+
+    ice_share = 1 - fuel_cell_share - electric_share
+
+    logger.info("FCEV share: {}".format(fuel_cell_share))
+    logger.info("EV share: {}".format(electric_share))
+    logger.info("ICEV share: {}".format(ice_share))
+
+    assert ice_share >= 0, "Error, more FCEV and EV share than 1."
+
+    # Nodes are already defined, remove it from here
+    # nodes = pop_layout.index
+
+    if electric_share > 0:
+        n.add("Carrier", "Li ion")
+
+        n.madd(
+            "Bus",
+            spatial.nodes,
+            location=spatial.nodes,
+            suffix=" EV battery",
+            carrier="Li ion",
+            x=n.buses.loc[list(spatial.nodes)].x.values,
+            y=n.buses.loc[list(spatial.nodes)].y.values,
+        )
+
+        p_set = (
+            electric_share
+            * (
+                transport[spatial.nodes]
+                + cycling_shift(transport[spatial.nodes], 1)
+                + cycling_shift(transport[spatial.nodes], 2)
+            )
+            / 3
+        )
+
+        n.madd(
+            "Load",
+            spatial.nodes,
+            suffix=" land transport EV",
+            bus=spatial.nodes + " EV battery",
+            carrier="land transport EV",
+            p_set=p_set,
+        )
+
+        p_nom = (
+            nodal_transport_data["number cars"]
+            * options.get("bev_charge_rate", 0.011)
+            * electric_share
+        )
+
+        n.madd(
+            "Link",
+            spatial.nodes,
+            suffix=" BEV charger",
+            bus0=spatial.nodes,
+            bus1=spatial.nodes + " EV battery",
+            p_nom=p_nom,
+            carrier="BEV charger",
+            p_max_pu=avail_profile[spatial.nodes],
+            efficiency=options.get("bev_charge_efficiency", 0.9),
+            # These were set non-zero to find LU infeasibility when availability = 0.25
+            # p_nom_extendable=True,
+            # p_nom_min=p_nom,
+            # capital_cost=1e6,  #i.e. so high it only gets built where necessary
+        )
+
+    if electric_share > 0 and options["v2g"]:
+        n.madd(
+            "Link",
+            spatial.nodes,
+            suffix=" V2G",
+            bus1=spatial.nodes,
+            bus0=spatial.nodes + " EV battery",
+            p_nom=p_nom,
+            carrier="V2G",
+            p_max_pu=avail_profile[spatial.nodes],
+            efficiency=options.get("bev_charge_efficiency", 0.9),
+        )
+
+    if electric_share > 0 and options["bev_dsm"]:
+        e_nom = (
+            nodal_transport_data["number cars"]
+            * options.get("bev_energy", 0.05)
+            * options["bev_availability"]
+            * electric_share
+        )
+
+        n.madd(
+            "Store",
+            spatial.nodes,
+            suffix=" battery storage",
+            bus=spatial.nodes + " EV battery",
+            carrier="battery storage",
+            e_cyclic=True,
+            e_nom=e_nom,
+            e_max_pu=1,
+            e_min_pu=dsm_profile[spatial.nodes],
+        )
+
+    if fuel_cell_share > 0:
+        if not (
+            snakemake.config["policy_config"]["hydrogen"]["is_reference"]
+            and snakemake.config["policy_config"]["hydrogen"]["remove_h2_load"]
+        ):
+            n.madd(
+                "Load",
+                nodes,
+                suffix=" land transport fuel cell",
+                bus=nodes + " H2",
+                carrier="land transport fuel cell",
+                p_set=fuel_cell_share
+                / options["transport_fuel_cell_efficiency"]
+                * transport[nodes],
+            )
+
+    if ice_share > 0:
+        if "oil" not in n.buses.carrier.unique():
+            n.madd(
+                "Bus", spatial.oil.nodes, location=spatial.oil.locations, carrier="oil"
+            )
+        ice_efficiency = options["transport_internal_combustion_efficiency"]
+
+        n.madd(
+            "Load",
+            spatial.nodes,
+            suffix=" land transport oil",
+            bus=spatial.oil.nodes,
+            carrier="land transport oil",
+            p_set=ice_share / ice_efficiency * transport[spatial.nodes],
+        )
+
+        co2 = (
+            ice_share
+            / ice_efficiency
+            * transport[spatial.nodes].sum().sum()
+            / 8760
+            * costs.at["oil", "CO2 intensity"]
+        )
+
+        n.add(
+            "Load",
+            "land transport oil emissions",
+            bus="co2 atmosphere",
+            carrier="land transport oil emissions",
+            p_set=-co2,
+        )
+
+
+def create_nodes_for_heat_sector():
+    # TODO pop_layout
+
+    # rural are areas with low heating density and individual heating
+    # urban are areas with high heating density
+    # urban can be split into district heating (central) and individual heating (decentral)
+
+    ct_urban = pop_layout.urban.groupby(pop_layout.ct).sum()
+    # distribution of urban population within a country
+    pop_layout["urban_ct_fraction"] = pop_layout.urban / pop_layout.ct.map(ct_urban.get)
+
+    sectors = ["residential", "services"]
+
+    h_nodes = {}
+    urban_fraction = pop_layout.urban / pop_layout[["rural", "urban"]].sum(axis=1)
+
+    for sector in sectors:
+        h_nodes[sector + " rural"] = pop_layout.index
+        h_nodes[sector + " urban decentral"] = pop_layout.index
+
+    # maximum potential of urban demand covered by district heating
+    central_fraction = options["district_heating"]["potential"]
+    # district heating share at each node
+    dist_fraction_node = (
+        district_heat_share["district heat share"]
+        * pop_layout["urban_ct_fraction"]
+        / pop_layout["fraction"]
+    )
+    h_nodes["urban central"] = dist_fraction_node.index
+    # if district heating share larger than urban fraction -> set urban
+    # fraction to district heating share
+    urban_fraction = pd.concat([urban_fraction, dist_fraction_node], axis=1).max(axis=1)
+    # difference of max potential and today's share of district heating
+    diff = (urban_fraction * central_fraction) - dist_fraction_node
+    progress = get(options["district_heating"]["progress"], investment_year)
+    dist_fraction_node += diff * progress
+    # logger.info(
+    #     "The current district heating share compared to the maximum",
+    #     f"possible is increased by a progress factor of\n{progress}",
+    #     "resulting in a district heating share of",  # "\n{dist_fraction_node}", #TODO fix district heat share
+    # )
+
+    return h_nodes, dist_fraction_node, urban_fraction
+
+
+def add_heat(n, costs):
+    # TODO options?
+    # TODO pop_layout?
+
+    logger.info("adding heat")
+
+    sectors = ["residential", "services"]
+
+    h_nodes, dist_fraction, urban_fraction = create_nodes_for_heat_sector()
+
+    # NB: must add costs of central heating afterwards (EUR 400 / kWpeak, 50a, 1% FOM from Fraunhofer ISE)
+
+    # exogenously reduce space heat demand
+    if options["reduce_space_heat_exogenously"]:
+        dE = get(options["reduce_space_heat_exogenously_factor"], investment_year)
+        # print(f"assumed space heat reduction of {dE*100} %")
+        for sector in sectors:
+            heat_demand[sector + " space"] = (1 - dE) * heat_demand[sector + " space"]
+
+    heat_systems = [
+        "residential rural",
+        "services rural",
+        "residential urban decentral",
+        "services urban decentral",
+        "urban central",
+    ]
+
+    for name in heat_systems:
+        name_type = "central" if name == "urban central" else "decentral"
+
+        n.add("Carrier", name + " heat")
+
+        n.madd(
+            "Bus",
+            h_nodes[name] + " {} heat".format(name),
+            location=h_nodes[name],
+            carrier=name + " heat",
+        )
+
+        ## Add heat load
+
+        for sector in sectors:
+            # heat demand weighting
+            if "rural" in name:
+                factor = 1 - urban_fraction[h_nodes[name]]
+            elif "urban central" in name:
+                factor = dist_fraction[h_nodes[name]]
+            elif "urban decentral" in name:
+                factor = urban_fraction[h_nodes[name]] - dist_fraction[h_nodes[name]]
+            else:
+                raise NotImplementedError(
+                    f" {name} not in " f"heat systems: {heat_systems}"
+                )
+
+            if sector in name:
+                heat_load = (
+                    heat_demand[[sector + " water", sector + " space"]]
+                    .groupby(level=1, axis=1)
+                    .sum()[h_nodes[name]]
+                    .multiply(factor)
+                )
+
+        if name == "urban central":
+            heat_load = (
+                heat_demand.groupby(level=1, axis=1)
+                .sum()[h_nodes[name]]
+                .multiply(
+                    factor * (1 + options["district_heating"]["district_heating_loss"])
+                )
+            )
+
+        n.madd(
+            "Load",
+            h_nodes[name],
+            suffix=f" {name} heat",
+            bus=h_nodes[name] + f" {name} heat",
+            carrier=name + " heat",
+            p_set=heat_load,
+        )
+
+        ## Add heat pumps
+
+        heat_pump_type = "air" if "urban" in name else "ground"
+
+        costs_name = f"{name_type} {heat_pump_type}-sourced heat pump"
+        cop = {"air": ashp_cop, "ground": gshp_cop}
+        efficiency = (
+            cop[heat_pump_type][h_nodes[name]]
+            if options["time_dep_hp_cop"]
+            else costs.at[costs_name, "efficiency"]
+        )
+
+        n.madd(
+            "Link",
+            h_nodes[name],
+            suffix=f" {name} {heat_pump_type} heat pump",
+            bus0=h_nodes[name],
+            bus1=h_nodes[name] + f" {name} heat",
+            carrier=f"{name} {heat_pump_type} heat pump",
+            efficiency=efficiency,
+            capital_cost=costs.at[costs_name, "efficiency"]
+            * costs.at[costs_name, "fixed"],
+            p_nom_extendable=True,
+            lifetime=costs.at[costs_name, "lifetime"],
+        )
+
+        if options["tes"]:
+            n.add("Carrier", name + " water tanks")
+
+            n.madd(
+                "Bus",
+                h_nodes[name] + f" {name} water tanks",
+                location=h_nodes[name],
+                carrier=name + " water tanks",
+            )
+
+            n.madd(
+                "Link",
+                h_nodes[name] + f" {name} water tanks charger",
+                bus0=h_nodes[name] + f" {name} heat",
+                bus1=h_nodes[name] + f" {name} water tanks",
+                efficiency=costs.at["water tank charger", "efficiency"],
+                carrier=name + " water tanks charger",
+                p_nom_extendable=True,
+            )
+
+            n.madd(
+                "Link",
+                h_nodes[name] + f" {name} water tanks discharger",
+                bus0=h_nodes[name] + f" {name} water tanks",
+                bus1=h_nodes[name] + f" {name} heat",
+                carrier=name + " water tanks discharger",
+                efficiency=costs.at["water tank discharger", "efficiency"],
+                p_nom_extendable=True,
+            )
+
+            if isinstance(options["tes_tau"], dict):
+                tes_time_constant_days = options["tes_tau"][name_type]
+            else:  # TODO add logger
+                # logger.warning("Deprecated: a future version will require you to specify 'tes_tau' ",
+                # "for 'decentral' and 'central' separately.")
+                tes_time_constant_days = (
+                    options["tes_tau"] if name_type == "decentral" else 180.0
+                )
+
+            # conversion from EUR/m^3 to EUR/MWh for 40 K diff and 1.17 kWh/m^3/K
+            capital_cost = (
+                costs.at[name_type + " water tank storage", "fixed"] / 0.00117 / 40
+            )
+
+            n.madd(
+                "Store",
+                h_nodes[name] + f" {name} water tanks",
+                bus=h_nodes[name] + f" {name} water tanks",
+                e_cyclic=True,
+                e_nom_extendable=True,
+                carrier=name + " water tanks",
+                standing_loss=1 - np.exp(-1 / 24 / tes_time_constant_days),
+                capital_cost=capital_cost,
+                lifetime=costs.at[name_type + " water tank storage", "lifetime"],
+            )
+
+        if options["boilers"]:
+            key = f"{name_type} resistive heater"
+
+            n.madd(
+                "Link",
+                h_nodes[name] + f" {name} resistive heater",
+                bus0=h_nodes[name],
+                bus1=h_nodes[name] + f" {name} heat",
+                carrier=name + " resistive heater",
+                efficiency=costs.at[key, "efficiency"],
+                capital_cost=costs.at[key, "efficiency"] * costs.at[key, "fixed"],
+                p_nom_extendable=True,
+                lifetime=costs.at[key, "lifetime"],
+            )
+
+            key = f"{name_type} gas boiler"
+
+            n.madd(
+                "Link",
+                h_nodes[name] + f" {name} gas boiler",
+                p_nom_extendable=True,
+                bus0=spatial.gas.nodes,
+                bus1=h_nodes[name] + f" {name} heat",
+                bus2="co2 atmosphere",
+                carrier=name + " gas boiler",
+                efficiency=costs.at[key, "efficiency"],
+                efficiency2=costs.at["gas", "CO2 intensity"],
+                capital_cost=costs.at[key, "efficiency"] * costs.at[key, "fixed"],
+                lifetime=costs.at[key, "lifetime"],
+            )
+
+        if options["solar_thermal"]:
+            n.add("Carrier", name + " solar thermal")
+
+            n.madd(
+                "Generator",
+                h_nodes[name],
+                suffix=f" {name} solar thermal collector",
+                bus=h_nodes[name] + f" {name} heat",
+                carrier=name + " solar thermal",
+                p_nom_extendable=True,
+                capital_cost=costs.at[name_type + " solar thermal", "fixed"],
+                p_max_pu=solar_thermal[h_nodes[name]],
+                lifetime=costs.at[name_type + " solar thermal", "lifetime"],
+            )
+
+        if options["chp"] and name == "urban central":
+            # add gas CHP; biomass CHP is added in biomass section
+            n.madd(
+                "Link",
+                h_nodes[name] + " urban central gas CHP",
+                bus0=spatial.gas.nodes,
+                bus1=h_nodes[name],
+                bus2=h_nodes[name] + " urban central heat",
+                bus3="co2 atmosphere",
+                carrier="urban central gas CHP",
+                p_nom_extendable=True,
+                capital_cost=costs.at["central gas CHP", "fixed"]
+                * costs.at["central gas CHP", "efficiency"],
+                marginal_cost=costs.at["central gas CHP", "VOM"],
+                efficiency=costs.at["central gas CHP", "efficiency"],
+                efficiency2=costs.at["central gas CHP", "efficiency"]
+                / costs.at["central gas CHP", "c_b"],
+                efficiency3=costs.at["gas", "CO2 intensity"],
+                lifetime=costs.at["central gas CHP", "lifetime"],
+            )
+            if snakemake.config["sector"]["cc"]:
+                n.madd(
+                    "Link",
+                    h_nodes[name] + " urban central gas CHP CC",
+                    # bus0="Africa gas",
+                    bus0=spatial.gas.nodes,
+                    bus1=h_nodes[name],
+                    bus2=h_nodes[name] + " urban central heat",
+                    bus3="co2 atmosphere",
+                    bus4=spatial.co2.df.loc[h_nodes[name], "nodes"].values,
+                    carrier="urban central gas CHP CC",
+                    p_nom_extendable=True,
+                    capital_cost=costs.at["central gas CHP", "fixed"]
+                    * costs.at["central gas CHP", "efficiency"]
+                    + costs.at["biomass CHP capture", "fixed"]
+                    * costs.at["gas", "CO2 intensity"],
+                    marginal_cost=costs.at["central gas CHP", "VOM"],
+                    efficiency=costs.at["central gas CHP", "efficiency"]
+                    - costs.at["gas", "CO2 intensity"]
+                    * (
+                        costs.at["biomass CHP capture", "electricity-input"]
+                        + costs.at[
+                            "biomass CHP capture", "compression-electricity-input"
+                        ]
+                    ),
+                    efficiency2=costs.at["central gas CHP", "efficiency"]
+                    / costs.at["central gas CHP", "c_b"]
+                    + costs.at["gas", "CO2 intensity"]
+                    * (
+                        costs.at["biomass CHP capture", "heat-output"]
+                        + costs.at["biomass CHP capture", "compression-heat-output"]
+                        - costs.at["biomass CHP capture", "heat-input"]
+                    ),
+                    efficiency3=costs.at["gas", "CO2 intensity"]
+                    * (1 - costs.at["biomass CHP capture", "capture_rate"]),
+                    efficiency4=costs.at["gas", "CO2 intensity"]
+                    * costs.at["biomass CHP capture", "capture_rate"],
+                    lifetime=costs.at["central gas CHP", "lifetime"],
+                )
+
+        if options["chp"] and options["micro_chp"] and name != "urban central":
+            n.madd(
+                "Link",
+                h_nodes[name] + f" {name} micro gas CHP",
+                p_nom_extendable=True,
+                # bus0="Africa gas",
+                bus0=spatial.gas.nodes,
+                bus1=h_nodes[name],
+                bus2=h_nodes[name] + f" {name} heat",
+                bus3="co2 atmosphere",
+                carrier=name + " micro gas CHP",
+                efficiency=costs.at["micro CHP", "efficiency"],
+                efficiency2=costs.at["micro CHP", "efficiency-heat"],
+                efficiency3=costs.at["gas", "CO2 intensity"],
+                capital_cost=costs.at["micro CHP", "fixed"],
+                lifetime=costs.at["micro CHP", "lifetime"],
+            )
+
+
+def average_every_nhours(n, offset):
+    m = n.copy(with_time=False)
+
+    snapshot_weightings = n.snapshot_weightings.resample(offset.casefold()).sum()
+    m.set_snapshots(snapshot_weightings.index)
+    m.snapshot_weightings = snapshot_weightings
+
+    for c in n.iterate_components():
+        pnl = getattr(m, c.list_name + "_t")
+        for k, df in c.pnl.items():
+            if not df.empty:
+                if c.list_name == "stores" and k == "e_max_pu":
+                    pnl[k] = df.resample(offset.casefold()).min()
+                elif c.list_name == "stores" and k == "e_min_pu":
+                    pnl[k] = df.resample(offset.casefold()).max()
+                else:
+                    pnl[k] = df.resample(offset.casefold()).mean()
+
+    return m
+
+
+def add_dac(n, costs):
+    heat_carriers = ["urban central heat", "services urban decentral heat"]
+    heat_buses = n.buses.index[n.buses.carrier.isin(heat_carriers)]
+    locations = n.buses.location[heat_buses]
+
+    efficiency2 = -(
+        costs.at["direct air capture", "electricity-input"]
+        + costs.at["direct air capture", "compression-electricity-input"]
+    )
+    efficiency3 = -(
+        costs.at["direct air capture", "heat-input"]
+        - costs.at["direct air capture", "compression-heat-output"]
+    )
+
+    n.madd(
+        "Link",
+        heat_buses.str.replace(" heat", " DAC"),
+        bus0="co2 atmosphere",
+        bus1=spatial.co2.df.loc[locations, "nodes"].values,
+        bus2=locations.values,
+        bus3=heat_buses,
+        carrier="DAC",
+        capital_cost=costs.at["direct air capture", "fixed"],
+        efficiency=1.0,
+        efficiency2=efficiency2,
+        efficiency3=efficiency3,
+        p_nom_extendable=True,
+        lifetime=costs.at["direct air capture", "lifetime"],
+    )
+
+
+def add_services(n, costs):
+    nhours = n.snapshot_weightings.generators.sum()
+    buses = spatial.nodes.intersection(n.loads_t.p_set.columns)
+
+    profile_residential = normalize_by_country(
+        n.loads_t.p_set[buses].reindex(columns=spatial.nodes, fill_value=0.0)
+    ).fillna(0)
+
+    p_set_elec = p_set_from_scaling(
+        "services electricity", profile_residential, energy_totals, nhours
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" services electricity",
+        bus=spatial.nodes,
+        carrier="services electricity",
+        p_set=p_set_elec,
+    )
+    p_set_biomass = p_set_from_scaling(
+        "services biomass", profile_residential, energy_totals, nhours
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" services biomass",
+        bus=spatial.biomass.nodes,
+        carrier="services biomass",
+        p_set=p_set_biomass,
+    )
+
+    p_set_oil = p_set_from_scaling(
+        "services oil", profile_residential, energy_totals, nhours
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" services oil",
+        bus=spatial.oil.nodes,
+        carrier="services oil",
+        p_set=p_set_oil,
+    )
+
+    # TODO check with different snapshot settings
+    co2 = p_set_oil.sum(axis=1).mean() * costs.at["oil", "CO2 intensity"]
+
+    n.add(
+        "Load",
+        "services oil emissions",
+        bus="co2 atmosphere",
+        carrier="oil emissions",
+        p_set=-co2,
+    )
+
+    p_set_gas = p_set_from_scaling(
+        "services gas", profile_residential, energy_totals, nhours
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" services gas",
+        bus=spatial.gas.nodes,
+        carrier="services gas",
+        p_set=p_set_gas,
+    )
+
+    # TODO check with different snapshot settings
+    co2 = p_set_gas.sum(axis=1).mean() * costs.at["gas", "CO2 intensity"]
+
+    n.add(
+        "Load",
+        "services gas emissions",
+        bus="co2 atmosphere",
+        carrier="gas emissions",
+        p_set=-co2,
+    )
+
+
+def add_agriculture(n, costs):
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" agriculture electricity",
+        bus=spatial.nodes,
+        carrier="agriculture electricity",
+        p_set=nodal_energy_totals.loc[spatial.nodes, "agriculture electricity"]
+        * 1e6
+        / 8760,
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" agriculture oil",
+        bus=spatial.oil.nodes,
+        carrier="agriculture oil",
+        p_set=nodal_energy_totals.loc[spatial.nodes, "agriculture oil"] * 1e6 / 8760,
+    )
+    co2 = (
+        nodal_energy_totals.loc[spatial.nodes, "agriculture oil"]
+        * 1e6
+        / 8760
+        * costs.at["oil", "CO2 intensity"]
+    ).sum()
+
+    n.add(
+        "Load",
+        "agriculture oil emissions",
+        bus="co2 atmosphere",
+        carrier="oil emissions",
+        p_set=-co2,
+    )
+
+
+def normalize_by_country(df, droplevel=False):
+    """
+    Auxiliary function to normalize a dataframe by the country.
+
+    If droplevel is False (default), the country level is added to the
+    column index If droplevel is True, the original column format is
+    preserved
+    """
+    ret = df.T.groupby(df.columns.str[:2]).apply(lambda x: x / x.sum().sum()).T
+    if droplevel:
+        return ret.droplevel(0, axis=1)
+    else:
+        return ret
+
+
+def group_by_node(df, multiindex=False):
+    """
+    Auxiliary function to group a dataframe by the node name.
+    """
+    ret = df.T.groupby(df.columns.str.split(" ").str[0]).sum().T
+    if multiindex:
+        ret.columns = pd.MultiIndex.from_tuples(zip(ret.columns.str[:2], ret.columns))
+    return ret
+
+
+def normalize_and_group(df, multiindex=False):
+    """
+    Function to concatenate normalize_by_country and group_by_node.
+    """
+    return group_by_node(
+        normalize_by_country(df, droplevel=True), multiindex=multiindex
+    )
+
+
+def p_set_from_scaling(col, scaling, energy_totals, nhours):
+    """
+    Function to create p_set from energy_totals, using the per-unit scaling
+    dataframe.
+    """
+    return (
+        1e6
+        / nhours
+        * scaling.mul(energy_totals[col], level=0).droplevel(level=0, axis=1)
+    )
+
+
+def add_residential(n, costs):
+    # need to adapt for many countries #TODO
+
+    nhours = n.snapshot_weightings.generators.sum()
+
+    heat_ind = (
+        n.loads_t.p_set.filter(like="residential")
+        .filter(like="heat")
+        .dropna(axis=1)
+        .columns
+    )
+    heat_shape_raw = normalize_by_country(n.loads_t.p_set[heat_ind])
+    heat_shape = heat_shape_raw.rename(
+        columns=n.loads.bus.map(n.buses.location), level=1
+    )
+    heat_shape = heat_shape.T.groupby(level=[0, 1]).sum().T
+
+    n.loads_t.p_set[heat_ind] = 1e6 * heat_shape_raw.mul(
+        energy_totals["total residential space"]
+        + energy_totals["total residential water"]
+        - energy_totals["residential heat biomass"]
+        - energy_totals["residential heat oil"]
+        - energy_totals["residential heat gas"],
+        level=0,
+    ).droplevel(level=0, axis=1).div(nhours)
+
+    heat_oil_demand = p_set_from_scaling(
+        "residential heat oil", heat_shape, energy_totals, nhours
+    )
+    heat_biomass_demand = p_set_from_scaling(
+        "residential heat biomass", heat_shape, energy_totals, nhours
+    )
+
+    heat_gas_demand = p_set_from_scaling(
+        "residential heat gas", heat_shape, energy_totals, nhours
+    )
+
+    res_index = spatial.nodes.intersection(n.loads_t.p_set.columns)
+    profile_residential_raw = normalize_by_country(n.loads_t.p_set[res_index])
+    profile_residential = profile_residential_raw.rename(
+        columns=n.loads.bus.map(n.buses.location), level=1
+    )
+    profile_residential = profile_residential.T.groupby(level=[0, 1]).sum().T
+
+    p_set_oil = (
+        p_set_from_scaling(
+            "residential oil", profile_residential, energy_totals, nhours
+        )
+        + heat_oil_demand
+    )
+
+    p_set_biomass = (
+        p_set_from_scaling(
+            "residential biomass", profile_residential, energy_totals, nhours
+        )
+        + heat_biomass_demand
+    )
+
+    p_set_gas = (
+        p_set_from_scaling(
+            "residential gas", profile_residential, energy_totals, nhours
+        )
+        + heat_gas_demand
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" residential oil",
+        bus=spatial.oil.nodes,
+        carrier="residential oil",
+        p_set=p_set_oil,
+    )
+
+    # TODO: check 8760 compatibility with different snapshot settings
+    co2 = p_set_oil.sum(axis=1).mean() * costs.at["oil", "CO2 intensity"]
+
+    n.add(
+        "Load",
+        "residential oil emissions",
+        bus="co2 atmosphere",
+        carrier="oil emissions",
+        p_set=-co2,
+    )
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" residential biomass",
+        bus=spatial.biomass.nodes,
+        carrier="residential biomass",
+        p_set=p_set_biomass,
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" residential gas",
+        bus=spatial.gas.nodes,
+        carrier="residential gas",
+        p_set=p_set_gas,
+    )
+
+    # TODO: check 8760 compatibility with different snapshot settings
+    co2 = p_set_gas.sum(axis=1).mean() * costs.at["gas", "CO2 intensity"]
+
+    n.add(
+        "Load",
+        "residential gas emissions",
+        bus="co2 atmosphere",
+        carrier="gas emissions",
+        p_set=-co2,
+    )
+
+    for country in countries:
+        rem_heat_demand = (
+            energy_totals.loc[country, "total residential space"]
+            + energy_totals.loc[country, "total residential water"]
+            - energy_totals.loc[country, "residential heat biomass"]
+            - energy_totals.loc[country, "residential heat oil"]
+            - energy_totals.loc[country, "residential heat gas"]
+        )
+
+        heat_buses = (n.loads_t.p_set.filter(regex="heat").filter(like=country)).columns
+
+        safe_division = safe_divide(
+            n.loads_t.p_set.filter(like=country)[heat_buses],
+            n.loads_t.p_set.filter(like=country)[heat_buses].sum().sum(),
+        )
+        n.loads_t.p_set.loc[:, heat_buses] = np.where(
+            ~np.isnan(safe_division),
+            safe_division * rem_heat_demand * 1e6 / nhours,
+            0.0,
+        )
+
+    # Revise residential electricity demand
+    buses = n.buses[n.buses.carrier == "AC"].index.intersection(n.loads_t.p_set.columns)
+
+    profile_pu = normalize_by_country(n.loads_t.p_set[buses]).fillna(0)
+    n.loads_t.p_set.loc[:, buses] = p_set_from_scaling(
+        "electricity residential", profile_pu, energy_totals, nhours
+    )
+
+
+def add_custom_water_cost(n):
+    for country in countries:
+        water_costs = pd.read_csv(
+            "resources/custom_data/{}_water_costs.csv".format(country),
+            sep=",",
+            index_col=0,
+        )
+        water_costs = water_costs.filter(like=country, axis=0).loc[spatial.nodes]
+        electrolysis_links = n.links.filter(like=country, axis=0).filter(
+            like="lectrolysis", axis=0
+        )
+
+        elec_index = n.links[
+            (n.links.carrier == "H2 Electrolysis")
+            & (n.links.bus0.str.contains(country))
+        ].index
+        n.links.loc[elec_index, "marginal_cost"] = water_costs.values
+        # n.links.filter(like=country, axis=0).filter(like='lectrolysis', axis=0)["marginal_cost"] = water_costs.values
+        # n.links.filter(like=country, axis=0).filter(like='lectrolysis', axis=0).apply(lambda x: water_costs[x.index], axis=0)
+        # print(n.links.filter(like=country, axis=0).filter(like='lectrolysis', axis=0).marginal_cost)
+
+
+def add_rail_transport(n, costs):
+    p_set_elec = nodal_energy_totals.loc[spatial.nodes, "electricity rail"]
+    p_set_oil = (nodal_energy_totals.loc[spatial.nodes, "total rail"]) - p_set_elec
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" rail transport oil",
+        bus=spatial.oil.nodes,
+        carrier=" rail transport oil",
+        p_set=p_set_oil * 1e6 / 8760,
+    )
+
+    n.madd(
+        "Load",
+        spatial.nodes,
+        suffix=" rail transport electricity",
+        bus=spatial.nodes,
+        carrier=" rail transport electricity",
+        p_set=p_set_elec * 1e6 / 8760,
+    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "prepare_sector_network",
+            simpl="",
+            clusters="19",
+            ll="c1.0",
+            opts="Co2L",
+            planning_horizons="2030",
+            sopts="72H",
+            discountrate="0.071",
+            demand="AB",
+        )
+
+    # Load population layout
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout, index_col=0)
+
+    # Load all sector wildcards
+    options = snakemake.config["sector"]
+    gadm_level = options["gadm_level"]
+    shapes_path = snakemake.input["shapes_path"]
+    gadm_clustering = snakemake.config["cluster_options"]["alternative_clustering"]
+    geo_crs = snakemake.params.geo_crs
+    file_prefix = snakemake.params.gadm_file_prefix
+    gadm_url_prefix = snakemake.params.gadm_url_prefix
+    contended_flag = snakemake.params.contended_flag
+    gadm_input_file_args = ["data", "raw", "gadm"]
+
+    # Load input network
+    overrides = override_component_attrs(snakemake.input.overrides)
+    n = pypsa.Network(snakemake.input.network, override_component_attrs=overrides)
+
+    # Fetch the country list from the network
+    # countries = list(n.buses.country.unique())
+    countries = snakemake.config["countries"]
+    # Locate all the AC buses
+    nodes = n.buses[
+        n.buses.carrier == "AC"
+    ].index  # TODO if you take nodes from the index of buses of n it's more than pop_layout
+    # clustering of regions must be double-checked. refer to regions onshore
+
+    # Add location. TODO: move it into pypsa-earth
+    n.buses.location = n.buses.index
+
+    # Set carrier of AC loads
+    n.loads.loc[nodes, "carrier"] = "AC"
+
+    Nyears = n.snapshot_weightings.generators.sum() / 8760
+
+    # Fetch wildcards
+    investment_year = int(snakemake.wildcards.planning_horizons[-4:])
+    demand_sc = snakemake.wildcards.demand  # loading the demand scenario wildcard
+    pop_layout = pd.read_csv(snakemake.input.clustered_pop_layout, index_col=0)
+
+    # Prepare the costs dataframe
+    costs = prepare_costs(
+        snakemake.input.costs,
+        snakemake.params.costs["USD2013_to_EUR2013"],
+        snakemake.params.costs["fill_values"],
+        Nyears,
+    )
+
+    # Define spatial for biomass and co2. They require the same spatial definition
+    spatial = define_spatial(pop_layout.index, options)
+
+    if snakemake.config["foresight"] in ["myopic", "perfect"]:
+        add_lifetime_wind_solar(n, costs)
+
+    # TODO logging
+
+    nodal_energy_totals = pd.read_csv(
+        snakemake.input.nodal_energy_totals,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+    energy_totals = pd.read_csv(
+        snakemake.input.energy_totals,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+    # Get the data required for land transport
+    # TODO Leon, This contains transport demand, right? if so let's change it to transport_demand?
+    transport = pd.read_csv(snakemake.input.transport, index_col=0, parse_dates=True)
+
+    avail_profile = pd.read_csv(
+        snakemake.input.avail_profile, index_col=0, parse_dates=True
+    )
+    dsm_profile = pd.read_csv(
+        snakemake.input.dsm_profile, index_col=0, parse_dates=True
+    )
+    nodal_transport_data = pd.read_csv(  # TODO This only includes no. of cars, change name to something descriptive?
+        snakemake.input.nodal_transport_data, index_col=0
+    )
+
+    # Load data required for the heat sector
+    heat_demand = pd.read_csv(
+        snakemake.input.heat_demand, index_col=0, header=[0, 1], parse_dates=True
+    ).fillna(0)
+    # Ground-sourced heatpump coefficient of performance
+    gshp_cop = pd.read_csv(
+        snakemake.input.gshp_cop, index_col=0, parse_dates=True
+    )  # only needed with heat dep. hp cop allowed from config
+    # TODO add option heat_dep_hp_cop to the config
+
+    # Air-sourced heatpump coefficient of performance
+    ashp_cop = pd.read_csv(
+        snakemake.input.ashp_cop, index_col=0, parse_dates=True
+    )  # only needed with heat dep. hp cop allowed from config
+
+    # Solar thermal availability profiles
+    solar_thermal = pd.read_csv(
+        snakemake.input.solar_thermal, index_col=0, parse_dates=True
+    )
+    gshp_cop = pd.read_csv(snakemake.input.gshp_cop, index_col=0, parse_dates=True)
+
+    # Share of district heating at each node
+    district_heat_share = pd.read_csv(snakemake.input.district_heat_share, index_col=0)
+
+    # Load data required for aviation and navigation
+    # TODO follow the same structure as land transport and heat
+
+    # Load industry demand data
+    industrial_demand = pd.read_csv(
+        snakemake.input.industrial_demand, index_col=0, header=0
+    )  # * 1e6
+
+    ##########################################################################
+    ############## Functions adding different carrires and sectors ###########
+    ##########################################################################
+
+    add_co2(n, costs)  # TODO add costs
+
+    # TODO This might be transferred to add_generation, but before apply remove_elec_base_techs(n) from PyPSA-Eur-Sec
+    add_oil(n, costs)
+
+    add_gas(n)
+    add_generation(n, costs)
+
+    add_hydrogen(n, costs)  # TODO add costs
+
+    add_storage(n, costs)
+
+    H2_liquid_fossil_conversions(n, costs)
+
+    h2_hc_conversions(n, costs)
+    add_heat(n, costs)
+    add_biomass(n, costs)
+
+    add_industry(n, costs)
+
+    add_shipping(
+        n,
+        costs,
+        gadm_level,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        contended_flag,
+        gadm_input_file_args,
+        shapes_path,
+        gadm_clustering,
+    )
+
+    # Add_aviation runs with dummy data
+    add_aviation(
+        n,
+        costs,
+        gadm_level,
+        geo_crs,
+        file_prefix,
+        gadm_url_prefix,
+        contended_flag,
+        gadm_input_file_args,
+        shapes_path,
+        gadm_clustering,
+    )
+
+    # prepare_transport_data(n)
+
+    add_land_transport(n, costs)
+
+    # if snakemake.config["custom_data"]["transport_demand"]:
+    add_rail_transport(n, costs)
+
+    # if snakemake.config["custom_data"]["custom_sectors"]:
+    add_agriculture(n, costs)
+    add_residential(n, costs)
+    add_services(n, costs)
+
+    sopts = snakemake.wildcards.sopts.split("-")
+
+    for o in sopts:
+        m = re.match(r"^\d+h$", o, re.IGNORECASE)
+        if m is not None:
+            n = average_every_nhours(n, m.group(0))
+            break
+
+    # TODO add co2 limit here, if necessary
+    # co2_limit_pu = eval(sopts[0][5:])
+    # co2_limit = co2_limit_pu *
+    # # Add co2 limit
+    # co2_limit = 1e9
+    # n.add(
+    #     "GlobalConstraint",
+    #     "CO2Limit",
+    #     carrier_attribute="co2_emissions",
+    #     sense="<=",
+    #     constant=co2_limit,
+    # )
+
+    if options["dac"]:
+        add_dac(n, costs)
+
+    if snakemake.config["custom_data"]["water_costs"]:
+        add_custom_water_cost(n)
+
+    n.export_to_netcdf(snakemake.output[0])
+
+    # TODO changes in case of myopic oversight
diff --git a/scripts/prepare_transport_data.py b/scripts/prepare_transport_data.py
new file mode 100644
index 000000000..315de4dec
--- /dev/null
+++ b/scripts/prepare_transport_data.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+import numpy as np
+import pandas as pd
+import pypsa
+import pytz
+import xarray as xr
+from _helpers import mock_snakemake
+
+
+def transport_degree_factor(
+    temperature,
+    deadband_lower=15,
+    deadband_upper=20,
+    lower_degree_factor=0.5,
+    upper_degree_factor=1.6,
+):
+    """
+    Work out how much energy demand in vehicles increases due to heating and
+    cooling.
+
+    There is a deadband where there is no increase. Degree factors are %
+    increase in demand compared to no heating/cooling fuel consumption.
+    Returns per unit increase in demand for each place and time
+    """
+
+    dd = temperature.copy()
+
+    dd[(temperature > deadband_lower) & (temperature < deadband_upper)] = 0.0
+
+    dT_lower = deadband_lower - temperature[temperature < deadband_lower]
+    dd[temperature < deadband_lower] = lower_degree_factor / 100 * dT_lower
+
+    dT_upper = temperature[temperature > deadband_upper] - deadband_upper
+    dd[temperature > deadband_upper] = upper_degree_factor / 100 * dT_upper
+
+    return dd
+
+
+def generate_periodic_profiles(dt_index, nodes, weekly_profile, localize=None):
+    """
+    Give a 24*7 long list of weekly hourly profiles, generate this for each
+    country for the period dt_index, taking account of time zones and summer
+    time.
+    """
+
+    weekly_profile = pd.Series(weekly_profile, range(24 * 7))
+
+    week_df = pd.DataFrame(index=dt_index, columns=nodes)
+
+    for node in nodes:
+        timezone = pytz.timezone(pytz.country_timezones[node[:2]][0])
+        tz_dt_index = dt_index.tz_convert(timezone)
+        week_df[node] = [24 * dt.weekday() + dt.hour for dt in tz_dt_index]
+        week_df[node] = week_df[node].map(weekly_profile)
+
+    week_df = week_df.tz_localize(localize)
+
+    return week_df
+
+
+def prepare_transport_data(n):
+    """
+    Function to prepare the data required for the (land) transport sector.
+    """
+
+    energy_totals = pd.read_csv(
+        snakemake.input.energy_totals_name,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )  # TODO change with real numbers
+
+    nodal_energy_totals = energy_totals.loc[pop_layout.ct].fillna(0.0)
+    nodal_energy_totals.index = pop_layout.index
+    # # district heat share not weighted by population
+    # district_heat_share = nodal_energy_totals["district heat share"].round(2)
+    nodal_energy_totals = nodal_energy_totals.multiply(pop_layout.fraction, axis=0)
+
+    # Get overall demand curve for all vehicles
+
+    traffic = pd.read_csv(
+        snakemake.input.traffic_data_KFZ, skiprows=2, usecols=["count"]
+    ).squeeze("columns")
+
+    # Generate profiles
+    transport_shape = generate_periodic_profiles(
+        dt_index=n.snapshots.tz_localize("UTC"),
+        nodes=pop_layout.index,
+        weekly_profile=traffic.values,
+    )
+
+    transport_shape = transport_shape / transport_shape.sum()
+
+    transport_data = pd.read_csv(
+        snakemake.input.transport_name, index_col=0, keep_default_na=False
+    )
+
+    nodal_transport_data = transport_data.reindex(pop_layout.ct, fill_value=0.0)
+
+    nodal_transport_data.index = pop_layout.index
+    nodal_transport_data["number cars"] = (
+        pop_layout["fraction"] * nodal_transport_data["number cars"]
+    )
+    nodal_transport_data.loc[
+        nodal_transport_data["average fuel efficiency"] == 0.0,
+        "average fuel efficiency",
+    ] = transport_data["average fuel efficiency"].mean()
+
+    # electric motors are more efficient, so alter transport demand
+
+    plug_to_wheels_eta = options.get("bev_plug_to_wheel_efficiency", 0.2)
+    battery_to_wheels_eta = plug_to_wheels_eta * options.get(
+        "bev_charge_efficiency", 0.9
+    )
+
+    efficiency_gain = (
+        nodal_transport_data["average fuel efficiency"] / battery_to_wheels_eta
+    )
+
+    # get heating demand for correction to demand time series
+    temperature = xr.open_dataarray(snakemake.input.temp_air_total).to_pandas()
+
+    # correction factors for vehicle heating
+    dd_ICE = transport_degree_factor(
+        temperature,
+        options["transport_heating_deadband_lower"],
+        options["transport_heating_deadband_upper"],
+        options["ICE_lower_degree_factor"],
+        options["ICE_upper_degree_factor"],
+    )
+
+    dd_EV = transport_degree_factor(
+        temperature,
+        options["transport_heating_deadband_lower"],
+        options["transport_heating_deadband_upper"],
+        options["EV_lower_degree_factor"],
+        options["EV_upper_degree_factor"],
+    )
+
+    # divide out the heating/cooling demand from ICE totals
+    # and multiply back in the heating/cooling demand for EVs
+    ice_correction = (transport_shape * (1 + dd_ICE)).sum() / transport_shape.sum()
+
+    if snakemake.config["custom_data"]["transport_demand"]:
+        energy_totals_transport = nodal_energy_totals["total road"]
+
+        transport = transport_shape.multiply(energy_totals_transport) * 1e6 * Nyears
+    else:
+        energy_totals_transport = (
+            nodal_energy_totals["total road"]
+            + nodal_energy_totals["total rail"]
+            - nodal_energy_totals["electricity rail"]
+        )
+        transport = (
+            (transport_shape.multiply(energy_totals_transport) * 1e6 * Nyears)
+            .divide(efficiency_gain * ice_correction)
+            .multiply(1 + dd_EV)
+        )
+
+    # derive plugged-in availability for PKW's (cars)
+
+    traffic = pd.read_csv(
+        snakemake.input.traffic_data_Pkw, skiprows=2, usecols=["count"]
+    ).squeeze("columns")
+
+    avail_max = options.get("bev_avail_max", 0.95)
+    avail_mean = options.get("bev_avail_mean", 0.8)
+
+    avail = avail_max - (avail_max - avail_mean) * (traffic - traffic.min()) / (
+        traffic.mean() - traffic.min()
+    )
+
+    avail_profile = generate_periodic_profiles(
+        dt_index=n.snapshots.tz_localize("UTC"),
+        nodes=pop_layout.index,
+        weekly_profile=avail.values,
+    )
+
+    dsm_week = np.zeros((24 * 7,))
+
+    dsm_week[(np.arange(0, 7, 1) * 24 + options["bev_dsm_restriction_time"])] = options[
+        "bev_dsm_restriction_value"
+    ]
+
+    dsm_profile = generate_periodic_profiles(
+        dt_index=n.snapshots.tz_localize("UTC"),
+        nodes=pop_layout.index,
+        weekly_profile=dsm_week,
+    )
+
+    return (
+        nodal_energy_totals,
+        transport,
+        avail_profile,
+        dsm_profile,
+        nodal_transport_data,
+    )
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "prepare_transport_data",
+            simpl="",
+            clusters="74",
+            demand="AB",
+            planning_horizons=2030,
+        )
+
+    n = pypsa.Network(snakemake.input.network)
+
+    # Get population layout
+    pop_layout = pd.read_csv(
+        snakemake.input.clustered_pop_layout,
+        index_col=0,
+        keep_default_na=False,
+        na_values=[""],
+    )
+
+    # Add options
+    options = snakemake.config["sector"]
+
+    # Get Nyears
+    Nyears = n.snapshot_weightings.generators.sum() / 8760
+
+    # Prepare transport data
+    (
+        nodal_energy_totals,
+        transport,
+        avail_profile,
+        dsm_profile,
+        nodal_transport_data,
+    ) = prepare_transport_data(n)
+
+    # Save the generated output files to snakemake paths
+
+    # Transport demand per node per timestep
+    transport.to_csv(snakemake.output.transport)
+
+    # Available share of the battery to be used by the grid
+    avail_profile.to_csv(snakemake.output.avail_profile)
+
+    # Restrictions on state of charge of EVs
+    dsm_profile.to_csv(snakemake.output.dsm_profile)
+
+    # Nodal data on number of cars
+    nodal_transport_data.to_csv(snakemake.output.nodal_transport_data)
diff --git a/scripts/prepare_transport_data_input.py b/scripts/prepare_transport_data_input.py
new file mode 100644
index 000000000..c4ce4e421
--- /dev/null
+++ b/scripts/prepare_transport_data_input.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+import shutil
+
+import country_converter as coco
+import numpy as np
+import pandas as pd
+from _helpers import get_current_directory_path, get_path, mock_snakemake
+
+
+def download_number_of_vehicles():
+    """
+    Downloads the Number of registered vehicles as .csv File.
+
+    The following csv file was downloaded from the webpage
+    https://apps.who.int/gho/data/node.main.A995
+    as a .csv file.
+    """
+    fn = "https://apps.who.int/gho/athena/data/GHO/RS_194?filter=COUNTRY:*&ead=&x-sideaxis=COUNTRY;YEAR;DATASOURCE&x-topaxis=GHO&profile=crosstable&format=csv"
+    storage_options = {"User-Agent": "Mozilla/5.0"}
+
+    # Read the 'Data' sheet directly from the csv file at the provided URL
+    try:
+        Nbr_vehicles_csv = pd.read_csv(
+            fn, storage_options=storage_options, encoding="utf8"
+        )
+        print("File read successfully.")
+    except Exception as e:
+        print("Failed to read the file:", e)
+        return pd.DataFrame()
+
+    Nbr_vehicles_csv = Nbr_vehicles_csv.rename(
+        columns={
+            "Countries, territories and areas": "Country",
+            "Number of registered vehicles": "number cars",
+        }
+    )
+
+    # Add ISO2 country code for each country
+    cc = coco.CountryConverter()
+    Country = pd.Series(Nbr_vehicles_csv["Country"])
+    Nbr_vehicles_csv["country"] = cc.pandas_convert(
+        series=Country, to="ISO2", not_found="not found"
+    )
+
+    # # Remove spaces, Replace empty values with NaN
+    Nbr_vehicles_csv["number cars"] = (
+        Nbr_vehicles_csv["number cars"].str.replace(" ", "").replace("", np.nan)
+    )
+
+    # Drop rows with NaN values in 'number cars'
+    Nbr_vehicles_csv = Nbr_vehicles_csv.dropna(subset=["number cars"])
+
+    # convert the 'number cars' to integer
+    Nbr_vehicles_csv["number cars"] = Nbr_vehicles_csv["number cars"].astype(int)
+
+    return Nbr_vehicles_csv
+
+
+def download_CO2_emissions():
+    """
+    Downloads the CO2_emissions from vehicles as .csv File.
+
+    The dataset is downloaded from the following link: https://data.worldbank.org/indicator/EN.CO2.TRAN.ZS?view=map
+    It is until the year 2014. # TODO: Maybe search for more recent years.
+    """
+    url = (
+        "https://api.worldbank.org/v2/en/indicator/EN.CO2.TRAN.ZS?downloadformat=excel"
+    )
+
+    # Read the 'Data' sheet directly from the Excel file at the provided URL
+    try:
+        CO2_emissions = pd.read_excel(url, sheet_name="Data", skiprows=[0, 1, 2])
+        print("File read successfully.")
+    except Exception as e:
+        print("Failed to read the file:", e)
+        return pd.DataFrame()
+
+    CO2_emissions = CO2_emissions[
+        ["Country Name", "Country Code", "Indicator Name", "2014"]
+    ]
+
+    # Calculate efficiency based on CO2 emissions from transport (% of total fuel combustion)
+    CO2_emissions["average fuel efficiency"] = (100 - CO2_emissions["2014"]) / 100
+
+    # Add ISO2 country code for each country
+    CO2_emissions = CO2_emissions.rename(columns={"Country Name": "Country"})
+    cc = coco.CountryConverter()
+    Country = pd.Series(CO2_emissions["Country"])
+    CO2_emissions["country"] = cc.pandas_convert(
+        series=Country, to="ISO2", not_found="not found"
+    )
+
+    # Drop region names that have no ISO2:
+    CO2_emissions = CO2_emissions[CO2_emissions.country != "not found"]
+
+    return CO2_emissions
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+
+        snakemake = mock_snakemake("prepare_transport_data_input")
+
+    # configure_logging(snakemake)
+
+    # run = snakemake.config.get("run", {})
+    # RDIR = run["name"] + "/" if run.get("name") else ""
+    # store_path_data = Path.joinpath(Path().cwd(), "data")
+    # country_list = country_list_to_geofk(snakemake.config["countries"])'
+
+    # Downloaded and prepare vehicles_csv:
+    vehicles_csv = download_number_of_vehicles().copy()
+
+    # Downloaded and prepare CO2_emissions_csv:
+    CO2_emissions_csv = download_CO2_emissions().copy()
+
+    if vehicles_csv.empty or CO2_emissions_csv.empty:
+        # In case one of the urls is not working, we can use the hard-coded data
+        src = get_path(
+            get_current_directory_path(), "data/temp_hard_coded/transport_data.csv"
+        )
+        dest = snakemake.output.transport_data_input
+        shutil.copy(src, dest)
+    else:
+        # Join the DataFrames by the 'country' column
+        merged_df = pd.merge(vehicles_csv, CO2_emissions_csv, on="country")
+        merged_df = merged_df[["country", "number cars", "average fuel efficiency"]]
+
+        # Drop rows with NaN values in 'average fuel efficiency'
+        merged_df = merged_df.dropna(subset=["average fuel efficiency"])
+
+        # Convert the 'average fuel efficiency' to float
+        merged_df["average fuel efficiency"] = merged_df[
+            "average fuel efficiency"
+        ].astype(float)
+
+        # Round the 'average fuel efficiency' to three decimal places
+        merged_df.loc[:, "average fuel efficiency"] = merged_df[
+            "average fuel efficiency"
+        ].round(3)
+
+        # Save the merged DataFrame to a CSV file
+        merged_df.to_csv(
+            snakemake.output.transport_data_input,
+            sep=",",
+            encoding="utf-8",
+            header="true",
+            index=False,
+        )
diff --git a/scripts/prepare_urban_percent.py b/scripts/prepare_urban_percent.py
new file mode 100644
index 000000000..47d3ce1f2
--- /dev/null
+++ b/scripts/prepare_urban_percent.py
@@ -0,0 +1,99 @@
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+import pathlib
+
+import country_converter as coco
+import pandas as pd
+import py7zr
+import requests
+from _helpers import mock_snakemake
+
+
+def download_urban_percent():
+    """
+    Downloads the United Nations "Total and urban population, annual" .7z File
+    and extracts it as csv File.
+
+    The above file was downloaded from the webpage
+    https://unctadstat.unctad.org/datacentre/
+    as a .7z file. The dataset contains urban percent for most countries from 1950 and predictions until 2050.
+    """
+    url = "https://unctadstat-api.unctad.org/api/reportMetadata/US.PopTotal/bulkfile/355/en"
+
+    # Make a GET request to the URL
+    response = requests.get(url)
+
+    # Check if the request was successful (status code 200)
+    if response.status_code == 200:
+        # Extract the filename from the Content-Disposition header
+        content_disposition = response.headers.get("Content-Disposition")
+        if content_disposition:
+            filename = content_disposition.split("filename=")[1].strip('"')
+        else:
+            filename = "downloaded_file.csv.7z"  # Provide a default filename if Content-Disposition header is not present
+
+        # Write the content of the response to a file
+        with open(filename, "wb") as f:
+            f.write(response.content)
+
+        print(f"Urban percent downloaded successfully as {filename}")
+
+        # Extract the downloaded .7z file
+        with py7zr.SevenZipFile(filename, "r") as archive:
+            archive.extractall()
+
+        print(f"Urban percent extracted successfully")
+
+        # Read the extracted CSV file
+        csv_filename = pathlib.Path(
+            filename
+        ).stem  # Remove the .7z extension to get the CSV filename
+        urban_percent_orig = pd.read_csv(csv_filename)
+
+        print("Urban percent CSV file read successfully:")
+
+        # Remove the downloaded .7z and .csv files
+        pathlib.Path(filename).unlink(missing_ok=True)
+        pathlib.Path(csv_filename).unlink(missing_ok=True)
+
+    else:
+        print(f"Failed to download file: Status code {response.status_code}")
+
+    return urban_percent_orig
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake("prepare_urban_percent")
+
+    df = download_urban_percent().copy()
+
+    # Select the columns that we need to keep
+    df = df[
+        [
+            "Year",
+            "Economy Label",
+            "Absolute value in thousands",
+            "Urban population as percentage of total population",
+        ]
+    ]
+
+    # Keep only years above 2020
+    df = df.loc[(df["Year"] >= 2020)]
+
+    # Add ISO2 country code for each country
+    cc = coco.CountryConverter()
+    Economy_Label = pd.Series(df["Economy Label"])
+    df["country"] = cc.pandas_convert(
+        series=Economy_Label, to="ISO2", not_found="not found"
+    )
+
+    # Drop isos that were not found:
+    df = df.loc[df["country"] != "not found"]
+
+    df = df.set_index("country")
+
+    # Save
+    df.to_csv(snakemake.output[0], sep=",", encoding="utf-8", header="true")
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 84b1c89b4..e38a7d7c5 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -89,16 +89,13 @@
 import pandas as pd
 import yaml
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_country_list,
     create_logger,
-    get_current_directory_path,
     get_path,
     get_relative_path,
     mock_snakemake,
     progress_retrieve,
-    sets_path_to_root,
 )
 from google_drive_downloader import GoogleDriveDownloader as gdd
 from tqdm import tqdm
@@ -338,9 +335,9 @@ def get_first_day_of_previous_month(date):
                         pathlib.Path(inner_zipname).unlink(missing_ok=True)
 
                         logger.info(f"{resource} - Successfully unzipped file '{fzip}'")
-                    except Exception as e:
+                    except:
                         logger.warning(
-                            f"Exception while unzipping file '{fzip}' for {resource_iter} with exception message '{e}': skipped file"
+                            f"Exception while unzipping file '{fzip}' for {resource_iter}: skipped file"
                         )
 
                 # close and remove outer zip file
@@ -517,7 +514,7 @@ def download_and_unzip_hydrobasins(
             file_path=file_path,
             resource=resource,
             destination=destination,
-            headers=[("User-agent", "Mozilla/5.0")],
+            headers={"User-agent": "Mozilla/5.0"},
             hot_run=hot_run,
             unzip=True,
             disable_progress=disable_progress,
@@ -805,7 +802,6 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
         "hybas_{0:s}_lev{1:02d}_v1c.shp".format(suffix, hydrobasins_level)
         for suffix in config_hydrobasin["urls"]["hydrobasins"]["suffixes"]
     ]
-
     gpdf_list = [None] * len(files_to_merge)
     logger.info("Merging hydrobasins files into: " + output_fl)
     for i, f_name in tqdm(enumerate(files_to_merge)):
@@ -818,26 +814,21 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("retrieve_databundle_light")
+
     # TODO Make logging compatible with progressbar (see PR #102, PyPSA-Eur)
     configure_logging(snakemake)
 
-    sets_path_to_root("pypsa-earth")
-
-    root_path = get_current_directory_path()
+    root_path = "."
     tutorial = snakemake.params.tutorial
     countries = snakemake.params.countries
     logger.info(f"Retrieving data for {len(countries)} countries.")
 
-    disable_progress = not snakemake.config.get("retrieve_databundle", {}).get(
-        "show_progress", True
-    )
-
     # load enable configuration
     config_enable = snakemake.config["enable"]
     # load databundle configuration
     config_bundles = load_databundle_config(snakemake.config["databundles"])
+    disable_progress = not config_enable["progress_bar"]
 
     bundles_to_download = get_best_bundles(
         countries, config_bundles, tutorial, config_enable
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index f72d742a1..5dc3c5973 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -93,7 +93,6 @@
 import pypsa
 import scipy as sp
 from _helpers import (
-    change_to_script_dir,
     configure_logging,
     create_logger,
     get_aggregation_strategies,
@@ -963,8 +962,8 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()):
 
 if __name__ == "__main__":
     if "snakemake" not in globals():
-        change_to_script_dir(__file__)
         snakemake = mock_snakemake("simplify_network", simpl="")
+
     configure_logging(snakemake)
 
     n = pypsa.Network(snakemake.input.network)
diff --git a/scripts/solve_network.py b/scripts/solve_network.py
index f057b8823..fc86d8988 100755
--- a/scripts/solve_network.py
+++ b/scripts/solve_network.py
@@ -1,582 +1,1034 @@
-# -*- coding: utf-8 -*-
-# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
-#
-# SPDX-License-Identifier: AGPL-3.0-or-later
-
-# -*- coding: utf-8 -*-
-"""
-Solves linear optimal power flow for a network iteratively while updating
-reactances.
-
-Relevant Settings
------------------
-
-.. code:: yaml
-
-    solving:
-        tmpdir:
-        options:
-            formulation:
-            clip_p_max_pu:
-            load_shedding:
-            noisy_costs:
-            nhours:
-            min_iterations:
-            max_iterations:
-            skip_iterations:
-            track_iterations:
-        solver:
-            name:
-
-.. seealso::
-    Documentation of the configuration file ``config.yaml`` at
-    :ref:`electricity_cf`, :ref:`solving_cf`, :ref:`plotting_cf`
-
-Inputs
-------
-
-- ``networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc``: confer :ref:`prepare`
-
-Outputs
--------
-
-- ``results/networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc``: Solved PyPSA network including optimisation results
-
-    .. image:: /img/results.png
-        :width: 40 %
-
-Description
------------
-
-Total annual system costs are minimised with PyPSA. The full formulation of the
-linear optimal power flow (plus investment planning)
-is provided in the
-`documentation of PyPSA <https://pypsa.readthedocs.io/en/latest/optimal_power_flow.html#linear-optimal-power-flow>`_.
-The optimization is based on the ``pyomo=False`` setting in the :func:`network.lopf` and  :func:`pypsa.linopf.ilopf` function.
-Additionally, some extra constraints specified in :mod:`prepare_network` are added.
-
-Solving the network in multiple iterations is motivated through the dependence of transmission line capacities and impedances on values of corresponding flows.
-As lines are expanded their electrical parameters change, which renders the optimisation bilinear even if the power flow
-equations are linearized.
-To retain the computational advantage of continuous linear programming, a sequential linear programming technique
-is used, where in between iterations the line impedances are updated.
-Details (and errors made through this heuristic) are discussed in the paper
-
-- Fabian Neumann and Tom Brown. `Heuristics for Transmission Expansion Planning in Low-Carbon Energy System Models <https://arxiv.org/abs/1907.10548>`_), *16th International Conference on the European Energy Market*, 2019. `arXiv:1907.10548 <https://arxiv.org/abs/1907.10548>`_.
-
-.. warning::
-    Capital costs of existing network components are not included in the objective function,
-    since for the optimisation problem they are just a constant term (no influence on optimal result).
-
-    Therefore, these capital costs are not included in ``network.objective``!
-
-    If you want to calculate the full total annual system costs add these to the objective value.
-
-.. tip::
-    The rule :mod:`solve_all_networks` runs
-    for all ``scenario`` s in the configuration file
-    the rule :mod:`solve_network`.
-"""
-import re
-
-import numpy as np
-import pandas as pd
-import pypsa
-from _helpers import (
-    build_directory,
-    change_to_script_dir,
-    configure_logging,
-    create_logger,
-    mock_snakemake,
-)
-from pypsa.descriptors import get_switchable_as_dense as get_as_dense
-from pypsa.linopf import (
-    define_constraints,
-    define_variables,
-    get_var,
-    ilopf,
-    join_exprs,
-    linexpr,
-    network_lopf,
-)
-
-logger = create_logger(__name__)
-
-
-def prepare_network(n, solve_opts):
-    if "clip_p_max_pu" in solve_opts:
-        for df in (n.generators_t.p_max_pu, n.storage_units_t.inflow):
-            df.where(df > solve_opts["clip_p_max_pu"], other=0.0, inplace=True)
-
-    load_shedding = solve_opts.get("load_shedding")
-    if load_shedding:
-        n.add("Carrier", "Load")
-        buses_i = n.buses.query("carrier == 'AC'").index
-        if not np.isscalar(load_shedding):
-            load_shedding = 8e3  # Eur/kWh
-        # intersect between macroeconomic and surveybased
-        # willingness to pay
-        # http://journal.frontiersin.org/article/10.3389/fenrg.2015.00055/full)
-        # 1e2 is practical relevant, 8e3 good for debugging
-        n.madd(
-            "Generator",
-            buses_i,
-            " load",
-            bus=buses_i,
-            carrier="load",
-            sign=1e-3,  # Adjust sign to measure p and p_nom in kW instead of MW
-            marginal_cost=load_shedding,
-            p_nom=1e9,  # kW
-        )
-
-    if solve_opts.get("noisy_costs"):
-        for t in n.iterate_components(n.one_port_components):
-            # TODO: uncomment out to and test noisy_cost (makes solution unique)
-            # if 'capital_cost' in t.df:
-            #    t.df['capital_cost'] += 1e1 + 2.*(np.random.random(len(t.df)) - 0.5)
-            if "marginal_cost" in t.df:
-                t.df["marginal_cost"] += 1e-2 + 2e-3 * (
-                    np.random.random(len(t.df)) - 0.5
-                )
-
-        for t in n.iterate_components(["Line", "Link"]):
-            t.df["capital_cost"] += (
-                1e-1 + 2e-2 * (np.random.random(len(t.df)) - 0.5)
-            ) * t.df["length"]
-
-    if solve_opts.get("nhours"):
-        nhours = solve_opts["nhours"]
-        n.set_snapshots(n.snapshots[:nhours])
-        n.snapshot_weightings[:] = 8760.0 / nhours
-
-    return n
-
-
-def add_CCL_constraints(n, config):
-    agg_p_nom_limits = config["electricity"].get("agg_p_nom_limits")
-
-    try:
-        agg_p_nom_minmax = pd.read_csv(agg_p_nom_limits, index_col=list(range(2)))
-    except IOError:
-        logger.exception(
-            "Need to specify the path to a .csv file containing "
-            "aggregate capacity limits per country in "
-            "config['electricity']['agg_p_nom_limit']."
-        )
-    logger.info(
-        "Adding per carrier generation capacity constraints for " "individual countries"
-    )
-
-    gen_country = n.generators.bus.map(n.buses.country)
-    # cc means country and carrier
-    p_nom_per_cc = (
-        pd.DataFrame(
-            {
-                "p_nom": linexpr((1, get_var(n, "Generator", "p_nom"))),
-                "country": gen_country,
-                "carrier": n.generators.carrier,
-            }
-        )
-        .dropna(subset=["p_nom"])
-        .groupby(["country", "carrier"])
-        .p_nom.apply(join_exprs)
-    )
-    minimum = agg_p_nom_minmax["min"].dropna()
-    if not minimum.empty:
-        minconstraint = define_constraints(
-            n, p_nom_per_cc[minimum.index], ">=", minimum, "agg_p_nom", "min"
-        )
-    maximum = agg_p_nom_minmax["max"].dropna()
-    if not maximum.empty:
-        maxconstraint = define_constraints(
-            n, p_nom_per_cc[maximum.index], "<=", maximum, "agg_p_nom", "max"
-        )
-
-
-def add_EQ_constraints(n, o, scaling=1e-1):
-    float_regex = "[0-9]*\.?[0-9]+"
-    level = float(re.findall(float_regex, o)[0])
-    if o[-1] == "c":
-        ggrouper = n.generators.bus.map(n.buses.country)
-        lgrouper = n.loads.bus.map(n.buses.country)
-        sgrouper = n.storage_units.bus.map(n.buses.country)
-    else:
-        ggrouper = n.generators.bus
-        lgrouper = n.loads.bus
-        sgrouper = n.storage_units.bus
-    load = (
-        n.snapshot_weightings.generators
-        @ n.loads_t.p_set.groupby(lgrouper, axis=1).sum()
-    )
-    inflow = (
-        n.snapshot_weightings.stores
-        @ n.storage_units_t.inflow.groupby(sgrouper, axis=1).sum()
-    )
-    inflow = inflow.reindex(load.index).fillna(0.0)
-    rhs = scaling * (level * load - inflow)
-    lhs_gen = (
-        linexpr(
-            (n.snapshot_weightings.generators * scaling, get_var(n, "Generator", "p").T)
-        )
-        .T.groupby(ggrouper, axis=1)
-        .apply(join_exprs)
-    )
-    lhs_spill = (
-        linexpr(
-            (
-                -n.snapshot_weightings.stores * scaling,
-                get_var(n, "StorageUnit", "spill").T,
-            )
-        )
-        .T.groupby(sgrouper, axis=1)
-        .apply(join_exprs)
-    )
-    lhs_spill = lhs_spill.reindex(lhs_gen.index).fillna("")
-    lhs = lhs_gen + lhs_spill
-    define_constraints(n, lhs, ">=", rhs, "equity", "min")
-
-
-def add_BAU_constraints(n, config):
-    ext_c = n.generators.query("p_nom_extendable").carrier.unique()
-    mincaps = pd.Series(
-        config["electricity"].get("BAU_mincapacities", {key: 0 for key in ext_c})
-    )
-    lhs = (
-        linexpr((1, get_var(n, "Generator", "p_nom")))
-        .groupby(n.generators.carrier)
-        .apply(join_exprs)
-    )
-    define_constraints(n, lhs, ">=", mincaps[lhs.index], "Carrier", "bau_mincaps")
-
-    maxcaps = pd.Series(
-        config["electricity"].get("BAU_maxcapacities", {key: np.inf for key in ext_c})
-    )
-    lhs = (
-        linexpr((1, get_var(n, "Generator", "p_nom")))
-        .groupby(n.generators.carrier)
-        .apply(join_exprs)
-    )
-    define_constraints(n, lhs, "<=", maxcaps[lhs.index], "Carrier", "bau_maxcaps")
-
-
-def add_SAFE_constraints(n, config):
-    peakdemand = (
-        1.0 + config["electricity"]["SAFE_reservemargin"]
-    ) * n.loads_t.p_set.sum(axis=1).max()
-    conv_techs = config["plotting"]["conv_techs"]
-    exist_conv_caps = n.generators.query(
-        "~p_nom_extendable & carrier in @conv_techs"
-    ).p_nom.sum()
-    ext_gens_i = n.generators.query("carrier in @conv_techs & p_nom_extendable").index
-    lhs = linexpr((1, get_var(n, "Generator", "p_nom")[ext_gens_i])).sum()
-    rhs = peakdemand - exist_conv_caps
-    define_constraints(n, lhs, ">=", rhs, "Safe", "mintotalcap")
-
-
-def add_operational_reserve_margin_constraint(n, config):
-    reserve_config = config["electricity"]["operational_reserve"]
-    EPSILON_LOAD = reserve_config["epsilon_load"]
-    EPSILON_VRES = reserve_config["epsilon_vres"]
-    CONTINGENCY = reserve_config["contingency"]
-
-    # Reserve Variables
-    reserve = get_var(n, "Generator", "r")
-    lhs = linexpr((1, reserve)).sum(1)
-
-    # Share of extendable renewable capacities
-    ext_i = n.generators.query("p_nom_extendable").index
-    vres_i = n.generators_t.p_max_pu.columns
-    if not ext_i.empty and not vres_i.empty:
-        capacity_factor = n.generators_t.p_max_pu[vres_i.intersection(ext_i)]
-        renewable_capacity_variables = get_var(n, "Generator", "p_nom")[
-            vres_i.intersection(ext_i)
-        ]
-        lhs += linexpr(
-            (-EPSILON_VRES * capacity_factor, renewable_capacity_variables)
-        ).sum(1)
-
-    # Total demand at t
-    demand = n.loads_t.p.sum(1)
-
-    # VRES potential of non extendable generators
-    capacity_factor = n.generators_t.p_max_pu[vres_i.difference(ext_i)]
-    renewable_capacity = n.generators.p_nom[vres_i.difference(ext_i)]
-    potential = (capacity_factor * renewable_capacity).sum(1)
-
-    # Right-hand-side
-    rhs = EPSILON_LOAD * demand + EPSILON_VRES * potential + CONTINGENCY
-
-    define_constraints(n, lhs, ">=", rhs, "Reserve margin")
-
-
-def update_capacity_constraint(n):
-    gen_i = n.generators.index
-    ext_i = n.generators.query("p_nom_extendable").index
-    fix_i = n.generators.query("not p_nom_extendable").index
-
-    dispatch = get_var(n, "Generator", "p")
-    reserve = get_var(n, "Generator", "r")
-
-    capacity_fixed = n.generators.p_nom[fix_i]
-
-    p_max_pu = get_as_dense(n, "Generator", "p_max_pu")
-
-    lhs = linexpr((1, dispatch), (1, reserve))
-
-    if not ext_i.empty:
-        capacity_variable = get_var(n, "Generator", "p_nom")
-        lhs += linexpr((-p_max_pu[ext_i], capacity_variable)).reindex(
-            columns=gen_i, fill_value=""
-        )
-
-    rhs = (p_max_pu[fix_i] * capacity_fixed).reindex(columns=gen_i, fill_value=0)
-
-    define_constraints(n, lhs, "<=", rhs, "Generators", "updated_capacity_constraint")
-
-
-def add_operational_reserve_margin(n, sns, config):
-    """
-    Build reserve margin constraints based on the formulation given in
-    https://genxproject.github.io/GenX/dev/core/#Reserves.
-    """
-
-    define_variables(n, 0, np.inf, "Generator", "r", axes=[sns, n.generators.index])
-
-    add_operational_reserve_margin_constraint(n, config)
-
-    update_capacity_constraint(n)
-
-
-def add_battery_constraints(n):
-    nodes = n.buses.index[n.buses.carrier == "battery"]
-    if nodes.empty or ("Link", "p_nom") not in n.variables.index:
-        return
-    link_p_nom = get_var(n, "Link", "p_nom")
-    lhs = linexpr(
-        (1, link_p_nom[nodes + " charger"]),
-        (
-            -n.links.loc[nodes + " discharger", "efficiency"].values,
-            link_p_nom[nodes + " discharger"].values,
-        ),
-    )
-    define_constraints(n, lhs, "=", 0, "Link", "charger_ratio")
-
-
-def add_RES_constraints(n, res_share):
-    lgrouper = n.loads.bus.map(n.buses.country)
-    ggrouper = n.generators.bus.map(n.buses.country)
-    sgrouper = n.storage_units.bus.map(n.buses.country)
-    cgrouper = n.links.bus0.map(n.buses.country)
-
-    logger.warning(
-        "The add_RES_constraints functionality is still work in progress. "
-        "Unexpected results might be incurred, particularly if "
-        "temporal clustering is applied or if an unexpected change of technologies "
-        "is subject to the obtimisation."
-    )
-
-    load = (
-        n.snapshot_weightings.generators
-        @ n.loads_t.p_set.groupby(lgrouper, axis=1).sum()
-    )
-
-    rhs = res_share * load
-
-    res_techs = [
-        "solar",
-        "onwind",
-        "offwind-dc",
-        "offwind-ac",
-        "battery",
-        "hydro",
-        "ror",
-    ]
-    charger = ["H2 electrolysis", "battery charger"]
-    discharger = ["H2 fuel cell", "battery discharger"]
-
-    gens_i = n.generators.query("carrier in @res_techs").index
-    stores_i = n.storage_units.query("carrier in @res_techs").index
-    charger_i = n.links.query("carrier in @charger").index
-    discharger_i = n.links.query("carrier in @discharger").index
-
-    # Generators
-    lhs_gen = (
-        linexpr(
-            (n.snapshot_weightings.generators, get_var(n, "Generator", "p")[gens_i].T)
-        )
-        .T.groupby(ggrouper, axis=1)
-        .apply(join_exprs)
-    )
-
-    # StorageUnits
-    lhs_dispatch = (
-        (
-            linexpr(
-                (
-                    n.snapshot_weightings.stores,
-                    get_var(n, "StorageUnit", "p_dispatch")[stores_i].T,
-                )
-            )
-            .T.groupby(sgrouper, axis=1)
-            .apply(join_exprs)
-        )
-        .reindex(lhs_gen.index)
-        .fillna("")
-    )
-
-    lhs_store = (
-        (
-            linexpr(
-                (
-                    -n.snapshot_weightings.stores,
-                    get_var(n, "StorageUnit", "p_store")[stores_i].T,
-                )
-            )
-            .T.groupby(sgrouper, axis=1)
-            .apply(join_exprs)
-        )
-        .reindex(lhs_gen.index)
-        .fillna("")
-    )
-
-    # Stores (or their resp. Link components)
-    # Note that the variables "p0" and "p1" currently do not exist.
-    # Thus, p0 and p1 must be derived from "p" (which exists), taking into account the link efficiency.
-    lhs_charge = (
-        (
-            linexpr(
-                (
-                    -n.snapshot_weightings.stores,
-                    get_var(n, "Link", "p")[charger_i].T,
-                )
-            )
-            .T.groupby(cgrouper, axis=1)
-            .apply(join_exprs)
-        )
-        .reindex(lhs_gen.index)
-        .fillna("")
-    )
-
-    lhs_discharge = (
-        (
-            linexpr(
-                (
-                    n.snapshot_weightings.stores.apply(
-                        lambda r: r * n.links.loc[discharger_i].efficiency
-                    ),
-                    get_var(n, "Link", "p")[discharger_i],
-                )
-            )
-            .groupby(cgrouper, axis=1)
-            .apply(join_exprs)
-        )
-        .reindex(lhs_gen.index)
-        .fillna("")
-    )
-
-    # signs of resp. terms are coded in the linexpr.
-    # todo: for links (lhs_charge and lhs_discharge), account for snapshot weightings
-    lhs = lhs_gen + lhs_dispatch + lhs_store + lhs_charge + lhs_discharge
-
-    define_constraints(n, lhs, "=", rhs, "RES share")
-
-
-def extra_functionality(n, snapshots):
-    """
-    Collects supplementary constraints which will be passed to
-    ``pypsa.linopf.network_lopf``.
-
-    If you want to enforce additional custom constraints, this is a good location to add them.
-    The arguments ``opts`` and ``snakemake.config`` are expected to be attached to the network.
-    """
-    opts = n.opts
-    config = n.config
-    if "BAU" in opts and n.generators.p_nom_extendable.any():
-        add_BAU_constraints(n, config)
-    if "SAFE" in opts and n.generators.p_nom_extendable.any():
-        add_SAFE_constraints(n, config)
-    if "CCL" in opts and n.generators.p_nom_extendable.any():
-        add_CCL_constraints(n, config)
-    reserve = config["electricity"].get("operational_reserve", {})
-    if reserve.get("activate"):
-        add_operational_reserve_margin(n, snapshots, config)
-    for o in opts:
-        if "RES" in o:
-            res_share = float(re.findall("[0-9]*\.?[0-9]+$", o)[0])
-            add_RES_constraints(n, res_share)
-    for o in opts:
-        if "EQ" in o:
-            add_EQ_constraints(n, o)
-    add_battery_constraints(n)
-
-
-def solve_network(n, config, opts="", **kwargs):
-    solver_options = config["solving"]["solver"].copy()
-    solver_name = solver_options.pop("name")
-    cf_solving = config["solving"]["options"]
-    track_iterations = cf_solving.get("track_iterations", False)
-    min_iterations = cf_solving.get("min_iterations", 4)
-    max_iterations = cf_solving.get("max_iterations", 6)
-
-    # add to network for extra_functionality
-    n.config = config
-    n.opts = opts
-
-    if cf_solving.get("skip_iterations", False):
-        network_lopf(
-            n,
-            solver_name=solver_name,
-            solver_options=solver_options,
-            extra_functionality=extra_functionality,
-            **kwargs,
-        )
-    else:
-        ilopf(
-            n,
-            solver_name=solver_name,
-            solver_options=solver_options,
-            track_iterations=track_iterations,
-            min_iterations=min_iterations,
-            max_iterations=max_iterations,
-            extra_functionality=extra_functionality,
-            **kwargs,
-        )
-    return n
-
-
-if __name__ == "__main__":
-    if "snakemake" not in globals():
-        change_to_script_dir(__file__)
-        snakemake = mock_snakemake(
-            "solve_network",
-            simpl="",
-            clusters="54",
-            ll="copt",
-            opts="Co2L-1H",
-        )
-    configure_logging(snakemake)
-
-    tmpdir = snakemake.params.solving.get("tmpdir")
-    if tmpdir is not None:
-        build_directory(tmpdir, just_parent_directory=False)
-    opts = snakemake.wildcards.opts.split("-")
-    solve_opts = snakemake.params.solving["options"]
-
-    n = pypsa.Network(snakemake.input[0])
-    if snakemake.params.augmented_line_connection.get("add_to_snakefile"):
-        n.lines.loc[n.lines.index.str.contains("new"), "s_nom_min"] = (
-            snakemake.params.augmented_line_connection.get("min_expansion")
-        )
-    n = prepare_network(n, solve_opts)
-
-    n = solve_network(
-        n,
-        config=snakemake.config,
-        opts=opts,
-        solver_dir=tmpdir,
-        solver_logfile=snakemake.log.solver,
-    )
-    n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards)))
-    n.export_to_netcdf(snakemake.output[0])
-    logger.info(f"Objective function: {n.objective}")
-    logger.info(f"Objective constant: {n.objective_constant}")
+# -*- coding: utf-8 -*-
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+# -*- coding: utf-8 -*-
+"""
+Solves linear optimal power flow for a network iteratively while updating
+reactances.
+
+Relevant Settings
+-----------------
+
+.. code:: yaml
+
+    solving:
+        tmpdir:
+        options:
+            formulation:
+            clip_p_max_pu:
+            load_shedding:
+            noisy_costs:
+            nhours:
+            min_iterations:
+            max_iterations:
+            skip_iterations:
+            track_iterations:
+        solver:
+            name:
+
+.. seealso::
+    Documentation of the configuration file ``config.yaml`` at
+    :ref:`electricity_cf`, :ref:`solving_cf`, :ref:`plotting_cf`
+
+Inputs
+------
+
+- ``networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc``: confer :ref:`prepare`
+
+Outputs
+-------
+
+- ``results/networks/elec_s{simpl}_{clusters}_ec_l{ll}_{opts}.nc``: Solved PyPSA network including optimisation results
+
+    .. image:: /img/results.png
+        :width: 40 %
+
+Description
+-----------
+
+Total annual system costs are minimised with PyPSA. The full formulation of the
+linear optimal power flow (plus investment planning)
+is provided in the
+`documentation of PyPSA <https://pypsa.readthedocs.io/en/latest/optimal_power_flow.html#linear-optimal-power-flow>`_.
+The optimization is based on the ``pyomo=False`` setting in the :func:`network.lopf` and  :func:`pypsa.linopf.ilopf` function.
+Additionally, some extra constraints specified in :mod:`prepare_network` are added.
+
+Solving the network in multiple iterations is motivated through the dependence of transmission line capacities and impedances on values of corresponding flows.
+As lines are expanded their electrical parameters change, which renders the optimisation bilinear even if the power flow
+equations are linearized.
+To retain the computational advantage of continuous linear programming, a sequential linear programming technique
+is used, where in between iterations the line impedances are updated.
+Details (and errors made through this heuristic) are discussed in the paper
+
+- Fabian Neumann and Tom Brown. `Heuristics for Transmission Expansion Planning in Low-Carbon Energy System Models <https://arxiv.org/abs/1907.10548>`_), *16th International Conference on the European Energy Market*, 2019. `arXiv:1907.10548 <https://arxiv.org/abs/1907.10548>`_.
+
+.. warning::
+    Capital costs of existing network components are not included in the objective function,
+    since for the optimisation problem they are just a constant term (no influence on optimal result).
+
+    Therefore, these capital costs are not included in ``network.objective``!
+
+    If you want to calculate the full total annual system costs add these to the objective value.
+
+.. tip::
+    The rule :mod:`solve_all_networks` runs
+    for all ``scenario`` s in the configuration file
+    the rule :mod:`solve_network`.
+"""
+import logging
+import re
+
+import numpy as np
+import pandas as pd
+import pypsa
+from _helpers import (
+    build_directory,
+    configure_logging,
+    create_logger,
+    mock_snakemake,
+    override_component_attrs,
+)
+from pypsa.descriptors import get_switchable_as_dense as get_as_dense
+from pypsa.linopf import (
+    define_constraints,
+    define_variables,
+    get_var,
+    ilopf,
+    join_exprs,
+    linexpr,
+    network_lopf,
+)
+from pypsa.linopt import define_constraints, get_var, join_exprs, linexpr
+
+logger = create_logger(__name__)
+pypsa.pf.logger.setLevel(logging.WARNING)
+
+
+def prepare_network(n, solve_opts):
+    if "clip_p_max_pu" in solve_opts:
+        for df in (
+            n.generators_t.p_max_pu,
+            n.generators_t.p_min_pu,
+            n.storage_units_t.inflow,
+        ):
+            df.where(df > solve_opts["clip_p_max_pu"], other=0.0, inplace=True)
+
+    if "lv_limit" in n.global_constraints.index:
+        n.line_volume_limit = n.global_constraints.at["lv_limit", "constant"]
+        n.line_volume_limit_dual = n.global_constraints.at["lv_limit", "mu"]
+
+    if solve_opts.get("load_shedding"):
+        n.add("Carrier", "Load")
+        n.madd(
+            "Generator",
+            n.buses.index,
+            " load",
+            bus=n.buses.index,
+            carrier="load",
+            sign=1e-3,  # Adjust sign to measure p and p_nom in kW instead of MW
+            marginal_cost=1e2,  # Eur/kWh
+            # intersect between macroeconomic and surveybased
+            # willingness to pay
+            # http://journal.frontiersin.org/article/10.3389/fenrg.2015.00055/full
+            p_nom=1e9,  # kW
+        )
+
+    if solve_opts.get("noisy_costs"):
+        for t in n.iterate_components():
+            # if 'capital_cost' in t.df:
+            #    t.df['capital_cost'] += 1e1 + 2.*(np.random.random(len(t.df)) - 0.5)
+            if "marginal_cost" in t.df:
+                np.random.seed(174)
+                t.df["marginal_cost"] += 1e-2 + 2e-3 * (
+                    np.random.random(len(t.df)) - 0.5
+                )
+
+        for t in n.iterate_components(["Line", "Link"]):
+            np.random.seed(123)
+            t.df["capital_cost"] += (
+                1e-1 + 2e-2 * (np.random.random(len(t.df)) - 0.5)
+            ) * t.df["length"]
+
+    if solve_opts.get("nhours"):
+        nhours = solve_opts["nhours"]
+        n.set_snapshots(n.snapshots[:nhours])
+        n.snapshot_weightings[:] = 8760.0 / nhours
+
+    if snakemake.config["foresight"] == "myopic":
+        add_land_use_constraint(n)
+
+    return n
+
+
+def add_CCL_constraints(n, config):
+    agg_p_nom_limits = config["electricity"].get("agg_p_nom_limits")
+
+    try:
+        agg_p_nom_minmax = pd.read_csv(agg_p_nom_limits, index_col=list(range(2)))
+    except IOError:
+        logger.exception(
+            "Need to specify the path to a .csv file containing "
+            "aggregate capacity limits per country in "
+            "config['electricity']['agg_p_nom_limit']."
+        )
+    logger.info(
+        "Adding per carrier generation capacity constraints for " "individual countries"
+    )
+
+    gen_country = n.generators.bus.map(n.buses.country)
+    # cc means country and carrier
+    p_nom_per_cc = (
+        pd.DataFrame(
+            {
+                "p_nom": linexpr((1, get_var(n, "Generator", "p_nom"))),
+                "country": gen_country,
+                "carrier": n.generators.carrier,
+            }
+        )
+        .dropna(subset=["p_nom"])
+        .groupby(["country", "carrier"])
+        .p_nom.apply(join_exprs)
+    )
+    minimum = agg_p_nom_minmax["min"].dropna()
+    if not minimum.empty:
+        minconstraint = define_constraints(
+            n, p_nom_per_cc[minimum.index], ">=", minimum, "agg_p_nom", "min"
+        )
+    maximum = agg_p_nom_minmax["max"].dropna()
+    if not maximum.empty:
+        maxconstraint = define_constraints(
+            n, p_nom_per_cc[maximum.index], "<=", maximum, "agg_p_nom", "max"
+        )
+
+
+def add_EQ_constraints(n, o, scaling=1e-1):
+    float_regex = "[0-9]*\.?[0-9]+"
+    level = float(re.findall(float_regex, o)[0])
+    if o[-1] == "c":
+        ggrouper = n.generators.bus.map(n.buses.country)
+        lgrouper = n.loads.bus.map(n.buses.country)
+        sgrouper = n.storage_units.bus.map(n.buses.country)
+    else:
+        ggrouper = n.generators.bus
+        lgrouper = n.loads.bus
+        sgrouper = n.storage_units.bus
+    load = (
+        n.snapshot_weightings.generators
+        @ n.loads_t.p_set.groupby(lgrouper, axis=1).sum()
+    )
+    inflow = (
+        n.snapshot_weightings.stores
+        @ n.storage_units_t.inflow.groupby(sgrouper, axis=1).sum()
+    )
+    inflow = inflow.reindex(load.index).fillna(0.0)
+    rhs = scaling * (level * load - inflow)
+    lhs_gen = (
+        linexpr(
+            (n.snapshot_weightings.generators * scaling, get_var(n, "Generator", "p").T)
+        )
+        .T.groupby(ggrouper, axis=1)
+        .apply(join_exprs)
+    )
+    lhs_spill = (
+        linexpr(
+            (
+                -n.snapshot_weightings.stores * scaling,
+                get_var(n, "StorageUnit", "spill").T,
+            )
+        )
+        .T.groupby(sgrouper, axis=1)
+        .apply(join_exprs)
+    )
+    lhs_spill = lhs_spill.reindex(lhs_gen.index).fillna("")
+    lhs = lhs_gen + lhs_spill
+    define_constraints(n, lhs, ">=", rhs, "equity", "min")
+
+
+def add_BAU_constraints(n, config):
+    ext_c = n.generators.query("p_nom_extendable").carrier.unique()
+    mincaps = pd.Series(
+        config["electricity"].get("BAU_mincapacities", {key: 0 for key in ext_c})
+    )
+    lhs = (
+        linexpr((1, get_var(n, "Generator", "p_nom")))
+        .groupby(n.generators.carrier)
+        .apply(join_exprs)
+    )
+    define_constraints(n, lhs, ">=", mincaps[lhs.index], "Carrier", "bau_mincaps")
+
+    maxcaps = pd.Series(
+        config["electricity"].get("BAU_maxcapacities", {key: np.inf for key in ext_c})
+    )
+    lhs = (
+        linexpr((1, get_var(n, "Generator", "p_nom")))
+        .groupby(n.generators.carrier)
+        .apply(join_exprs)
+    )
+    define_constraints(n, lhs, "<=", maxcaps[lhs.index], "Carrier", "bau_maxcaps")
+
+
+def add_SAFE_constraints(n, config):
+    peakdemand = (
+        1.0 + config["electricity"]["SAFE_reservemargin"]
+    ) * n.loads_t.p_set.sum(axis=1).max()
+    conv_techs = config["plotting"]["conv_techs"]
+    exist_conv_caps = n.generators.query(
+        "~p_nom_extendable & carrier in @conv_techs"
+    ).p_nom.sum()
+    ext_gens_i = n.generators.query("carrier in @conv_techs & p_nom_extendable").index
+    lhs = linexpr((1, get_var(n, "Generator", "p_nom")[ext_gens_i])).sum()
+    rhs = peakdemand - exist_conv_caps
+    define_constraints(n, lhs, ">=", rhs, "Safe", "mintotalcap")
+
+
+def add_operational_reserve_margin_constraint(n, config):
+    reserve_config = config["electricity"]["operational_reserve"]
+    EPSILON_LOAD = reserve_config["epsilon_load"]
+    EPSILON_VRES = reserve_config["epsilon_vres"]
+    CONTINGENCY = reserve_config["contingency"]
+
+    # Reserve Variables
+    reserve = get_var(n, "Generator", "r")
+    lhs = linexpr((1, reserve)).sum(1)
+
+    # Share of extendable renewable capacities
+    ext_i = n.generators.query("p_nom_extendable").index
+    vres_i = n.generators_t.p_max_pu.columns
+    if not ext_i.empty and not vres_i.empty:
+        capacity_factor = n.generators_t.p_max_pu[vres_i.intersection(ext_i)]
+        renewable_capacity_variables = get_var(n, "Generator", "p_nom")[
+            vres_i.intersection(ext_i)
+        ]
+        lhs += linexpr(
+            (-EPSILON_VRES * capacity_factor, renewable_capacity_variables)
+        ).sum(1)
+
+    # Total demand at t
+    demand = n.loads_t.p.sum(1)
+
+    # VRES potential of non extendable generators
+    capacity_factor = n.generators_t.p_max_pu[vres_i.difference(ext_i)]
+    renewable_capacity = n.generators.p_nom[vres_i.difference(ext_i)]
+    potential = (capacity_factor * renewable_capacity).sum(1)
+
+    # Right-hand-side
+    rhs = EPSILON_LOAD * demand + EPSILON_VRES * potential + CONTINGENCY
+
+    define_constraints(n, lhs, ">=", rhs, "Reserve margin")
+
+
+def update_capacity_constraint(n):
+    gen_i = n.generators.index
+    ext_i = n.generators.query("p_nom_extendable").index
+    fix_i = n.generators.query("not p_nom_extendable").index
+
+    dispatch = get_var(n, "Generator", "p")
+    reserve = get_var(n, "Generator", "r")
+
+    capacity_fixed = n.generators.p_nom[fix_i]
+
+    p_max_pu = get_as_dense(n, "Generator", "p_max_pu")
+
+    lhs = linexpr((1, dispatch), (1, reserve))
+
+    if not ext_i.empty:
+        capacity_variable = get_var(n, "Generator", "p_nom")
+        lhs += linexpr((-p_max_pu[ext_i], capacity_variable)).reindex(
+            columns=gen_i, fill_value=""
+        )
+
+    rhs = (p_max_pu[fix_i] * capacity_fixed).reindex(columns=gen_i, fill_value=0)
+
+    define_constraints(n, lhs, "<=", rhs, "Generators", "updated_capacity_constraint")
+
+
+def add_operational_reserve_margin(n, sns, config):
+    """
+    Build reserve margin constraints based on the formulation given in
+    https://genxproject.github.io/GenX/dev/core/#Reserves.
+    """
+
+    define_variables(n, 0, np.inf, "Generator", "r", axes=[sns, n.generators.index])
+
+    add_operational_reserve_margin_constraint(n, config)
+
+    update_capacity_constraint(n)
+
+
+def add_battery_constraints(n):
+    nodes = n.buses.index[n.buses.carrier == "battery"]
+    if nodes.empty or ("Link", "p_nom") not in n.variables.index:
+        return
+    link_p_nom = get_var(n, "Link", "p_nom")
+    lhs = linexpr(
+        (1, link_p_nom[nodes + " charger"]),
+        (
+            -n.links.loc[nodes + " discharger", "efficiency"].values,
+            link_p_nom[nodes + " discharger"].values,
+        ),
+    )
+    define_constraints(n, lhs, "=", 0, "Link", "charger_ratio")
+
+
+def add_RES_constraints(n, res_share):
+    lgrouper = n.loads.bus.map(n.buses.country)
+    ggrouper = n.generators.bus.map(n.buses.country)
+    sgrouper = n.storage_units.bus.map(n.buses.country)
+    cgrouper = n.links.bus0.map(n.buses.country)
+
+    logger.warning(
+        "The add_RES_constraints functionality is still work in progress. "
+        "Unexpected results might be incurred, particularly if "
+        "temporal clustering is applied or if an unexpected change of technologies "
+        "is subject to the obtimisation."
+    )
+
+    load = (
+        n.snapshot_weightings.generators
+        @ n.loads_t.p_set.groupby(lgrouper, axis=1).sum()
+    )
+
+    rhs = res_share * load
+
+    res_techs = [
+        "solar",
+        "onwind",
+        "offwind-dc",
+        "offwind-ac",
+        "battery",
+        "hydro",
+        "ror",
+    ]
+    charger = ["H2 electrolysis", "battery charger"]
+    discharger = ["H2 fuel cell", "battery discharger"]
+
+    gens_i = n.generators.query("carrier in @res_techs").index
+    stores_i = n.storage_units.query("carrier in @res_techs").index
+    charger_i = n.links.query("carrier in @charger").index
+    discharger_i = n.links.query("carrier in @discharger").index
+
+    # Generators
+    lhs_gen = (
+        linexpr(
+            (n.snapshot_weightings.generators, get_var(n, "Generator", "p")[gens_i].T)
+        )
+        .T.groupby(ggrouper, axis=1)
+        .apply(join_exprs)
+    )
+
+    # StorageUnits
+    lhs_dispatch = (
+        (
+            linexpr(
+                (
+                    n.snapshot_weightings.stores,
+                    get_var(n, "StorageUnit", "p_dispatch")[stores_i].T,
+                )
+            )
+            .T.groupby(sgrouper, axis=1)
+            .apply(join_exprs)
+        )
+        .reindex(lhs_gen.index)
+        .fillna("")
+    )
+
+    lhs_store = (
+        (
+            linexpr(
+                (
+                    -n.snapshot_weightings.stores,
+                    get_var(n, "StorageUnit", "p_store")[stores_i].T,
+                )
+            )
+            .T.groupby(sgrouper, axis=1)
+            .apply(join_exprs)
+        )
+        .reindex(lhs_gen.index)
+        .fillna("")
+    )
+
+    # Stores (or their resp. Link components)
+    # Note that the variables "p0" and "p1" currently do not exist.
+    # Thus, p0 and p1 must be derived from "p" (which exists), taking into account the link efficiency.
+    lhs_charge = (
+        (
+            linexpr(
+                (
+                    -n.snapshot_weightings.stores,
+                    get_var(n, "Link", "p")[charger_i].T,
+                )
+            )
+            .T.groupby(cgrouper, axis=1)
+            .apply(join_exprs)
+        )
+        .reindex(lhs_gen.index)
+        .fillna("")
+    )
+
+    lhs_discharge = (
+        (
+            linexpr(
+                (
+                    n.snapshot_weightings.stores.apply(
+                        lambda r: r * n.links.loc[discharger_i].efficiency
+                    ),
+                    get_var(n, "Link", "p")[discharger_i],
+                )
+            )
+            .groupby(cgrouper, axis=1)
+            .apply(join_exprs)
+        )
+        .reindex(lhs_gen.index)
+        .fillna("")
+    )
+
+    # signs of resp. terms are coded in the linexpr.
+    # todo: for links (lhs_charge and lhs_discharge), account for snapshot weightings
+    lhs = lhs_gen + lhs_dispatch + lhs_store + lhs_charge + lhs_discharge
+
+    define_constraints(n, lhs, "=", rhs, "RES share")
+
+
+def add_land_use_constraint(n):
+    if "m" in snakemake.wildcards.clusters:
+        _add_land_use_constraint_m(n)
+    else:
+        _add_land_use_constraint(n)
+
+
+def _add_land_use_constraint(n):
+    # warning: this will miss existing offwind which is not classed AC-DC and has carrier 'offwind'
+
+    for carrier in ["solar", "onwind", "offwind-ac", "offwind-dc"]:
+        existing = (
+            n.generators.loc[n.generators.carrier == carrier, "p_nom"]
+            .groupby(n.generators.bus.map(n.buses.location))
+            .sum()
+        )
+        existing.index += " " + carrier + "-" + snakemake.wildcards.planning_horizons
+        n.generators.loc[existing.index, "p_nom_max"] -= existing
+
+    n.generators.p_nom_max.clip(lower=0, inplace=True)
+
+
+def _add_land_use_constraint_m(n):
+    # if generators clustering is lower than network clustering, land_use accounting is at generators clusters
+
+    planning_horizons = snakemake.config["scenario"]["planning_horizons"]
+    grouping_years = snakemake.config["existing_capacities"]["grouping_years"]
+    current_horizon = snakemake.wildcards.planning_horizons
+
+    for carrier in ["solar", "onwind", "offwind-ac", "offwind-dc"]:
+        existing = n.generators.loc[n.generators.carrier == carrier, "p_nom"]
+        ind = list(
+            set(
+                [
+                    i.split(sep=" ")[0] + " " + i.split(sep=" ")[1]
+                    for i in existing.index
+                ]
+            )
+        )
+
+        previous_years = [
+            str(y)
+            for y in planning_horizons + grouping_years
+            if y < int(snakemake.wildcards.planning_horizons)
+        ]
+
+        for p_year in previous_years:
+            ind2 = [
+                i for i in ind if i + " " + carrier + "-" + p_year in existing.index
+            ]
+            sel_current = [i + " " + carrier + "-" + current_horizon for i in ind2]
+            sel_p_year = [i + " " + carrier + "-" + p_year for i in ind2]
+            n.generators.loc[sel_current, "p_nom_max"] -= existing.loc[
+                sel_p_year
+            ].rename(lambda x: x[:-4] + current_horizon)
+
+    n.generators.p_nom_max.clip(lower=0, inplace=True)
+
+
+def add_h2_network_cap(n, cap):
+    h2_network = n.links.loc[n.links.carrier == "H2 pipeline"]
+    if h2_network.index.empty or ("Link", "p_nom") not in n.variables.index:
+        return
+    h2_network_cap = get_var(n, "Link", "p_nom")
+    subset_index = h2_network.index.intersection(h2_network_cap.index)
+    lhs = linexpr(
+        (h2_network.loc[subset_index, "length"], h2_network_cap[subset_index])
+    ).sum()
+    # lhs = linexpr((1, h2_network_cap[h2_network.index])).sum()
+    rhs = cap * 1000
+    define_constraints(n, lhs, "<=", rhs, "h2_network_cap")
+
+
+def H2_export_yearly_constraint(n):
+    res = [
+        "csp",
+        "rooftop-solar",
+        "solar",
+        "onwind",
+        "onwind2",
+        "offwind",
+        "offwind2",
+        "ror",
+    ]
+    res_index = n.generators.loc[n.generators.carrier.isin(res)].index
+
+    weightings = pd.DataFrame(
+        np.outer(n.snapshot_weightings["generators"], [1.0] * len(res_index)),
+        index=n.snapshots,
+        columns=res_index,
+    )
+    res = join_exprs(
+        linexpr((weightings, get_var(n, "Generator", "p")[res_index]))
+    )  # single line sum
+
+    load_ind = n.loads[n.loads.carrier == "AC"].index.intersection(
+        n.loads_t.p_set.columns
+    )
+
+    load = (
+        n.loads_t.p_set[load_ind].sum(axis=1) * n.snapshot_weightings["generators"]
+    ).sum()
+
+    h2_export = n.loads.loc["H2 export load"].p_set * 8760
+
+    lhs = res
+
+    include_country_load = snakemake.config["policy_config"]["yearly"][
+        "re_country_load"
+    ]
+
+    if include_country_load:
+        elec_efficiency = (
+            n.links.filter(like="Electrolysis", axis=0).loc[:, "efficiency"].mean()
+        )
+        rhs = (
+            h2_export * (1 / elec_efficiency) + load
+        )  # 0.7 is approximation of electrloyzer efficiency # TODO obtain value from network
+    else:
+        rhs = h2_export * (1 / 0.7)
+
+    con = define_constraints(n, lhs, ">=", rhs, "H2ExportConstraint", "RESproduction")
+
+
+def monthly_constraints(n, n_ref):
+    res_techs = [
+        "csp",
+        "rooftop-solar",
+        "solar",
+        "onwind",
+        "onwind2",
+        "offwind",
+        "offwind2",
+        "ror",
+    ]
+    allowed_excess = snakemake.config["policy_config"]["hydrogen"]["allowed_excess"]
+
+    res_index = n.generators.loc[n.generators.carrier.isin(res_techs)].index
+
+    weightings = pd.DataFrame(
+        np.outer(n.snapshot_weightings["generators"], [1.0] * len(res_index)),
+        index=n.snapshots,
+        columns=res_index,
+    )
+
+    res = linexpr((weightings, get_var(n, "Generator", "p")[res_index])).sum(
+        axis=1
+    )  # single line sum
+    res = res.groupby(res.index.month).sum()
+
+    electrolysis = get_var(n, "Link", "p")[
+        n.links.index[n.links.index.str.contains("H2 Electrolysis")]
+    ]
+    weightings_electrolysis = pd.DataFrame(
+        np.outer(
+            n.snapshot_weightings["generators"], [1.0] * len(electrolysis.columns)
+        ),
+        index=n.snapshots,
+        columns=electrolysis.columns,
+    )
+
+    elec_input = linexpr((-allowed_excess * weightings_electrolysis, electrolysis)).sum(
+        axis=1
+    )
+
+    elec_input = elec_input.groupby(elec_input.index.month).sum()
+
+    if snakemake.config["policy_config"]["hydrogen"]["additionality"]:
+        res_ref = n_ref.generators_t.p[res_index] * weightings
+        res_ref = res_ref.groupby(n_ref.generators_t.p.index.month).sum().sum(axis=1)
+
+        elec_input_ref = (
+            n_ref.links_t.p0.loc[
+                :, n_ref.links_t.p0.columns.str.contains("H2 Electrolysis")
+            ]
+            * weightings_electrolysis
+        )
+        elec_input_ref = (
+            -elec_input_ref.groupby(elec_input_ref.index.month).sum().sum(axis=1)
+        )
+
+        for i in range(len(res.index)):
+            lhs = res.iloc[i] + "\n" + elec_input.iloc[i]
+            rhs = res_ref.iloc[i] + elec_input_ref.iloc[i]
+            con = define_constraints(
+                n, lhs, ">=", rhs, f"RESconstraints_{i}", f"REStarget_{i}"
+            )
+
+    else:
+        for i in range(len(res.index)):
+            lhs = res.iloc[i] + "\n" + elec_input.iloc[i]
+
+            con = define_constraints(
+                n, lhs, ">=", 0.0, f"RESconstraints_{i}", f"REStarget_{i}"
+            )
+    # else:
+    #     logger.info("ignoring H2 export constraint as wildcard is set to 0")
+
+
+def add_chp_constraints(n):
+    electric_bool = (
+        n.links.index.str.contains("urban central")
+        & n.links.index.str.contains("CHP")
+        & n.links.index.str.contains("electric")
+    )
+    heat_bool = (
+        n.links.index.str.contains("urban central")
+        & n.links.index.str.contains("CHP")
+        & n.links.index.str.contains("heat")
+    )
+
+    electric = n.links.index[electric_bool]
+    heat = n.links.index[heat_bool]
+
+    electric_ext = n.links.index[electric_bool & n.links.p_nom_extendable]
+    heat_ext = n.links.index[heat_bool & n.links.p_nom_extendable]
+
+    electric_fix = n.links.index[electric_bool & ~n.links.p_nom_extendable]
+    heat_fix = n.links.index[heat_bool & ~n.links.p_nom_extendable]
+
+    link_p = get_var(n, "Link", "p")
+
+    if not electric_ext.empty:
+        link_p_nom = get_var(n, "Link", "p_nom")
+
+        # ratio of output heat to electricity set by p_nom_ratio
+        lhs = linexpr(
+            (
+                n.links.loc[electric_ext, "efficiency"]
+                * n.links.loc[electric_ext, "p_nom_ratio"],
+                link_p_nom[electric_ext],
+            ),
+            (-n.links.loc[heat_ext, "efficiency"].values, link_p_nom[heat_ext].values),
+        )
+
+        define_constraints(n, lhs, "=", 0, "chplink", "fix_p_nom_ratio")
+
+        # top_iso_fuel_line for extendable
+        lhs = linexpr(
+            (1, link_p[heat_ext]),
+            (1, link_p[electric_ext].values),
+            (-1, link_p_nom[electric_ext].values),
+        )
+
+        define_constraints(n, lhs, "<=", 0, "chplink", "top_iso_fuel_line_ext")
+
+    if not electric_fix.empty:
+        # top_iso_fuel_line for fixed
+        lhs = linexpr((1, link_p[heat_fix]), (1, link_p[electric_fix].values))
+
+        rhs = n.links.loc[electric_fix, "p_nom"].values
+
+        define_constraints(n, lhs, "<=", rhs, "chplink", "top_iso_fuel_line_fix")
+
+    if not electric.empty:
+        # backpressure
+        lhs = linexpr(
+            (
+                n.links.loc[electric, "c_b"].values * n.links.loc[heat, "efficiency"],
+                link_p[heat],
+            ),
+            (-n.links.loc[electric, "efficiency"].values, link_p[electric].values),
+        )
+
+        define_constraints(n, lhs, "<=", 0, "chplink", "backpressure")
+
+
+def add_co2_sequestration_limit(n, sns):
+    co2_stores = n.stores.loc[n.stores.carrier == "co2 stored"].index
+
+    if co2_stores.empty or ("Store", "e") not in n.variables.index:
+        return
+
+    vars_final_co2_stored = get_var(n, "Store", "e").loc[sns[-1], co2_stores]
+
+    lhs = linexpr((1, vars_final_co2_stored)).sum()
+    rhs = (
+        n.config["sector"].get("co2_sequestration_potential", 5) * 1e6
+    )  # TODO change 200 limit (Europe)
+
+    name = "co2_sequestration_limit"
+    define_constraints(
+        n, lhs, "<=", rhs, "GlobalConstraint", "mu", axes=pd.Index([name]), spec=name
+    )
+
+
+def set_h2_colors(n):
+    blue_h2 = get_var(n, "Link", "p")[
+        n.links.index[n.links.index.str.contains("blue H2")]
+    ]
+
+    pink_h2 = get_var(n, "Link", "p")[
+        n.links.index[n.links.index.str.contains("pink H2")]
+    ]
+
+    fuelcell_ind = n.loads[n.loads.carrier == "land transport fuel cell"].index
+
+    other_ind = n.loads[
+        (n.loads.carrier == "H2 for industry")
+        | (n.loads.carrier == "H2 for shipping")
+        | (n.loads.carrier == "H2")
+    ].index
+
+    load_fuelcell = (
+        n.loads_t.p_set[fuelcell_ind].sum(axis=1) * n.snapshot_weightings["generators"]
+    ).sum()
+
+    load_other_h2 = n.loads.loc[other_ind].p_set.sum() * 8760
+
+    load_h2 = load_fuelcell + load_other_h2
+
+    weightings_blue = pd.DataFrame(
+        np.outer(n.snapshot_weightings["generators"], [1.0] * len(blue_h2.columns)),
+        index=n.snapshots,
+        columns=blue_h2.columns,
+    )
+
+    weightings_pink = pd.DataFrame(
+        np.outer(n.snapshot_weightings["generators"], [1.0] * len(pink_h2.columns)),
+        index=n.snapshots,
+        columns=pink_h2.columns,
+    )
+
+    total_blue = linexpr((weightings_blue, blue_h2)).sum().sum()
+
+    total_pink = linexpr((weightings_pink, pink_h2)).sum().sum()
+
+    rhs_blue = load_h2 * snakemake.config["sector"]["hydrogen"]["blue_share"]
+    rhs_pink = load_h2 * snakemake.config["sector"]["hydrogen"]["pink_share"]
+
+    define_constraints(n, total_blue, "=", rhs_blue, "blue_h2_share")
+
+    define_constraints(n, total_pink, "=", rhs_pink, "pink_h2_share")
+
+
+def add_existing(n):
+    if snakemake.wildcards["planning_horizons"] == "2050":
+        directory = (
+            "results/"
+            + "Existing_capacities/"
+            + snakemake.config["run"].replace("2050", "2030")
+        )
+        n_name = (
+            snakemake.input.network.split("/")[-1]
+            .replace(str(snakemake.config["scenario"]["clusters"][0]), "")
+            .replace(str(snakemake.config["costs"]["discountrate"][0]), "")
+            .replace("_presec", "")
+            .replace(".nc", ".csv")
+        )
+        df = pd.read_csv(directory + "/electrolyzer_caps_" + n_name, index_col=0)
+        existing_electrolyzers = df.p_nom_opt.values
+
+        h2_index = n.links[n.links.carrier == "H2 Electrolysis"].index
+        n.links.loc[h2_index, "p_nom_min"] = existing_electrolyzers
+
+        # n_name = snakemake.input.network.split("/")[-1].replace(str(snakemake.config["scenario"]["clusters"][0]), "").\
+        #     replace(".nc", ".csv").replace(str(snakemake.config["costs"]["discountrate"][0]), "")
+        df = pd.read_csv(directory + "/res_caps_" + n_name, index_col=0)
+
+        for tech in snakemake.config["custom_data"]["renewables"]:
+            # df = pd.read_csv(snakemake.config["custom_data"]["existing_renewables"], index_col=0)
+            existing_res = df.loc[tech]
+            existing_res.index = existing_res.index.str.apply(lambda x: x + tech)
+            tech_index = n.generators[n.generators.carrier == tech].index
+            n.generators.loc[tech_index, tech] = existing_res
+
+
+def extra_functionality(n, snapshots):
+    """
+    Collects supplementary constraints which will be passed to
+    ``pypsa.linopf.network_lopf``.
+
+    If you want to enforce additional custom constraints, this is a good location to add them.
+    The arguments ``opts`` and ``snakemake.config`` are expected to be attached to the network.
+    """
+    opts = n.opts
+    config = n.config
+    if "BAU" in opts and n.generators.p_nom_extendable.any():
+        add_BAU_constraints(n, config)
+    if "SAFE" in opts and n.generators.p_nom_extendable.any():
+        add_SAFE_constraints(n, config)
+    if "CCL" in opts and n.generators.p_nom_extendable.any():
+        add_CCL_constraints(n, config)
+    reserve = config["electricity"].get("operational_reserve", {})
+    if reserve.get("activate"):
+        add_operational_reserve_margin(n, snapshots, config)
+    for o in opts:
+        if "RES" in o:
+            res_share = float(re.findall("[0-9]*\.?[0-9]+$", o)[0])
+            add_RES_constraints(n, res_share)
+    for o in opts:
+        if "EQ" in o:
+            add_EQ_constraints(n, o)
+    add_battery_constraints(n)
+
+    if (
+        snakemake.config["policy_config"]["hydrogen"]["temporal_matching"]
+        == "h2_yearly_matching"
+    ):
+        if snakemake.config["policy_config"]["hydrogen"]["additionality"] == True:
+            logger.info(
+                "additionality is currently not supported for yearly constraints, proceeding without additionality"
+            )
+        logger.info("setting h2 export to yearly greenness constraint")
+        H2_export_yearly_constraint(n)
+
+    elif (
+        snakemake.config["policy_config"]["hydrogen"]["temporal_matching"]
+        == "h2_monthly_matching"
+    ):
+        if not snakemake.config["policy_config"]["hydrogen"]["is_reference"]:
+            logger.info("setting h2 export to monthly greenness constraint")
+            monthly_constraints(n, n_ref)
+        else:
+            logger.info("preparing reference case for additionality constraint")
+
+    elif (
+        snakemake.config["policy_config"]["hydrogen"]["temporal_matching"]
+        == "no_res_matching"
+    ):
+        logger.info("no h2 export constraint set")
+
+    else:
+        raise ValueError(
+            'H2 export constraint is invalid, check config["policy_config"]'
+        )
+
+    if snakemake.config["sector"]["hydrogen"]["network"]:
+        if snakemake.config["sector"]["hydrogen"]["network_limit"]:
+            add_h2_network_cap(
+                n, snakemake.config["sector"]["hydrogen"]["network_limit"]
+            )
+
+    if snakemake.config["sector"]["hydrogen"]["set_color_shares"]:
+        logger.info("setting H2 color mix")
+        set_h2_colors(n)
+
+    add_co2_sequestration_limit(n, snapshots)
+
+
+def solve_network(n, config, solving={}, opts="", **kwargs):
+    set_of_options = solving["solver"]["options"]
+    cf_solving = solving["options"]
+
+    solver_options = solving["solver_options"][set_of_options] if set_of_options else {}
+    solver_name = solving["solver"]["name"]
+
+    track_iterations = cf_solving.get("track_iterations", False)
+    min_iterations = cf_solving.get("min_iterations", 4)
+    max_iterations = cf_solving.get("max_iterations", 6)
+
+    # add to network for extra_functionality
+    n.config = config
+    n.opts = opts
+
+    if cf_solving.get("skip_iterations", False):
+        network_lopf(
+            n,
+            solver_name=solver_name,
+            solver_options=solver_options,
+            extra_functionality=extra_functionality,
+            **kwargs,
+        )
+    else:
+        ilopf(
+            n,
+            solver_name=solver_name,
+            solver_options=solver_options,
+            track_iterations=track_iterations,
+            min_iterations=min_iterations,
+            max_iterations=max_iterations,
+            extra_functionality=extra_functionality,
+            **kwargs,
+        )
+    return n
+
+
+if __name__ == "__main__":
+    if "snakemake" not in globals():
+        snakemake = mock_snakemake(
+            "solve_network",
+            simpl="",
+            clusters="54",
+            ll="copt",
+            opts="Co2L-1H",
+        )
+
+    configure_logging(snakemake)
+
+    tmpdir = snakemake.params.solving.get("tmpdir")
+    if tmpdir is not None:
+        build_directory(tmpdir, just_parent_directory=False)
+    opts = snakemake.wildcards.opts.split("-")
+    solving = snakemake.params.solving
+
+    is_sector_coupled = "sopts" in snakemake.wildcards.keys()
+
+    if is_sector_coupled:
+        overrides = override_component_attrs(snakemake.input.overrides)
+        n = pypsa.Network(snakemake.input.network, override_component_attrs=overrides)
+    else:
+        n = pypsa.Network(snakemake.input.network)
+
+    if snakemake.params.augmented_line_connection.get("add_to_snakefile"):
+        n.lines.loc[n.lines.index.str.contains("new"), "s_nom_min"] = (
+            snakemake.params.augmented_line_connection.get("min_expansion")
+        )
+
+    if (
+        snakemake.config["custom_data"]["add_existing"]
+        and snakemake.wildcards.planning_horizons == "2050"
+        and is_sector_coupled
+    ):
+        add_existing(n)
+
+    if (
+        snakemake.config["policy_config"]["hydrogen"]["additionality"]
+        and not snakemake.config["policy_config"]["hydrogen"]["is_reference"]
+        and snakemake.config["policy_config"]["hydrogen"]["temporal_matching"]
+        != "no_res_matching"
+        and is_sector_coupled
+    ):
+        n_ref_path = snakemake.config["policy_config"]["hydrogen"]["path_to_ref"]
+        n_ref = pypsa.Network(n_ref_path)
+    else:
+        n_ref = None
+
+    n = prepare_network(n, solving["options"])
+
+    n = solve_network(
+        n,
+        config=snakemake.config,
+        solving=solving,
+        opts=opts,
+        solver_dir=tmpdir,
+        solver_logfile=snakemake.log.solver,
+    )
+    n.meta = dict(snakemake.config, **dict(wildcards=dict(snakemake.wildcards)))
+    n.export_to_netcdf(snakemake.output[0])
+    logger.info(f"Objective function: {n.objective}")
+    logger.info(f"Objective constant: {n.objective_constant}")
diff --git a/test/config.custom.yaml b/test/config.custom.yaml
index 6caf4ef67..a596a932d 100644
--- a/test/config.custom.yaml
+++ b/test/config.custom.yaml
@@ -3,8 +3,7 @@
 # SPDX-License-Identifier: CC0-1.0
 
 ### CHANGES TO CONFIG.TUTORIAL.YAML ###
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: false
+version: 0.5.0
 
 run:
   name: "custom"
diff --git a/test/config.landlock.yaml b/test/config.landlock.yaml
index c56bdd968..913211f29 100644
--- a/test/config.landlock.yaml
+++ b/test/config.landlock.yaml
@@ -3,8 +3,7 @@
 # SPDX-License-Identifier: CC0-1.0
 
 ### CHANGES TO CONFIG.TUTORIAL.YAML ###
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: false
+version: 0.5.0
 
 countries: ["BW"]
 
diff --git a/test/config.monte_carlo.yaml b/test/config.monte_carlo.yaml
index b51e53819..034dd51cd 100644
--- a/test/config.monte_carlo.yaml
+++ b/test/config.monte_carlo.yaml
@@ -3,8 +3,7 @@
 # SPDX-License-Identifier: CC0-1.0
 
 ### CHANGES TO CONFIG.TUTORIAL.YAML ###
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: false
+version: 0.5.0
 
 monte_carlo:
   options:
diff --git a/test/config.test1.yaml b/test/config.test1.yaml
new file mode 100644
index 000000000..792f60767
--- /dev/null
+++ b/test/config.test1.yaml
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+version: 0.5.0
+tutorial: true
+
+run:
+  name: test1
+  shared_cutouts: true
+
+scenario:
+  clusters: # number of nodes in Europe, any integer between 37 (1 node per country-zone) and several hundred
+  - 4
+  ll:
+  - "c1"
+
+countries: ["NG", "BJ"]
+
+
+electricity:
+  extendable_carriers:
+    Store: []
+    Link: []
+
+  co2limit: 7.75e7
+
+export:
+  h2export: [120] # Yearly export demand in TWh
+  store: true # [True, False] # specifies whether an export store to balance demand is implemented
+  store_capital_costs: "no_costs" # ["standard_costs", "no_costs"] # specifies the costs of the export store "standard_costs" takes CAPEX of "hydrogen storage tank type 1 including compressor"
+
+existing_capacities:
+  grouping_years_power: [1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025, 2030]
+  grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # these should not extend 2020
+  threshold_capacity: 10
+  default_heating_lifetime: 20
+  conventional_carriers:
+  - lignite
+  - coal
+  - oil
+  - uranium
+
+sector:
+  solid_biomass_potential: 10 # TWh/a, Potential of whole modelled area
+  gadm_level: 2
+snapshots:
+  # arguments to pd.date_range
+  start: "2013-03-1"
+  end: "2013-03-7"
+
+solving:
+  solver:
+    name: gurobi
diff --git a/test/config.test_myopic.yaml b/test/config.test_myopic.yaml
new file mode 100644
index 000000000..bede7c639
--- /dev/null
+++ b/test/config.test_myopic.yaml
@@ -0,0 +1,540 @@
+# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
+#
+# SPDX-License-Identifier: AGPL-3.0-or-later
+
+version: 0.5.0
+logging_level: INFO
+tutorial: true
+
+results_dir: results/
+summary_dir: results/
+costs_dir: data/ #TODO change to the equivalent of technology data
+
+run:
+  name: "test_myopic" # use this to keep track of runs with different settings
+  name_subworkflow: "tutorial"  # scenario name of the pypsa-earth subworkflow
+  shared_cutouts: true  # set to true to share the default cutout(s) across runs; Note: value false requires build_cutout to be enabled
+foresight: myopic
+
+# option to disable the subworkflow to ease the analyses
+disable_subworkflow: true
+
+scenario:
+  simpl: # only relevant for PyPSA-Eur
+  - ""
+  clusters: # number of nodes in Europe, any integer between 37 (1 node per country-zone) and several hundred
+  - 4
+  planning_horizons: # investment years for myopic and perfect; or costs year for overnight
+  - 2030
+  ll:
+  - "c1"
+  opts:
+  - "Co2L"
+  sopts:
+  - "24H"
+  demand:
+  - "DF"
+
+
+policy_config:
+  hydrogen:
+    temporal_matching: "no_res_matching" #either "h2_yearly_matching", "h2_monthly_matching", "no_res_matching"
+    spatial_matching: false
+    additionality: false # RE electricity is equal to the amount required for additional hydrogen export compared to the 0 export case ("reference_case")
+    allowed_excess: 1.0
+    is_reference: false # Whether or not this network is a reference case network, relevant only if additionality is _true_
+    remove_h2_load: false #Whether or not to remove the h2 load from the network, relevant only if is_reference is _true_
+    path_to_ref: "" # Path to the reference case network for additionality calculation, relevant only if additionality is _true_ and is_reference is _false_
+    re_country_load: false # Set to "True" to force the RE electricity to be equal to the electricity required for hydrogen export and the country electricity load. "False" excludes the country electricity load from the constraint.
+
+cluster_options:
+  alternative_clustering: true
+
+countries: ['NG', 'BJ']
+
+demand_data:
+  update_data: true # if true, the workflow downloads the energy balances data saved in data/demand/unsd/data again. Turn on for the first run.
+  base_year: 2019
+
+  other_industries: false # Whether or not to include industries that are not specified. some countries have has exageratted numbers, check carefully.
+  aluminium_year: 2019 # Year of the aluminium demand data specified in `data/AL_production.csv`
+
+
+enable:
+  retrieve_cost_data: true # if true, the workflow overwrites the cost data saved in data/costs again
+  retrieve_irena: true  #If true, downloads the IRENA data
+
+fossil_reserves:
+  oil: 100 #TWh Maybe reduntant
+
+
+export:
+  h2export: [120] # Yearly export demand in TWh
+  store: true # [True, False] # specifies wether an export store to balance demand is implemented
+  store_capital_costs: "no_costs" # ["standard_costs", "no_costs"] # specifies the costs of the export store "standard_costs" takes CAPEX of "hydrogen storage tank type 1 including compressor"
+  export_profile: "ship" # use "ship" or "constant"
+  ship:
+    ship_capacity: 0.4 # TWh # 0.05 TWh for new ones, 0.003 TWh for Susio Frontier, 0.4 TWh according to Hampp2021: "Corresponds to 11360 t H2 (l) with LHV of 33.3333 Mwh/t_H2. Cihlar et al 2020 based on IEA 2019, Table 3-B"
+    travel_time: 288 # hours # From Agadir to Rotterdam and back (12*24)
+    fill_time: 24 # hours, for 48h see Hampp2021
+    unload_time: 24 # hours for 48h see Hampp2021
+
+custom_data:
+  renewables: [] # ['csp', 'rooftop-solar', 'solar']
+  elec_demand: false
+  heat_demand: false
+  industry_demand: false
+  industry_database: false
+  transport_demand: false
+  water_costs: false
+  h2_underground: false
+  add_existing: false
+  custom_sectors: false
+  gas_network: false # If "True" then a custom .csv file must be placed in "resources/custom_data/pipelines.csv" , If "False" the user can choose btw "greenfield" or Model built-in datasets. Please refer to ["sector"] below.
+
+
+costs: # Costs used in PyPSA-Earth-Sec. Year depends on the wildcard planning_horizon in the scenario section
+  version: v0.6.2
+  lifetime: 25 #default lifetime
+  # From a Lion Hirth paper, also reflects average of Noothout et al 2016
+  discountrate: [0.071] #, 0.086, 0.111]
+  # [EUR/USD] ECB: https://www.ecb.europa.eu/stats/exchange/eurofxref/html/eurofxref-graph-usd.en.html # noqa: E501
+  USD2013_to_EUR2013: 0.7532
+
+  # Marginal and capital costs can be overwritten
+  # capital_cost:
+  #   onwind: 500
+  marginal_cost:
+    solar: 0.01
+    onwind: 0.015
+    offwind: 0.015
+    hydro: 0.
+    H2: 0.
+    battery: 0.
+
+  emission_prices: # only used with the option Ep (emission prices)
+    co2: 0.
+
+  lines:
+    length_factor: 1.25 #to estimate offwind connection costs
+
+
+industry:
+  reference_year: 2015
+
+solar_thermal:
+  clearsky_model: simple
+  orientation:
+    slope: 45.
+    azimuth: 180.
+
+existing_capacities:
+  grouping_years_power: [1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020, 2025, 2030]
+  grouping_years_heat: [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2019] # these should not extend 2020
+  threshold_capacity: 10
+  default_heating_lifetime: 20
+  conventional_carriers:
+  - lignite
+  - coal
+  - oil
+  - uranium
+
+sector:
+  gas:
+    spatial_gas: true # ALWAYS TRUE
+    network: false # ALWAYS FALSE for now (NOT USED)
+    network_data: GGIT # Global dataset -> 'GGIT' , European dataset -> 'IGGIELGN'
+    network_data_GGIT_status: ['Construction', 'Operating', 'Idle', 'Shelved', 'Mothballed', 'Proposed']
+  hydrogen:
+    network: true
+    H2_retrofit_capacity_per_CH4: 0.6
+    network_limit: 2000 #GWkm
+    network_routes: gas # "gas or "greenfield". If "gas"  ->  the network data are fetched from ["sector"]["gas"]["network_data"]. If "greenfield"  -> the network follows the topology of electrical transmission lines
+    gas_network_repurposing: true # If true -> ["sector"]["gas"]["network"] is automatically false
+    underground_storage: false
+    hydrogen_colors: false
+    set_color_shares: false
+    blue_share: 0.40
+    pink_share: 0.05
+  coal:
+    shift_to_elec: true # If true, residential and services demand of coal is shifted to electricity. If false, the final energy demand of coal is disregarded
+
+
+  international_bunkers: false #Whether or not to count the emissions of international aviation and navigation
+
+  oil:
+    spatial_oil: true
+
+  district_heating:
+    potential: 0.3 #maximum fraction of urban demand which can be supplied by district heating
+    #increase of today's district heating demand to potential maximum district heating share
+    #progress = 0 means today's district heating share, progress=-1 means maxumzm fraction of urban demand is supplied by district heating
+    progress: 1
+    # 2020: 0.0
+    # 2030: 0.3
+    # 2040: 0.6
+    # 2050: 1.0
+    district_heating_loss: 0.15
+  reduce_space_heat_exogenously: true  # reduces space heat demand by a given factor (applied before losses in DH)
+  # this can represent e.g. building renovation, building demolition, or if
+  # the factor is negative: increasing floor area, increased thermal comfort, population growth
+  reduce_space_heat_exogenously_factor: 0.29 # per unit reduction in space heat demand
+  # the default factors are determined by the LTS scenario from http://tool.european-calculator.eu/app/buildings/building-types-area/?levers=1ddd4444421213bdbbbddd44444ffffff11f411111221111211l212221
+  # 2020: 0.10  # this results in a space heat demand reduction of 10%
+  # 2025: 0.09  # first heat demand increases compared to 2020 because of larger floor area per capita
+  # 2030: 0.09
+  # 2035: 0.11
+  # 2040: 0.16
+  # 2045: 0.21
+  # 2050: 0.29
+
+  tes: true
+  tes_tau: # 180 day time constant for centralised, 3 day for decentralised
+    decentral: 3
+    central: 180
+  boilers: true
+  oil_boilers: false
+  chp: true
+  micro_chp: false
+  solar_thermal: true
+  heat_pump_sink_T: 55 #Celsius, based on DTU / large area radiators; used un build_cop_profiles.py
+  time_dep_hp_cop: true #time dependent heat pump coefficient of performance
+  solar_cf_correction: 0.788457 # = >>>1/1.2683
+  bev_plug_to_wheel_efficiency: 0.2 #kWh/km from EPA https://www.fueleconomy.gov/feg/ for Tesla Model S
+  bev_charge_efficiency: 0.9 #BEV (dis-)charging efficiency
+  transport_heating_deadband_upper: 20.
+  transport_heating_deadband_lower: 15.
+  ICE_lower_degree_factor: 0.375 #in per cent increase in fuel consumption per degree above deadband
+  ICE_upper_degree_factor: 1.6
+  EV_lower_degree_factor: 0.98
+  EV_upper_degree_factor: 0.63
+  bev_avail_max: 0.95
+  bev_avail_mean: 0.8
+  bev_dsm_restriction_value: 0.75 #Set to 0 for no restriction on BEV DSM
+  bev_dsm_restriction_time: 7 #Time at which SOC of BEV has to be dsm_restriction_value
+  v2g: true #allows feed-in to grid from EV battery
+  bev_dsm: true #turns on EV battery
+  bev_energy: 0.05 #average battery size in MWh
+  bev_availability: 0.5 #How many cars do smart charging
+  transport_fuel_cell_efficiency: 0.5
+  transport_internal_combustion_efficiency: 0.3
+  industry_util_factor: 0.7
+
+  biomass_transport: true  # biomass transport between nodes
+  biomass_transport_default_cost: 0.1 #EUR/km/MWh
+  solid_biomass_potential: 40 # TWh/a, Potential of whole modelled area
+  biogas_potential: 0.5 # TWh/a, Potential of whole modelled area
+
+  efficiency_heat_oil_to_elec: 0.9
+  efficiency_heat_biomass_to_elec: 0.9
+  efficiency_heat_gas_to_elec: 0.9
+
+  dynamic_transport:
+    enable: false # If "True", then the BEV and FCEV shares are obtained depening on the "Co2L"-wildcard (e.g. "Co2L0.70: 0.10"). If "False", then the shares are obtained depending on the "demand" wildcard and "planning_horizons" wildcard as listed below (e.g. "DF_2050: 0.08")
+    land_transport_electric_share:
+      Co2L2.0: 0.00
+      Co2L1.0: 0.01
+      Co2L0.90: 0.03
+      Co2L0.80: 0.06
+      Co2L0.70: 0.10
+      Co2L0.60: 0.17
+      Co2L0.50: 0.27
+      Co2L0.40: 0.40
+      Co2L0.30: 0.55
+      Co2L0.20: 0.69
+      Co2L0.10: 0.80
+      Co2L0.00: 0.88
+    land_transport_fuel_cell_share:
+      Co2L2.0: 0.01
+      Co2L1.0: 0.01
+      Co2L0.90: 0.01
+      Co2L0.80: 0.01
+      Co2L0.70: 0.01
+      Co2L0.60: 0.01
+      Co2L0.50: 0.01
+      Co2L0.40: 0.01
+      Co2L0.30: 0.01
+      Co2L0.20: 0.01
+      Co2L0.10: 0.01
+      Co2L0.00: 0.01
+
+  land_transport_fuel_cell_share: # 1 means all FCEVs HERE
+    BU_2030: 0.00
+    AP_2030: 0.004
+    NZ_2030: 0.02
+    DF_2030: 0.01
+    AB_2030: 0.01
+    BU_2050: 0.00
+    AP_2050: 0.06
+    NZ_2050: 0.28
+    DF_2050: 0.08
+
+  land_transport_electric_share: # 1 means all EVs  # This leads to problems when non-zero HERE
+    BU_2030: 0.00
+    AP_2030: 0.075
+    NZ_2030: 0.13
+    DF_2030: 0.01
+    AB_2030: 0.01
+    BU_2050: 0.00
+    AP_2050: 0.42
+    NZ_2050: 0.68
+    DF_2050: 0.011
+
+  co2_network: true
+  co2_sequestration_potential: 200 #MtCO2/a sequestration potential for Europe
+  co2_sequestration_cost: 10 #EUR/tCO2 for sequestration of CO2
+  hydrogen_underground_storage: true
+  shipping_hydrogen_liquefaction: false
+  shipping_average_efficiency: 0.4 #For conversion of fuel oil to propulsion in 2011
+
+  shipping_hydrogen_share: #1.0
+    BU_2030: 0.00
+    AP_2030: 0.00
+    NZ_2030: 0.10
+    DF_2030: 0.05
+    AB_2030: 0.05
+    BU_2050: 0.00
+    AP_2050: 0.25
+    NZ_2050: 0.36
+    DF_2050: 0.12
+
+  gadm_level: 1
+  h2_cavern: true
+  marginal_cost_storage: 0
+  methanation: true
+  helmeth: true
+  dac: true
+  SMR: true
+  SMR CC: true
+  cc_fraction: 0.9
+  cc: true
+  space_heat_share: 0.6 # the share of space heating from all heating. Remainder goes to water heating.
+  airport_sizing_factor: 3
+
+  min_part_load_fischer_tropsch: 0.9
+
+  conventional_generation: # generator : carrier
+    OCGT: gas
+    #Gen_Test: oil # Just for testing purposes
+
+# snapshots are originally set in PyPSA-Earth/config.yaml but used again by PyPSA-Earth-Sec
+snapshots:
+  # arguments to pd.date_range
+  start: "2013-03-1"
+  end: "2013-03-7"
+  inclusive: "left" # end is not inclusive
+
+# atlite:
+#   cutout: ./cutouts/africa-2013-era5.nc
+
+build_osm_network:  # TODO: To Remove this once we merge pypsa-earth and pypsa-earth-sec
+  force_ac: false  # When true, it forces all components (lines and substation) to be AC-only. To be used if DC assets create problem.
+
+solving:
+  #tmpdir: "path/to/tmp"
+  options:
+    formulation: kirchhoff
+    clip_p_max_pu: 1.e-2
+    load_shedding: true
+    noisy_costs: true
+    skip_iterations: true
+    track_iterations: false
+    min_iterations: 4
+    max_iterations: 6
+
+  solver:
+    name: cbc
+
+  mem: 30000 #memory in MB; 20 GB enough for 50+B+I+H2; 100 GB for 181+B+I+H2
+
+plotting:
+  map:
+    boundaries: [-11, 30, 34, 71]
+    color_geomap:
+      ocean: white
+      land: whitesmoke
+  costs_max: 10
+  costs_threshold: 0.2
+  energy_max: 20000
+  energy_min: -20000
+  energy_threshold: 15
+  vre_techs:
+  - onwind
+  - offwind-ac
+  - offwind-dc
+  - solar
+  - ror
+  renewable_storage_techs:
+  - PHS
+  - hydro
+  conv_techs:
+  - OCGT
+  - CCGT
+  - Nuclear
+  - Coal
+  storage_techs:
+  - hydro+PHS
+  - battery
+  - H2
+  load_carriers:
+  - AC load
+  AC_carriers:
+  - AC line
+  - AC transformer
+  link_carriers:
+  - DC line
+  - Converter AC-DC
+  heat_links:
+  - heat pump
+  - resistive heater
+  - CHP heat
+  - CHP electric
+  - gas boiler
+  - central heat pump
+  - central resistive heater
+  - central CHP heat
+  - central CHP electric
+  - central gas boiler
+  heat_generators:
+  - gas boiler
+  - central gas boiler
+  - solar thermal collector
+  - central solar thermal collector
+  tech_colors:
+    SMR CC: "darkblue"
+    gas for industry CC: "brown"
+    process emissions CC: "gray"
+    CO2 pipeline: "gray"
+    onwind: "dodgerblue"
+    onshore wind: "#235ebc"
+    offwind: "#6895dd"
+    offshore wind: "#6895dd"
+    offwind-ac: "c"
+    offshore wind (AC): "#6895dd"
+    offwind-dc: "#74c6f2"
+    offshore wind (DC): "#74c6f2"
+    wave: '#004444'
+    hydro: '#3B5323'
+    hydro reservoir: '#3B5323'
+    ror: '#78AB46'
+    run of river: '#78AB46'
+    hydroelectricity: 'blue'
+    solar: "orange"
+    solar PV: "#f9d002"
+    solar thermal: coral
+    solar rooftop: '#ffef60'
+    OCGT: wheat
+    OCGT marginal: sandybrown
+    OCGT-heat: '#ee8340'
+    gas boiler: '#ee8340'
+    gas boilers: '#ee8340'
+    gas boiler marginal: '#ee8340'
+    gas-to-power/heat: 'brown'
+    gas: brown
+    natural gas: brown
+    SMR: '#4F4F2F'
+    oil: '#B5A642'
+    oil boiler: '#B5A677'
+    lines: k
+    transmission lines: k
+    H2: m
+    H2 liquefaction: m
+    hydrogen storage: m
+    battery: slategray
+    battery storage: slategray
+    home battery: '#614700'
+    home battery storage: '#614700'
+    Nuclear: r
+    Nuclear marginal: r
+    nuclear: r
+    uranium: r
+    Coal: k
+    coal: k
+    Coal marginal: k
+    Lignite: grey
+    lignite: grey
+    Lignite marginal: grey
+    CCGT: '#ee8340'
+    CCGT marginal: '#ee8340'
+    heat pumps: '#76EE00'
+    heat pump: '#76EE00'
+    air heat pump: '#76EE00'
+    ground heat pump: '#40AA00'
+    power-to-heat: 'red'
+    resistive heater: pink
+    Sabatier: '#FF1493'
+    methanation: '#FF1493'
+    power-to-gas: 'purple'
+    power-to-liquid: 'darkgreen'
+    helmeth: '#7D0552'
+    DAC: 'deeppink'
+    co2 stored: '#123456'
+    CO2 sequestration: '#123456'
+    CC: k
+    co2: '#123456'
+    co2 vent: '#654321'
+    agriculture heat: '#D07A7A'
+    agriculture machinery oil: '#1e1e1e'
+    agriculture machinery oil emissions: '#111111'
+    agriculture electricity: '#222222'
+    solid biomass for industry co2 from atmosphere: '#654321'
+    solid biomass for industry co2 to stored: '#654321'
+    solid biomass for industry CC: '#654321'
+    gas for industry co2 to atmosphere: '#654321'
+    gas for industry co2 to stored: '#654321'
+    Fischer-Tropsch: '#44DD33'
+    kerosene for aviation: '#44BB11'
+    naphtha for industry: '#44FF55'
+    land transport oil: '#44DD33'
+    water tanks: '#BBBBBB'
+    hot water storage: '#BBBBBB'
+    hot water charging: '#BBBBBB'
+    hot water discharging: '#999999'
+    # CO2 pipeline: '#999999'
+    CHP: r
+    CHP heat: r
+    CHP electric: r
+    PHS: g
+    Ambient: k
+    Electric load: b
+    Heat load: r
+    heat: darkred
+    rural heat: '#880000'
+    central heat: '#b22222'
+    decentral heat: '#800000'
+    low-temperature heat for industry: '#991111'
+    process heat: '#FF3333'
+    heat demand: darkred
+    electric demand: k
+    Li ion: grey
+    district heating: '#CC4E5C'
+    retrofitting: purple
+    building retrofitting: purple
+    BEV charger: grey
+    V2G: grey
+    land transport EV: grey
+    electricity: k
+    gas for industry: '#333333'
+    solid biomass for industry: '#555555'
+    industry electricity: '#222222'
+    industry new electricity: '#222222'
+    process emissions to stored: '#444444'
+    process emissions to atmosphere: '#888888'
+    process emissions: '#222222'
+    oil emissions: '#666666'
+    industry oil emissions: '#666666'
+    land transport oil emissions: '#666666'
+    land transport fuel cell: '#AAAAAA'
+    biogas: '#800000'
+    solid biomass: '#DAA520'
+    today: '#D2691E'
+    shipping: '#6495ED'
+    shipping oil: "#6495ED"
+    shipping oil emissions: "#6495ED"
+    electricity distribution grid: 'y'
+    solid biomass transport: green
+    H2 for industry: "#222222"
+    H2 for shipping: "#6495ED"
+    biomass EOP: "green"
+    biomass: "green"
+    high-temp electrolysis: "magenta"
diff --git a/test/config.tutorial_noprogress.yaml b/test/config.tutorial_noprogress.yaml
deleted file mode 100644
index a726a9c68..000000000
--- a/test/config.tutorial_noprogress.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
-#
-# SPDX-License-Identifier: CC0-1.0
-
-### CHANGES TO CONFIG.TUTORIAL.YAML ###
-retrieve_databundle: # required to be "false" for nice CI test output
-  show_progress: false
diff --git a/test/test_build_powerplants.py b/test/test_build_powerplants.py
index 68f406f04..34a118c9a 100644
--- a/test/test_build_powerplants.py
+++ b/test/test_build_powerplants.py
@@ -9,6 +9,7 @@
 import sys
 
 import pandas as pd
+import pytest
 import yaml
 
 sys.path.append("./scripts")
@@ -98,7 +99,11 @@ def test_replace_natural_gas_technology():
     assert comparison_df.empty
 
 
-def test_add_power_plants(get_config_dict):
+@pytest.mark.parametrize(
+    "strategy,expected",
+    [("replace", (4, 19)), ("false", (34, 18)), ("merge", (38, 20))],
+)
+def test_add_power_plants(get_config_dict, strategy, expected):
     """
     Verify what returned by add_power_plants.
     """
@@ -113,47 +118,12 @@ def test_add_power_plants(get_config_dict):
 
     config_dict["countries"] = ["NG"]
 
-    # replace
-    config_dict["electricity"]["custom_powerplants"] = "replace"
-    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
+    powerplants_assignment_strategy = strategy
     if isinstance(ppl_query, str):
         power_plants_config["main_query"] = ppl_query
     countries_names = ["Nigeria"]
     power_plants_config["target_countries"] = countries_names
-    ppl_replace = add_power_plants(
-        custom_powerplants_file_path,
-        power_plants_config,
-        powerplants_assignment_strategy,
-        countries_names,
-    )
-    assert ppl_replace.shape == (4, 19)
-
-    # false
-    config_dict["electricity"]["custom_powerplants"] = "false"
-    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
-    if isinstance(ppl_query, str):
-        power_plants_config["main_query"] = ppl_query
-    countries_names = ["Nigeria"]
-    power_plants_config["target_countries"] = countries_names
-    ppl_false = add_power_plants(
-        custom_powerplants_file_path,
-        power_plants_config,
-        powerplants_assignment_strategy,
-        countries_names,
-    )
-    # The number of powerplants returned by powerplantmatching
-    # may vary depending on the version of powerplantmatching
-    # The numbers below refer to version 0.15.5
-    assert ppl_false.shape == (31, 18)
-
-    # merge
-    config_dict["electricity"]["custom_powerplants"] = "merge"
-    powerplants_assignment_strategy = config_dict["electricity"]["custom_powerplants"]
-    if isinstance(ppl_query, str):
-        power_plants_config["main_query"] = ppl_query
-    countries_names = ["Nigeria"]
-    power_plants_config["target_countries"] = countries_names
-    ppl_merge = add_power_plants(
+    ppl = add_power_plants(
         custom_powerplants_file_path,
         power_plants_config,
         powerplants_assignment_strategy,
@@ -161,5 +131,5 @@ def test_add_power_plants(get_config_dict):
     )
     # The number of powerplants returned by powerplantmatching
     # may vary depending on the version of powerplantmatching
-    # The numbers below refer to version 0.15.5
-    assert ppl_merge.shape == (35, 20)
+    # The numbers below refer to version 0.6.0
+    assert ppl.shape == expected

From 196d2554acc53db097fbfe50f41852d2e8519ac1 Mon Sep 17 00:00:00 2001
From: Fabrizio Finozzi <fabrizio.finozzi.business@gmail.com>
Date: Thu, 14 Nov 2024 14:46:07 +0100
Subject: [PATCH 40/40] code: new changes based on upstream

---
 scripts/_helpers.py                   | 18 +++++++----
 scripts/build_base_industry_totals.py |  6 ++--
 scripts/build_demand_profiles.py      |  6 ++--
 scripts/build_natura_raster.py        |  5 +--
 scripts/build_shapes.py               | 16 ++++------
 scripts/cluster_network.py            |  4 +--
 scripts/download_osm_data.py          |  4 +--
 scripts/make_statistics.py            | 19 ++++++-----
 scripts/make_summary.py               |  4 +--
 scripts/monte_carlo.py                |  6 ++--
 scripts/prepare_urban_percent.py      | 11 +++----
 scripts/retrieve_databundle_light.py  | 46 +++++++++++----------------
 12 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index f0103f799..67ea3355a 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -1408,6 +1408,7 @@ def locate_bus(
     gadm_url_prefix,
     gadm_input_file_args,
     contended_flag,
+    col_name="name",
     path_to_gadm=None,
     update=False,
     out_logging=False,
@@ -1445,8 +1446,9 @@ def locate_bus(
         out_logging = true, enables output logging
     gadm_clustering : bool
         gadm_cluster = true, to enable clustering
+    col_name: str
+        column to use to filter the GeoDataFrame
     """
-    col = "name"
     if not gadm_clustering:
         gdf = gpd.read_file(path_to_gadm)
     else:
@@ -1473,11 +1475,11 @@ def locate_bus(
                 update,
                 out_logging,
             )
-            col = "GID_{}".format(gadm_level)
+            col_name = "GID_{}".format(gadm_level)
 
         # gdf.set_index("GADM_ID", inplace=True)
     gdf_co = gdf[
-        gdf[col].str.contains(co)
+        gdf[col_name].str.contains(co)
     ]  # geodataframe of entire continent - output of prev function {} are placeholders
     # in strings - conditional formatting
     # insert any variable into that place using .format - extract string and filter for those containing co (MA)
@@ -1485,12 +1487,12 @@ def locate_bus(
 
     try:
         return gdf_co[gdf_co.contains(point)][
-            col
+            col_name
         ].item()  # filter gdf_co which contains point and returns the bus
 
     except ValueError:
         return gdf_co[gdf_co.geometry == min(gdf_co.geometry, key=(point.distance))][
-            col
+            col_name
         ].item()  # looks for closest one shape=node
 
 
@@ -1566,7 +1568,9 @@ def aggregate_fuels(sector):
     ]
 
     oil_fuels = [
-        "additives and oxygenates" "aviation gasoline" "bitumen",
+        "additives and oxygenates",
+        "aviation gasoline",
+        "bitumen",
         "conventional crude oil",
         "crude petroleum",
         "ethane",
@@ -1580,7 +1584,7 @@ def aggregate_fuels(sector):
         "naphtha",
         "natural gas liquids",
         "other kerosene",
-        "paraffin waxes" "patent fuel",
+        "paraffin waxes",
         "petroleum coke",
         "refinery gas",
     ]
diff --git a/scripts/build_base_industry_totals.py b/scripts/build_base_industry_totals.py
index 954a1a631..8320f99cc 100644
--- a/scripts/build_base_industry_totals.py
+++ b/scripts/build_base_industry_totals.py
@@ -8,8 +8,6 @@
 @author: user
 """
 
-import pathlib
-
 import country_converter as coco
 import pandas as pd
 from _helpers import (
@@ -117,11 +115,11 @@ def create_industry_base_totals(df):
     clean_industry_list = list(transaction.clean_name.unique())
 
     unsd_path = get_path(
-        pathlib.Path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
+        get_path(snakemake.input["energy_totals_base"]).parent, "demand/unsd/data/"
     )
 
     # Get the files from the path provided in the OP
-    all_files = list(pathlib.Path(unsd_path).glob("*.txt"))
+    all_files = list(get_path(unsd_path).glob("*.txt"))
 
     # Create a dataframe from all downloaded files
     df = pd.concat(
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index 0906ae25c..83118d6c1 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -124,12 +124,12 @@ def get_load_paths_gegis(ssp_parentfolder, config):
     for continent in region_load:
         sel_ext = ".nc"
         for ext in [".nc", ".csv"]:
-            load_path = get_path(BASE_DIR, load_dir, str(continent) + str(ext))
-            if pathlib.Path(load_path).exists():
+            load_path = get_path(BASE_DIR, str(load_dir), str(continent) + str(ext))
+            if get_path(load_path).exists():
                 sel_ext = ext
                 break
         file_name = str(continent) + str(sel_ext)
-        load_path = get_path(load_dir, file_name)
+        load_path = get_path(str(load_dir), file_name)
         load_paths.append(load_path)
         file_names.append(file_name)
 
diff --git a/scripts/build_natura_raster.py b/scripts/build_natura_raster.py
index 07a479e54..bc4233cef 100644
--- a/scripts/build_natura_raster.py
+++ b/scripts/build_natura_raster.py
@@ -45,7 +45,6 @@
 The output is a raster file with the name `natura.tiff` in the folder `resources/natura/`.
 """
 import os
-import pathlib
 
 import atlite
 import geopandas as gpd
@@ -68,9 +67,7 @@ def get_fileshapes(list_paths, accepted_formats=(".shp",)):
 
     list_fileshapes = []
     for lf in list_paths:
-        if pathlib.Path(
-            lf
-        ).is_dir():  # if it is a folder, then list all shapes files contained
+        if get_path(lf).is_dir():  # if it is a folder, then list all shapes files contained
             # loop over all dirs and subdirs
             for path, subdirs, files in os.walk(lf):
                 # loop over all files
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index d8879393a..76bacd687 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -6,7 +6,6 @@
 # -*- coding: utf-8 -*-
 
 import multiprocessing as mp
-import pathlib
 import shutil
 from itertools import takewhile
 from operator import attrgetter
@@ -22,7 +21,6 @@
     BASE_DIR,
     configure_logging,
     create_logger,
-    get_current_directory_path,
     get_gadm_layer,
     get_path,
     mock_snakemake,
@@ -117,7 +115,7 @@ def country_cover(country_shapes, eez_shapes=None, out_logging=False, distance=0
 
 
 def save_to_geojson(df, fn):
-    pathlib.Path(fn).unlink(missing_ok=True)  # remove file if it exists
+    get_path(fn).unlink(missing_ok=True)  # remove file if it exists
     if not isinstance(df, gpd.GeoDataFrame):
         df = gpd.GeoDataFrame(dict(geometry=df))
 
@@ -139,9 +137,9 @@ def load_eez(countries_codes, geo_crs, eez_gpkg_file="./data/eez/eez_v11.gpkg"):
     The dataset shall be downloaded independently by the user (see
     guide) or together with pypsa-earth package.
     """
-    if not pathlib.Path(eez_gpkg_file).exists():
+    if not get_path(eez_gpkg_file).exists():
         raise Exception(
-            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {pathlib.Path(eez_gpkg).parent}"
+            f"File EEZ {eez_gpkg_file} not found, please download it from https://www.marineregions.org/download_file.php?name=World_EEZ_v11_20191118_gpkg.zip and copy it in {get_path(eez_gpkg).parent}"
         )
 
     geodf_EEZ = gpd.read_file(eez_gpkg_file, engine="pyogrio").to_crs(geo_crs)
@@ -303,7 +301,7 @@ def download_WorldPop_standard(
         BASE_DIR, "data", "WorldPop", WorldPop_filename
     )  # Input filepath tif
 
-    if not pathlib.Path(WorldPop_inputfile).exists() or update is True:
+    if not get_path(WorldPop_inputfile).exists() or update is True:
         if out_logging:
             logger.warning(
                 f"Stage 3 of 5: {WorldPop_filename} does not exist, downloading to {WorldPop_inputfile}"
@@ -395,9 +393,9 @@ def convert_gdp(name_file_nc, year=2015, out_logging=False):
     )  # Input filepath nc
 
     # Check if file exists, otherwise throw exception
-    if not pathlib.Path(GDP_nc).exists():
+    if not get_path(GDP_nc).exists():
         raise Exception(
-            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {pathlib.Path(GDP_nc).parent}"
+            f"File {name_file_nc} not found, please download it from https://datadryad.org/stash/dataset/doi:10.5061/dryad.dk1j0 and copy it in {get_path(GDP_nc).parent}"
         )
 
     # open nc dataset
@@ -439,7 +437,7 @@ def load_gdp(
         BASE_DIR, "data", "GDP", name_file_tif
     )  # Input filepath tif
 
-    if update | (not pathlib.Path(GDP_tif).exists()):
+    if update | (not get_path(GDP_tif).exists()):
         if out_logging:
             logger.warning(
                 f"Stage 5 of 5: File {name_file_tif} not found, the file will be produced by processing {name_file_nc}"
diff --git a/scripts/cluster_network.py b/scripts/cluster_network.py
index b98b78730..551776e36 100644
--- a/scripts/cluster_network.py
+++ b/scripts/cluster_network.py
@@ -121,7 +121,6 @@
     :align: center
 """
 
-import pathlib
 from functools import reduce
 
 import geopandas as gpd
@@ -134,6 +133,7 @@
     configure_logging,
     create_logger,
     get_aggregation_strategies,
+    get_path,
     mock_snakemake,
     normed,
     update_p_nom_max,
@@ -626,7 +626,7 @@ def clustering_for_n_clusters(
 
 
 def save_to_geojson(s, fn):
-    pathlib.Path(fn).unlink(missing_ok=True)
+    get_path(fn).unlink(missing_ok=True)
     df = s.reset_index()
     schema = {**gpd.io.file.infer_schema(df), "geometry": "Unknown"}
     df.to_file(fn, driver="GeoJSON", schema=schema)
diff --git a/scripts/download_osm_data.py b/scripts/download_osm_data.py
index 88d3db273..b36472087 100644
--- a/scripts/download_osm_data.py
+++ b/scripts/download_osm_data.py
@@ -26,14 +26,12 @@
 - ``data/osm/out``:  Prepared power data as .geojson and .csv files per country
 - ``resources/osm/raw``: Prepared and per type (e.g. cable/lines) aggregated power data as .geojson and .csv files
 """
-import pathlib
 import shutil
 
 from _helpers import (
     BASE_DIR,
     configure_logging,
     create_logger,
-    get_current_directory_path,
     get_path,
     mock_snakemake,
     read_osm_config,
@@ -135,7 +133,7 @@ def convert_iso_to_geofk(
     for name in names:
         for f in out_formats:
             new_file_name = get_path(store_path_resources, f"all_raw_{name}s.{f}")
-            old_files = list(pathlib.Path(out_path).glob(f"*{name}.{f}"))
+            old_files = list(get_path(out_path).glob(f"*{name}.{f}"))
             # if file is missing, create empty file, otherwise rename it and move it
             if not old_files:
                 with open(new_file_name, "w") as f:
diff --git a/scripts/make_statistics.py b/scripts/make_statistics.py
index a87eb2a17..e71070329 100644
--- a/scripts/make_statistics.py
+++ b/scripts/make_statistics.py
@@ -24,8 +24,6 @@
 This rule creates a dataframe containing in the columns the relevant statistics for the current run.
 """
 
-import pathlib
-
 import geopandas as gpd
 import numpy as np
 import pandas as pd
@@ -34,6 +32,7 @@
 from _helpers import (
     create_country_list,
     create_logger,
+    get_path,
     get_path_size,
     mock_snakemake,
     three_2_two_digits_country,
@@ -127,7 +126,7 @@ def collect_basic_osm_stats(path, rulename, header):
     """
     Collect basic statistics on OSM data: number of items
     """
-    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
+    if get_path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
 
@@ -146,7 +145,7 @@ def collect_network_osm_stats(path, rulename, header, metric_crs="EPSG:3857"):
     - length of the stored shapes
     - length of objects with tag_frequency == 0 (DC elements)
     """
-    if pathlib.Path(path).is_file() and get_path_size(path) > 0:
+    if get_path(path).is_file() and get_path_size(path) > 0:
         df = gpd.read_file(path)
         n_elem = len(df)
         obj_length = (
@@ -248,7 +247,7 @@ def collect_bus_regions_stats(bus_region_rule="build_bus_regions"):
 
     df = pd.DataFrame()
 
-    if pathlib.Path(fp_onshore).is_file() and pathlib.Path(fp_offshore).is_file():
+    if get_path(fp_onshore).is_file() and get_path(fp_offshore).is_file():
         gdf_onshore = gpd.read_file(fp_onshore)
         gdf_offshore = gpd.read_file(fp_offshore)
 
@@ -290,7 +289,7 @@ def capacity_stats(df):
         else:
             return df.groupby("carrier").p_nom.sum().astype(float)
 
-    if pathlib.Path(network_path).is_file():
+    if get_path(network_path).is_file():
         n = pypsa.Network(network_path)
 
         lines_length = float((n.lines.length * n.lines.num_parallel).sum())
@@ -345,7 +344,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
     """
     snakemake = _mock_snakemake(rulename)
 
-    if not pathlib.Path(snakemake.output.africa_shape).is_file():
+    if not get_path(snakemake.output.africa_shape).is_file():
         return pd.DataFrame()
 
     df_continent = gpd.read_file(snakemake.output.africa_shape)
@@ -356,7 +355,7 @@ def collect_shape_stats(rulename="build_shapes", area_crs="ESRI:54009"):
         .geometry.area.iloc[0]
     )
 
-    if not pathlib.Path(snakemake.output.gadm_shapes).is_file():
+    if not get_path(snakemake.output.gadm_shapes).is_file():
         return pd.DataFrame()
 
     df_gadm = gpd.read_file(snakemake.output.gadm_shapes)
@@ -470,7 +469,7 @@ def collect_renewable_stats(rulename, technology):
     """
     snakemake = _mock_snakemake(rulename, technology=technology)
 
-    if pathlib.Path(snakemake.output.profile).is_file():
+    if get_path(snakemake.output.profile).is_file():
         res = xr.open_dataset(snakemake.output.profile)
 
         if technology == "hydro":
@@ -503,7 +502,7 @@ def add_computational_stats(df, snakemake, column_name=None):
     comp_data = [np.nan] * 3  # total_time, mean_load and max_memory
 
     if snakemake.benchmark:
-        if not pathlib.Path(snakemake.benchmark).is_file():
+        if not get_path(snakemake.benchmark).is_file():
             return df
 
         bench_data = pd.read_csv(snakemake.benchmark, delimiter="\t")
diff --git a/scripts/make_summary.py b/scripts/make_summary.py
index 8867c2c6a..26b66157d 100644
--- a/scripts/make_summary.py
+++ b/scripts/make_summary.py
@@ -52,8 +52,6 @@
 Replacing *summaries* with *plots* creates nice colored maps of the results.
 """
 
-import pathlib
-
 import pandas as pd
 import pypsa
 from _helpers import (
@@ -503,7 +501,7 @@ def make_summaries(networks_dict, inputs, cost_config, elec_config, country="all
 
     for label, filename in networks_dict.items():
         print(label, filename)
-        if not pathlib.Path(filename).exists():
+        if not get_path(filename).exists():
             print("does not exist!!")
             continue
 
diff --git a/scripts/monte_carlo.py b/scripts/monte_carlo.py
index 4f80059dc..726f0ed78 100644
--- a/scripts/monte_carlo.py
+++ b/scripts/monte_carlo.py
@@ -112,7 +112,7 @@ def monte_carlo_sampling_pydoe2(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh
@@ -142,7 +142,7 @@ def monte_carlo_sampling_chaospy(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh
@@ -185,7 +185,7 @@ def monte_carlo_sampling_scipy(
     lh = rescale_distribution(lh, uncertainties_values)
     discrepancy = qmc.discrepancy(lh)
     logger.info(
-        "Discrepancy is:", discrepancy, " more details in function documentation."
+        f"Discrepancy is: {discrepancy} more details in function documentation."
     )
 
     return lh
diff --git a/scripts/prepare_urban_percent.py b/scripts/prepare_urban_percent.py
index 47d3ce1f2..df5bb46df 100644
--- a/scripts/prepare_urban_percent.py
+++ b/scripts/prepare_urban_percent.py
@@ -2,13 +2,12 @@
 # SPDX-FileCopyrightText:  PyPSA-Earth and PyPSA-Eur Authors
 #
 # SPDX-License-Identifier: AGPL-3.0-or-later
-import pathlib
 
 import country_converter as coco
 import pandas as pd
 import py7zr
 import requests
-from _helpers import mock_snakemake
+from _helpers import get_path, mock_snakemake
 
 
 def download_urban_percent():
@@ -47,16 +46,14 @@ def download_urban_percent():
         print(f"Urban percent extracted successfully")
 
         # Read the extracted CSV file
-        csv_filename = pathlib.Path(
-            filename
-        ).stem  # Remove the .7z extension to get the CSV filename
+        csv_filename = get_path(filename).stem  # Remove the .7z extension to get the CSV filename
         urban_percent_orig = pd.read_csv(csv_filename)
 
         print("Urban percent CSV file read successfully:")
 
         # Remove the downloaded .7z and .csv files
-        pathlib.Path(filename).unlink(missing_ok=True)
-        pathlib.Path(csv_filename).unlink(missing_ok=True)
+        get_path(filename).unlink(missing_ok=True)
+        get_path(csv_filename).unlink(missing_ok=True)
 
     else:
         print(f"Failed to download file: Status code {response.status_code}")
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 240fa19e9..7ecdc158a 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -81,7 +81,6 @@
 
 """
 import datetime as dt
-import pathlib
 import re
 from zipfile import ZipFile
 
@@ -94,7 +93,6 @@
     create_country_list,
     create_logger,
     get_path,
-    get_relative_path,
     mock_snakemake,
     progress_retrieve,
 )
@@ -158,7 +156,7 @@ def download_and_unzip_zenodo(config, root_path, hot_run=True, disable_progress=
             with ZipFile(file_path, "r") as zipObj:
                 # Extract all the contents of zip file in current directory
                 zipObj.extractall(path=destination)
-            pathlib.Path(file_path).unlink(missing_ok=True)
+            get_path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
         except Exception as e:
             logger.warning(
@@ -221,7 +219,7 @@ def download_and_unzip_gdrive(config, root_path, hot_run=True, disable_progress=
     # if hot run enabled
     if hot_run:
         # remove file
-        pathlib.Path(file_path).unlink(missing_ok=True)
+        get_path(file_path).unlink(missing_ok=True)
         # download file from google drive
         gdd.download_file_from_google_drive(
             file_id=file_id,
@@ -286,7 +284,7 @@ def get_first_day_of_previous_month(date):
     )
 
     if hot_run:
-        pathlib.Path(file_path).unlink(missing_ok=True)
+        get_path(file_path).unlink(missing_ok=True)
 
         downloaded = False
 
@@ -333,7 +331,7 @@ def get_first_day_of_previous_month(date):
                             nested_zip.extractall(path=dest_nested)
 
                         # remove inner zip file
-                        pathlib.Path(inner_zipname).unlink(missing_ok=True)
+                        get_path(inner_zipname).unlink(missing_ok=True)
 
                         logger.info(f"{resource} - Successfully unzipped file '{fzip}'")
                     except:
@@ -343,7 +341,7 @@ def get_first_day_of_previous_month(date):
 
                 # close and remove outer zip file
                 zip_obj.close()
-                pathlib.Path(file_path).unlink(missing_ok=True)
+                get_path(file_path).unlink(missing_ok=True)
 
                 logger.info(
                     f"Downloaded resource '{resource_iter}' from cloud '{url_iter}'."
@@ -394,7 +392,7 @@ def download_and_unpack(
     True when download is successful, False otherwise
     """
     if hot_run:
-        pathlib.Path(file_path).unlink(missing_ok=True)
+        get_path(file_path).unlink(missing_ok=True)
 
         try:
             logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
@@ -408,7 +406,7 @@ def download_and_unpack(
                 with ZipFile(file_path, "r") as zipfile:
                     zipfile.extractall(path=destination)
 
-                pathlib.Path(file_path).unlink(missing_ok=True)
+                get_path(file_path).unlink(missing_ok=True)
             logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
             return True
         except Exception as e:
@@ -418,9 +416,9 @@ def download_and_unpack(
             return False
 
 
-def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=False):
+def download_and_unzip_direct(config, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_direct(config, root_path, dest_path, hot_run=True,
+    download_and_unzip_direct(config, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download the data by category from a direct url with no processing.
@@ -430,8 +428,6 @@ def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=
     ------
     config : Dict
         Configuration data for the category to download
-    root_path : str
-        Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
         When false, the workflow is run without downloading and unzipping
@@ -446,7 +442,7 @@ def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=
     destination = get_path(BASE_DIR, config["destination"])
     url = config["urls"]["direct"]
 
-    file_path = get_path(destination, pathlib.Path(url).name)
+    file_path = get_path(destination, get_path(url).name)
 
     unzip = config.get("unzip", False)
 
@@ -461,10 +457,10 @@ def download_and_unzip_direct(config, root_path, hot_run=True, disable_progress=
 
 
 def download_and_unzip_hydrobasins(
-    config, root_path, hot_run=True, disable_progress=False
+    config, hot_run=True, disable_progress=False
 ):
     """
-    download_and_unzip_basins(config, root_path, dest_path, hot_run=True,
+    download_and_unzip_basins(config, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download and unzip the data for hydrobasins from HydroBASINS database
@@ -484,8 +480,6 @@ def download_and_unzip_hydrobasins(
     ------
     config : Dict
         Configuration data for the category to download
-    root_path : str
-        Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
         When false, the workflow is run without downloading and unzipping
@@ -508,7 +502,7 @@ def download_and_unzip_hydrobasins(
 
     for rg in suffix_list:
         url = url_templ + "hybas_" + rg + "_lev" + level_code + "_v1c.zip"
-        file_path = get_path(destination, pathlib.Path(url).name)
+        file_path = get_path(destination, get_path(url).name)
 
         all_downloaded &= download_and_unpack(
             url=url,
@@ -524,9 +518,9 @@ def download_and_unzip_hydrobasins(
     return all_downloaded
 
 
-def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=False):
+def download_and_unzip_post(config, hot_run=True, disable_progress=False):
     """
-    download_and_unzip_post(config, root_path, dest_path, hot_run=True,
+    download_and_unzip_post(config, dest_path, hot_run=True,
     disable_progress=False)
 
     Function to download the data by category from a post request.
@@ -535,8 +529,6 @@ def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=Fa
     ------
     config : Dict
         Configuration data for the category to download
-    root_path : str
-        Absolute path of the repository
     hot_run : Bool (default True)
         When true the data are downloaded
         When false, the workflow is run without downloading and unzipping
@@ -555,10 +547,10 @@ def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=Fa
     # remove url feature
     url = postdata.pop("url")
 
-    file_path = get_path(destination, pathlib.Path(url).name)
+    file_path = get_path(destination, get_path(url).name)
 
     if hot_run:
-        pathlib.Path(file_path).unlink(missing_ok=True)
+        get_path(file_path).unlink(missing_ok=True)
 
         # try:
         logger.info(f"Downloading resource '{resource}' from cloud '{url}'.")
@@ -576,7 +568,7 @@ def download_and_unzip_post(config, root_path, hot_run=True, disable_progress=Fa
             with ZipFile(file_path, "r") as zipfile:
                 zipfile.extractall(path=destination)
 
-            pathlib.Path(file_path).unlink(missing_ok=True)
+            get_path(file_path).unlink(missing_ok=True)
         logger.info(f"Downloaded resource '{resource}' from cloud '{url}'.")
         # except:
         #     logger.warning(f"Failed download resource '{resource}' from cloud '{url}'.")
@@ -804,7 +796,7 @@ def merge_hydrobasins_shape(config_hydrobasin, hydrobasins_level):
         for suffix in config_hydrobasin["urls"]["hydrobasins"]["suffixes"]
     ]
     gpdf_list = [None] * len(files_to_merge)
-    logger.info("Merging hydrobasins files into: " + output_fl)
+    logger.info(f"Merging hydrobasins files into: {output_fl}")
     for i, f_name in tqdm(enumerate(files_to_merge)):
         gpdf_list[i] = gpd.read_file(get_path(basins_path, f_name))
     fl_merged = gpd.GeoDataFrame(pd.concat(gpdf_list)).drop_duplicates(