diff --git a/notebooks/01_classification.ipynb b/notebooks/01_classification.ipynb new file mode 100644 index 0000000..c0841c4 --- /dev/null +++ b/notebooks/01_classification.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: Classification\n", + "author: Pikall Nikolas\n", + "keep-ipynb: true\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "#| echo: true\n", + "#| code-fold: true\n", + "# Imports\n", + "import os\n", + "import xarray as xr\n", + "import numpy as np\n", + "import datetime as dt\n", + "import cmcrameri as cmc\n", + "import folium\n", + "from dotenv import dotenv_values\n", + "\n", + "from eodag import EODataAccessGateway, SearchResult, setup_logging\n", + "from rasterio.crs import CRS\n", + "\n", + "# Setup logging\n", + "setup_logging(0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "01_classification", + "language": "python", + "name": "01_classification" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02_floodmapping.ipynb b/notebooks/02_floodmapping.ipynb new file mode 100644 index 0000000..b0d5d75 --- /dev/null +++ b/notebooks/02_floodmapping.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: Reverend Bayes updates our Belief in Flood Detection\n", + "subtitle: How an 275 year old idea helps map the extent of floods\n", + "keep-ipynb: true\n", + "code-fold: true\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Image from [wikipedia](https://upload.wikimedia.org/wikipedia/commons/d/d4/Thomas_Bayes.gif)](https://upload.wikimedia.org/wikipedia/commons/d/d4/Thomas_Bayes.gif)" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "%matplotlib widget\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "import numpy as np\n", + "import datetime\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from scipy.stats import norm\n", + "from eomaps import Maps" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "sig0_dc = xr.open_dataset('../data/s1_parameters/S1_CSAR_IWGRDH/SIG0/V1M1R1/EQUI7_EU020M/E054N006T3/SIG0_20180228T043908__VV_D080_E054N006T3_EU020M_V1M1R1_S1AIWGRDH_TUWIEN.nc')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## From Backscattering to Flood Mapping\n", + "\n", + "This notebook explains how microwave ($\\sigma^0$) backscattering (@fig-area) can be used to map the extent of a flood. We replicate in this exercise the work of @bauer-marschallinger_satellite-based_2022 on the TU Wien Bayesian-based flood mapping algorithm.\n", + "\n", + "In the following lines we create a map with EOmaps [@quast_getting_2024] of the $\\sigma^0$ backscattering values." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "#| label: fig-area\n", + "#| fig-cap: 'Area targeted for $\\sigma^0$ backscattering is the Greek region of Thessaly, which experienced a major flood in February of 2018.'\n", + "m = Maps(ax=121, crs=3857)\n", + "m.set_data(data=sig0_dc, x=\"x\", y=\"y\", parameter=\"SIG0\", crs=Maps.CRS.Equi7_EU)\n", + "m.plot_map()\n", + "m.add_colorbar(label=\"$\\sigma^0$ (dB)\", orientation=\"vertical\", hist_bins=30)\n", + "m.add_scalebar(n=5)\n", + "m2 = m.new_map(ax=122, crs=3857)\n", + "m2.set_extent(m.get_extent())\n", + "m2.add_wms.OpenStreetMap.add_layer.default()\n", + "m.apply_layout(\n", + " {\n", + " 'figsize': [7.32, 4.59],\n", + " '0_map': [0.05, 0.18, 0.35, 0.64],\n", + " '1_cb': [0.8125, 0.1, 0.1, 0.8],\n", + " '1_cb_histogram_size': 0.8,\n", + " '2_map': [0.4375, 0.18, 0.35, 0.64]\n", + " }\n", + " )\n", + "m.show()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Microwave Backscattering over Land and Water\n", + "\n", + "Reverend Bayes was concerned with two events, one (the *hypothesis*) occurring before the other (the *evidence*). If we know its cause, it is easy to logically deduce the probability of an effect. However, in this case we want to deduce the probability of a cause from an observed effect, also known as \"reversed probability\". In the case of flood mapping, we have $\\sigma^0$ backscatter observations over land (the effect) and we want to deduce the probability of flooding ($F$) and non-flooding ($NF$). \n", + "\n", + "In other words, we want to know the probability of flooding $P(F)$ given a pixel's $\\sigma^0$:\n", + "\n", + "$$P(F|\\sigma^0)$$\n", + "\n", + "and the probability of a pixel being not flooded $P(NF)$ given a certain $\\sigma^0$:\n", + "\n", + "$$P(NF|\\sigma^0).$$\n", + "\n", + "Bayes showed that these can be deduced from the observation that forward and reversed probability are equal, so that:\n", + "\n", + "$$P(F|\\sigma^0)P(\\sigma^0) = P(\\sigma^0|F)P(F)$$\n", + "\n", + "and\n", + "\n", + "$$P(NF|\\sigma^0)P(\\sigma^0) = P(\\sigma^0|NF)P(NF).$$\n", + "\n", + "\n", + "The forward probability of $\\sigma^0$ given the occurrence of flooding ($P(\\sigma^0|F)$) and $\\sigma^0$ given no flooding ($P(\\sigma^0|NF)$) can be extracted from past information on backscattering over land and water surfaces. As seen in the sketch below (@fig-sat), the characteristics of backscattering over land and water differ considerably.\n", + "\n", + "![Schematic backscattering over land and water. Image from [Geological Survey Ireland](https://www.gsi.ie/images/images/SAR_mapping_land_water.jpg)](https://www.gsi.ie/images/images/SAR_mapping_land_water.jpg){#fig-sat}\n", + "\n", + "## Likelihoods\n", + "\n", + "The so-called likelihoods of $P(\\sigma^0|F)$ and $P(\\sigma^0|NF)$ can thus be calculated from past backscattering information. Without going into the details of how these likelihoods are calculated, you can **click** on a pixel of the map to plot the likelihoods of $\\sigma^0$ being governed by land or water." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "#| label: fig-lik\n", + "#| fig-cap: 'Likelihoods for $\\sigma^0$ being associated with land or water for 1 pixel in the Greek area of Thessaly. Likelihoods are calculated over a range of $\\sigma^0$. The pixel''s observed $\\sigma^0$ is given with a vertical line. Click on the map to re-calculate and update this figure for another pixel in the study area. Map created with EOmaps [@quast_getting_2024].'\n", + "\n", + "RANGE = np.arange(-30, 0, 0.1)\n", + "hparam_dc = xr.open_dataset('../data/tuw_s1_harpar/S1_CSAR_IWGRDH/SIG0-HPAR/V0M2R3/EQUI7_EU020M/E054N006T3/D080.nc')\n", + "plia_dc = xr.open_dataset('../data/s1_parameters/S1_CSAR_IWGRDH/PLIA-TAG/V01R03/EQUI7_EU020M/E054N006T3/PLIA-TAG-MEAN_20200101T000000_20201231T235959__D080_E054N006T3_EU020M_V01R03_S1IWGRDH.nc')\n", + "sig0_dc['id'] = (('y', 'x'), np.arange(sig0_dc.SIG0.size).reshape(sig0_dc.SIG0.shape))\n", + "hparam_dc['id'] = (('y', 'x'), np.arange(sig0_dc.SIG0.size).reshape(sig0_dc.SIG0.shape))\n", + "plia_dc['id'] = (('y', 'x'), np.arange(sig0_dc.SIG0.size).reshape(sig0_dc.SIG0.shape))\n", + "\n", + "def calc_water_likelihood(id, x):\n", + " point = plia_dc.where(plia_dc.id == id, drop=True)\n", + " wbsc_mean = point.PLIA * -0.394181 + -4.142015\n", + " wbsc_std = 2.754041\n", + " return norm.pdf(x, wbsc_mean.to_numpy(), wbsc_std).flatten()\n", + "\n", + "def expected_land_backscatter(data, dtime_str):\n", + " w = np.pi * 2 / 365\n", + " dt = datetime.datetime.strptime(dtime_str, \"%Y-%m-%d\")\n", + " t = dt.timetuple().tm_yday\n", + " wt = w * t\n", + "\n", + " M0 = data.M0\n", + " S1 = data.S1\n", + " S2 = data.S2\n", + " S3 = data.S3\n", + " C1 = data.C1\n", + " C2 = data.C2\n", + " C3 = data.C3\n", + " hm_c1 = (M0 + S1 * np.sin(wt)) + (C1 * np.cos(wt))\n", + " hm_c2 = ((hm_c1 + S2 * np.sin(2 * wt)) + C2 * np.cos(2 * wt))\n", + " hm_c3 = ((hm_c2 + S3 * np.sin(3 * wt)) + C3 * np.cos(3 * wt))\n", + " return hm_c3\n", + "\n", + "def calc_land_likelihood(id, x):\n", + " point = hparam_dc.where(hparam_dc.id == id, drop=True)\n", + " lbsc_mean = expected_land_backscatter(point, '2018-02-01')\n", + " lbsc_std = point.STD\n", + " return norm.pdf(x, lbsc_mean.to_numpy(), lbsc_std.to_numpy()).flatten()\n", + "\n", + "def calc_likelihoods(id, x):\n", + " if isinstance(x, list):\n", + " x = np.arange(x[0], x[1], 0.1)\n", + " water_likelihood, land_likelihood = calc_water_likelihood(id=id, x=x), calc_land_likelihood(id=id, x=x)\n", + " return water_likelihood, land_likelihood\n", + "\n", + "def view_bayes_flood(sig0_dc, calc_posteriors=None, bayesian_flood_decision=None):\n", + "\n", + " # initialize a map on top\n", + " m = Maps(ax=122, layer=\"data\", crs=Maps.CRS.Equi7_EU)\n", + "\n", + " # initialize 2 matplotlib plot-axes next to the map\n", + " ax_upper = m.f.add_subplot(221)\n", + " ax_upper.set_ylabel(\"likelihood\")\n", + " ax_upper.set_xlabel(\"$\\sigma^0 (dB)$\")\n", + "\n", + " ax_lower = m.f.add_subplot(223)\n", + " ax_lower.set_ylabel(\"probability\")\n", + " ax_lower.set_xlabel(\"$\\sigma^0 (dB)$\")\n", + "\n", + " # -------- assign data to the map and plot it\n", + " if bayesian_flood_decision is not None:\n", + " # add map\n", + " m2 = m.new_layer(layer=\"map\")\n", + " m2.add_wms.OpenStreetMap.add_layer.default()\n", + " flood_classification = bayesian_flood_decision(sig0_dc.id, sig0_dc.SIG0)\n", + " sig0_dc[\"decision\"] = (('y', 'x'), flood_classification.reshape(sig0_dc.SIG0.shape))\n", + " sig0_dc[\"decision\"] = sig0_dc.decision.where(sig0_dc.SIG0.notnull())\n", + " sig0_dc[\"decision\"] = sig0_dc.decision.where(sig0_dc.decision==0)\n", + " m.set_data(data=sig0_dc, x=\"x\", y=\"y\", parameter=\"decision\", crs=Maps.CRS.Equi7_EU)\n", + " m.plot_map()\n", + " m.show_layer(\"map\", (\"data\", 0.5))\n", + " m.apply_layout(\n", + " {\n", + " 'figsize': [7.32, 4.59],\n", + " '0_map': [0.44573, 0.11961, 0.3375, 0.75237],\n", + " '1_': [0.10625, 0.5781, 0.3125, 0.29902],\n", + " '2_': [0.10625, 0.11961, 0.3125, 0.29902],\n", + " }\n", + " )\n", + "\n", + " else:\n", + " m.set_data(data=sig0_dc, x=\"x\", y=\"y\", parameter=\"SIG0\", crs=Maps.CRS.Equi7_EU)\n", + " m.plot_map()\n", + " m.add_colorbar(label=\"$\\sigma^0$ (dB)\", orientation=\"vertical\", hist_bins=30)\n", + " m.apply_layout(\n", + " {\n", + " 'figsize': [7.32, 4.59],\n", + " '0_map': [0.44573, 0.11961, 0.3375, 0.75237],\n", + " '1_': [0.10625, 0.5781, 0.3125, 0.29902],\n", + " '2_': [0.10625, 0.11961, 0.3125, 0.29902],\n", + " '3_cb': [0.8, 0.09034, 0.1, 0.85],\n", + " '3_cb_histogram_size': 0.8\n", + " }\n", + " )\n", + "\n", + " def update_plots(ID, **kwargs):\n", + " \n", + " # get the data\n", + " value = sig0_dc.where(sig0_dc.id == ID, drop=True).SIG0.to_numpy()\n", + " y1_pdf, y2_pdf = calc_water_likelihood(ID, RANGE), calc_land_likelihood(ID, RANGE)\n", + "\n", + " # plot the lines and vline\n", + " (water,) = ax_upper.plot(RANGE, y1_pdf, 'k-', lw=2, label=\"water\")\n", + " (land,) = ax_upper.plot(RANGE, y2_pdf,'r-', lw=5, alpha=0.6, label=\"land\")\n", + " value_left = ax_upper.vlines(x=value, ymin=0, ymax=np.max((y1_pdf, y2_pdf)), lw=3, label=\"observed\")\n", + " ax_upper.legend(loc=\"upper left\")\n", + "\n", + " # add all artists as \"temporary pick artists\" so that they\n", + " # are removed when the next datapoint is selected\n", + " for a in [water, land, value_left]:\n", + " m.cb.pick.add_temporary_artist(a)\n", + "\n", + " if calc_posteriors is not None:\n", + " f_post, nf_post = calc_posteriors(y1_pdf, y2_pdf)\n", + " (f,) = ax_lower.plot(RANGE, f_post, 'k-', lw=2, label=\"flood\")\n", + " (nf,) = ax_lower.plot(RANGE, nf_post,'r-', lw=5, alpha=0.6, label=\"non-flood\")\n", + " value_right = ax_lower.vlines(x=value, ymin=-0.1, ymax=1.1, lw=3, label=\"observed\")\n", + " ax_lower.legend(loc=\"upper left\")\n", + " for a in [f, nf, value_right]:\n", + " m.cb.pick.add_temporary_artist(a)\n", + "\n", + " # re-compute axis limits based on the new artists\n", + " ax_upper.relim()\n", + " ax_upper.autoscale()\n", + "\n", + " m.cb.pick.attach(update_plots)\n", + " m.cb.pick.attach.mark(permanent=False, buffer=1, fc=\"none\", ec=\"r\")\n", + " m.cb.pick.attach.mark(permanent=False, buffer=2, fc=\"none\", ec=\"r\", ls=\":\")\n", + " m.show()\n", + "\n", + "view_bayes_flood(sig0_dc)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Posteriors\n", + "\n", + "Having calculated the likelihoods, we can now move on to calculate the probability of (non-)flooding given a pixel's $\\sigma^0$. These so-called *posteriors* need one more piece of information, as can be seen in the equation above. We need the probability that a pixel is flooded $P(F)$ or not flooded $P(NF)$. Of course, these are the figures we've been trying to find this whole time. We don't actually have them yet, so what can we do? In Bayesian statistics, we can just start with our best guess. These guesses are called our \"priors\", because they are the beliefs we hold *prior* to looking at the data. This subjective prior belief is the foundation Bayesian statistics, and we use the likelihoods we just calculated to update our belief in this particular hypothesis. This updated belief is called the \"posterior\".\n", + "\n", + "Let's say that our best estimate for the chance of flooding versus non-flooding of a pixel is 50-50: a coin flip. We now can also calculate the probability of backscattering $P(\\sigma^0)$, as the weighted average of the water and land likelihoods, ensuring that our posteriors range between 0 to 1.\n", + "\n", + "The following code block shows how we calculate the priors." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def calc_posteriors(water_likelihood, land_likelihood):\n", + " evidence = (water_likelihood * 0.5) + (land_likelihood * 0.5)\n", + " return (water_likelihood * 0.5) / evidence, (land_likelihood * 0.5) / evidence" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can plot the posterior probabilities of flooding and non-flooding again and compare these to pixel's measured $\\sigma^0$. **Click** on a pixel to calculate the posterior probability." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "#| label: fig-post\n", + "#| fig-cap: 'Posterior probabilities for $\\sigma^0$ of 1 pixel being associated with land for water in the Greek area of Thessaly. Click on the map to re-calculate and update this figure for another pixel in the study area. Map created with EOmaps [@quast_getting_2024].'\n", + "view_bayes_flood(sig0_dc, calc_posteriors)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Flood Classification\n", + "\n", + "We are now ready to combine all this information and classify the pixels according to the probability of flooding given the backscatter value of each pixel. Here we just look whether the probability of flooding is higher than non-flooding:" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "def bayesian_flood_decision(id, sig0_dc):\n", + " nf_post_prob, f_post_prob = calc_posteriors(*calc_likelihoods(id, sig0_dc))\n", + " return np.greater(f_post_prob, nf_post_prob)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Click** on a point in the below map to see the likelihoods and posterior distributions (in the left-hand subplots)." + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": [ + "#| label: fig-clas\n", + "#| fig-cap: 'Flood extent of the Greek region of Thessaly based on Bayesian probabilities are shown on the map superimposed on an open street map. Click on a pixel to generate the point''s water and land likelihoods as well as the posterior probabilities. Map created with EOmaps [@quast_getting_2024].'\n", + "view_bayes_flood(sig0_dc, calc_posteriors, bayesian_flood_decision)" + ], + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "name": "02_floodmapping", + "language": "python", + "display_name": "02_floodmapping" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/notebooks/Makefile b/notebooks/Makefile new file mode 100644 index 0000000..a50f195 --- /dev/null +++ b/notebooks/Makefile @@ -0,0 +1,45 @@ +.ONESHELL: +SHELL = /bin/bash +.PHONY: help clean environment kernel post-render data +YML = $(wildcard chapters/*.yml) +QMD = $(wildcard chapters/*.qmd) +REQ = $(basename $(notdir $(YML))) +CONDA_ENV_DIR := $(foreach i,$(REQ),$(shell conda info --base)/envs/$(i)) +KERNEL_DIR := $(foreach i,$(REQ),$(shell jupyter --data-dir)/kernels/$(i)) +CONDA_ACTIVATE = source $$(conda info --base)/etc/profile.d/conda.sh ; conda activate ; conda activate + +help: + @echo "make clean" + @echo " clean all jupyter checkpoints" + @echo "make environment" + @echo " create a conda environment" + @echo "make kernel" + @echo " make ipykernel based on conda lock file" + +clean: + rm --force --recursive .ipynb_checkpoints/ + for i in $(REQ); do conda remove -n $$i --all -y ; jupyter kernelspec uninstall -y $$i ; done + +$(CONDA_ENV_DIR): + for i in $(YML); do conda env create -f $$i; done + +environment: $(CONDA_ENV_DIR) + @echo -e "conda environments are ready." + +$(KERNEL_DIR): $(CONDA_ENV_DIR) + pip install --upgrade pip + pip install jupyter + for i in $(REQ); do $(CONDA_ACTIVATE) $$i ; python -m ipykernel install --user --name $$i --display-name $$i ; conda deactivate; done + +kernel: $(KERNEL_DIR) + @echo -e "conda jupyter kernel is ready." + +post-render: + for i in $(QMD); do quarto convert $$i; done + - mv chapters/*.ipynb notebooks/ >/dev/null 2>&1 + - for f in chapters/*.quarto_ipynb ; do mv -- "$f" "${f%.quarto_ipynb}.ipynb" >/dev/null 2>&1; done + cp Makefile notebooks/ + +data: + wget -q -P ./data https://cloud.geo.tuwien.ac.at/s/AezWMFwmFQJsKyr/download/floodmap.zip + cd data && unzip -n floodmap.zip && rm floodmap.zip \ No newline at end of file diff --git a/notebooks/references.ipynb b/notebooks/references.ipynb new file mode 100644 index 0000000..a15416e --- /dev/null +++ b/notebooks/references.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# References {.unnumbered}\n", + "\n", + "::: {#refs}\n", + ":::" + ] + } + ], + "metadata": { + "kernelspec": { + "name": "python3", + "language": "python", + "display_name": "Python 3 (ipykernel)" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file