diff --git a/Chapter 5 Statistical Hypothesis Testing.ipynb b/Chapter 5 Statistical Hypothesis Testing.ipynb
new file mode 100644
index 0000000..9feca34
--- /dev/null
+++ b/Chapter 5 Statistical Hypothesis Testing.ipynb
@@ -0,0 +1,1175 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Chapter 5: Statistical Hypothesis Testing.ipynb",
+ "provenance": [],
+ "collapsed_sections": [
+ "WJhmgDxsVHEO",
+ "MkgcD5YRqY2t",
+ "YdiAgoIkqkgY",
+ "zPLd6dYaxMkT",
+ "OJMPHhRcyblx",
+ "CvS6L1Js4duu",
+ "7ob5bPMC7wnc",
+ "i9LHPNrU_aNd",
+ "LROas1z3DtJL",
+ "7c95dMu8HLoA",
+ "QFnla-p6YV94",
+ "7JMLIct9Ydku",
+ "IWdTeJz-ZuWc",
+ "6sR1d3BHS9Hh",
+ "_beFJ80gTH85",
+ "uXF1GqteUu0E",
+ "PpZ12cY4Vv_5",
+ "uCyetBhlW3E3"
+ ]
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Chapter 5: Statistical Hypothesis Testing**"
+ ],
+ "metadata": {
+ "id": "RZ4sKXgZqP5R"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Table of Content:**\n",
+ "\n",
+ "- [Import Libraries](#Import_Libraries)\n",
+ "- [5.1. Normality Tests](#Normality_Tests)\n",
+ " - [5.1.1. Shapiro-Wilk Test](#Shapiro-Wilk_Test)\n",
+ " - [5.1.2. D’Agostino’s $K^2$ Test](#D’Agostino’s_Test)\n",
+ " - [5.1.3. Anderson-Darling Test](#Anderson-Darling_Test)\n",
+ "- [5.2. Correlation Tests](#Correlation_Tests)\n",
+ " - [5.2.1. Pearson’s Correlation Coefficient](#Pearson’s_Correlation_Coefficient)\n",
+ " - [5.2.2. Spearman’s Rank Correlation](#Spearman’s_Rank_Correlation)\n",
+ " - [5.2.3. Kendall’s Rank Correlation](#Kendall’s_Rank_Correlation)\n",
+ " - [5.2.4. Chi-Squared Test](#Chi-Squared_Test)\n",
+ "- [5.3. Stationary Tests](#Stationary_Tests)\n",
+ " - [5.3.1. Augmented Dickey-Fuller Unit Root Test](#Augmented_Dickey-Fuller_Unit_Root_Test)\n",
+ " - [5.3.2. Kwiatkowski-Phillips-Schmidt-Shin Test](#Kwiatkowski-Phillips-Schmidt-Shin_Test) \n",
+ "- [5.4. Other Tests](#Other_Tests)\n",
+ " - [5.4.1. Mann-Whitney U-Test](#Mann-Whitney_U-Test)\n",
+ " - [5.4.2. Wilcoxon Signed-Rank Test](#Wilcoxon_Signed-Rank-Test)\n",
+ " - [5.4.3. Kruskal-Wallis H Test](#Kruskal-Wallis_H_Test)\n",
+ " - [5.4.4. Friedman Test](#Friedman_Test) "
+ ],
+ "metadata": {
+ "id": "V9XOxBPqCABJ"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "## **Import Libraries**"
+ ],
+ "metadata": {
+ "id": "WJhmgDxsVHEO"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install --upgrade scipy"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "vXStgb2JU6c0",
+ "outputId": "f227901d-8551-4fde-bb9a-4bc5e3bef1ab"
+ },
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (1.4.1)\n",
+ "Collecting scipy\n",
+ " Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)\n",
+ "\u001b[K |████████████████████████████████| 38.1 MB 1.3 MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy<1.23.0,>=1.16.5 in /usr/local/lib/python3.7/dist-packages (from scipy) (1.21.6)\n",
+ "Installing collected packages: scipy\n",
+ " Attempting uninstall: scipy\n",
+ " Found existing installation: scipy 1.4.1\n",
+ " Uninstalling scipy-1.4.1:\n",
+ " Successfully uninstalled scipy-1.4.1\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.\u001b[0m\n",
+ "Successfully installed scipy-1.7.3\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import matplotlib.patches as mpatches\n",
+ "import seaborn as sns\n",
+ "import math\n",
+ "from scipy import stats\n",
+ "from scipy.stats import norm\n",
+ "from scipy.stats import chi2\n",
+ "from scipy.stats import t\n",
+ "from scipy.stats import f\n",
+ "from scipy.stats import bernoulli\n",
+ "from scipy.stats import binom\n",
+ "from scipy.stats import nbinom\n",
+ "from scipy.stats import geom\n",
+ "from scipy.stats import poisson\n",
+ "from scipy.stats import uniform\n",
+ "from scipy.stats import randint\n",
+ "from scipy.stats import expon\n",
+ "from scipy.stats import gamma\n",
+ "from scipy.stats import beta\n",
+ "from scipy.stats import weibull_min\n",
+ "from scipy.stats import hypergeom\n",
+ "from scipy.stats import shapiro\n",
+ "from scipy.stats import pearsonr\n",
+ "from scipy.stats import normaltest\n",
+ "from scipy.stats import anderson\n",
+ "from scipy.stats import spearmanr\n",
+ "from scipy.stats import kendalltau\n",
+ "from scipy.stats import chi2_contingency\n",
+ "from scipy.stats import ttest_ind\n",
+ "from scipy.stats import ttest_rel\n",
+ "from scipy.stats import mannwhitneyu\n",
+ "from scipy.stats import wilcoxon\n",
+ "from scipy.stats import kruskal\n",
+ "from scipy.stats import friedmanchisquare\n",
+ "from statsmodels.tsa.stattools import adfuller\n",
+ "from statsmodels.tsa.stattools import kpss\n",
+ "from statsmodels.stats.weightstats import ztest\n",
+ "from scipy.integrate import quad\n",
+ "from IPython.display import display, Latex\n",
+ "\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "warnings.simplefilter(action='ignore', category=FutureWarning)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ZPuphzTmU-P8",
+ "outputId": "db6527ea-c413-4457-e747-8f817f99006b"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
+ " import pandas.util.testing as tm\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "## **5.1. Normality Tests:**"
+ ],
+ "metadata": {
+ "id": "MkgcD5YRqY2t"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.1.1. Shapiro-Wilk Test:**"
+ ],
+ "metadata": {
+ "id": "YdiAgoIkqkgY"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "$H_0$ : The sample has a Normal (Gaussian) distribution\n",
+ "\n",
+ "$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
+ "\n",
+ "Assumptions: \n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Shapiro-Wilk Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.shapiro.html)"
+ ],
+ "metadata": {
+ "id": "5THGAbaPqrjg"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 100\n",
+ "alpha = 0.05\n",
+ "np.random.seed(1)\n",
+ "data = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, p_value = shapiro(data)\n",
+ "print(f'Test_statistic_shapiro = {Test_statistic}, p_value = {p_value}', '\\n')\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, The data is probably normal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.')"
+ ],
+ "metadata": {
+ "id": "oEXRlb4QwyRK",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c857ada2-317e-4851-b71f-356d76a9cb22"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_shapiro = 0.9920045137405396, p_value = 0.8215526342391968 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.1.2. D’Agostino’s $K^2$ Test:**"
+ ],
+ "metadata": {
+ "id": "zPLd6dYaxMkT"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "$H_0$ : The sample has a Normal (Gaussian) distribution\n",
+ "\n",
+ "$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
+ "\n",
+ "Assumptions: \n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[D’Agostino’s $K^2$ Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.normaltest.html)"
+ ],
+ "metadata": {
+ "id": "AZnwcLLAxVJI"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 100\n",
+ "alpha = 0.05\n",
+ "np.random.seed(1)\n",
+ "data = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, p_value = normaltest(data)\n",
+ "print(f\"Test_statistic_D'Agostino's K-squared = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, The data is probably normal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "f5f2c919-1de4-415d-a57e-d478d55e4456",
+ "id": "AwDtLtHkxVJK"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_D'Agostino's K-squared = 0.10202388832581702, p_value = 0.9502673203169621 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, The data is not probably normal.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.1.3. Anderson-Darling Test:**"
+ ],
+ "metadata": {
+ "id": "OJMPHhRcyblx"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "$H_0$ : The sample has a Normal (Gaussian) distribution\n",
+ "\n",
+ "$H_1$ : The sample does not have a Normal (Gaussian) distribution.\n",
+ "\n",
+ "Assumptions: \n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Anderson-Darling Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.anderson.html)\n",
+ "\n",
+ "Critical values provided are for the following significance levels:\n",
+ "\n",
+ "normal/exponential:\n",
+ "\n",
+ "$15\\%, 10\\%, 5\\%, 2.5\\%, 1\\%$\n",
+ "\n",
+ "logistic:\n",
+ "\n",
+ "$25\\%, 10\\%, 5\\%, 2.5\\%, 1\\%, 0.5\\%$\n",
+ "\n",
+ "Gumbel:\n",
+ "\n",
+ "$25\\%, 10\\%, 5\\%, 2.5\\%, 1\\%$\n",
+ "\n",
+ "If the test statistic is larger than these critical values then for the corresponding significance level, the null hypothesis that the data come from the chosen distribution can be rejected."
+ ],
+ "metadata": {
+ "id": "gU9cF9z9ybly"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 100\n",
+ "np.random.seed(1)\n",
+ "data = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, critical_values, significance_level = anderson(data, dist='norm')\n",
+ "print(f'Test_statistic_anderson = {Test_statistic}', '\\n')\n",
+ "\n",
+ "for i in range(len(critical_values)):\n",
+ " sl, cv = significance_level[i], critical_values[i]\n",
+ " if Test_statistic > cv:\n",
+ " print(f'(Test statistic = {Test_statistic}) > (critical value = {sl}%), therefore for the corresponding significance level, the null hpothesis cannot be rejected.')\n",
+ " else:\n",
+ " print(f'(Test statistic = {Test_statistic}) > (critical value = {sl}%), therefore for the corresponding significance level, the null hpothesis is rejected.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "G041pJhiy4ra",
+ "outputId": "eb4d9936-b41a-4218-865d-eed99eb45eb7"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_anderson = 0.2196508855594459 \n",
+ "\n",
+ "(Test statistic = 0.2196508855594459) > (critical value = 15.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
+ "(Test statistic = 0.2196508855594459) > (critical value = 10.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
+ "(Test statistic = 0.2196508855594459) > (critical value = 5.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
+ "(Test statistic = 0.2196508855594459) > (critical value = 2.5%), therefore for the corresponding significance level, the null hpothesis is rejected.\n",
+ "(Test statistic = 0.2196508855594459) > (critical value = 1.0%), therefore for the corresponding significance level, the null hpothesis is rejected.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Note that you can use Anderson-Darling test for other distributions. \n",
+ "\n",
+ "The valid values are: {‘norm’, ‘expon’, ‘logistic’, ‘gumbel’, ‘gumbel_l’, ‘gumbel_r’, ‘extreme1’}"
+ ],
+ "metadata": {
+ "id": "JJ5mE7YozixQ"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "## **5.2. Correlation Tests:**"
+ ],
+ "metadata": {
+ "id": "CvS6L1Js4duu"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.2.1. Pearson’s Correlation Coefficient:**"
+ ],
+ "metadata": {
+ "id": "7ob5bPMC7wnc"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether two data sample have a linear relationship.\n",
+ "\n",
+ "$H_0$: The two data are independent.\n",
+ "\n",
+ "$H_1$: There is a dependency between the two data.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each data sample are independent and identically distributed (iid).\n",
+ "* Observations in each data sample are normally distributed.\n",
+ "* Observations in each data sample have the same variance.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Pearson’s Correlation Coefficient Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html)"
+ ],
+ "metadata": {
+ "id": "yNCyRmLS8NIj"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "np.random.seed(1)\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N) + 2\n",
+ "\n",
+ "Test_statistic, p_value = pearsonr(data1, data2)\n",
+ "print(f\"Test_statistic_Pearson's Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "P6ENjsEd0lbp",
+ "outputId": "2eb12605-e669-466e-91ff-905169eb6995"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Pearson's Correlation = 0.6556177144470315, p_value = 0.03957633895447448 \n",
+ "\n",
+ "Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "This test is parametric."
+ ],
+ "metadata": {
+ "id": "WbRpE09hGQXz"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.2.2. Spearman’s Rank Correlation:**"
+ ],
+ "metadata": {
+ "id": "i9LHPNrU_aNd"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether two data samples have a monotonic relationship.\n",
+ "\n",
+ "$H_0$: The two data are independent.\n",
+ "\n",
+ "$H_1$: There is a dependency between the two data.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each data sample are independent and identically distributed (iid).\n",
+ "* Observations in each data sample can be ranked.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Spearman’s Rank Correlation Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html)"
+ ],
+ "metadata": {
+ "id": "s4Itzt6K_kaj"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "np.random.seed(1)\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N) + 2\n",
+ "\n",
+ "Test_statistic, p_value = spearmanr(data1, data2, alternative = 'two-sided')\n",
+ "print(f\"Test_statistic_Spearman's Rank Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "vSdyARWiAQuh",
+ "outputId": "17841159-7ace-4bc8-8cae-18dc3d2bccd5"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Spearman's Rank Correlation = 0.7818181818181817, p_value = 0.007547007781067878 \n",
+ "\n",
+ "Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Alternative hypothesis can be {‘two-sided’, ‘less’, ‘greater’}.\n",
+ "\n",
+ "'two-sided': the correlation is non-zero\n",
+ "\n",
+ "'less': the correlation is negative (less than zero)\n",
+ "\n",
+ "'greater': the correlation is positive (greater than zero)"
+ ],
+ "metadata": {
+ "id": "wibHxhC4BO5b"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.2.3. Kendall’s Rank Correlation:**"
+ ],
+ "metadata": {
+ "id": "LROas1z3DtJL"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether two data samples have a monotonic relationship.\n",
+ "\n",
+ "$H_0$: The two data are independent.\n",
+ "\n",
+ "$H_1$: There is a dependency between the two data.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each data sample are independent and identically distributed (iid).\n",
+ "* Observations in each data sample can be ranked.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Kendall’s Rank Correlation Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kendalltau.html)"
+ ],
+ "metadata": {
+ "id": "084h0SpxDzCX"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "np.random.seed(1)\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N) + 2\n",
+ "\n",
+ "Test_statistic, p_value = kendalltau(data1, data2)\n",
+ "print(f\"Test_statistic_Kendall's Rank Correlation = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Gzxj53vaEUzF",
+ "outputId": "0f7af409-1a8d-4dbe-f656-63902010e85c"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Kendall's Rank Correlation = 0.6, p_value = 0.016666115520282188 \n",
+ "\n",
+ "Since p_value < 0.05, reject null hypothesis. Therefore, Two data are probably dependent.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.2.4. Chi-Squared Test:**"
+ ],
+ "metadata": {
+ "id": "7c95dMu8HLoA"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether two categorical variables are related or independent.\n",
+ "\n",
+ "$H_0$: The two data are independent.\n",
+ "\n",
+ "$H_1$: There is a dependency between the two data.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations used in the calculation of the contingency table are independent.\n",
+ "* 25 or more examples in each cell of the contingency table.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Chi-Squared Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chi2_contingency.html)\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "degrees of freedom: $(rows - 1) * (cols - 1)$"
+ ],
+ "metadata": {
+ "id": "tcV0_bRnHRI8"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "table = [[10, 20, 30],\n",
+ "\t\t\t [6, 9, 17]]\n",
+ "\n",
+ "Test_statistic, p_value, dof, expected = chi2_contingency(table)\n",
+ "print(f\"Test_statistic_Chi-Squared = {Test_statistic}, p_value = {p_value}, df = {dof}, \\n\", f\"Expected = {expected}\",\"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data are probably dependent.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0Ik8RPjBHOwr",
+ "outputId": "aa7be138-2f00-429d-f6ba-eda5f09a94fd"
+ },
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Chi-Squared = 0.27157465150403504, p_value = 0.873028283380073, df = 2, \n",
+ " Expected = [[10.43478261 18.91304348 30.65217391]\n",
+ " [ 5.56521739 10.08695652 16.34782609]] \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data are probably independent.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "## **5.3. Stationary Tests:**"
+ ],
+ "metadata": {
+ "id": "QFnla-p6YV94"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.3.1. Augmented Dickey-Fuller Unit Root Test:**"
+ ],
+ "metadata": {
+ "id": "7JMLIct9Ydku"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether a time series has a unit root, e.g. has a trend or more generally is autoregressive.\n",
+ "\n",
+ "$H_0$: A unit root is present (series is non-stationary).\n",
+ "\n",
+ "$H_1$: A unit root is not present (series is stationary).\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in are temporally ordered.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Augmented Dickey-Fuller Unit Root Test Doc](https://www.statsmodels.org/dev/generated/statsmodels.tsa.stattools.adfuller.html)"
+ ],
+ "metadata": {
+ "id": "FbuyfZBIYoPj"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "alpha = 0.05\n",
+ "data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
+ "\n",
+ "Test_statistic, p_value, lags, obs, crit, t = adfuller(data)\n",
+ "print(f\"Test_statistic_Mann-Whitney = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, the series is probably stationary.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, the series is probably non-stationary.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "HnKfeoZfYXPt",
+ "outputId": "ad4c4075-ecda-4be5-f482-940bb90840b1"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Mann-Whitney = 0.5171974540944098, p_value = 0.9853865316323872 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, the series is probably non-stationary.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.3.2. Kwiatkowski-Phillips-Schmidt-Shin Test:**"
+ ],
+ "metadata": {
+ "id": "IWdTeJz-ZuWc"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether a time series is trend stationary or not.\n",
+ "\n",
+ "$H_0$: The time series is trend-stationary.\n",
+ "\n",
+ "$H_1$: The time series is not trend-stationary.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in are temporally ordered.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Kwiatkowski-Phillips-Schmidt-Shin Test Doc](https://www.statsmodels.org/stable/generated/statsmodels.tsa.stattools.kpss.html#statsmodels.tsa.stattools.kpss)"
+ ],
+ "metadata": {
+ "id": "IrK61Uv-ZuWc"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "alpha = 0.05\n",
+ "data = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n",
+ "\n",
+ "Test_statistic, p_value, lags, crit = kpss(data)\n",
+ "print(f\"Test_statistic_Kwiatkowski = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, the series is probably not trend-stationary.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, the series is probably trend-stationary.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "d1d6462e-7130-418d-b277-e34f419ad68b",
+ "id": "EtboNoivZuWd"
+ },
+ "execution_count": 11,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Kwiatkowski = 0.4099630996309963, p_value = 0.072860732917674 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, the series is probably trend-stationary.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "## **5.4. Other Tests:**"
+ ],
+ "metadata": {
+ "id": "6sR1d3BHS9Hh"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.4.1. Mann-Whitney U-Test:**"
+ ],
+ "metadata": {
+ "id": "_beFJ80gTH85"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether the distributions of two independent samples are equal or not.\n",
+ "\n",
+ "$H_0$: The distributions of both samples are equal.\n",
+ "\n",
+ "$H_1$: The distributions of both samples are not equal.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "* Observations in each sample can be ranked.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Mann-Whitney U Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.mannwhitneyu.html)"
+ ],
+ "metadata": {
+ "id": "xDJkmODOTOrP"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, p_value = mannwhitneyu(data1, data2, alternative='two-sided')\n",
+ "print(f\"Test_statistic_Mann-Whitney = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UJaGUqmfTjr4",
+ "outputId": "0e6858f5-0e32-4345-a83a-ce2b2ed6b517"
+ },
+ "execution_count": 12,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Mann-Whitney = 61.0, p_value = 0.4273553138978077 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.4.2. Wilcoxon Signed-Rank Test:**"
+ ],
+ "metadata": {
+ "id": "uXF1GqteUu0E"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether the distributions of two paired samples are equal or not.\n",
+ "\n",
+ "$H_0$: The distributions of both samples are equal.\n",
+ "\n",
+ "$H_1$: The distributions of both samples are not equal.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "* Observations in each sample can be ranked.\n",
+ "* Observations across each sample are paired.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Wilcoxon Signed-Rank Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html)"
+ ],
+ "metadata": {
+ "id": "wQUAp_96Uu0E"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, p_value = wilcoxon(data1, data2, alternative='two-sided')\n",
+ "print(f\"Test_statistic_Wilcoxon = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "96500652-9c3f-4794-fb0e-6fdf3302b134",
+ "id": "_45cKDykUu0F"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Wilcoxon = 24.0, p_value = 0.76953125 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.4.3. Kruskal-Wallis H Test:**"
+ ],
+ "metadata": {
+ "id": "PpZ12cY4Vv_5"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether the distributions of two or more independent samples are equal or not.\n",
+ "\n",
+ "$H_0$: The distributions of all samples are equal.\n",
+ "\n",
+ "$H_1$: The distributions of one or more samples are not equal.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "* Observations in each sample can be ranked.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Kruskal-Wallis H Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.kruskal.html)"
+ ],
+ "metadata": {
+ "id": "yPeW-V0QVv_6"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "N = 10\n",
+ "alpha = 0.05\n",
+ "data1 = np.random.normal(0, 1, N)\n",
+ "data2 = np.random.normal(0, 1, N)\n",
+ "\n",
+ "Test_statistic, p_value = kruskal(data1, data2)\n",
+ "print(f\"Test_statistic_Wilcoxon = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, Two data distributions are probably not equal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "5f856ccd-41f9-420d-997c-0e3507d0ca44",
+ "id": "gZfSggVCVv_7"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Wilcoxon = 1.462857142857132, p_value = 0.22647606604348455 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, Two data distributions are probably equal.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "\n",
+ "### **5.4.4. Friedman Test:**"
+ ],
+ "metadata": {
+ "id": "uCyetBhlW3E3"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Tests whether the distributions of two or more paired samples are equal or not.\n",
+ "\n",
+ "$H_0$: The distributions of both samples are equal.\n",
+ "\n",
+ "$H_1$: The distributions of both samples are not equal.\n",
+ "\n",
+ "Assumptions:\n",
+ "* Observations in each sample are independent and identically distributed (iid).\n",
+ "* Observations in each sample can be ranked.\n",
+ "* Observations across each sample are paired.\n",
+ "\n",
+ "$\\\\ $\n",
+ "\n",
+ "[Friedman Test Doc](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.friedmanchisquare.html)"
+ ],
+ "metadata": {
+ "id": "fTbP_K-AW3E5"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "alpha = 0.05\n",
+ "data1 = [0.873, 2.817, 0.121, -0.945, -0.055, -1.436, 0.360, -1.478, -1.637, -1.869]\n",
+ "data2 = [1.142, -0.432, -0.938, -0.729, -0.846, -0.157, 0.500, 1.183, -1.075, -0.169]\n",
+ "data3 = [-0.208, 0.696, 0.928, -1.148, -0.213, 0.229, 0.137, 0.269, -0.870, -1.204]\n",
+ "\n",
+ "Test_statistic, p_value = friedmanchisquare(data1, data2, data3)\n",
+ "print(f\"Test_statistic_Friedman = {Test_statistic}, p_value = {p_value}\", \"\\n\")\n",
+ "\n",
+ "if p_value < alpha:\n",
+ "\tprint(f'Since p_value < {alpha}, reject null hypothesis. Therefore, data distributions are probably not equal.')\n",
+ "else:\n",
+ "\tprint(f'Since p_value > {alpha}, the null hypothesis cannot be rejected. Therefore, data distributions are probably equal.')"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "042a5986-7d2e-4c15-b2f7-5a72f8714294",
+ "id": "w0if4yFkW3E5"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Test_statistic_Friedman = 0.8000000000000114, p_value = 0.6703200460356356 \n",
+ "\n",
+ "Since p_value > 0.05, the null hypothesis cannot be rejected. Therefore, data distributions are probably equal.\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file