From fe25b68a72ee5e89f78a9e4dd5321c1b97cfada0 Mon Sep 17 00:00:00 2001 From: Milton Pividori Date: Mon, 20 Nov 2023 02:18:44 -0700 Subject: [PATCH] add notebook to analyze the relationship between UTY and KDM6a --- .../20_00-gtex_whole_blood-UTY_KDM6A.ipynb | 4634 +++++++++++++++++ .../py/20_00-gtex_whole_blood-UTY_KDM6A.py | 720 +++ 2 files changed, 5354 insertions(+) create mode 100644 nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.ipynb create mode 100644 nbs/99_manuscript/coefs_comp/py/20_00-gtex_whole_blood-UTY_KDM6A.py diff --git a/nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.ipynb b/nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.ipynb new file mode 100644 index 00000000..befeccf5 --- /dev/null +++ b/nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.ipynb @@ -0,0 +1,4634 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9c35e72-e087-4511-9899-5da25f3ff7ba", + "metadata": { + "papermill": { + "duration": 0.013796, + "end_time": "2023-11-20T08:55:56.439073", + "exception": false, + "start_time": "2023-11-20T08:55:56.425277", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Description" + ] + }, + { + "cell_type": "markdown", + "id": "2422b873-5148-44ef-9884-0dd0fa8e69fa", + "metadata": { + "papermill": { + "duration": 0.013051, + "end_time": "2023-11-20T08:55:56.465902", + "exception": false, + "start_time": "2023-11-20T08:55:56.452851", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "This notebooks analyzes more closely the pattern between gene pair *UTY* / *KDM6A*. The analyses are focused on the Reviewer 2's comment:\n", + "\n", + "```\n", + "In Figure 4, while there is a visible difference between the correlation of male samples, the CCC values are still quite close. For example, this can be observed in Brain Cerebellum and Small Intestine Terminal Ileum. Please address this.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5d4ae4df-624d-4951-917c-880462cfb658", + "metadata": { + "papermill": { + "duration": 0.012554, + "end_time": "2023-11-20T08:55:56.490793", + "exception": false, + "start_time": "2023-11-20T08:55:56.478239", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2dcd0e52-236a-44cd-9cd9-71cca1232338", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:56.512994Z", + "iopub.status.busy": "2023-11-20T08:55:56.512464Z", + "iopub.status.idle": "2023-11-20T08:55:57.280770Z", + "shell.execute_reply": "2023-11-20T08:55:57.280425Z" + }, + "papermill": { + "duration": 0.778767, + "end_time": "2023-11-20T08:55:57.282390", + "exception": false, + "start_time": "2023-11-20T08:55:56.503623", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "from scipy.stats import pearsonr, spearmanr\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "from ccc import conf\n", + "from ccc.coef import ccc" + ] + }, + { + "cell_type": "markdown", + "id": "e9ef3211-8d35-4b4b-be45-2de384b6fdee", + "metadata": { + "papermill": { + "duration": 0.010558, + "end_time": "2023-11-20T08:55:57.300232", + "exception": false, + "start_time": "2023-11-20T08:55:57.289674", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Settings" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3e9781c8-faf2-41ec-a5c3-ab84dec6a7fa", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.313597Z", + "iopub.status.busy": "2023-11-20T08:55:57.313511Z", + "iopub.status.idle": "2023-11-20T08:55:57.315760Z", + "shell.execute_reply": "2023-11-20T08:55:57.315469Z" + }, + "papermill": { + "duration": 0.009823, + "end_time": "2023-11-20T08:55:57.316518", + "exception": false, + "start_time": "2023-11-20T08:55:57.306695", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# this gene pair was originally found with ccc on whole blood\n", + "# interesting: https://clincancerres.aacrjournals.org/content/26/21/5567.figures-only\n", + "gene0_id, gene1_id = \"ENSG00000147050.14\", \"ENSG00000183878.15\"\n", + "gene0_symbol, gene1_symbol = \"KDM6A\", \"UTY\"" + ] + }, + { + "cell_type": "markdown", + "id": "649a237d-595e-4bd7-a607-7c1773b36cb5", + "metadata": { + "papermill": { + "duration": 0.026613, + "end_time": "2023-11-20T08:55:57.349663", + "exception": false, + "start_time": "2023-11-20T08:55:57.323050", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Paths" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "1ea8a834-34dd-480f-b85f-5706aa4db334", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.363287Z", + "iopub.status.busy": "2023-11-20T08:55:57.363162Z", + "iopub.status.idle": "2023-11-20T08:55:57.365443Z", + "shell.execute_reply": "2023-11-20T08:55:57.365163Z" + }, + "papermill": { + "duration": 0.009932, + "end_time": "2023-11-20T08:55:57.366200", + "exception": false, + "start_time": "2023-11-20T08:55:57.356268", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "TISSUE_DIR = conf.GTEX[\"DATA_DIR\"] / \"data_by_tissue\"\n", + "assert TISSUE_DIR.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f869a719-5302-4c5b-b496-0878617ea3d3", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.379862Z", + "iopub.status.busy": "2023-11-20T08:55:57.379650Z", + "iopub.status.idle": "2023-11-20T08:55:57.383787Z", + "shell.execute_reply": "2023-11-20T08:55:57.383383Z" + }, + "papermill": { + "duration": 0.011846, + "end_time": "2023-11-20T08:55:57.384630", + "exception": false, + "start_time": "2023-11-20T08:55:57.372784", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "PosixPath('/opt/manuscript/content/images/coefs_comp/kdm6a_vs_uty')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "OUTPUT_FIGURE_DIR = (\n", + " conf.MANUSCRIPT[\"FIGURES_DIR\"]\n", + " / \"coefs_comp\"\n", + " / f\"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}\"\n", + ")\n", + "OUTPUT_FIGURE_DIR.mkdir(parents=True, exist_ok=True)\n", + "display(OUTPUT_FIGURE_DIR)" + ] + }, + { + "cell_type": "markdown", + "id": "35453032-07b1-4e68-9f65-f8ec0d0ebcd1", + "metadata": { + "papermill": { + "duration": 0.006516, + "end_time": "2023-11-20T08:55:57.397804", + "exception": false, + "start_time": "2023-11-20T08:55:57.391288", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Data" + ] + }, + { + "cell_type": "markdown", + "id": "76b3390f-360a-45e8-b38d-f14f92823125", + "metadata": { + "papermill": { + "duration": 0.006556, + "end_time": "2023-11-20T08:55:57.410974", + "exception": false, + "start_time": "2023-11-20T08:55:57.404418", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## GTEx metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e2dffcd4-3280-4f2e-8e5b-1797c6333894", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.425110Z", + "iopub.status.busy": "2023-11-20T08:55:57.424812Z", + "iopub.status.idle": "2023-11-20T08:55:57.460560Z", + "shell.execute_reply": "2023-11-20T08:55:57.460088Z" + }, + "papermill": { + "duration": 0.043947, + "end_time": "2023-11-20T08:55:57.461533", + "exception": false, + "start_time": "2023-11-20T08:55:57.417586", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "gtex_metadata = pd.read_pickle(conf.GTEX[\"DATA_DIR\"] / \"gtex_v8-sample_metadata.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f762c76d-31b6-46e3-9660-3352112c9326", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.475718Z", + "iopub.status.busy": "2023-11-20T08:55:57.475409Z", + "iopub.status.idle": "2023-11-20T08:55:57.479139Z", + "shell.execute_reply": "2023-11-20T08:55:57.478744Z" + }, + "papermill": { + "duration": 0.011791, + "end_time": "2023-11-20T08:55:57.479973", + "exception": false, + "start_time": "2023-11-20T08:55:57.468182", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(22951, 66)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtex_metadata.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "870baa0e-4f5b-44c7-b32d-f3763f74a2b7", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.494199Z", + "iopub.status.busy": "2023-11-20T08:55:57.493904Z", + "iopub.status.idle": "2023-11-20T08:55:57.515276Z", + "shell.execute_reply": "2023-11-20T08:55:57.514837Z" + }, + "papermill": { + "duration": 0.029424, + "end_time": "2023-11-20T08:55:57.516109", + "exception": false, + "start_time": "2023-11-20T08:55:57.486685", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SUBJIDSEXAGEDTHHRDYSMATSSCRSMCENTERSMPTHNTSSMRINSMTSSMTSD...SME1ANTISMSPLTRDSMBSMMRTSME1SNSESME1PCTSSMRRNARTSME1MPRTSMNUM5CDSMDPMPRTSME2PCTS
SAMPID
GTEX-1117F-0003-SM-58Q7GGTEX-1117FFemale60-694.0NaNB1NaNNaNBloodWhole Blood...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GTEX-1117F-0003-SM-5DWSBGTEX-1117FFemale60-694.0NaNB1NaNNaNBloodWhole Blood...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GTEX-1117F-0003-SM-6WBT7GTEX-1117FFemale60-694.0NaNB1NaNNaNBloodWhole Blood...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GTEX-1117F-0011-R10a-SM-AHZ7FGTEX-1117FFemale60-694.0NaNB1, A1NaNNaNBrainBrain - Frontal Cortex (BA9)...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
GTEX-1117F-0011-R10b-SM-CYKQ8GTEX-1117FFemale60-694.0NaNB1, A1NaN7.2BrainBrain - Frontal Cortex (BA9)...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

5 rows × 66 columns

\n", + "
" + ], + "text/plain": [ + " SUBJID SEX AGE DTHHRDY SMATSSCR \\\n", + "SAMPID \n", + "GTEX-1117F-0003-SM-58Q7G GTEX-1117F Female 60-69 4.0 NaN \n", + "GTEX-1117F-0003-SM-5DWSB GTEX-1117F Female 60-69 4.0 NaN \n", + "GTEX-1117F-0003-SM-6WBT7 GTEX-1117F Female 60-69 4.0 NaN \n", + "GTEX-1117F-0011-R10a-SM-AHZ7F GTEX-1117F Female 60-69 4.0 NaN \n", + "GTEX-1117F-0011-R10b-SM-CYKQ8 GTEX-1117F Female 60-69 4.0 NaN \n", + "\n", + " SMCENTER SMPTHNTS SMRIN SMTS \\\n", + "SAMPID \n", + "GTEX-1117F-0003-SM-58Q7G B1 NaN NaN Blood \n", + "GTEX-1117F-0003-SM-5DWSB B1 NaN NaN Blood \n", + "GTEX-1117F-0003-SM-6WBT7 B1 NaN NaN Blood \n", + "GTEX-1117F-0011-R10a-SM-AHZ7F B1, A1 NaN NaN Brain \n", + "GTEX-1117F-0011-R10b-SM-CYKQ8 B1, A1 NaN 7.2 Brain \n", + "\n", + " SMTSD ... SME1ANTI \\\n", + "SAMPID ... \n", + "GTEX-1117F-0003-SM-58Q7G Whole Blood ... NaN \n", + "GTEX-1117F-0003-SM-5DWSB Whole Blood ... NaN \n", + "GTEX-1117F-0003-SM-6WBT7 Whole Blood ... NaN \n", + "GTEX-1117F-0011-R10a-SM-AHZ7F Brain - Frontal Cortex (BA9) ... NaN \n", + "GTEX-1117F-0011-R10b-SM-CYKQ8 Brain - Frontal Cortex (BA9) ... NaN \n", + "\n", + " SMSPLTRD SMBSMMRT SME1SNSE SME1PCTS SMRRNART \\\n", + "SAMPID \n", + "GTEX-1117F-0003-SM-58Q7G NaN NaN NaN NaN NaN \n", + "GTEX-1117F-0003-SM-5DWSB NaN NaN NaN NaN NaN \n", + "GTEX-1117F-0003-SM-6WBT7 NaN NaN NaN NaN NaN \n", + "GTEX-1117F-0011-R10a-SM-AHZ7F NaN NaN NaN NaN NaN \n", + "GTEX-1117F-0011-R10b-SM-CYKQ8 NaN NaN NaN NaN NaN \n", + "\n", + " SME1MPRT SMNUM5CD SMDPMPRT SME2PCTS \n", + "SAMPID \n", + "GTEX-1117F-0003-SM-58Q7G NaN NaN NaN NaN \n", + "GTEX-1117F-0003-SM-5DWSB NaN NaN NaN NaN \n", + "GTEX-1117F-0003-SM-6WBT7 NaN NaN NaN NaN \n", + "GTEX-1117F-0011-R10a-SM-AHZ7F NaN NaN NaN NaN \n", + "GTEX-1117F-0011-R10b-SM-CYKQ8 NaN NaN NaN NaN \n", + "\n", + "[5 rows x 66 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtex_metadata.head()" + ] + }, + { + "cell_type": "markdown", + "id": "85a68693-1bae-4473-bd18-086caf5820eb", + "metadata": { + "papermill": { + "duration": 0.006772, + "end_time": "2023-11-20T08:55:57.529828", + "exception": false, + "start_time": "2023-11-20T08:55:57.523056", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Gene Ensembl ID -> Symbol mapping" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3cf14b08-658d-46bc-9581-c9afd70be177", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.544600Z", + "iopub.status.busy": "2023-11-20T08:55:57.544199Z", + "iopub.status.idle": "2023-11-20T08:55:57.562921Z", + "shell.execute_reply": "2023-11-20T08:55:57.562384Z" + }, + "papermill": { + "duration": 0.027167, + "end_time": "2023-11-20T08:55:57.563910", + "exception": false, + "start_time": "2023-11-20T08:55:57.536743", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "gene_map = pd.read_pickle(conf.GTEX[\"DATA_DIR\"] / \"gtex_gene_id_symbol_mappings.pkl\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a2564d82-f685-4bc4-8ab9-5c09988830ae", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.579072Z", + "iopub.status.busy": "2023-11-20T08:55:57.578653Z", + "iopub.status.idle": "2023-11-20T08:55:57.634878Z", + "shell.execute_reply": "2023-11-20T08:55:57.634425Z" + }, + "papermill": { + "duration": 0.064994, + "end_time": "2023-11-20T08:55:57.635818", + "exception": false, + "start_time": "2023-11-20T08:55:57.570824", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "gene_map = gene_map.set_index(\"gene_ens_id\")[\"gene_symbol\"].to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ced72294-1843-427e-8dc0-3f57572fd9fd", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.650500Z", + "iopub.status.busy": "2023-11-20T08:55:57.650207Z", + "iopub.status.idle": "2023-11-20T08:55:57.652491Z", + "shell.execute_reply": "2023-11-20T08:55:57.652119Z" + }, + "papermill": { + "duration": 0.010461, + "end_time": "2023-11-20T08:55:57.653268", + "exception": false, + "start_time": "2023-11-20T08:55:57.642807", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "assert gene_map[\"ENSG00000145309.5\"] == \"CABS1\"" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e7d49359-73b7-4e13-b404-24bbcce77172", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.667993Z", + "iopub.status.busy": "2023-11-20T08:55:57.667737Z", + "iopub.status.idle": "2023-11-20T08:55:57.670315Z", + "shell.execute_reply": "2023-11-20T08:55:57.669870Z" + }, + "papermill": { + "duration": 0.010763, + "end_time": "2023-11-20T08:55:57.671152", + "exception": false, + "start_time": "2023-11-20T08:55:57.660389", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "assert gene_map[gene0_id] == gene0_symbol\n", + "assert gene_map[gene1_id] == gene1_symbol" + ] + }, + { + "cell_type": "markdown", + "id": "80037782-e5d0-4bf3-a7bf-ee6cb22e064e", + "metadata": { + "papermill": { + "duration": 0.006923, + "end_time": "2023-11-20T08:55:57.685112", + "exception": false, + "start_time": "2023-11-20T08:55:57.678189", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Get male/females sample IDs" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5122cc00-b5da-4327-a0e3-d0d4829b1986", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.700015Z", + "iopub.status.busy": "2023-11-20T08:55:57.699725Z", + "iopub.status.idle": "2023-11-20T08:55:57.707873Z", + "shell.execute_reply": "2023-11-20T08:55:57.707403Z" + }, + "papermill": { + "duration": 0.016514, + "end_time": "2023-11-20T08:55:57.708660", + "exception": false, + "start_time": "2023-11-20T08:55:57.692146", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 22951\n", + "unique 2\n", + "top Male\n", + "freq 15046\n", + "Name: SEX, dtype: object" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gtex_metadata[\"SEX\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a31f26d7-91dc-468d-81b1-f6e563d7b88d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.723690Z", + "iopub.status.busy": "2023-11-20T08:55:57.723392Z", + "iopub.status.idle": "2023-11-20T08:55:57.734583Z", + "shell.execute_reply": "2023-11-20T08:55:57.734034Z" + }, + "papermill": { + "duration": 0.019757, + "end_time": "2023-11-20T08:55:57.735493", + "exception": false, + "start_time": "2023-11-20T08:55:57.715736", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "male_samples = gtex_metadata[gtex_metadata[\"SEX\"] == \"Male\"].index.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "275350ae-f69f-4351-9a3e-0363a423487c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.750681Z", + "iopub.status.busy": "2023-11-20T08:55:57.750313Z", + "iopub.status.idle": "2023-11-20T08:55:57.754061Z", + "shell.execute_reply": "2023-11-20T08:55:57.753594Z" + }, + "papermill": { + "duration": 0.012315, + "end_time": "2023-11-20T08:55:57.754893", + "exception": false, + "start_time": "2023-11-20T08:55:57.742578", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "15046" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(male_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7f9ddb05-4328-4a98-921d-16c3bdff1c67", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.769965Z", + "iopub.status.busy": "2023-11-20T08:55:57.769676Z", + "iopub.status.idle": "2023-11-20T08:55:57.773333Z", + "shell.execute_reply": "2023-11-20T08:55:57.772871Z" + }, + "papermill": { + "duration": 0.012109, + "end_time": "2023-11-20T08:55:57.774177", + "exception": false, + "start_time": "2023-11-20T08:55:57.762068", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['GTEX-111CU-0003-SM-58Q95',\n", + " 'GTEX-111CU-0003-SM-5DWTR',\n", + " 'GTEX-111CU-0003-SM-6WBUD',\n", + " 'GTEX-111CU-0126-SM-5GZWZ',\n", + " 'GTEX-111CU-0226-SM-5GZXC']" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "male_samples[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "dede5dad-0df7-40a1-8ea5-eab06fc3b9af", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.789526Z", + "iopub.status.busy": "2023-11-20T08:55:57.789169Z", + "iopub.status.idle": "2023-11-20T08:55:57.797293Z", + "shell.execute_reply": "2023-11-20T08:55:57.796743Z" + }, + "papermill": { + "duration": 0.016816, + "end_time": "2023-11-20T08:55:57.798240", + "exception": false, + "start_time": "2023-11-20T08:55:57.781424", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "female_samples = gtex_metadata[gtex_metadata[\"SEX\"] == \"Female\"].index.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "57609f27-8bb6-4377-827b-ae9d55537be1", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.813663Z", + "iopub.status.busy": "2023-11-20T08:55:57.813372Z", + "iopub.status.idle": "2023-11-20T08:55:57.817067Z", + "shell.execute_reply": "2023-11-20T08:55:57.816608Z" + }, + "papermill": { + "duration": 0.012367, + "end_time": "2023-11-20T08:55:57.817919", + "exception": false, + "start_time": "2023-11-20T08:55:57.805552", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "7905" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(female_samples)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e8fea00b-9119-440b-b33a-fadbc2131210", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.833423Z", + "iopub.status.busy": "2023-11-20T08:55:57.833136Z", + "iopub.status.idle": "2023-11-20T08:55:57.836776Z", + "shell.execute_reply": "2023-11-20T08:55:57.836311Z" + }, + "papermill": { + "duration": 0.012323, + "end_time": "2023-11-20T08:55:57.837601", + "exception": false, + "start_time": "2023-11-20T08:55:57.825278", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['GTEX-1117F-0003-SM-58Q7G',\n", + " 'GTEX-1117F-0003-SM-5DWSB',\n", + " 'GTEX-1117F-0003-SM-6WBT7',\n", + " 'GTEX-1117F-0011-R10a-SM-AHZ7F',\n", + " 'GTEX-1117F-0011-R10b-SM-CYKQ8']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "female_samples[:5]" + ] + }, + { + "cell_type": "markdown", + "id": "e5e52d3d-2d87-488a-ab48-d105b85fe46b", + "metadata": { + "papermill": { + "duration": 0.007277, + "end_time": "2023-11-20T08:55:57.852283", + "exception": false, + "start_time": "2023-11-20T08:55:57.845006", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Brain cerebellum" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "137ce1ba-751a-4758-91dc-2ad1e2fb57d6", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.868126Z", + "iopub.status.busy": "2023-11-20T08:55:57.867764Z", + "iopub.status.idle": "2023-11-20T08:55:57.971907Z", + "shell.execute_reply": "2023-11-20T08:55:57.971473Z" + }, + "papermill": { + "duration": 0.11334, + "end_time": "2023-11-20T08:55:57.973180", + "exception": false, + "start_time": "2023-11-20T08:55:57.859840", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "brain_cerebellum = (\n", + " pd.read_pickle(TISSUE_DIR / \"gtex_v8_data_brain_cerebellum.pkl\")\n", + " .loc[[gene0_id, gene1_id]]\n", + " .T.rename_axis(\"sample_id\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "6e2eb857-c33c-4a65-a401-db3ced529623", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:57.989734Z", + "iopub.status.busy": "2023-11-20T08:55:57.989634Z", + "iopub.status.idle": "2023-11-20T08:55:57.992488Z", + "shell.execute_reply": "2023-11-20T08:55:57.992134Z" + }, + "papermill": { + "duration": 0.011606, + "end_time": "2023-11-20T08:55:57.993202", + "exception": false, + "start_time": "2023-11-20T08:55:57.981596", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(241, 2)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brain_cerebellum.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "61a489c4-de22-46ec-b795-8199d68123ae", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.008703Z", + "iopub.status.busy": "2023-11-20T08:55:58.008476Z", + "iopub.status.idle": "2023-11-20T08:55:58.012908Z", + "shell.execute_reply": "2023-11-20T08:55:58.012553Z" + }, + "papermill": { + "duration": 0.012911, + "end_time": "2023-11-20T08:55:58.013590", + "exception": false, + "start_time": "2023-11-20T08:55:58.000679", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gene_ens_idENSG00000147050.14ENSG00000183878.15
sample_id
GTEX-111FC-3326-SM-5GZYV9.2686.988
GTEX-1128S-2826-SM-5N9DI12.9300.000
GTEX-117XS-3126-SM-5GIDP10.8407.389
GTEX-1192X-3226-SM-5987D10.7805.132
GTEX-11DXW-1026-SM-5H11K10.0607.823
\n", + "
" + ], + "text/plain": [ + "gene_ens_id ENSG00000147050.14 ENSG00000183878.15\n", + "sample_id \n", + "GTEX-111FC-3326-SM-5GZYV 9.268 6.988\n", + "GTEX-1128S-2826-SM-5N9DI 12.930 0.000\n", + "GTEX-117XS-3126-SM-5GIDP 10.840 7.389\n", + "GTEX-1192X-3226-SM-5987D 10.780 5.132\n", + "GTEX-11DXW-1026-SM-5H11K 10.060 7.823" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brain_cerebellum.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "23a5e80d-afbf-4780-b84d-ba4553421f8b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.029435Z", + "iopub.status.busy": "2023-11-20T08:55:58.029164Z", + "iopub.status.idle": "2023-11-20T08:55:58.034801Z", + "shell.execute_reply": "2023-11-20T08:55:58.034365Z" + }, + "papermill": { + "duration": 0.014433, + "end_time": "2023-11-20T08:55:58.035617", + "exception": false, + "start_time": "2023-11-20T08:55:58.021184", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "brain_cerebellum_males = brain_cerebellum.loc[\n", + " brain_cerebellum.index.intersection(male_samples)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "38730891-f083-4c34-8530-30f511003abc", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.051552Z", + "iopub.status.busy": "2023-11-20T08:55:58.051264Z", + "iopub.status.idle": "2023-11-20T08:55:58.054819Z", + "shell.execute_reply": "2023-11-20T08:55:58.054358Z" + }, + "papermill": { + "duration": 0.012427, + "end_time": "2023-11-20T08:55:58.055626", + "exception": false, + "start_time": "2023-11-20T08:55:58.043199", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(174, 2)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brain_cerebellum_males.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2081927d-7970-4805-891c-c8bc76a2351f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.071750Z", + "iopub.status.busy": "2023-11-20T08:55:58.071458Z", + "iopub.status.idle": "2023-11-20T08:55:58.076640Z", + "shell.execute_reply": "2023-11-20T08:55:58.076155Z" + }, + "papermill": { + "duration": 0.014105, + "end_time": "2023-11-20T08:55:58.077423", + "exception": false, + "start_time": "2023-11-20T08:55:58.063318", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "brain_cerebellum_females = brain_cerebellum.loc[\n", + " brain_cerebellum.index.intersection(female_samples)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "23e75a4b-0c77-470d-9a7b-4d07909be735", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.097064Z", + "iopub.status.busy": "2023-11-20T08:55:58.096854Z", + "iopub.status.idle": "2023-11-20T08:55:58.105749Z", + "shell.execute_reply": "2023-11-20T08:55:58.105289Z" + }, + "papermill": { + "duration": 0.021495, + "end_time": "2023-11-20T08:55:58.106610", + "exception": false, + "start_time": "2023-11-20T08:55:58.085115", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(67, 2)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "brain_cerebellum_females.shape" + ] + }, + { + "cell_type": "markdown", + "id": "47ed8fc3-e037-405a-bd25-010665c91132", + "metadata": { + "papermill": { + "duration": 0.007803, + "end_time": "2023-11-20T08:55:58.122565", + "exception": false, + "start_time": "2023-11-20T08:55:58.114762", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Small intestine (terminal ileum)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "695487c0-35c7-4a5e-b91c-4e8ee47b0295", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.139576Z", + "iopub.status.busy": "2023-11-20T08:55:58.139132Z", + "iopub.status.idle": "2023-11-20T08:55:58.230646Z", + "shell.execute_reply": "2023-11-20T08:55:58.230353Z" + }, + "papermill": { + "duration": 0.100976, + "end_time": "2023-11-20T08:55:58.231502", + "exception": false, + "start_time": "2023-11-20T08:55:58.130526", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "small_intestine = (\n", + " pd.read_pickle(TISSUE_DIR / \"gtex_v8_data_small_intestine_terminal_ileum.pkl\")\n", + " .loc[[gene0_id, gene1_id]]\n", + " .T.rename_axis(\"sample_id\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "a84a00a6-b51f-4a79-b056-0f515f526c95", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.247529Z", + "iopub.status.busy": "2023-11-20T08:55:58.247442Z", + "iopub.status.idle": "2023-11-20T08:55:58.250183Z", + "shell.execute_reply": "2023-11-20T08:55:58.249901Z" + }, + "papermill": { + "duration": 0.011604, + "end_time": "2023-11-20T08:55:58.250884", + "exception": false, + "start_time": "2023-11-20T08:55:58.239280", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(187, 2)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_intestine.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d9ef287c-3f2a-46da-99a8-2b6688379fc8", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.267183Z", + "iopub.status.busy": "2023-11-20T08:55:58.267093Z", + "iopub.status.idle": "2023-11-20T08:55:58.271467Z", + "shell.execute_reply": "2023-11-20T08:55:58.271146Z" + }, + "papermill": { + "duration": 0.013258, + "end_time": "2023-11-20T08:55:58.272186", + "exception": false, + "start_time": "2023-11-20T08:55:58.258928", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gene_ens_idENSG00000147050.14ENSG00000183878.15
sample_id
GTEX-111CU-1326-SM-5NQ8L7.5864.0370
GTEX-111YS-1426-SM-5GID813.5003.3700
GTEX-1122O-1326-SM-5H11F21.6800.0309
GTEX-117YX-0326-SM-5GICL8.9645.2350
GTEX-1192X-2526-SM-5986810.0106.3990
\n", + "
" + ], + "text/plain": [ + "gene_ens_id ENSG00000147050.14 ENSG00000183878.15\n", + "sample_id \n", + "GTEX-111CU-1326-SM-5NQ8L 7.586 4.0370\n", + "GTEX-111YS-1426-SM-5GID8 13.500 3.3700\n", + "GTEX-1122O-1326-SM-5H11F 21.680 0.0309\n", + "GTEX-117YX-0326-SM-5GICL 8.964 5.2350\n", + "GTEX-1192X-2526-SM-59868 10.010 6.3990" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_intestine.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e485ed5e-6d56-4432-8fcb-dd2956dbe605", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.288638Z", + "iopub.status.busy": "2023-11-20T08:55:58.288544Z", + "iopub.status.idle": "2023-11-20T08:55:58.293896Z", + "shell.execute_reply": "2023-11-20T08:55:58.293520Z" + }, + "papermill": { + "duration": 0.014431, + "end_time": "2023-11-20T08:55:58.294649", + "exception": false, + "start_time": "2023-11-20T08:55:58.280218", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "small_intestine_males = small_intestine.loc[\n", + " small_intestine.index.intersection(male_samples)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "558a0a54-558a-4af6-baec-4dd1189909cc", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.311601Z", + "iopub.status.busy": "2023-11-20T08:55:58.311263Z", + "iopub.status.idle": "2023-11-20T08:55:58.314724Z", + "shell.execute_reply": "2023-11-20T08:55:58.314331Z" + }, + "papermill": { + "duration": 0.01288, + "end_time": "2023-11-20T08:55:58.315541", + "exception": false, + "start_time": "2023-11-20T08:55:58.302661", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(120, 2)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_intestine_males.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5094eb5e-7d8d-408f-9e7e-3f73fa8e91ed", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.332677Z", + "iopub.status.busy": "2023-11-20T08:55:58.332302Z", + "iopub.status.idle": "2023-11-20T08:55:58.337054Z", + "shell.execute_reply": "2023-11-20T08:55:58.336655Z" + }, + "papermill": { + "duration": 0.01416, + "end_time": "2023-11-20T08:55:58.337830", + "exception": false, + "start_time": "2023-11-20T08:55:58.323670", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "small_intestine_females = small_intestine.loc[\n", + " small_intestine.index.intersection(female_samples)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "81168021-bd56-45f8-9acc-0d6e5a1c7ba5", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.354948Z", + "iopub.status.busy": "2023-11-20T08:55:58.354661Z", + "iopub.status.idle": "2023-11-20T08:55:58.358062Z", + "shell.execute_reply": "2023-11-20T08:55:58.357659Z" + }, + "papermill": { + "duration": 0.012847, + "end_time": "2023-11-20T08:55:58.358878", + "exception": false, + "start_time": "2023-11-20T08:55:58.346031", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(67, 2)" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "small_intestine_females.shape" + ] + }, + { + "cell_type": "markdown", + "id": "67e48d49-8a28-45b1-b8bd-0fd85db559b6", + "metadata": { + "papermill": { + "duration": 0.008076, + "end_time": "2023-11-20T08:55:58.375169", + "exception": false, + "start_time": "2023-11-20T08:55:58.367093", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Compute correlation" + ] + }, + { + "cell_type": "markdown", + "id": "cfabd8a6-ac3b-4ba0-963e-689236871151", + "metadata": { + "papermill": { + "duration": 0.008133, + "end_time": "2023-11-20T08:55:58.391481", + "exception": false, + "start_time": "2023-11-20T08:55:58.383348", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Brain cerebellum" + ] + }, + { + "cell_type": "markdown", + "id": "44e81899-ebee-4625-ba7f-7ce82dc25eca", + "metadata": { + "papermill": { + "duration": 0.008075, + "end_time": "2023-11-20T08:55:58.407755", + "exception": false, + "start_time": "2023-11-20T08:55:58.399680", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### CCC" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "0adc0cb7-6a22-49be-aafc-894aebb42dde", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:58.424934Z", + "iopub.status.busy": "2023-11-20T08:55:58.424612Z", + "iopub.status.idle": "2023-11-20T08:55:59.027926Z", + "shell.execute_reply": "2023-11-20T08:55:59.027542Z" + }, + "papermill": { + "duration": 0.613147, + "end_time": "2023-11-20T08:55:59.029090", + "exception": false, + "start_time": "2023-11-20T08:55:58.415943", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.3674627535203262, 0.000999000999000999)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ccc(brain_cerebellum_males, pvalue_n_perms=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "2d8d145a-f9bd-42eb-89ae-beee42621083", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.049822Z", + "iopub.status.busy": "2023-11-20T08:55:59.049738Z", + "iopub.status.idle": "2023-11-20T08:55:59.386803Z", + "shell.execute_reply": "2023-11-20T08:55:59.386444Z" + }, + "papermill": { + "duration": 0.349521, + "end_time": "2023-11-20T08:55:59.388102", + "exception": false, + "start_time": "2023-11-20T08:55:59.038581", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.10427807486631016, 0.015984015984015984)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ccc(brain_cerebellum_females, pvalue_n_perms=1000)" + ] + }, + { + "cell_type": "markdown", + "id": "439a1bce-e610-47b8-aa63-d49c5402c727", + "metadata": { + "papermill": { + "duration": 0.015606, + "end_time": "2023-11-20T08:55:59.419596", + "exception": false, + "start_time": "2023-11-20T08:55:59.403990", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Pearson" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d7ef4ad5-afbe-4365-9d90-3fecd0f382cc", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.453572Z", + "iopub.status.busy": "2023-11-20T08:55:59.453491Z", + "iopub.status.idle": "2023-11-20T08:55:59.456801Z", + "shell.execute_reply": "2023-11-20T08:55:59.456505Z" + }, + "papermill": { + "duration": 0.022649, + "end_time": "2023-11-20T08:55:59.458058", + "exception": false, + "start_time": "2023-11-20T08:55:59.435409", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.8570928082720543, 2.0135444417639445e-51)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pearsonr(brain_cerebellum_males.iloc[:, 0], brain_cerebellum_males.iloc[:, 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "4b4bb182-418b-4ef6-997d-0a7569a794c4", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.483075Z", + "iopub.status.busy": "2023-11-20T08:55:59.482995Z", + "iopub.status.idle": "2023-11-20T08:55:59.486455Z", + "shell.execute_reply": "2023-11-20T08:55:59.486022Z" + }, + "papermill": { + "duration": 0.01374, + "end_time": "2023-11-20T08:55:59.487298", + "exception": false, + "start_time": "2023-11-20T08:55:59.473558", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.21500691975584293, 0.08058770847518525)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pearsonr(brain_cerebellum_females.iloc[:, 0], brain_cerebellum_females.iloc[:, 1])" + ] + }, + { + "cell_type": "markdown", + "id": "105fd413-62ed-4fcf-baf6-35adfd5de8c6", + "metadata": { + "papermill": { + "duration": 0.008299, + "end_time": "2023-11-20T08:55:59.504020", + "exception": false, + "start_time": "2023-11-20T08:55:59.495721", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Spearman" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "e5712802-eb2c-4779-987f-ccd4817c6070", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.521582Z", + "iopub.status.busy": "2023-11-20T08:55:59.521432Z", + "iopub.status.idle": "2023-11-20T08:55:59.526539Z", + "shell.execute_reply": "2023-11-20T08:55:59.526078Z" + }, + "papermill": { + "duration": 0.014912, + "end_time": "2023-11-20T08:55:59.527335", + "exception": false, + "start_time": "2023-11-20T08:55:59.512423", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SpearmanrResult(correlation=0.7909406514931421, pvalue=1.5562774794999233e-38)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spearmanr(brain_cerebellum_males.iloc[:, 0], brain_cerebellum_males.iloc[:, 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "290b1ebf-91cc-4032-a0d8-fa147d39aaeb", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.545195Z", + "iopub.status.busy": "2023-11-20T08:55:59.544867Z", + "iopub.status.idle": "2023-11-20T08:55:59.549626Z", + "shell.execute_reply": "2023-11-20T08:55:59.549230Z" + }, + "papermill": { + "duration": 0.014564, + "end_time": "2023-11-20T08:55:59.550412", + "exception": false, + "start_time": "2023-11-20T08:55:59.535848", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SpearmanrResult(correlation=0.2225445085313031, pvalue=0.07028065513936946)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spearmanr(brain_cerebellum_females.iloc[:, 0], brain_cerebellum_females.iloc[:, 1])" + ] + }, + { + "cell_type": "markdown", + "id": "a1d1d5c7-dbc0-440a-8eec-31aa252868e6", + "metadata": { + "papermill": { + "duration": 0.008566, + "end_time": "2023-11-20T08:55:59.567627", + "exception": false, + "start_time": "2023-11-20T08:55:59.559061", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Small intestine (terminal ileum)" + ] + }, + { + "cell_type": "markdown", + "id": "6fc2d04e-39fb-4d55-919b-3f2f830386a0", + "metadata": { + "papermill": { + "duration": 0.008572, + "end_time": "2023-11-20T08:55:59.584887", + "exception": false, + "start_time": "2023-11-20T08:55:59.576315", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### CCC" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "ebe913d2-d02e-4515-9742-a9303f527395", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:55:59.603192Z", + "iopub.status.busy": "2023-11-20T08:55:59.602834Z", + "iopub.status.idle": "2023-11-20T08:56:00.192183Z", + "shell.execute_reply": "2023-11-20T08:56:00.191803Z" + }, + "papermill": { + "duration": 0.599662, + "end_time": "2023-11-20T08:56:00.193244", + "exception": false, + "start_time": "2023-11-20T08:55:59.593582", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.13137865911237015, 0.000999000999000999)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ccc(small_intestine_males, pvalue_n_perms=1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f173bde0-9cf2-47fa-af40-739845d16247", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.214571Z", + "iopub.status.busy": "2023-11-20T08:56:00.214489Z", + "iopub.status.idle": "2023-11-20T08:56:00.557902Z", + "shell.execute_reply": "2023-11-20T08:56:00.557502Z" + }, + "papermill": { + "duration": 0.355935, + "end_time": "2023-11-20T08:56:00.559121", + "exception": false, + "start_time": "2023-11-20T08:56:00.203186", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.04191990815775257, 0.35264735264735264)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ccc(small_intestine_females, pvalue_n_perms=1000)" + ] + }, + { + "cell_type": "markdown", + "id": "867e5576-5224-46fe-ab7a-0366b8019b7b", + "metadata": { + "papermill": { + "duration": 0.016368, + "end_time": "2023-11-20T08:56:00.592088", + "exception": false, + "start_time": "2023-11-20T08:56:00.575720", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Pearson" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "75f54397-bfb4-467d-b329-fb159d87ba77", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.625863Z", + "iopub.status.busy": "2023-11-20T08:56:00.625776Z", + "iopub.status.idle": "2023-11-20T08:56:00.629256Z", + "shell.execute_reply": "2023-11-20T08:56:00.628844Z" + }, + "papermill": { + "duration": 0.021756, + "end_time": "2023-11-20T08:56:00.630532", + "exception": false, + "start_time": "2023-11-20T08:56:00.608776", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.5401927091270222, 1.918887071018949e-10)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pearsonr(small_intestine_males.iloc[:, 0], small_intestine_males.iloc[:, 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "181af358-ac4a-400e-bc46-5213cf866c50", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.658608Z", + "iopub.status.busy": "2023-11-20T08:56:00.658303Z", + "iopub.status.idle": "2023-11-20T08:56:00.662418Z", + "shell.execute_reply": "2023-11-20T08:56:00.662026Z" + }, + "papermill": { + "duration": 0.015931, + "end_time": "2023-11-20T08:56:00.663236", + "exception": false, + "start_time": "2023-11-20T08:56:00.647305", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(-0.12377555020943821, 0.3183161563369864)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pearsonr(small_intestine_females.iloc[:, 0], small_intestine_females.iloc[:, 1])" + ] + }, + { + "cell_type": "markdown", + "id": "f21bd584-c23b-4026-8caa-fcec19f7752e", + "metadata": { + "papermill": { + "duration": 0.008623, + "end_time": "2023-11-20T08:56:00.680667", + "exception": false, + "start_time": "2023-11-20T08:56:00.672044", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "### Spearman" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f3eef972-8259-4a03-8b1e-24cb05cf7c57", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.699282Z", + "iopub.status.busy": "2023-11-20T08:56:00.698900Z", + "iopub.status.idle": "2023-11-20T08:56:00.703998Z", + "shell.execute_reply": "2023-11-20T08:56:00.703548Z" + }, + "papermill": { + "duration": 0.015248, + "end_time": "2023-11-20T08:56:00.704802", + "exception": false, + "start_time": "2023-11-20T08:56:00.689554", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SpearmanrResult(correlation=0.46555777415724725, pvalue=8.417733469299811e-08)" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spearmanr(small_intestine_males.iloc[:, 0], small_intestine_males.iloc[:, 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "9dfdbf5a-fca2-4a80-a8af-fe5fa3887c66", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.723658Z", + "iopub.status.busy": "2023-11-20T08:56:00.723226Z", + "iopub.status.idle": "2023-11-20T08:56:00.727952Z", + "shell.execute_reply": "2023-11-20T08:56:00.727552Z" + }, + "papermill": { + "duration": 0.014948, + "end_time": "2023-11-20T08:56:00.728742", + "exception": false, + "start_time": "2023-11-20T08:56:00.713794", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "SpearmanrResult(correlation=0.1041356321278035, pvalue=0.40167737622824995)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spearmanr(small_intestine_females.iloc[:, 0], small_intestine_females.iloc[:, 1])" + ] + }, + { + "cell_type": "markdown", + "id": "ecd455af-8f12-484d-9338-140f69aedb75", + "metadata": { + "papermill": { + "duration": 0.008966, + "end_time": "2023-11-20T08:56:00.746800", + "exception": false, + "start_time": "2023-11-20T08:56:00.737834", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Compute correlation on all tissues, males only" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "8b86aa69-c53b-4906-b8fd-fa97b9f90462", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:00.765779Z", + "iopub.status.busy": "2023-11-20T08:56:00.765437Z", + "iopub.status.idle": "2023-11-20T08:56:05.764878Z", + "shell.execute_reply": "2023-11-20T08:56:05.764531Z" + }, + "papermill": { + "duration": 5.01057, + "end_time": "2023-11-20T08:56:05.766475", + "exception": false, + "start_time": "2023-11-20T08:56:00.755905", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "res_all_males = pd.DataFrame(\n", + " {\n", + " f.stem.split(\"_data_\")[1]: {\n", + " \"ccc\": ccc(data[gene0_id], data[gene1_id]),\n", + " \"pearson\": pearsonr(data[gene0_id], data[gene1_id])[0],\n", + " \"spearman\": spearmanr(data[gene0_id], data[gene1_id])[0],\n", + " }\n", + " for f in TISSUE_DIR.glob(\"*.pkl\")\n", + " if (\n", + " data := pd.read_pickle(f)\n", + " .T[[gene0_id, gene1_id]]\n", + " .reindex(male_samples)\n", + " .dropna()\n", + " )\n", + " is not None\n", + " and data.shape[0] > 10\n", + " }\n", + ").T.abs()" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "93b8e40d-a049-4272-81b0-6c4f45fa1df6", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.801558Z", + "iopub.status.busy": "2023-11-20T08:56:05.801475Z", + "iopub.status.idle": "2023-11-20T08:56:05.804294Z", + "shell.execute_reply": "2023-11-20T08:56:05.804009Z" + }, + "papermill": { + "duration": 0.021576, + "end_time": "2023-11-20T08:56:05.805494", + "exception": false, + "start_time": "2023-11-20T08:56:05.783918", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(47, 3)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_all_males.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "52c86cee-59b3-41a7-ac71-8a000d9c472f", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.840469Z", + "iopub.status.busy": "2023-11-20T08:56:05.840392Z", + "iopub.status.idle": "2023-11-20T08:56:05.844909Z", + "shell.execute_reply": "2023-11-20T08:56:05.844558Z" + }, + "papermill": { + "duration": 0.023309, + "end_time": "2023-11-20T08:56:05.846180", + "exception": false, + "start_time": "2023-11-20T08:56:05.822871", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cccpearsonspearman
colon_transverse0.2374850.7047030.691430
brain_amygdala0.5407820.8803060.914251
artery_coronary0.4104580.8610730.801901
artery_aorta0.4281660.8149620.810471
adrenal_gland0.2614430.6579660.682019
\n", + "
" + ], + "text/plain": [ + " ccc pearson spearman\n", + "colon_transverse 0.237485 0.704703 0.691430\n", + "brain_amygdala 0.540782 0.880306 0.914251\n", + "artery_coronary 0.410458 0.861073 0.801901\n", + "artery_aorta 0.428166 0.814962 0.810471\n", + "adrenal_gland 0.261443 0.657966 0.682019" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_all_males.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "58840394-da1e-499d-b3a3-4c2292a57fbe", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.873387Z", + "iopub.status.busy": "2023-11-20T08:56:05.873052Z", + "iopub.status.idle": "2023-11-20T08:56:05.884369Z", + "shell.execute_reply": "2023-11-20T08:56:05.883923Z" + }, + "papermill": { + "duration": 0.022072, + "end_time": "2023-11-20T08:56:05.885172", + "exception": false, + "start_time": "2023-11-20T08:56:05.863100", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cccpearsonspearman
small_intestine_terminal_ileum0.1313790.5401930.465558
stomach0.1866050.5460080.631823
liver0.1892740.6198300.618370
muscle_skeletal0.2054500.6485360.658019
testis0.2067040.6926610.690990
pituitary0.2175990.6298300.643730
colon_transverse0.2374850.7047030.691430
thyroid0.2552400.6759180.697297
adrenal_gland0.2614430.6579660.682019
colon_sigmoid0.3019710.7840190.769884
nerve_tibial0.3237450.8199430.772638
artery_tibial0.3366750.7820140.776460
prostate0.3475970.8040460.799923
cells_ebvtransformed_lymphocytes0.3628830.8752650.827843
brain_cerebellum0.3674630.8570930.790941
adipose_subcutaneous0.3805010.8269600.807961
lung0.3831480.7776880.767171
skin_sun_exposed_lower_leg0.3977420.8068170.799174
esophagus_mucosa0.4033100.8435000.850711
esophagus_muscularis0.4066280.8593940.849154
brain_cerebellar_hemisphere0.4100690.8819020.862051
artery_coronary0.4104580.8610730.801901
esophagus_gastroesophageal_junction0.4194010.8497060.843063
minor_salivary_gland0.4202460.8052880.804937
brain_anterior_cingulate_cortex_ba240.4261460.8352510.874480
adipose_visceral_omentum0.4274600.8775320.863902
artery_aorta0.4281660.8149620.810471
brain_frontal_cortex_ba90.4320360.8541440.876284
cells_cultured_fibroblasts0.4346820.7724100.791284
brain_cortex0.4438140.8848410.889380
pancreas0.4613640.8968960.871488
heart_atrial_appendage0.4877680.8350580.833513
spleen0.5069820.8078280.858734
skin_not_sun_exposed_suprapubic0.5280960.8557960.859677
brain_caudate_basal_ganglia0.5364270.8622350.894522
brain_amygdala0.5407820.8803060.914251
brain_putamen_basal_ganglia0.5500230.9153800.929241
brain_hippocampus0.5997050.8999030.916020
brain_nucleus_accumbens_basal_ganglia0.6065690.9205630.941653
brain_hypothalamus0.6093380.9286250.946419
whole_blood0.6145610.9113540.929595
bladder0.6230770.9476060.898901
heart_left_ventricle0.6322310.9244070.923212
brain_substantia_nigra0.6395690.9041600.922021
kidney_cortex0.6642560.9335450.927273
brain_spinal_cord_cervical_c10.7079890.9244380.923254
breast_mammary_tissue0.7194900.9130810.937118
\n", + "
" + ], + "text/plain": [ + " ccc pearson spearman\n", + "small_intestine_terminal_ileum 0.131379 0.540193 0.465558\n", + "stomach 0.186605 0.546008 0.631823\n", + "liver 0.189274 0.619830 0.618370\n", + "muscle_skeletal 0.205450 0.648536 0.658019\n", + "testis 0.206704 0.692661 0.690990\n", + "pituitary 0.217599 0.629830 0.643730\n", + "colon_transverse 0.237485 0.704703 0.691430\n", + "thyroid 0.255240 0.675918 0.697297\n", + "adrenal_gland 0.261443 0.657966 0.682019\n", + "colon_sigmoid 0.301971 0.784019 0.769884\n", + "nerve_tibial 0.323745 0.819943 0.772638\n", + "artery_tibial 0.336675 0.782014 0.776460\n", + "prostate 0.347597 0.804046 0.799923\n", + "cells_ebvtransformed_lymphocytes 0.362883 0.875265 0.827843\n", + "brain_cerebellum 0.367463 0.857093 0.790941\n", + "adipose_subcutaneous 0.380501 0.826960 0.807961\n", + "lung 0.383148 0.777688 0.767171\n", + "skin_sun_exposed_lower_leg 0.397742 0.806817 0.799174\n", + "esophagus_mucosa 0.403310 0.843500 0.850711\n", + "esophagus_muscularis 0.406628 0.859394 0.849154\n", + "brain_cerebellar_hemisphere 0.410069 0.881902 0.862051\n", + "artery_coronary 0.410458 0.861073 0.801901\n", + "esophagus_gastroesophageal_junction 0.419401 0.849706 0.843063\n", + "minor_salivary_gland 0.420246 0.805288 0.804937\n", + "brain_anterior_cingulate_cortex_ba24 0.426146 0.835251 0.874480\n", + "adipose_visceral_omentum 0.427460 0.877532 0.863902\n", + "artery_aorta 0.428166 0.814962 0.810471\n", + "brain_frontal_cortex_ba9 0.432036 0.854144 0.876284\n", + "cells_cultured_fibroblasts 0.434682 0.772410 0.791284\n", + "brain_cortex 0.443814 0.884841 0.889380\n", + "pancreas 0.461364 0.896896 0.871488\n", + "heart_atrial_appendage 0.487768 0.835058 0.833513\n", + "spleen 0.506982 0.807828 0.858734\n", + "skin_not_sun_exposed_suprapubic 0.528096 0.855796 0.859677\n", + "brain_caudate_basal_ganglia 0.536427 0.862235 0.894522\n", + "brain_amygdala 0.540782 0.880306 0.914251\n", + "brain_putamen_basal_ganglia 0.550023 0.915380 0.929241\n", + "brain_hippocampus 0.599705 0.899903 0.916020\n", + "brain_nucleus_accumbens_basal_ganglia 0.606569 0.920563 0.941653\n", + "brain_hypothalamus 0.609338 0.928625 0.946419\n", + "whole_blood 0.614561 0.911354 0.929595\n", + "bladder 0.623077 0.947606 0.898901\n", + "heart_left_ventricle 0.632231 0.924407 0.923212\n", + "brain_substantia_nigra 0.639569 0.904160 0.922021\n", + "kidney_cortex 0.664256 0.933545 0.927273\n", + "brain_spinal_cord_cervical_c1 0.707989 0.924438 0.923254\n", + "breast_mammary_tissue 0.719490 0.913081 0.937118" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_all_males.sort_values(\"ccc\")" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "21684890-5dc8-4ad4-a7bc-693caccb386e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.904956Z", + "iopub.status.busy": "2023-11-20T08:56:05.904807Z", + "iopub.status.idle": "2023-11-20T08:56:05.915624Z", + "shell.execute_reply": "2023-11-20T08:56:05.915230Z" + }, + "papermill": { + "duration": 0.021518, + "end_time": "2023-11-20T08:56:05.916408", + "exception": false, + "start_time": "2023-11-20T08:56:05.894890", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cccpearsonspearman
small_intestine_terminal_ileum0.1313790.5401930.465558
stomach0.1866050.5460080.631823
liver0.1892740.6198300.618370
pituitary0.2175990.6298300.643730
muscle_skeletal0.2054500.6485360.658019
adrenal_gland0.2614430.6579660.682019
thyroid0.2552400.6759180.697297
testis0.2067040.6926610.690990
colon_transverse0.2374850.7047030.691430
cells_cultured_fibroblasts0.4346820.7724100.791284
lung0.3831480.7776880.767171
artery_tibial0.3366750.7820140.776460
colon_sigmoid0.3019710.7840190.769884
prostate0.3475970.8040460.799923
minor_salivary_gland0.4202460.8052880.804937
skin_sun_exposed_lower_leg0.3977420.8068170.799174
spleen0.5069820.8078280.858734
artery_aorta0.4281660.8149620.810471
nerve_tibial0.3237450.8199430.772638
adipose_subcutaneous0.3805010.8269600.807961
heart_atrial_appendage0.4877680.8350580.833513
brain_anterior_cingulate_cortex_ba240.4261460.8352510.874480
esophagus_mucosa0.4033100.8435000.850711
esophagus_gastroesophageal_junction0.4194010.8497060.843063
brain_frontal_cortex_ba90.4320360.8541440.876284
skin_not_sun_exposed_suprapubic0.5280960.8557960.859677
brain_cerebellum0.3674630.8570930.790941
esophagus_muscularis0.4066280.8593940.849154
artery_coronary0.4104580.8610730.801901
brain_caudate_basal_ganglia0.5364270.8622350.894522
cells_ebvtransformed_lymphocytes0.3628830.8752650.827843
adipose_visceral_omentum0.4274600.8775320.863902
brain_amygdala0.5407820.8803060.914251
brain_cerebellar_hemisphere0.4100690.8819020.862051
brain_cortex0.4438140.8848410.889380
pancreas0.4613640.8968960.871488
brain_hippocampus0.5997050.8999030.916020
brain_substantia_nigra0.6395690.9041600.922021
whole_blood0.6145610.9113540.929595
breast_mammary_tissue0.7194900.9130810.937118
brain_putamen_basal_ganglia0.5500230.9153800.929241
brain_nucleus_accumbens_basal_ganglia0.6065690.9205630.941653
heart_left_ventricle0.6322310.9244070.923212
brain_spinal_cord_cervical_c10.7079890.9244380.923254
brain_hypothalamus0.6093380.9286250.946419
kidney_cortex0.6642560.9335450.927273
bladder0.6230770.9476060.898901
\n", + "
" + ], + "text/plain": [ + " ccc pearson spearman\n", + "small_intestine_terminal_ileum 0.131379 0.540193 0.465558\n", + "stomach 0.186605 0.546008 0.631823\n", + "liver 0.189274 0.619830 0.618370\n", + "pituitary 0.217599 0.629830 0.643730\n", + "muscle_skeletal 0.205450 0.648536 0.658019\n", + "adrenal_gland 0.261443 0.657966 0.682019\n", + "thyroid 0.255240 0.675918 0.697297\n", + "testis 0.206704 0.692661 0.690990\n", + "colon_transverse 0.237485 0.704703 0.691430\n", + "cells_cultured_fibroblasts 0.434682 0.772410 0.791284\n", + "lung 0.383148 0.777688 0.767171\n", + "artery_tibial 0.336675 0.782014 0.776460\n", + "colon_sigmoid 0.301971 0.784019 0.769884\n", + "prostate 0.347597 0.804046 0.799923\n", + "minor_salivary_gland 0.420246 0.805288 0.804937\n", + "skin_sun_exposed_lower_leg 0.397742 0.806817 0.799174\n", + "spleen 0.506982 0.807828 0.858734\n", + "artery_aorta 0.428166 0.814962 0.810471\n", + "nerve_tibial 0.323745 0.819943 0.772638\n", + "adipose_subcutaneous 0.380501 0.826960 0.807961\n", + "heart_atrial_appendage 0.487768 0.835058 0.833513\n", + "brain_anterior_cingulate_cortex_ba24 0.426146 0.835251 0.874480\n", + "esophagus_mucosa 0.403310 0.843500 0.850711\n", + "esophagus_gastroesophageal_junction 0.419401 0.849706 0.843063\n", + "brain_frontal_cortex_ba9 0.432036 0.854144 0.876284\n", + "skin_not_sun_exposed_suprapubic 0.528096 0.855796 0.859677\n", + "brain_cerebellum 0.367463 0.857093 0.790941\n", + "esophagus_muscularis 0.406628 0.859394 0.849154\n", + "artery_coronary 0.410458 0.861073 0.801901\n", + "brain_caudate_basal_ganglia 0.536427 0.862235 0.894522\n", + "cells_ebvtransformed_lymphocytes 0.362883 0.875265 0.827843\n", + "adipose_visceral_omentum 0.427460 0.877532 0.863902\n", + "brain_amygdala 0.540782 0.880306 0.914251\n", + "brain_cerebellar_hemisphere 0.410069 0.881902 0.862051\n", + "brain_cortex 0.443814 0.884841 0.889380\n", + "pancreas 0.461364 0.896896 0.871488\n", + "brain_hippocampus 0.599705 0.899903 0.916020\n", + "brain_substantia_nigra 0.639569 0.904160 0.922021\n", + "whole_blood 0.614561 0.911354 0.929595\n", + "breast_mammary_tissue 0.719490 0.913081 0.937118\n", + "brain_putamen_basal_ganglia 0.550023 0.915380 0.929241\n", + "brain_nucleus_accumbens_basal_ganglia 0.606569 0.920563 0.941653\n", + "heart_left_ventricle 0.632231 0.924407 0.923212\n", + "brain_spinal_cord_cervical_c1 0.707989 0.924438 0.923254\n", + "brain_hypothalamus 0.609338 0.928625 0.946419\n", + "kidney_cortex 0.664256 0.933545 0.927273\n", + "bladder 0.623077 0.947606 0.898901" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_all_males.sort_values(\"pearson\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "65fda98a-ebcb-421b-a286-3a89dd369b38", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.936868Z", + "iopub.status.busy": "2023-11-20T08:56:05.936719Z", + "iopub.status.idle": "2023-11-20T08:56:05.947502Z", + "shell.execute_reply": "2023-11-20T08:56:05.947098Z" + }, + "papermill": { + "duration": 0.021826, + "end_time": "2023-11-20T08:56:05.948275", + "exception": false, + "start_time": "2023-11-20T08:56:05.926449", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cccpearsonspearman
small_intestine_terminal_ileum0.1313790.5401930.465558
liver0.1892740.6198300.618370
stomach0.1866050.5460080.631823
pituitary0.2175990.6298300.643730
muscle_skeletal0.2054500.6485360.658019
adrenal_gland0.2614430.6579660.682019
testis0.2067040.6926610.690990
colon_transverse0.2374850.7047030.691430
thyroid0.2552400.6759180.697297
lung0.3831480.7776880.767171
colon_sigmoid0.3019710.7840190.769884
nerve_tibial0.3237450.8199430.772638
artery_tibial0.3366750.7820140.776460
brain_cerebellum0.3674630.8570930.790941
cells_cultured_fibroblasts0.4346820.7724100.791284
skin_sun_exposed_lower_leg0.3977420.8068170.799174
prostate0.3475970.8040460.799923
artery_coronary0.4104580.8610730.801901
minor_salivary_gland0.4202460.8052880.804937
adipose_subcutaneous0.3805010.8269600.807961
artery_aorta0.4281660.8149620.810471
cells_ebvtransformed_lymphocytes0.3628830.8752650.827843
heart_atrial_appendage0.4877680.8350580.833513
esophagus_gastroesophageal_junction0.4194010.8497060.843063
esophagus_muscularis0.4066280.8593940.849154
esophagus_mucosa0.4033100.8435000.850711
spleen0.5069820.8078280.858734
skin_not_sun_exposed_suprapubic0.5280960.8557960.859677
brain_cerebellar_hemisphere0.4100690.8819020.862051
adipose_visceral_omentum0.4274600.8775320.863902
pancreas0.4613640.8968960.871488
brain_anterior_cingulate_cortex_ba240.4261460.8352510.874480
brain_frontal_cortex_ba90.4320360.8541440.876284
brain_cortex0.4438140.8848410.889380
brain_caudate_basal_ganglia0.5364270.8622350.894522
bladder0.6230770.9476060.898901
brain_amygdala0.5407820.8803060.914251
brain_hippocampus0.5997050.8999030.916020
brain_substantia_nigra0.6395690.9041600.922021
heart_left_ventricle0.6322310.9244070.923212
brain_spinal_cord_cervical_c10.7079890.9244380.923254
kidney_cortex0.6642560.9335450.927273
brain_putamen_basal_ganglia0.5500230.9153800.929241
whole_blood0.6145610.9113540.929595
breast_mammary_tissue0.7194900.9130810.937118
brain_nucleus_accumbens_basal_ganglia0.6065690.9205630.941653
brain_hypothalamus0.6093380.9286250.946419
\n", + "
" + ], + "text/plain": [ + " ccc pearson spearman\n", + "small_intestine_terminal_ileum 0.131379 0.540193 0.465558\n", + "liver 0.189274 0.619830 0.618370\n", + "stomach 0.186605 0.546008 0.631823\n", + "pituitary 0.217599 0.629830 0.643730\n", + "muscle_skeletal 0.205450 0.648536 0.658019\n", + "adrenal_gland 0.261443 0.657966 0.682019\n", + "testis 0.206704 0.692661 0.690990\n", + "colon_transverse 0.237485 0.704703 0.691430\n", + "thyroid 0.255240 0.675918 0.697297\n", + "lung 0.383148 0.777688 0.767171\n", + "colon_sigmoid 0.301971 0.784019 0.769884\n", + "nerve_tibial 0.323745 0.819943 0.772638\n", + "artery_tibial 0.336675 0.782014 0.776460\n", + "brain_cerebellum 0.367463 0.857093 0.790941\n", + "cells_cultured_fibroblasts 0.434682 0.772410 0.791284\n", + "skin_sun_exposed_lower_leg 0.397742 0.806817 0.799174\n", + "prostate 0.347597 0.804046 0.799923\n", + "artery_coronary 0.410458 0.861073 0.801901\n", + "minor_salivary_gland 0.420246 0.805288 0.804937\n", + "adipose_subcutaneous 0.380501 0.826960 0.807961\n", + "artery_aorta 0.428166 0.814962 0.810471\n", + "cells_ebvtransformed_lymphocytes 0.362883 0.875265 0.827843\n", + "heart_atrial_appendage 0.487768 0.835058 0.833513\n", + "esophagus_gastroesophageal_junction 0.419401 0.849706 0.843063\n", + "esophagus_muscularis 0.406628 0.859394 0.849154\n", + "esophagus_mucosa 0.403310 0.843500 0.850711\n", + "spleen 0.506982 0.807828 0.858734\n", + "skin_not_sun_exposed_suprapubic 0.528096 0.855796 0.859677\n", + "brain_cerebellar_hemisphere 0.410069 0.881902 0.862051\n", + "adipose_visceral_omentum 0.427460 0.877532 0.863902\n", + "pancreas 0.461364 0.896896 0.871488\n", + "brain_anterior_cingulate_cortex_ba24 0.426146 0.835251 0.874480\n", + "brain_frontal_cortex_ba9 0.432036 0.854144 0.876284\n", + "brain_cortex 0.443814 0.884841 0.889380\n", + "brain_caudate_basal_ganglia 0.536427 0.862235 0.894522\n", + "bladder 0.623077 0.947606 0.898901\n", + "brain_amygdala 0.540782 0.880306 0.914251\n", + "brain_hippocampus 0.599705 0.899903 0.916020\n", + "brain_substantia_nigra 0.639569 0.904160 0.922021\n", + "heart_left_ventricle 0.632231 0.924407 0.923212\n", + "brain_spinal_cord_cervical_c1 0.707989 0.924438 0.923254\n", + "kidney_cortex 0.664256 0.933545 0.927273\n", + "brain_putamen_basal_ganglia 0.550023 0.915380 0.929241\n", + "whole_blood 0.614561 0.911354 0.929595\n", + "breast_mammary_tissue 0.719490 0.913081 0.937118\n", + "brain_nucleus_accumbens_basal_ganglia 0.606569 0.920563 0.941653\n", + "brain_hypothalamus 0.609338 0.928625 0.946419" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res_all_males.sort_values(\"spearman\")" + ] + }, + { + "cell_type": "markdown", + "id": "cccf7fd4-329c-49e0-b6a7-3f449d78a73c", + "metadata": { + "papermill": { + "duration": 0.010108, + "end_time": "2023-11-20T08:56:05.968664", + "exception": false, + "start_time": "2023-11-20T08:56:05.958556", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Plot of male samples" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "37979e0a-5a5d-40d7-8071-1ecc54a11c06", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:05.989605Z", + "iopub.status.busy": "2023-11-20T08:56:05.989454Z", + "iopub.status.idle": "2023-11-20T08:56:05.993475Z", + "shell.execute_reply": "2023-11-20T08:56:05.993079Z" + }, + "papermill": { + "duration": 0.015537, + "end_time": "2023-11-20T08:56:05.994253", + "exception": false, + "start_time": "2023-11-20T08:56:05.978716", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_tissue_file(name):\n", + " \"\"\"\n", + " Given a part of a tissue name, it returns a file path to the\n", + " expression data for that tissue in GTEx. It fails if more than\n", + " one files are found.\n", + "\n", + " Args:\n", + " name: a string with the tissue name (or a part of it).\n", + "\n", + " Returns:\n", + " A Path object pointing to the gene expression file for the\n", + " given tissue.\n", + " \"\"\"\n", + " tissue_files = []\n", + " for f in TISSUE_DIR.glob(\"*.pkl\"):\n", + " if name in f.name:\n", + " tissue_files.append(f)\n", + "\n", + " assert len(tissue_files) == 1\n", + " return tissue_files[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "08d2da77-49d4-471b-9e6c-c1d27fc5b13b", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.015399Z", + "iopub.status.busy": "2023-11-20T08:56:06.015248Z", + "iopub.status.idle": "2023-11-20T08:56:06.018280Z", + "shell.execute_reply": "2023-11-20T08:56:06.017885Z" + }, + "papermill": { + "duration": 0.014842, + "end_time": "2023-11-20T08:56:06.019182", + "exception": false, + "start_time": "2023-11-20T08:56:06.004340", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# testing\n", + "_tmp = get_tissue_file(\"whole_blood\")\n", + "assert _tmp.exists()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "0d04139a-2fd5-4f09-81c7-8f7b6b87d478", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.040472Z", + "iopub.status.busy": "2023-11-20T08:56:06.040325Z", + "iopub.status.idle": "2023-11-20T08:56:06.043073Z", + "shell.execute_reply": "2023-11-20T08:56:06.042681Z" + }, + "papermill": { + "duration": 0.014423, + "end_time": "2023-11-20T08:56:06.043873", + "exception": false, + "start_time": "2023-11-20T08:56:06.029450", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def simplify_tissue_name(tissue_name):\n", + " return f\"{tissue_name[0].upper()}{tissue_name[1:].replace('_', ' ')}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "5e0026f0-e25f-4058-bba8-810887f523f5", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.065222Z", + "iopub.status.busy": "2023-11-20T08:56:06.064825Z", + "iopub.status.idle": "2023-11-20T08:56:06.067504Z", + "shell.execute_reply": "2023-11-20T08:56:06.067101Z" + }, + "papermill": { + "duration": 0.014174, + "end_time": "2023-11-20T08:56:06.068318", + "exception": false, + "start_time": "2023-11-20T08:56:06.054144", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "assert simplify_tissue_name(\"whole_blood\") == \"Whole blood\"\n", + "assert simplify_tissue_name(\"uterus\") == \"Uterus\"" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "30becd4a-14ad-467d-b462-54429c61008a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.089570Z", + "iopub.status.busy": "2023-11-20T08:56:06.089169Z", + "iopub.status.idle": "2023-11-20T08:56:06.099591Z", + "shell.execute_reply": "2023-11-20T08:56:06.099184Z" + }, + "papermill": { + "duration": 0.021749, + "end_time": "2023-11-20T08:56:06.100320", + "exception": false, + "start_time": "2023-11-20T08:56:06.078571", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def plot_gene_pair(\n", + " tissue_name,\n", + " gene0,\n", + " gene1,\n", + " hue=None,\n", + " kind=\"hex\",\n", + " ylim=None,\n", + " bins=\"log\",\n", + " samples=None,\n", + " filename_suffix=\"\",\n", + "):\n", + " \"\"\"\n", + " It plots (joint plot) a gene pair from the given tissue. It saves the plot\n", + " for the manuscript.\n", + " \"\"\"\n", + " # merge gene expression with metadata\n", + " tissue_file = get_tissue_file(tissue_name)\n", + " if samples is not None:\n", + " tissue_data = (\n", + " pd.read_pickle(tissue_file).T[[gene0, gene1]].reindex(samples).dropna()\n", + " )\n", + " else:\n", + " tissue_data = pd.read_pickle(tissue_file).T[[gene0, gene1]]\n", + "\n", + " tissue_data = pd.merge(\n", + " tissue_data,\n", + " gtex_metadata,\n", + " how=\"inner\",\n", + " left_index=True,\n", + " right_index=True,\n", + " validate=\"one_to_one\",\n", + " )\n", + "\n", + " # get gene symbols\n", + " gene0_symbol, gene1_symbol = gene_map[gene0], gene_map[gene1]\n", + " display((gene0_symbol, gene1_symbol))\n", + "\n", + " # compute correlations for this gene pair\n", + " _clustermatch = ccc(tissue_data[gene0], tissue_data[gene1])\n", + " _pearson = pearsonr(tissue_data[gene0], tissue_data[gene1])[0]\n", + " _spearman = spearmanr(tissue_data[gene0], tissue_data[gene1])[0]\n", + "\n", + " _title = f\"{simplify_tissue_name(tissue_name)}\\n$c={_clustermatch:.2f}$ $p={_pearson:.2f}$ $s={_spearman:.2f}$\"\n", + "\n", + " other_args = {\n", + " \"kind\": kind, # if hue is None else \"scatter\",\n", + " \"rasterized\": True,\n", + " }\n", + " if hue is None:\n", + " other_args[\"hue_order\"] = None\n", + " else:\n", + " other_args[\"hue_order\"] = [\"Male\", \"Female\"]\n", + "\n", + " with sns.plotting_context(\"paper\", font_scale=1.5):\n", + " p = sns.jointplot(\n", + " data=tissue_data,\n", + " x=gene0,\n", + " y=gene1,\n", + " hue=hue,\n", + " **other_args,\n", + " # ylim=(0, 500),\n", + " )\n", + "\n", + " # if samples is not None:\n", + " # p.ax_joint.legend_.remove()\n", + "\n", + " if ylim is not None:\n", + " p.ax_joint.set_ylim(ylim)\n", + "\n", + " gene_x_id = p.ax_joint.get_xlabel()\n", + " gene_x_symbol = gene_map[gene_x_id]\n", + " p.ax_joint.set_xlabel(f\"{gene_x_symbol}\", fontstyle=\"italic\")\n", + "\n", + " gene_y_id = p.ax_joint.get_ylabel()\n", + " gene_y_symbol = gene_map[gene_y_id]\n", + " p.ax_joint.set_ylabel(f\"{gene_y_symbol}\", fontstyle=\"italic\")\n", + "\n", + " p.fig.suptitle(_title)\n", + "\n", + " # save\n", + " output_file = (\n", + " OUTPUT_FIGURE_DIR\n", + " / f\"gtex_{tissue_name}-{gene_x_symbol}_vs_{gene_y_symbol}{filename_suffix}.svg\"\n", + " )\n", + " display(output_file)\n", + "\n", + " plt.savefig(\n", + " output_file,\n", + " bbox_inches=\"tight\",\n", + " dpi=300,\n", + " facecolor=\"white\",\n", + " )\n", + "\n", + " return tissue_data" + ] + }, + { + "cell_type": "markdown", + "id": "a1203a1b-9192-43ee-8715-476aa7048ce6", + "metadata": { + "papermill": { + "duration": 0.010275, + "end_time": "2023-11-20T08:56:06.120691", + "exception": false, + "start_time": "2023-11-20T08:56:06.110416", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Brain cerebellum (males)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "ad7a21de-2d8e-423e-8044-38d6f3b43808", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.141871Z", + "iopub.status.busy": "2023-11-20T08:56:06.141469Z", + "iopub.status.idle": "2023-11-20T08:56:06.815437Z", + "shell.execute_reply": "2023-11-20T08:56:06.815120Z" + }, + "papermill": { + "duration": 0.686272, + "end_time": "2023-11-20T08:56:06.817129", + "exception": false, + "start_time": "2023-11-20T08:56:06.130857", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('KDM6A', 'UTY')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "PosixPath('/opt/manuscript/content/images/coefs_comp/kdm6a_vs_uty/gtex_brain_cerebellum-KDM6A_vs_UTY-all.svg')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "_tissue_data = plot_gene_pair(\n", + " \"brain_cerebellum\",\n", + " gene0_id,\n", + " gene1_id,\n", + " hue=\"SEX\",\n", + " kind=\"scatter\",\n", + " samples=None,\n", + " filename_suffix=\"-all\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "f009b03f-b276-42ac-8f76-5377aac04b82", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:06.858869Z", + "iopub.status.busy": "2023-11-20T08:56:06.858782Z", + "iopub.status.idle": "2023-11-20T08:56:07.380845Z", + "shell.execute_reply": "2023-11-20T08:56:07.380537Z" + }, + "papermill": { + "duration": 0.544177, + "end_time": "2023-11-20T08:56:07.382358", + "exception": false, + "start_time": "2023-11-20T08:56:06.838181", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('KDM6A', 'UTY')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "PosixPath('/opt/manuscript/content/images/coefs_comp/kdm6a_vs_uty/gtex_brain_cerebellum-KDM6A_vs_UTY-males.svg')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "_tissue_data = plot_gene_pair(\n", + " \"brain_cerebellum\",\n", + " gene0_id,\n", + " gene1_id,\n", + " hue=\"SEX\",\n", + " kind=\"scatter\",\n", + " samples=male_samples,\n", + " filename_suffix=\"-males\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a3e88e70-6c2b-4a41-95f8-3e7d3214c307", + "metadata": { + "papermill": { + "duration": 0.021029, + "end_time": "2023-11-20T08:56:07.424778", + "exception": false, + "start_time": "2023-11-20T08:56:07.403749", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Smalle intestine (males)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "4b35981a-a86c-4bca-afc2-d426d899f38e", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:07.466017Z", + "iopub.status.busy": "2023-11-20T08:56:07.465857Z", + "iopub.status.idle": "2023-11-20T08:56:07.951425Z", + "shell.execute_reply": "2023-11-20T08:56:07.951103Z" + }, + "papermill": { + "duration": 0.507235, + "end_time": "2023-11-20T08:56:07.953195", + "exception": false, + "start_time": "2023-11-20T08:56:07.445960", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('KDM6A', 'UTY')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "PosixPath('/opt/manuscript/content/images/coefs_comp/kdm6a_vs_uty/gtex_small_intestine_terminal_ileum-KDM6A_vs_UTY-all.svg')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "_tissue_data = plot_gene_pair(\n", + " \"small_intestine_terminal_ileum\",\n", + " gene0_id,\n", + " gene1_id,\n", + " hue=\"SEX\",\n", + " kind=\"scatter\",\n", + " samples=None,\n", + " filename_suffix=\"-all\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "193939bb-4af0-4995-af74-ab3d0ccd86b5", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:07.998867Z", + "iopub.status.busy": "2023-11-20T08:56:07.998697Z", + "iopub.status.idle": "2023-11-20T08:56:08.457065Z", + "shell.execute_reply": "2023-11-20T08:56:08.456756Z" + }, + "papermill": { + "duration": 0.481924, + "end_time": "2023-11-20T08:56:08.458042", + "exception": false, + "start_time": "2023-11-20T08:56:07.976118", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "('KDM6A', 'UTY')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "PosixPath('/opt/manuscript/content/images/coefs_comp/kdm6a_vs_uty/gtex_small_intestine_terminal_ileum-KDM6A_vs_UTY-males.svg')" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "_tissue_data = plot_gene_pair(\n", + " \"small_intestine_terminal_ileum\",\n", + " gene0_id,\n", + " gene1_id,\n", + " hue=\"SEX\",\n", + " kind=\"scatter\",\n", + " samples=male_samples,\n", + " filename_suffix=\"-males\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dafb2823-fdd8-40e0-919e-9bab0ead9f4d", + "metadata": { + "papermill": { + "duration": 0.023055, + "end_time": "2023-11-20T08:56:08.504159", + "exception": false, + "start_time": "2023-11-20T08:56:08.481104", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Create final figure" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "57090b29-80c6-4ae5-8d2e-a1f256f8a58c", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.545680Z", + "iopub.status.busy": "2023-11-20T08:56:08.545550Z", + "iopub.status.idle": "2023-11-20T08:56:08.557139Z", + "shell.execute_reply": "2023-11-20T08:56:08.556741Z" + }, + "papermill": { + "duration": 0.030886, + "end_time": "2023-11-20T08:56:08.557999", + "exception": false, + "start_time": "2023-11-20T08:56:08.527113", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from svgutils.compose import Figure, SVG, Panel" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "e5a39467-e448-43a9-a9ba-9f947bfff1ed", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.582763Z", + "iopub.status.busy": "2023-11-20T08:56:08.582635Z", + "iopub.status.idle": "2023-11-20T08:56:08.592221Z", + "shell.execute_reply": "2023-11-20T08:56:08.591831Z" + }, + "papermill": { + "duration": 0.022872, + "end_time": "2023-11-20T08:56:08.592942", + "exception": false, + "start_time": "2023-11-20T08:56:08.570070", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "Figure(\n", + " \"6.0767480cm\",\n", + " \"8.7045984cm\",\n", + " # Panel(\n", + " # SVG(OUTPUT_FIGURE_DIR / \"gtex_brain_cerebellum-KDM6A_vs_UTY-all.svg\").scale(0.005),\n", + " # SVG(OUTPUT_FIGURE_DIR / \"gtex_small_intestine-KDM6A_vs_UTY-all.svg\").scale(0.005).move(2, 0),\n", + " # ),\n", + " Panel(\n", + " SVG(OUTPUT_FIGURE_DIR / \"gtex_brain_cerebellum-KDM6A_vs_UTY-males.svg\").scale(\n", + " 0.005\n", + " ),\n", + " SVG(\n", + " OUTPUT_FIGURE_DIR\n", + " / \"gtex_small_intestine_terminal_ileum-KDM6A_vs_UTY-males.svg\"\n", + " )\n", + " .scale(0.005)\n", + " .move(2, 0),\n", + " ).move(0, 2.20),\n", + ").save(\n", + " OUTPUT_FIGURE_DIR\n", + " / \"gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-males.svg\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5f462b54-b10e-4392-9dc8-719f84a9bff3", + "metadata": { + "papermill": { + "duration": 0.012083, + "end_time": "2023-11-20T08:56:08.617210", + "exception": false, + "start_time": "2023-11-20T08:56:08.605127", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Now open the file, reside to fit drawing to page, and add a white rectangle to the background." + ] + }, + { + "cell_type": "markdown", + "id": "84fb982b-1dce-4985-99b2-86c74d1d7f33", + "metadata": { + "papermill": { + "duration": 0.012069, + "end_time": "2023-11-20T08:56:08.641484", + "exception": false, + "start_time": "2023-11-20T08:56:08.629415", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Understand how CCC divides samples" + ] + }, + { + "cell_type": "markdown", + "id": "4d78acde-d32c-4660-bab4-30aa89222298", + "metadata": { + "papermill": { + "duration": 0.01217, + "end_time": "2023-11-20T08:56:08.665891", + "exception": false, + "start_time": "2023-11-20T08:56:08.653721", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Prepare datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "bc5680c7-18d2-45ca-a7d4-3c0cb1d8b8f9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.691974Z", + "iopub.status.busy": "2023-11-20T08:56:08.691490Z", + "iopub.status.idle": "2023-11-20T08:56:08.701524Z", + "shell.execute_reply": "2023-11-20T08:56:08.701168Z" + }, + "papermill": { + "duration": 0.023909, + "end_time": "2023-11-20T08:56:08.702252", + "exception": false, + "start_time": "2023-11-20T08:56:08.678343", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "datasets_df = pd.DataFrame(\n", + " {\n", + " \"dataset\": \"Brain cerebellum (all)\",\n", + " \"x\": brain_cerebellum.iloc[:, 0],\n", + " \"y\": brain_cerebellum.iloc[:, 1],\n", + " }\n", + ")\n", + "\n", + "datasets_df = datasets_df.append(\n", + " pd.DataFrame(\n", + " {\n", + " \"dataset\": \"Small intestine (terminal ileum) (all)\",\n", + " \"x\": small_intestine.iloc[:, 0],\n", + " \"y\": small_intestine.iloc[:, 1],\n", + " }\n", + " ),\n", + " ignore_index=True,\n", + ")\n", + "\n", + "datasets_df = datasets_df.append(\n", + " pd.DataFrame(\n", + " {\n", + " \"dataset\": \"Brain cerebellum (males)\",\n", + " \"x\": brain_cerebellum_males.iloc[:, 0],\n", + " \"y\": brain_cerebellum_males.iloc[:, 1],\n", + " }\n", + " ),\n", + " ignore_index=True,\n", + ")\n", + "\n", + "datasets_df = datasets_df.append(\n", + " pd.DataFrame(\n", + " {\n", + " \"dataset\": \"Small intestine (terminal ileum) (males)\",\n", + " \"x\": small_intestine_males.iloc[:, 0],\n", + " \"y\": small_intestine_males.iloc[:, 1],\n", + " }\n", + " ),\n", + " ignore_index=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "603dae3d-0fbe-4fe0-8639-181ba6f13a20", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.727410Z", + "iopub.status.busy": "2023-11-20T08:56:08.727248Z", + "iopub.status.idle": "2023-11-20T08:56:08.732824Z", + "shell.execute_reply": "2023-11-20T08:56:08.732470Z" + }, + "papermill": { + "duration": 0.019325, + "end_time": "2023-11-20T08:56:08.733558", + "exception": false, + "start_time": "2023-11-20T08:56:08.714233", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "datasets = {\n", + " idx: df.drop(columns=\"dataset\") for idx, df in datasets_df.groupby(\"dataset\")\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "79a733f1-c41f-41b7-a5d4-3341014176d8", + "metadata": { + "papermill": { + "duration": 0.012121, + "end_time": "2023-11-20T08:56:08.757944", + "exception": false, + "start_time": "2023-11-20T08:56:08.745823", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "## Plot" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "0109f1d9-22ff-42fd-8bf8-ab84f449cbbd", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.783470Z", + "iopub.status.busy": "2023-11-20T08:56:08.783036Z", + "iopub.status.idle": "2023-11-20T08:56:08.790958Z", + "shell.execute_reply": "2023-11-20T08:56:08.790544Z" + }, + "papermill": { + "duration": 0.021517, + "end_time": "2023-11-20T08:56:08.791733", + "exception": false, + "start_time": "2023-11-20T08:56:08.770216", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "def get_cm_line_points(x, y, max_parts, parts):\n", + " \"\"\"\n", + " Given two data vectors (x and y) and the max_parts and parts\n", + " returned from calling cm, this function returns two arrays with\n", + " scalars to draw the lines that separates clusters in x and y.\n", + " \"\"\"\n", + " # get the ccc partitions that maximize the coefficient\n", + " x_max_part = parts[0][max_parts[0]]\n", + " x_unique_k = {}\n", + " for k in np.unique(x_max_part):\n", + " data = x[x_max_part == k]\n", + " x_unique_k[k] = data.min(), data.max()\n", + " x_unique_k = sorted(x_unique_k.items(), key=lambda x: x[1][0])\n", + "\n", + " y_max_part = parts[1][max_parts[1]]\n", + " y_unique_k = {}\n", + " for k in np.unique(y_max_part):\n", + " data = y[y_max_part == k]\n", + " y_unique_k[k] = data.min(), data.max()\n", + " y_unique_k = sorted(y_unique_k.items(), key=lambda x: x[1][0])\n", + "\n", + " x_line_points, y_line_points = [], []\n", + "\n", + " for idx in range(len(x_unique_k) - 1):\n", + " k, (k_min, k_max) = x_unique_k[idx]\n", + " nk, (nk_min, nk_max) = x_unique_k[idx + 1]\n", + "\n", + " x_line_points.append((k_max + nk_min) / 2.0)\n", + "\n", + " for idx in range(len(y_unique_k) - 1):\n", + " k, (k_min, k_max) = y_unique_k[idx]\n", + " nk, (nk_min, nk_max) = y_unique_k[idx + 1]\n", + "\n", + " y_line_points.append((k_max + nk_min) / 2.0)\n", + "\n", + " return x_line_points, y_line_points" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "55ecad23-cf11-4ed5-a81d-c7faa16ad8b9", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:08.817173Z", + "iopub.status.busy": "2023-11-20T08:56:08.817019Z", + "iopub.status.idle": "2023-11-20T08:56:09.300155Z", + "shell.execute_reply": "2023-11-20T08:56:09.299832Z" + }, + "papermill": { + "duration": 0.497529, + "end_time": "2023-11-20T08:56:09.301630", + "exception": false, + "start_time": "2023-11-20T08:56:08.804101", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "with sns.plotting_context(\"paper\", font_scale=1.8):\n", + " g = sns.FacetGrid(\n", + " data=datasets_df,\n", + " col=\"dataset\",\n", + " col_order=[\n", + " # \"Brain cerebellum (all)\",\n", + " # \"Small intestine (terminal ileum) (all)\",\n", + " \"Brain cerebellum (males)\",\n", + " \"Small intestine (terminal ileum) (males)\",\n", + " ],\n", + " col_wrap=2,\n", + " height=5,\n", + " )\n", + " g.map(sns.scatterplot, \"x\", \"y\", s=50, alpha=1)\n", + " g.set_titles(row_template=\"{row_name}\", col_template=\"{col_name}\")\n", + "\n", + " for ds, ax in g.axes_dict.items():\n", + " df = datasets[ds].to_numpy()\n", + " x, y = df[:, 0], df[:, 1]\n", + "\n", + " # pearson and spearman\n", + " r = pearsonr(x, y)[0]\n", + " rs = spearmanr(x, y)[0]\n", + "\n", + " # ccc\n", + " c, max_parts, parts = ccc(x, y, return_parts=True)\n", + " c = ccc(x, y)\n", + "\n", + " x_line_points, y_line_points = get_cm_line_points(x, y, max_parts, parts)\n", + " for yp in y_line_points:\n", + " ax.hlines(y=yp, xmin=-0.5, xmax=30, color=\"r\", alpha=0.5)\n", + "\n", + " for xp in x_line_points:\n", + " ax.vlines(x=xp, ymin=-0.5, ymax=18, color=\"r\", alpha=0.5)\n", + "\n", + " # add text box for the statistics\n", + " stats = f\"$c$ = {c:.2f}\"\n", + " bbox = dict(boxstyle=\"round\", fc=\"white\", ec=\"black\", alpha=0.75)\n", + " ax.text(\n", + " 0.95,\n", + " 0.90,\n", + " stats,\n", + " fontsize=14,\n", + " bbox=bbox,\n", + " transform=ax.transAxes,\n", + " horizontalalignment=\"right\",\n", + " )\n", + "\n", + " plt.savefig(\n", + " OUTPUT_FIGURE_DIR\n", + " / \"gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-clusters-males.png\",\n", + " # rasterized=True,\n", + " dpi=300,\n", + " bbox_inches=\"tight\",\n", + " facecolor=\"white\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "65d1355b-5793-491f-9da6-436f9676accc", + "metadata": { + "execution": { + "iopub.execute_input": "2023-11-20T08:56:09.386253Z", + "iopub.status.busy": "2023-11-20T08:56:09.386119Z", + "iopub.status.idle": "2023-11-20T08:56:09.861742Z", + "shell.execute_reply": "2023-11-20T08:56:09.861241Z" + }, + "papermill": { + "duration": 0.537138, + "end_time": "2023-11-20T08:56:09.862842", + "exception": false, + "start_time": "2023-11-20T08:56:09.325704", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "with sns.plotting_context(\"paper\", font_scale=1.8):\n", + " g = sns.FacetGrid(\n", + " data=datasets_df,\n", + " col=\"dataset\",\n", + " col_order=[\n", + " \"Brain cerebellum (all)\",\n", + " \"Small intestine (terminal ileum) (all)\",\n", + " # \"Brain cerebellum (males)\",\n", + " # \"Small intestine (terminal ileum) (males)\",\n", + " ],\n", + " col_wrap=2,\n", + " height=5,\n", + " )\n", + " g.map(sns.scatterplot, \"x\", \"y\", s=50, alpha=1)\n", + " g.set_titles(row_template=\"{row_name}\", col_template=\"{col_name}\")\n", + "\n", + " for ds, ax in g.axes_dict.items():\n", + " df = datasets[ds].to_numpy()\n", + " x, y = df[:, 0], df[:, 1]\n", + "\n", + " # pearson and spearman\n", + " r = pearsonr(x, y)[0]\n", + " rs = spearmanr(x, y)[0]\n", + "\n", + " # ccc\n", + " c, max_parts, parts = ccc(x, y, return_parts=True)\n", + " c = ccc(x, y)\n", + "\n", + " x_line_points, y_line_points = get_cm_line_points(x, y, max_parts, parts)\n", + " for yp in y_line_points:\n", + " ax.hlines(y=yp, xmin=-0.5, xmax=30, color=\"r\", alpha=0.5)\n", + "\n", + " for xp in x_line_points:\n", + " ax.vlines(x=xp, ymin=-0.5, ymax=18, color=\"r\", alpha=0.5)\n", + "\n", + " # add text box for the statistics\n", + " stats = f\"$c$ = {c:.2f}\"\n", + " bbox = dict(boxstyle=\"round\", fc=\"white\", ec=\"black\", alpha=0.75)\n", + " ax.text(\n", + " 0.95,\n", + " 0.90,\n", + " stats,\n", + " fontsize=14,\n", + " bbox=bbox,\n", + " transform=ax.transAxes,\n", + " horizontalalignment=\"right\",\n", + " )\n", + "\n", + " plt.savefig(\n", + " OUTPUT_FIGURE_DIR\n", + " / \"gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-clusters-all.png\",\n", + " # rasterized=True,\n", + " dpi=300,\n", + " bbox_inches=\"tight\",\n", + " facecolor=\"white\",\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f00d3bab-20b4-489f-bbe8-ef449eda9488", + "metadata": { + "papermill": { + "duration": 0.019825, + "end_time": "2023-11-20T08:56:09.902585", + "exception": false, + "start_time": "2023-11-20T08:56:09.882760", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "all,-execution,-papermill,-trusted", + "notebook_metadata_filter": "-jupytext.text_representation.jupytext_version" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "papermill": { + "default_parameters": {}, + "duration": 14.836242, + "end_time": "2023-11-20T08:56:10.343797", + "environment_variables": {}, + "exception": null, + "input_path": "nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.ipynb", + "output_path": "nbs/99_manuscript/coefs_comp/20_00-gtex_whole_blood-UTY_KDM6A.run.ipynb", + "parameters": {}, + "start_time": "2023-11-20T08:55:55.507555", + "version": "2.3.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/nbs/99_manuscript/coefs_comp/py/20_00-gtex_whole_blood-UTY_KDM6A.py b/nbs/99_manuscript/coefs_comp/py/20_00-gtex_whole_blood-UTY_KDM6A.py new file mode 100644 index 00000000..e9fabb63 --- /dev/null +++ b/nbs/99_manuscript/coefs_comp/py/20_00-gtex_whole_blood-UTY_KDM6A.py @@ -0,0 +1,720 @@ +# --- +# jupyter: +# jupytext: +# cell_metadata_filter: all,-execution,-papermill,-trusted +# notebook_metadata_filter: -jupytext.text_representation.jupytext_version +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # Description + +# %% [markdown] tags=[] +# This notebooks analyzes more closely the pattern between gene pair *UTY* / *KDM6A*. The analyses are focused on the Reviewer 2's comment: +# +# ``` +# In Figure 4, while there is a visible difference between the correlation of male samples, the CCC values are still quite close. For example, this can be observed in Brain Cerebellum and Small Intestine Terminal Ileum. Please address this. +# ``` + +# %% [markdown] tags=[] +# # Modules + +# %% tags=[] +import pandas as pd + +from scipy.stats import pearsonr, spearmanr +import matplotlib.pyplot as plt +import seaborn as sns +import numpy as np + +from ccc import conf +from ccc.coef import ccc + +# %% [markdown] tags=[] +# # Settings + +# %% tags=[] +# this gene pair was originally found with ccc on whole blood +# interesting: https://clincancerres.aacrjournals.org/content/26/21/5567.figures-only +gene0_id, gene1_id = "ENSG00000147050.14", "ENSG00000183878.15" +gene0_symbol, gene1_symbol = "KDM6A", "UTY" + +# %% [markdown] tags=[] +# # Paths + +# %% tags=[] +TISSUE_DIR = conf.GTEX["DATA_DIR"] / "data_by_tissue" +assert TISSUE_DIR.exists() + +# %% tags=[] +OUTPUT_FIGURE_DIR = ( + conf.MANUSCRIPT["FIGURES_DIR"] + / "coefs_comp" + / f"{gene0_symbol.lower()}_vs_{gene1_symbol.lower()}" +) +OUTPUT_FIGURE_DIR.mkdir(parents=True, exist_ok=True) +display(OUTPUT_FIGURE_DIR) + +# %% [markdown] tags=[] +# # Data + +# %% [markdown] tags=[] +# ## GTEx metadata + +# %% tags=[] +gtex_metadata = pd.read_pickle(conf.GTEX["DATA_DIR"] / "gtex_v8-sample_metadata.pkl") + +# %% tags=[] +gtex_metadata.shape + +# %% tags=[] +gtex_metadata.head() + +# %% [markdown] tags=[] +# ## Gene Ensembl ID -> Symbol mapping + +# %% tags=[] +gene_map = pd.read_pickle(conf.GTEX["DATA_DIR"] / "gtex_gene_id_symbol_mappings.pkl") + +# %% tags=[] +gene_map = gene_map.set_index("gene_ens_id")["gene_symbol"].to_dict() + +# %% tags=[] +assert gene_map["ENSG00000145309.5"] == "CABS1" + +# %% tags=[] +assert gene_map[gene0_id] == gene0_symbol +assert gene_map[gene1_id] == gene1_symbol + +# %% [markdown] tags=[] +# ## Get male/females sample IDs + +# %% tags=[] +gtex_metadata["SEX"].describe() + +# %% tags=[] +male_samples = gtex_metadata[gtex_metadata["SEX"] == "Male"].index.tolist() + +# %% tags=[] +len(male_samples) + +# %% tags=[] +male_samples[:5] + +# %% tags=[] +female_samples = gtex_metadata[gtex_metadata["SEX"] == "Female"].index.tolist() + +# %% tags=[] +len(female_samples) + +# %% tags=[] +female_samples[:5] + +# %% [markdown] tags=[] +# # Brain cerebellum + +# %% tags=[] +brain_cerebellum = ( + pd.read_pickle(TISSUE_DIR / "gtex_v8_data_brain_cerebellum.pkl") + .loc[[gene0_id, gene1_id]] + .T.rename_axis("sample_id") +) + +# %% tags=[] +brain_cerebellum.shape + +# %% tags=[] +brain_cerebellum.head() + +# %% tags=[] +brain_cerebellum_males = brain_cerebellum.loc[ + brain_cerebellum.index.intersection(male_samples) +] + +# %% tags=[] +brain_cerebellum_males.shape + +# %% tags=[] +brain_cerebellum_females = brain_cerebellum.loc[ + brain_cerebellum.index.intersection(female_samples) +] + +# %% tags=[] +brain_cerebellum_females.shape + +# %% [markdown] tags=[] +# # Small intestine (terminal ileum) + +# %% tags=[] +small_intestine = ( + pd.read_pickle(TISSUE_DIR / "gtex_v8_data_small_intestine_terminal_ileum.pkl") + .loc[[gene0_id, gene1_id]] + .T.rename_axis("sample_id") +) + +# %% tags=[] +small_intestine.shape + +# %% tags=[] +small_intestine.head() + +# %% tags=[] +small_intestine_males = small_intestine.loc[ + small_intestine.index.intersection(male_samples) +] + +# %% tags=[] +small_intestine_males.shape + +# %% tags=[] +small_intestine_females = small_intestine.loc[ + small_intestine.index.intersection(female_samples) +] + +# %% tags=[] +small_intestine_females.shape + +# %% [markdown] tags=[] +# # Compute correlation + +# %% [markdown] tags=[] +# ## Brain cerebellum + +# %% [markdown] tags=[] +# ### CCC + +# %% tags=[] +ccc(brain_cerebellum_males, pvalue_n_perms=1000) + +# %% tags=[] +ccc(brain_cerebellum_females, pvalue_n_perms=1000) + +# %% [markdown] tags=[] +# ### Pearson + +# %% tags=[] +pearsonr(brain_cerebellum_males.iloc[:, 0], brain_cerebellum_males.iloc[:, 1]) + +# %% tags=[] +pearsonr(brain_cerebellum_females.iloc[:, 0], brain_cerebellum_females.iloc[:, 1]) + +# %% [markdown] tags=[] +# ### Spearman + +# %% tags=[] +spearmanr(brain_cerebellum_males.iloc[:, 0], brain_cerebellum_males.iloc[:, 1]) + +# %% tags=[] +spearmanr(brain_cerebellum_females.iloc[:, 0], brain_cerebellum_females.iloc[:, 1]) + +# %% [markdown] tags=[] +# ## Small intestine (terminal ileum) + +# %% [markdown] tags=[] +# ### CCC + +# %% tags=[] +ccc(small_intestine_males, pvalue_n_perms=1000) + +# %% tags=[] +ccc(small_intestine_females, pvalue_n_perms=1000) + +# %% [markdown] tags=[] +# ### Pearson + +# %% tags=[] +pearsonr(small_intestine_males.iloc[:, 0], small_intestine_males.iloc[:, 1]) + +# %% tags=[] +pearsonr(small_intestine_females.iloc[:, 0], small_intestine_females.iloc[:, 1]) + +# %% [markdown] tags=[] +# ### Spearman + +# %% tags=[] +spearmanr(small_intestine_males.iloc[:, 0], small_intestine_males.iloc[:, 1]) + +# %% tags=[] +spearmanr(small_intestine_females.iloc[:, 0], small_intestine_females.iloc[:, 1]) + +# %% [markdown] tags=[] +# # Compute correlation on all tissues, males only + +# %% tags=[] +res_all_males = pd.DataFrame( + { + f.stem.split("_data_")[1]: { + "ccc": ccc(data[gene0_id], data[gene1_id]), + "pearson": pearsonr(data[gene0_id], data[gene1_id])[0], + "spearman": spearmanr(data[gene0_id], data[gene1_id])[0], + } + for f in TISSUE_DIR.glob("*.pkl") + if ( + data := pd.read_pickle(f) + .T[[gene0_id, gene1_id]] + .reindex(male_samples) + .dropna() + ) + is not None + and data.shape[0] > 10 + } +).T.abs() + +# %% tags=[] +res_all_males.shape + +# %% tags=[] +res_all_males.head() + +# %% tags=[] +res_all_males.sort_values("ccc") + +# %% tags=[] +res_all_males.sort_values("pearson") + +# %% tags=[] +res_all_males.sort_values("spearman") + + +# %% [markdown] tags=[] +# # Plot of male samples + +# %% tags=[] +def get_tissue_file(name): + """ + Given a part of a tissue name, it returns a file path to the + expression data for that tissue in GTEx. It fails if more than + one files are found. + + Args: + name: a string with the tissue name (or a part of it). + + Returns: + A Path object pointing to the gene expression file for the + given tissue. + """ + tissue_files = [] + for f in TISSUE_DIR.glob("*.pkl"): + if name in f.name: + tissue_files.append(f) + + assert len(tissue_files) == 1 + return tissue_files[0] + + +# %% tags=[] +# testing +_tmp = get_tissue_file("whole_blood") +assert _tmp.exists() + + +# %% tags=[] +def simplify_tissue_name(tissue_name): + return f"{tissue_name[0].upper()}{tissue_name[1:].replace('_', ' ')}" + + +# %% tags=[] +assert simplify_tissue_name("whole_blood") == "Whole blood" +assert simplify_tissue_name("uterus") == "Uterus" + + +# %% tags=[] +def plot_gene_pair( + tissue_name, + gene0, + gene1, + hue=None, + kind="hex", + ylim=None, + bins="log", + samples=None, + filename_suffix="", +): + """ + It plots (joint plot) a gene pair from the given tissue. It saves the plot + for the manuscript. + """ + # merge gene expression with metadata + tissue_file = get_tissue_file(tissue_name) + if samples is not None: + tissue_data = ( + pd.read_pickle(tissue_file).T[[gene0, gene1]].reindex(samples).dropna() + ) + else: + tissue_data = pd.read_pickle(tissue_file).T[[gene0, gene1]] + + tissue_data = pd.merge( + tissue_data, + gtex_metadata, + how="inner", + left_index=True, + right_index=True, + validate="one_to_one", + ) + + # get gene symbols + gene0_symbol, gene1_symbol = gene_map[gene0], gene_map[gene1] + display((gene0_symbol, gene1_symbol)) + + # compute correlations for this gene pair + _clustermatch = ccc(tissue_data[gene0], tissue_data[gene1]) + _pearson = pearsonr(tissue_data[gene0], tissue_data[gene1])[0] + _spearman = spearmanr(tissue_data[gene0], tissue_data[gene1])[0] + + _title = f"{simplify_tissue_name(tissue_name)}\n$c={_clustermatch:.2f}$ $p={_pearson:.2f}$ $s={_spearman:.2f}$" + + other_args = { + "kind": kind, # if hue is None else "scatter", + "rasterized": True, + } + if hue is None: + other_args["hue_order"] = None + else: + other_args["hue_order"] = ["Male", "Female"] + + with sns.plotting_context("paper", font_scale=1.5): + p = sns.jointplot( + data=tissue_data, + x=gene0, + y=gene1, + hue=hue, + **other_args, + # ylim=(0, 500), + ) + + # if samples is not None: + # p.ax_joint.legend_.remove() + + if ylim is not None: + p.ax_joint.set_ylim(ylim) + + gene_x_id = p.ax_joint.get_xlabel() + gene_x_symbol = gene_map[gene_x_id] + p.ax_joint.set_xlabel(f"{gene_x_symbol}", fontstyle="italic") + + gene_y_id = p.ax_joint.get_ylabel() + gene_y_symbol = gene_map[gene_y_id] + p.ax_joint.set_ylabel(f"{gene_y_symbol}", fontstyle="italic") + + p.fig.suptitle(_title) + + # save + output_file = ( + OUTPUT_FIGURE_DIR + / f"gtex_{tissue_name}-{gene_x_symbol}_vs_{gene_y_symbol}{filename_suffix}.svg" + ) + display(output_file) + + plt.savefig( + output_file, + bbox_inches="tight", + dpi=300, + facecolor="white", + ) + + return tissue_data + + +# %% [markdown] tags=[] +# ## Brain cerebellum (males) + +# %% tags=[] +_tissue_data = plot_gene_pair( + "brain_cerebellum", + gene0_id, + gene1_id, + hue="SEX", + kind="scatter", + samples=None, + filename_suffix="-all", +) + +# %% tags=[] +_tissue_data = plot_gene_pair( + "brain_cerebellum", + gene0_id, + gene1_id, + hue="SEX", + kind="scatter", + samples=male_samples, + filename_suffix="-males", +) + +# %% [markdown] tags=[] +# ## Smalle intestine (males) + +# %% tags=[] +_tissue_data = plot_gene_pair( + "small_intestine_terminal_ileum", + gene0_id, + gene1_id, + hue="SEX", + kind="scatter", + samples=None, + filename_suffix="-all", +) + +# %% tags=[] +_tissue_data = plot_gene_pair( + "small_intestine_terminal_ileum", + gene0_id, + gene1_id, + hue="SEX", + kind="scatter", + samples=male_samples, + filename_suffix="-males", +) + +# %% [markdown] tags=[] +# # Create final figure + +# %% tags=[] +from svgutils.compose import Figure, SVG, Panel + +# %% tags=[] +Figure( + "6.0767480cm", + "8.7045984cm", + # Panel( + # SVG(OUTPUT_FIGURE_DIR / "gtex_brain_cerebellum-KDM6A_vs_UTY-all.svg").scale(0.005), + # SVG(OUTPUT_FIGURE_DIR / "gtex_small_intestine-KDM6A_vs_UTY-all.svg").scale(0.005).move(2, 0), + # ), + Panel( + SVG(OUTPUT_FIGURE_DIR / "gtex_brain_cerebellum-KDM6A_vs_UTY-males.svg").scale( + 0.005 + ), + SVG( + OUTPUT_FIGURE_DIR + / "gtex_small_intestine_terminal_ileum-KDM6A_vs_UTY-males.svg" + ) + .scale(0.005) + .move(2, 0), + ).move(0, 2.20), +).save( + OUTPUT_FIGURE_DIR + / "gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-males.svg" +) + +# %% [markdown] tags=[] +# Now open the file, reside to fit drawing to page, and add a white rectangle to the background. + +# %% [markdown] tags=[] +# # Understand how CCC divides samples + +# %% [markdown] tags=[] +# ## Prepare datasets + +# %% tags=[] +datasets_df = pd.DataFrame( + { + "dataset": "Brain cerebellum (all)", + "x": brain_cerebellum.iloc[:, 0], + "y": brain_cerebellum.iloc[:, 1], + } +) + +datasets_df = datasets_df.append( + pd.DataFrame( + { + "dataset": "Small intestine (terminal ileum) (all)", + "x": small_intestine.iloc[:, 0], + "y": small_intestine.iloc[:, 1], + } + ), + ignore_index=True, +) + +datasets_df = datasets_df.append( + pd.DataFrame( + { + "dataset": "Brain cerebellum (males)", + "x": brain_cerebellum_males.iloc[:, 0], + "y": brain_cerebellum_males.iloc[:, 1], + } + ), + ignore_index=True, +) + +datasets_df = datasets_df.append( + pd.DataFrame( + { + "dataset": "Small intestine (terminal ileum) (males)", + "x": small_intestine_males.iloc[:, 0], + "y": small_intestine_males.iloc[:, 1], + } + ), + ignore_index=True, +) + +# %% tags=[] +datasets = { + idx: df.drop(columns="dataset") for idx, df in datasets_df.groupby("dataset") +} + + +# %% [markdown] tags=[] +# ## Plot + +# %% tags=[] +def get_cm_line_points(x, y, max_parts, parts): + """ + Given two data vectors (x and y) and the max_parts and parts + returned from calling cm, this function returns two arrays with + scalars to draw the lines that separates clusters in x and y. + """ + # get the ccc partitions that maximize the coefficient + x_max_part = parts[0][max_parts[0]] + x_unique_k = {} + for k in np.unique(x_max_part): + data = x[x_max_part == k] + x_unique_k[k] = data.min(), data.max() + x_unique_k = sorted(x_unique_k.items(), key=lambda x: x[1][0]) + + y_max_part = parts[1][max_parts[1]] + y_unique_k = {} + for k in np.unique(y_max_part): + data = y[y_max_part == k] + y_unique_k[k] = data.min(), data.max() + y_unique_k = sorted(y_unique_k.items(), key=lambda x: x[1][0]) + + x_line_points, y_line_points = [], [] + + for idx in range(len(x_unique_k) - 1): + k, (k_min, k_max) = x_unique_k[idx] + nk, (nk_min, nk_max) = x_unique_k[idx + 1] + + x_line_points.append((k_max + nk_min) / 2.0) + + for idx in range(len(y_unique_k) - 1): + k, (k_min, k_max) = y_unique_k[idx] + nk, (nk_min, nk_max) = y_unique_k[idx + 1] + + y_line_points.append((k_max + nk_min) / 2.0) + + return x_line_points, y_line_points + + +# %% tags=[] +with sns.plotting_context("paper", font_scale=1.8): + g = sns.FacetGrid( + data=datasets_df, + col="dataset", + col_order=[ + # "Brain cerebellum (all)", + # "Small intestine (terminal ileum) (all)", + "Brain cerebellum (males)", + "Small intestine (terminal ileum) (males)", + ], + col_wrap=2, + height=5, + ) + g.map(sns.scatterplot, "x", "y", s=50, alpha=1) + g.set_titles(row_template="{row_name}", col_template="{col_name}") + + for ds, ax in g.axes_dict.items(): + df = datasets[ds].to_numpy() + x, y = df[:, 0], df[:, 1] + + # pearson and spearman + r = pearsonr(x, y)[0] + rs = spearmanr(x, y)[0] + + # ccc + c, max_parts, parts = ccc(x, y, return_parts=True) + c = ccc(x, y) + + x_line_points, y_line_points = get_cm_line_points(x, y, max_parts, parts) + for yp in y_line_points: + ax.hlines(y=yp, xmin=-0.5, xmax=30, color="r", alpha=0.5) + + for xp in x_line_points: + ax.vlines(x=xp, ymin=-0.5, ymax=18, color="r", alpha=0.5) + + # add text box for the statistics + stats = f"$c$ = {c:.2f}" + bbox = dict(boxstyle="round", fc="white", ec="black", alpha=0.75) + ax.text( + 0.95, + 0.90, + stats, + fontsize=14, + bbox=bbox, + transform=ax.transAxes, + horizontalalignment="right", + ) + + plt.savefig( + OUTPUT_FIGURE_DIR + / "gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-clusters-males.png", + # rasterized=True, + dpi=300, + bbox_inches="tight", + facecolor="white", + ) + +# %% tags=[] +with sns.plotting_context("paper", font_scale=1.8): + g = sns.FacetGrid( + data=datasets_df, + col="dataset", + col_order=[ + "Brain cerebellum (all)", + "Small intestine (terminal ileum) (all)", + # "Brain cerebellum (males)", + # "Small intestine (terminal ileum) (males)", + ], + col_wrap=2, + height=5, + ) + g.map(sns.scatterplot, "x", "y", s=50, alpha=1) + g.set_titles(row_template="{row_name}", col_template="{col_name}") + + for ds, ax in g.axes_dict.items(): + df = datasets[ds].to_numpy() + x, y = df[:, 0], df[:, 1] + + # pearson and spearman + r = pearsonr(x, y)[0] + rs = spearmanr(x, y)[0] + + # ccc + c, max_parts, parts = ccc(x, y, return_parts=True) + c = ccc(x, y) + + x_line_points, y_line_points = get_cm_line_points(x, y, max_parts, parts) + for yp in y_line_points: + ax.hlines(y=yp, xmin=-0.5, xmax=30, color="r", alpha=0.5) + + for xp in x_line_points: + ax.vlines(x=xp, ymin=-0.5, ymax=18, color="r", alpha=0.5) + + # add text box for the statistics + stats = f"$c$ = {c:.2f}" + bbox = dict(boxstyle="round", fc="white", ec="black", alpha=0.75) + ax.text( + 0.95, + 0.90, + stats, + fontsize=14, + bbox=bbox, + transform=ax.transAxes, + horizontalalignment="right", + ) + + plt.savefig( + OUTPUT_FIGURE_DIR + / "gtex-KDM6A_vs_UTY-brain_cerebellum_and_small_intestine_terminal_ileum-clusters-all.png", + # rasterized=True, + dpi=300, + bbox_inches="tight", + facecolor="white", + ) + +# %% tags=[]