-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create sample lookup JSON objects for frontend
- Loading branch information
Showing
4 changed files
with
7,703 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Export cancer-data to JSON for frontend" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import json\n", | ||
"import gzip\n", | ||
"\n", | ||
"import pandas" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## gene_to_mutated_samples" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"path = os.path.join('data', 'mutation-matrix.tsv.bz2')\n", | ||
"mutation_mat_df = pandas.read_table(path, index_col=0)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"gene_to_mutated_samples = dict()\n", | ||
"for entrez_gene_id, series in mutation_mat_df.iteritems():\n", | ||
" gene_to_mutated_samples[int(entrez_gene_id)] = list(series.index[series == 1])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'20.68 MB'" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"path = os.path.join('data', 'json', 'gene_to_mutated_samples.json')\n", | ||
"with open(path, 'w') as write_file:\n", | ||
" json.dump(gene_to_mutated_samples, write_file, indent=2, sort_keys=True)\n", | ||
"'{:.2f} MB'.format(1e-6 * os.path.getsize(path))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'2.97 MB'" | ||
] | ||
}, | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"path = os.path.join('data', 'json', 'gene_to_mutated_samples.json.gz')\n", | ||
"with gzip.open(path, 'wt') as write_file:\n", | ||
" json.dump(gene_to_mutated_samples, write_file, indent=2, sort_keys=True)\n", | ||
"'{:.2f} MB'.format(1e-6 * os.path.getsize(path))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## disease_to_samples" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/html": [ | ||
"<div>\n", | ||
"<table border=\"1\" class=\"dataframe\">\n", | ||
" <thead>\n", | ||
" <tr style=\"text-align: right;\">\n", | ||
" <th></th>\n", | ||
" <th>sample_id</th>\n", | ||
" <th>patient_id</th>\n", | ||
" <th>sample_type</th>\n", | ||
" <th>disease</th>\n", | ||
" <th>acronym</th>\n", | ||
" <th>organ_of_origin</th>\n", | ||
" <th>gender</th>\n", | ||
" <th>age_diagnosed</th>\n", | ||
" <th>dead</th>\n", | ||
" <th>days_survived</th>\n", | ||
" <th>recurred</th>\n", | ||
" <th>days_recurrence_free</th>\n", | ||
" <th>n_mutations</th>\n", | ||
" </tr>\n", | ||
" </thead>\n", | ||
" <tbody>\n", | ||
" <tr>\n", | ||
" <th>0</th>\n", | ||
" <td>TCGA-02-0047-01</td>\n", | ||
" <td>TCGA-02-0047</td>\n", | ||
" <td>Primary Tumor</td>\n", | ||
" <td>glioblastoma multiforme</td>\n", | ||
" <td>GBM</td>\n", | ||
" <td>Brain</td>\n", | ||
" <td>Male</td>\n", | ||
" <td>78.0</td>\n", | ||
" <td>1.0</td>\n", | ||
" <td>448.0</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>39</td>\n", | ||
" </tr>\n", | ||
" <tr>\n", | ||
" <th>1</th>\n", | ||
" <td>TCGA-02-0055-01</td>\n", | ||
" <td>TCGA-02-0055</td>\n", | ||
" <td>Primary Tumor</td>\n", | ||
" <td>glioblastoma multiforme</td>\n", | ||
" <td>GBM</td>\n", | ||
" <td>Brain</td>\n", | ||
" <td>Female</td>\n", | ||
" <td>62.0</td>\n", | ||
" <td>1.0</td>\n", | ||
" <td>76.0</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>NaN</td>\n", | ||
" <td>33</td>\n", | ||
" </tr>\n", | ||
" </tbody>\n", | ||
"</table>\n", | ||
"</div>" | ||
], | ||
"text/plain": [ | ||
" sample_id patient_id sample_type disease \\\n", | ||
"0 TCGA-02-0047-01 TCGA-02-0047 Primary Tumor glioblastoma multiforme \n", | ||
"1 TCGA-02-0055-01 TCGA-02-0055 Primary Tumor glioblastoma multiforme \n", | ||
"\n", | ||
" acronym organ_of_origin gender age_diagnosed dead days_survived \\\n", | ||
"0 GBM Brain Male 78.0 1.0 448.0 \n", | ||
"1 GBM Brain Female 62.0 1.0 76.0 \n", | ||
"\n", | ||
" recurred days_recurrence_free n_mutations \n", | ||
"0 NaN NaN 39 \n", | ||
"1 NaN NaN 33 " | ||
] | ||
}, | ||
"execution_count": 6, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"path = os.path.join('data', 'samples.tsv')\n", | ||
"sample_df = pandas.read_table(path)\n", | ||
"sample_df.head(2)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"disease_to_samples = {acronym: sorted(sample_ids) for acronym, sample_ids in sample_df.groupby('acronym').sample_id}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'0.17 MB'" | ||
] | ||
}, | ||
"execution_count": 8, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"path = os.path.join('data', 'json', 'disease_to_samples.json')\n", | ||
"with open(path, 'w') as write_file:\n", | ||
" json.dump(disease_to_samples, write_file, indent=2, sort_keys=True)\n", | ||
"'{:.2f} MB'.format(1e-6 * os.path.getsize(path))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python [default]", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
# Ignore compressed files in this directory due to file size | ||
*.bz2 | ||
*.gz |
Oops, something went wrong.