Skip to content

Commit

Permalink
updated all files, should solve issue with proteomic
Browse files Browse the repository at this point in the history
  • Loading branch information
jclachance committed Nov 21, 2017
1 parent 4f452a1 commit ca1f675
Show file tree
Hide file tree
Showing 46 changed files with 63,582 additions and 3,180 deletions.
4 changes: 3 additions & 1 deletion BOFdat/dna.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def _convert_to_coefficient(model, ratio_genome, CELL_WEIGHT, DNA_RATIO):
# Transform the ratios into mmol/gDW
DNA_WEIGHT = CELL_WEIGHT * DNA_RATIO

DIPHOSPHATE_WEIGHT = 174.951262

base_to_bigg = {'A': model.metabolites.datp_c, 'T': model.metabolites.dttp_c,
'C': model.metabolites.dctp_c, 'G': model.metabolites.dgtp_c}
coefficients,metabolites = [],[]
Expand All @@ -65,7 +67,7 @@ def _convert_to_coefficient(model, ratio_genome, CELL_WEIGHT, DNA_RATIO):
ratio = ratio_genome.get(letter)
total_weight = ratio * DNA_WEIGHT
metab = base_to_bigg.get(letter)
mol_weight = metab.formula_weight
mol_weight = metab.formula_weight - DIPHOSPHATE_WEIGHT
mmols_per_cell = (total_weight / mol_weight) * 1000
mmols_per_gDW = mmols_per_cell / CELL_WEIGHT
coefficients.append(mmols_per_gDW)
Expand Down
Binary file modified BOFdat/dna.pyc
Binary file not shown.
77 changes: 32 additions & 45 deletions BOFdat/lipid.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,50 @@
This module generates BOFsc for the lipid content of the cell.
"""


def _import_model(path_to_model):
import cobra
extension = path_to_model.split('.')[-1]
if extension == 'json':
model = cobra.io.load_json_model(path_to_model)
elif extension == 'xml':
model = cobra.io.read_sbml_model(path_to_model)
else:
print('Model format type not supported')
return model

def filter_for_model_metab(path_to_conversion_file, path_to_model):
"""
:param path_to_conversion_file: a dictionary converting from the name present in the lipidomic data to BiGG identifiers. This dictionary is generated through manual curation from the modeller.
:param path_to_model: a path to the model, format supported are json and xml
:return: updated dictionary with metabolites found in the model
:return: updated dataframe with metabolites found in the model
"""

def import_model(path_to_model):
import cobra
extension = path_to_model.split('.')[-1]
if extension == 'json':
model = cobra.io.load_json_model(path_to_model)
elif extension == 'xml':
model = cobra.io.read_sbml_model(path_to_model)
else:
print('Model format type not supported')
return model

import pandas as pd
# Get the model
model = import_model(path_to_model)
model = _import_model(path_to_model)
# Get the metabolites in model
model_metab_id = [m.id for m in model.metabolites]
# Get to_bigg_dict
import pandas as pd
to_bigg_df = pd.read_csv(path_to_conversion_file)
to_bigg_dict = dict(zip([i for i in to_bigg_df[to_bigg_df.columns[0]]],
[i for i in to_bigg_df[to_bigg_df.columns[1]]]))

# Get the metabolites that are in the model
model_metab = {k: v for k, v in to_bigg_dict.iteritems() if k in model_metab_id}
model_metab = {k: v for k, v in to_bigg_dict.iteritems() if v in model_metab_id}

# Get the metabolites that are not in the model but present in OMICs data
non_model_metab = [k for k in to_bigg_dict.keys() if k not in model_metab_id]
non_model_metab = [k for k,v in to_bigg_dict.iteritems() if v not in model_metab_id]
if len(non_model_metab) != 0:
print("These metabolites were not found in the model but were present in your metabolomic data, "
print("These lipids were not found in the model but were present in your lipidomic data, "
"consider adding them to your model: %s " % ([metab for metab in non_model_metab]))

return model_metab
model_metab_df = pd.DataFrame({'lipid_name':model_metab.keys(),'lipid_id':model_metab.values()},columns=['lipid_name','lipid_id'])

return model_metab_df

def generate_coefficients(path_to_lipidomic,path_to_bigg_dict,
path_to_model,
Expand Down Expand Up @@ -83,24 +86,10 @@ def make_compliant_lipidomic(path_to_lipidomic):

def make_compliant_bigg(path_to_bigg_dict):
import pandas as pd
df = pd.read_csv(path_to_bigg_dict, names=['lipid_name','lipid_id'],skiprows=1)
keys = [i for i in df.lipid_name]
values = [i for i in df.lipid_id]

return dict(zip(keys,values))

#Operation 0.3
def import_model(path_to_model):
import cobra
extension = path_to_model.split('.')[-1]
if extension == 'json':
model = cobra.io.load_json_model(path_to_model)
elif extension == 'xml':
model = cobra.io.read_sbml_model(path_to_model)
return model
return pd.read_csv(path_to_bigg_dict, names=['lipid_name','lipid_id'],skiprows=1)

# Operation 1
def convert_lipidomics_to_bigg(lipidomic,to_bigg_dict):
def convert_lipidomics_to_bigg(lipid_abun,lipid_conv):
"""
This function generates a dictionary of BiGG identifiers that were generated through manual curation of the user
with their relative abundances.
Expand All @@ -112,20 +101,18 @@ def convert_lipidomics_to_bigg(lipidomic,to_bigg_dict):
:return: a dictionary containing BiGG identifiers and their relative abundances
"""
import pandas as pd
#Generate the dictionary
keys,values = [],[]
#Generate the dictionary of lipid_id and relative abundances
df = pd.merge(left=lipid_conv, right=lipid_abun, on='lipid_name')
df1 = pd.concat([df.lipid_id, df.abundance], axis=1)
grouped = df1.groupby('lipid_id').agg(lambda x: sum(x))

for i,row in lipidomic.iterrows():
keys.append(to_bigg_dict.get(row.lipid_name))
values.append(row.abundance)

return dict(zip(keys,values))
return dict(zip([i for i in grouped.index], [i for i in grouped.abundance]))

# Operation 2
def get_relative_abundance(bigg_abundance):
# Calculate relative abundances
total_peak = sum(bigg_abundance.values())
return {k: v / total_peak for k, v in bigg_abundance.iteritems()}
return {k: float(v) / total_peak for k, v in bigg_abundance.iteritems()}

# Operation 3
def get_lipid_weight(model,compound_list,R_WEIGHT):
Expand Down Expand Up @@ -191,11 +178,11 @@ def calculate_coefficients(weight_dict,relative_abundance,LIPID_WEIGHT,CELL_WEIG
#0.2- Make data compliant for the rest of the functions
lipidomic_compliant = make_compliant_lipidomic(path_to_lipidomic)
bigg_compliant = make_compliant_bigg(path_to_bigg_dict)

#0.3- Get the model
model = import_model(path_to_model)
model = _import_model(path_to_model)
#1- Generate a dictionary of BiGG IDs and relative abundances
bigg_abundance = convert_lipidomics_to_bigg(lipidomic_compliant, bigg_compliant)

#2- Get the relative abundance of each lipid
rel_abundance = get_relative_abundance(bigg_abundance)
#3- Get the weight of each lipid specie
Expand Down
Binary file modified BOFdat/lipid.pyc
Binary file not shown.
22 changes: 14 additions & 8 deletions BOFdat/protein.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ def _import_model(path_to_model):
def _import_proteomic(path_to_proteomic):
import pandas as pd
proteomics =pd.read_csv(path_to_proteomic, names=['gene_ID', 'Mean'], skiprows=1)
keys = [k for k in proteomics.gene_ID]
values = [v for v in proteomics.Mean]
if proteomics.isnull().values.any():
print('Some proteins in your dataset do not have associated abundance, removing them')
proteomics = proteomics.dropna()
keys = [str(k) for k in proteomics.gene_ID]
values = [float(v) for v in proteomics.Mean]
return dict(zip(keys, values))

def _get_aa_composition(seq_dict):
Expand All @@ -67,7 +70,7 @@ def _get_aa_composition(seq_dict):
# Keys = amino acid by letter code
# Values = the occurence of that amino acid
list_of_dict = []
for k,v seq_dict.iteritems():
for k,v in seq_dict.iteritems():
list_of_occurences = []
# Get the occurence for each letter
for letter in AMINO_ACIDS:
Expand Down Expand Up @@ -119,7 +122,7 @@ def _get_norm_sum(normalized_dict):

return norm_sum

def _get_ratio(normalized_dict, norm_sum, PROTEIN_RATIO):
def _get_ratio(normalized_dict, norm_sum, PROTEIN_RATIO, CELL_WEIGHT):
# 2- Divide letter to norm_sum to get ratio of each amino acid in the cell
# based on proteomic data
ratio_dict = {'A': 0., 'C': 0., 'D': 0., 'E': 0., 'F': 0., 'G': 0., 'H': 0., 'I': 0.,
Expand All @@ -138,7 +141,8 @@ def _get_ratio(normalized_dict, norm_sum, PROTEIN_RATIO):
return ratio_dict

def _convert_to_coefficient(ratio_dict, path_to_model, CELL_WEIGHT):
model = import_model(path_to_model)
WATER_WEIGHT = 18.01528
model = _import_model(path_to_model)
# 3- Convert gram ratios to mmol/g Dry weight
'''
To verify that the normalized to grams to get to the total amount of protein
Expand All @@ -161,7 +165,7 @@ def _convert_to_coefficient(ratio_dict, path_to_model, CELL_WEIGHT):
# Get number of moles from number of grams
for letter in AMINO_ACIDS:
metab = letter_to_bigg.get(letter)
mol_weight = metab.formula_weight
mol_weight = metab.formula_weight - WATER_WEIGHT
grams = ratio_dict.get(letter)
mmols_per_cell = (grams / mol_weight) * 1000
mmols_per_gDW = mmols_per_cell / CELL_WEIGHT
Expand Down Expand Up @@ -192,15 +196,17 @@ def generate_coefficients(path_to_genbank, path_to_model, path_to_proteomic, CEL
"""
# Operations
# 1- Parse the genome, extract protein sequence, count and store amino acid composition of each protein
if PROTEIN_RATIO > 1.:
print('Must enter ratio, value between 0. and 1.')
seq_dict = _get_protein_sequence(path_to_genbank)
list_of_dict = _get_aa_composition(seq_dict)
normalized_dict = _normalize_aa_composition(list_of_dict,path_to_proteomic)

# 2- Get coefficients from experimental proteomics data
# Proteomics data should come in a 2 columns standard format protein_id:abundance
norm_sum = _get_norm_sum(normalized_dict)
ratio_dict = get_ratio(normalized_dict, norm_sum, PROTEIN_RATIO)
biomass_coefficients = convert_to_coefficient(ratio_dict,path_to_model, CELL_WEIGHT)
ratio_dict = _get_ratio(normalized_dict, norm_sum, PROTEIN_RATIO, CELL_WEIGHT)
biomass_coefficients = _convert_to_coefficient(ratio_dict,path_to_model, CELL_WEIGHT)

return biomass_coefficients

Expand Down
Binary file modified BOFdat/protein.pyc
Binary file not shown.
3 changes: 2 additions & 1 deletion BOFdat/rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def _total_coefficients(mRNA_fractions, tRNA_fractions, rRNA_fractions, mRNA_RAT


def _convert_to_mmolgDW(RNA_coefficients, model, RNA_RATIO, CELL_WEIGHT):
DIPHOSPHATE_WEIGHT = 174.951262
# Get coefficients for BIOMASS
# Transform the ratios into mmol/gDW
RNA_WEIGHT = CELL_WEIGHT * RNA_RATIO
Expand All @@ -190,7 +191,7 @@ def _convert_to_mmolgDW(RNA_coefficients, model, RNA_RATIO, CELL_WEIGHT):
ratio = RNA_coefficients.get(letter)
total_weight = ratio * RNA_WEIGHT
metab = rna_base_to_bigg.get(letter)
mol_weight = metab.formula_weight
mol_weight = metab.formula_weight - DIPHOSPHATE_WEIGHT
mmols_per_cell = (total_weight / mol_weight) * 1000
mmols_per_gDW = mmols_per_cell / CELL_WEIGHT
coefficients.append(mmols_per_gDW)
Expand Down
Binary file modified BOFdat/rna.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions Example_usage/.~lock.lipidomic_conversion.csv#
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,jean-christophe,jeanchristophe-OptiPlex-7040,20.11.2017 14:45,file:///home/jean-christophe/.config/libreoffice/4;
1 change: 0 additions & 1 deletion Example_usage/.~lock.maintenance.csv#

This file was deleted.

1 change: 1 addition & 0 deletions Example_usage/.~lock.new_lipidomic_abundances.csv#
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,jean-christophe,jeanchristophe-OptiPlex-7040,20.11.2017 18:23,file:///home/jean-christophe/.config/libreoffice/4;
1 change: 1 addition & 0 deletions Example_usage/.~lock.new_lipidomic_conversion.csv#
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,jean-christophe,jeanchristophe-OptiPlex-7040,20.11.2017 18:25,file:///home/jean-christophe/.config/libreoffice/4;
Loading

0 comments on commit ca1f675

Please sign in to comment.