diff --git a/doc/src/geckomat/gather_kcats/fuzzyKcatMatching.html b/doc/src/geckomat/gather_kcats/fuzzyKcatMatching.html index 12670628..86e8db83 100644 --- a/doc/src/geckomat/gather_kcats/fuzzyKcatMatching.html +++ b/doc/src/geckomat/gather_kcats/fuzzyKcatMatching.html @@ -197,430 +197,415 @@

SOURCE CODE ^find_inKEGG(org_name,phylDistStruct.names); 0103 %build an index for genus in the phyl dist struct 0104 %first just extract the genus (i.e. the first part of the name) -0105 phylDistStruct.genus = cell(length(phylDistStruct.names),1); -0106 for i = 1:length(phylDistStruct.genus) -0107 name = phylDistStruct.names{i}; -0108 phylDistStruct.genus{i} = lower(name(1:(strfind(name,' ')-1))); %convert all to lower case to avoid problems with case -0109 end -0110 %create a map for the genuses -0111 phylDistStruct.uniqueGenusList = unique(phylDistStruct.genus); -0112 phylDistStruct.genusHashMap = containers.Map(phylDistStruct.uniqueGenusList,1:length(phylDistStruct.uniqueGenusList)); -0113 phylDistStruct.uniqueGenusIndices = cell(length(phylDistStruct.uniqueGenusList),1); -0114 -0115 %Then for each genus create a list with indices to the names -0116 for i = 1:length(phylDistStruct.genus) -0117 matchInd = cell2mat(values(phylDistStruct.genusHashMap,phylDistStruct.genus(i))); -0118 phylDistStruct.uniqueGenusIndices{matchInd} = [phylDistStruct.uniqueGenusIndices{matchInd};i]; -0119 end +0105 phylDistStruct.genus = lower(regexprep(phylDistStruct.names,'\s.*','')); +0106 %create a map for the genuses +0107 phylDistStruct.uniqueGenusList = unique(phylDistStruct.genus); +0108 phylDistStruct.genusHashMap = containers.Map(phylDistStruct.uniqueGenusList,1:length(phylDistStruct.uniqueGenusList)); +0109 phylDistStruct.uniqueGenusIndices = cell(length(phylDistStruct.uniqueGenusList),1); +0110 +0111 %Then for each genus create a list with indices to the names +0112 for i = 1:length(phylDistStruct.genus) +0113 matchInd = cell2mat(values(phylDistStruct.genusHashMap,phylDistStruct.genus(i))); +0114 phylDistStruct.uniqueGenusIndices{matchInd} = [phylDistStruct.uniqueGenusIndices{matchInd};i]; +0115 end +0116 +0117 %Allocate output +0118 kcats = zeros(length(eccodes),1); +0119 mM = length(eccodes); 0120 -0121 %Allocate output -0122 kcats = zeros(length(eccodes),1); -0123 mM = length(eccodes); -0124 -0125 %Create empty kcatInfo -0126 %Legacy, no longer given as output, rather used to construct -0127 %kcatList.wildcardLvl and kcatList.origin. -0128 kcatInfo.info.org_s = zeros(mM,1); -0129 kcatInfo.info.rest_s = zeros(mM,1); -0130 kcatInfo.info.org_ns = zeros(mM,1); -0131 kcatInfo.info.rest_ns = zeros(mM,1); -0132 kcatInfo.info.org_sa = zeros(mM,1); -0133 kcatInfo.info.rest_sa = zeros(mM,1); -0134 kcatInfo.info.wcLevel = NaN(mM,1); -0135 kcatInfo.stats.queries = 0; -0136 kcatInfo.stats.org_s = 0; -0137 kcatInfo.stats.rest_s = 0; -0138 kcatInfo.stats.org_ns = 0; -0139 kcatInfo.stats.rest_ns = 0; -0140 kcatInfo.stats.org_sa = 0; -0141 kcatInfo.stats.rest_sa = 0; -0142 kcatInfo.stats.wc0 = 0; -0143 kcatInfo.stats.wc1 = 0; -0144 kcatInfo.stats.wc2 = 0; -0145 kcatInfo.stats.wc3 = 0; -0146 kcatInfo.stats.wc4 = 0; -0147 kcatInfo.stats.matrix = zeros(6,5); -0148 -0149 %build an EC index to speed things up a bit - many of the ECs appear -0150 %many times - unnecessary to compare them all -0151 %so, here, each EC string appears only once, and you get a vector with -0152 %indices to the rows in KCATcell -0153 [ECIndexIds,~,ic] = unique(KCATcell{1}); -0154 EcIndexIndices = cell(length(ECIndexIds),1); -0155 for i = 1:length(EcIndexIndices) -0156 EcIndexIndices{i} = find(ic == i).'; -0157 end -0158 -0159 %Apply force wildcard level -0160 while forceWClvl > 0 -0161 eccodes=regexprep(eccodes,'(.)*(\.\d+)(\.-)*$','$1\.-$3'); -0162 forceWClvl = forceWClvl - 1; -0163 end -0164 if forceWClvl == 1 -0165 eccodes = regexprep(eccodes,'.*','-\.-\.-\.-'); -0166 end -0167 -0168 progressbar('Gathering kcat values by fuzzy matching to BRENDA database') -0169 %Main loop: -0170 for i = 1:mM -0171 %Match: -0172 EC = eccodes{i}; -0173 if ~isempty(EC) -0174 EC = strsplit(EC,';'); -0175 %Try to match direct reaction: -0176 if ~isempty(substrates{i}) -0177 [kcats(i), kcatInfo.info,kcatInfo.stats] = iterativeMatch(EC,substrates{i},substrCoeffs{i},i,KCATcell,... -0178 kcatInfo.info,kcatInfo.stats,org_name,... -0179 phylDistStruct,org_index,SAcell,ECIndexIds,EcIndexIndices); -0180 end -0181 end -0182 progressbar(i/mM) -0183 end -0184 -0185 kcatList.source = 'brenda'; -0186 kcatList.rxns = model.ec.rxns(ecRxns); -0187 kcatList.substrates = substrates; -0188 kcatList.kcats = kcats; -0189 kcatList.eccodes = eccodes; -0190 kcatList.wildcardLvl = kcatInfo.info.wcLevel; -0191 kcatList.origin = NaN(numel(model.ec.rxns(ecRxns)),1); -0192 % This can be refactored, iterativeMatch and their nested functions can -0193 % just directly report the origin number. -0194 origin = [kcatInfo.info.org_s kcatInfo.info.rest_s kcatInfo.info.org_ns kcatInfo.info.rest_ns kcatInfo.info.org_sa kcatInfo.info.rest_sa]; -0195 for i=1:6 -0196 kcatList.origin(find(origin(:,i))) = i; -0197 end -0198 end -0199 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0200 function [kcat,dir,tot] =iterativeMatch(EC,subs,substrCoeff,i,KCATcell,dir,tot,... -0201 name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices) -0202 %Will iteratively try to match the EC number to some registry in BRENDA, -0203 %using each time one additional wildcard. -0204 -0205 kcat = zeros(size(EC)); -0206 origin = zeros(size(EC)); -0207 matches = zeros(size(EC)); -0208 wc_num = ones(size(EC)).*1000; -0209 for k = 1:length(EC) -0210 success = false; -0211 while ~success -0212 %Atempt match: -0213 [kcat(k),origin(k),matches(k)] = mainMatch(EC{k},subs,substrCoeff,KCATcell,... -0214 name,phylDist,... -0215 org_index,SAcell,ECIndexIds,EcIndexIndices); -0216 %If any match found, ends. If not, introduces one extra wild card and -0217 %tries again: -0218 if origin(k) > 0 -0219 success = true; -0220 wc_num(k) = sum(EC{k}=='-'); -0221 else -0222 dot_pos = [2 strfind(EC{k},'.')]; -0223 wild_num = sum(EC{k}=='-'); -0224 wc_text = '-.-.-.-'; -0225 EC{k} = [EC{k}(1:dot_pos(4-wild_num)) wc_text(1:2*wild_num+1)]; -0226 end -0227 end -0228 end -0229 -0230 if sum(origin) > 0 -0231 %For more than one EC: Choose the maximum value among the ones with the -0232 %less amount of wildcards and the better origin: -0233 best_pos = (wc_num == min(wc_num)); -0234 new_origin = origin(best_pos); -0235 best_pos = (origin == min(new_origin(new_origin~=0))); -0236 max_pos = find(kcat == max(kcat(best_pos))); -0237 wc_num = wc_num(max_pos(1)); -0238 origin = origin(max_pos(1)); -0239 matches = matches(max_pos(1)); -0240 kcat = kcat(max_pos(1)); -0241 -0242 %Update dir and tot: -0243 dir.org_s(i) = matches*(origin == 1); -0244 dir.rest_s(i) = matches*(origin == 2); -0245 dir.org_ns(i) = matches*(origin == 3); -0246 dir.org_sa(i) = matches*(origin == 4); -0247 dir.rest_ns(i) = matches*(origin == 5); -0248 dir.rest_sa(i) = matches*(origin == 6); -0249 dir.wcLevel(i) = wc_num; -0250 tot.org_s = tot.org_s + (origin == 1); -0251 tot.rest_s = tot.rest_s + (origin == 2); -0252 tot.org_ns = tot.org_ns + (origin == 3); -0253 tot.org_sa = tot.org_sa + (origin == 4); -0254 tot.rest_ns = tot.rest_ns + (origin == 5); -0255 tot.rest_sa = tot.rest_sa + (origin == 6); -0256 tot.wc0 = tot.wc0 + (wc_num == 0); -0257 tot.wc1 = tot.wc1 + (wc_num == 1); -0258 tot.wc2 = tot.wc2 + (wc_num == 2); -0259 tot.wc3 = tot.wc3 + (wc_num == 3); -0260 tot.wc4 = tot.wc4 + (wc_num == 4); -0261 tot.queries = tot.queries + 1; -0262 tot.matrix(origin,wc_num+1) = tot.matrix(origin,wc_num+1) + 1; -0263 end -0264 -0265 end -0266 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0267 -0268 function [kcat,origin,matches] = mainMatch(EC,subs,substrCoeff,KCATcell,... -0269 name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices) -0270 -0271 %First make the string matching. This takes time, so we only want to do -0272 %this once: -0273 %Relaxes matching if wild cards are present: -0274 wild = false; -0275 wild_pos = strfind(EC,'-'); -0276 if ~isempty(wild_pos) -0277 EC = EC(1:wild_pos(1)-1); -0278 wild = true; -0279 end -0280 stringMatchesEC_cell = extract_string_matches(EC,KCATcell{1},wild,ECIndexIds,EcIndexIndices); -0281 -0282 % Matching function prioritizing organism and substrate specificity when -0283 % available. -0284 -0285 origin = 0; -0286 %First try to match organism and substrate: -0287 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,true,false,... -0288 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); -0289 if matches > 0 && ~wild % If wildcard, ignore substrate match -0290 origin = 1; -0291 %If no match, try the closest organism but match the substrate: -0292 else -0293 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',true,false,... -0294 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); -0295 if matches > 0 && ~wild % If wildcard, ignore substrate match -0296 origin = 2; -0297 %If no match, try to match organism but with any substrate: -0298 else -0299 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,false,... -0300 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); -0301 if matches > 0 -0302 origin = 3; -0303 %If no match, try to match organism but for any substrate (SA*MW): -0304 else -0305 %create matching index for SA, has not been needed until now -0306 stringMatchesSA = extract_string_matches(EC,SAcell{1},wild,[],[]); -0307 -0308 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,... -0309 true,phylDist,org_index,... -0310 SAcell,stringMatchesEC_cell,stringMatchesSA); -0311 if matches > 0 -0312 origin = 4; -0313 %If no match, try any organism and any substrate: -0314 else -0315 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',false,... -0316 false,phylDist,... -0317 org_index,SAcell,stringMatchesEC_cell,stringMatchesSA); -0318 if matches > 0 -0319 origin = 5; -0320 %Again if no match, look for any org and SA*MW -0321 else -0322 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',... -0323 false,true,phylDist,... -0324 org_index,SAcell,stringMatchesEC_cell,stringMatchesSA); -0325 if matches > 0 -0326 origin = 6; -0327 end -0328 end -0329 -0330 end -0331 end -0332 end -0333 end -0334 end -0335 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0336 function [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,organism,... -0337 substrate,SA,phylDist,... -0338 org_index,SAcell,KCATcellMatches,SAcellMatches) -0339 -0340 %Will go through BRENDA and will record any match. Afterwards, it will -0341 %return the average value and the number of matches attained. -0342 kcat = []; -0343 matches = 0; -0344 -0345 if SA -0346 %SAcell{1},wild,[],[] -0347 EC_indexes = extract_indexes(SAcellMatches,[],SAcell{2},subs,substrate,... -0348 organism,org_index,phylDist); +0121 %Create empty kcatInfo +0122 %Legacy, no longer given as output, rather used to construct +0123 %kcatList.wildcardLvl and kcatList.origin. +0124 kcatInfo.info.org_s = zeros(mM,1); +0125 kcatInfo.info.rest_s = zeros(mM,1); +0126 kcatInfo.info.org_ns = zeros(mM,1); +0127 kcatInfo.info.rest_ns = zeros(mM,1); +0128 kcatInfo.info.org_sa = zeros(mM,1); +0129 kcatInfo.info.rest_sa = zeros(mM,1); +0130 kcatInfo.info.wcLevel = NaN(mM,1); +0131 kcatInfo.stats.queries = 0; +0132 kcatInfo.stats.org_s = 0; +0133 kcatInfo.stats.rest_s = 0; +0134 kcatInfo.stats.org_ns = 0; +0135 kcatInfo.stats.rest_ns = 0; +0136 kcatInfo.stats.org_sa = 0; +0137 kcatInfo.stats.rest_sa = 0; +0138 kcatInfo.stats.wc0 = 0; +0139 kcatInfo.stats.wc1 = 0; +0140 kcatInfo.stats.wc2 = 0; +0141 kcatInfo.stats.wc3 = 0; +0142 kcatInfo.stats.wc4 = 0; +0143 kcatInfo.stats.matrix = zeros(6,5); +0144 +0145 %build an EC index to speed things up a bit - many of the ECs appear +0146 %many times - unnecessary to compare them all +0147 %so, here, each EC string appears only once, and you get a vector with +0148 %indices to the rows in KCATcell +0149 [ECIndexIds,~,ic] = unique(KCATcell{1}); +0150 EcIndexIndices = cell(length(ECIndexIds),1); +0151 for i = 1:length(EcIndexIndices) +0152 EcIndexIndices{i} = find(ic == i).'; +0153 end +0154 +0155 %Apply force wildcard level +0156 while forceWClvl > 0 +0157 eccodes=regexprep(eccodes,'(.)*(\.\d+)(\.-)*$','$1\.-$3'); +0158 forceWClvl = forceWClvl - 1; +0159 end +0160 if forceWClvl == 1 +0161 eccodes = regexprep(eccodes,'.*','-\.-\.-\.-'); +0162 end +0163 +0164 progressbar('Gathering kcat values by fuzzy matching to BRENDA database') +0165 %Main loop: +0166 for i = 1:mM +0167 %Match: +0168 EC = eccodes{i}; +0169 if ~isempty(EC) +0170 EC = strsplit(EC,';'); +0171 %Try to match direct reaction: +0172 if ~isempty(substrates{i}) +0173 [kcats(i), kcatInfo.info,kcatInfo.stats] = iterativeMatch(EC,substrates{i},substrCoeffs{i},i,KCATcell,... +0174 kcatInfo.info,kcatInfo.stats,org_name,... +0175 phylDistStruct,org_index,SAcell,ECIndexIds,EcIndexIndices); +0176 end +0177 end +0178 progressbar(i/mM) +0179 end +0180 +0181 kcatList.source = 'brenda'; +0182 kcatList.rxns = model.ec.rxns(ecRxns); +0183 kcatList.substrates = substrates; +0184 kcatList.kcats = kcats; +0185 kcatList.eccodes = eccodes; +0186 kcatList.wildcardLvl = kcatInfo.info.wcLevel; +0187 kcatList.origin = NaN(numel(model.ec.rxns(ecRxns)),1); +0188 % This can be refactored, iterativeMatch and their nested functions can +0189 % just directly report the origin number. +0190 origin = [kcatInfo.info.org_s kcatInfo.info.rest_s kcatInfo.info.org_ns kcatInfo.info.rest_ns kcatInfo.info.org_sa kcatInfo.info.rest_sa]; +0191 for i=1:6 +0192 kcatList.origin(find(origin(:,i))) = i; +0193 end +0194 end +0195 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0196 function [kcat,dir,tot] =iterativeMatch(EC,subs,substrCoeff,i,KCATcell,dir,tot,... +0197 name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices) +0198 %Will iteratively try to match the EC number to some registry in BRENDA, +0199 %using each time one additional wildcard. +0200 +0201 kcat = zeros(size(EC)); +0202 origin = zeros(size(EC)); +0203 matches = zeros(size(EC)); +0204 wc_num = ones(size(EC)).*1000; +0205 for k = 1:length(EC) +0206 success = false; +0207 while ~success +0208 %Atempt match: +0209 [kcat(k),origin(k),matches(k)] = mainMatch(EC{k},subs,substrCoeff,KCATcell,... +0210 name,phylDist,... +0211 org_index,SAcell,ECIndexIds,EcIndexIndices); +0212 %If any match found, ends. If not, introduces one extra wild card and +0213 %tries again: +0214 if origin(k) > 0 +0215 success = true; +0216 wc_num(k) = sum(EC{k}=='-'); +0217 else +0218 dot_pos = [2 strfind(EC{k},'.')]; +0219 wild_num = sum(EC{k}=='-'); +0220 wc_text = '-.-.-.-'; +0221 EC{k} = [EC{k}(1:dot_pos(4-wild_num)) wc_text(1:2*wild_num+1)]; +0222 end +0223 end +0224 end +0225 +0226 if sum(origin) > 0 +0227 %For more than one EC: Choose the maximum value among the ones with the +0228 %less amount of wildcards and the better origin: +0229 best_pos = (wc_num == min(wc_num)); +0230 new_origin = origin(best_pos); +0231 best_pos = (origin == min(new_origin(new_origin~=0))); +0232 max_pos = find(kcat == max(kcat(best_pos))); +0233 wc_num = wc_num(max_pos(1)); +0234 origin = origin(max_pos(1)); +0235 matches = matches(max_pos(1)); +0236 kcat = kcat(max_pos(1)); +0237 +0238 %Update dir and tot: +0239 dir.org_s(i) = matches*(origin == 1); +0240 dir.rest_s(i) = matches*(origin == 2); +0241 dir.org_ns(i) = matches*(origin == 3); +0242 dir.org_sa(i) = matches*(origin == 4); +0243 dir.rest_ns(i) = matches*(origin == 5); +0244 dir.rest_sa(i) = matches*(origin == 6); +0245 dir.wcLevel(i) = wc_num; +0246 tot.org_s = tot.org_s + (origin == 1); +0247 tot.rest_s = tot.rest_s + (origin == 2); +0248 tot.org_ns = tot.org_ns + (origin == 3); +0249 tot.org_sa = tot.org_sa + (origin == 4); +0250 tot.rest_ns = tot.rest_ns + (origin == 5); +0251 tot.rest_sa = tot.rest_sa + (origin == 6); +0252 tot.wc0 = tot.wc0 + (wc_num == 0); +0253 tot.wc1 = tot.wc1 + (wc_num == 1); +0254 tot.wc2 = tot.wc2 + (wc_num == 2); +0255 tot.wc3 = tot.wc3 + (wc_num == 3); +0256 tot.wc4 = tot.wc4 + (wc_num == 4); +0257 tot.queries = tot.queries + 1; +0258 tot.matrix(origin,wc_num+1) = tot.matrix(origin,wc_num+1) + 1; +0259 end +0260 +0261 end +0262 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0263 +0264 function [kcat,origin,matches] = mainMatch(EC,subs,substrCoeff,KCATcell,... +0265 name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices) +0266 +0267 %First make the string matching. This takes time, so we only want to do +0268 %this once: +0269 %Relaxes matching if wild cards are present: +0270 wild = false; +0271 wild_pos = strfind(EC,'-'); +0272 if ~isempty(wild_pos) +0273 EC = EC(1:wild_pos(1)-1); +0274 wild = true; +0275 end +0276 stringMatchesEC_cell = extract_string_matches(EC,KCATcell{1},wild,ECIndexIds,EcIndexIndices); +0277 +0278 % Matching function prioritizing organism and substrate specificity when +0279 % available. +0280 +0281 origin = 0; +0282 %First try to match organism and substrate: +0283 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,true,false,... +0284 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); +0285 if matches > 0 && ~wild % If wildcard, ignore substrate match +0286 origin = 1; +0287 %If no match, try the closest organism but match the substrate: +0288 else +0289 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',true,false,... +0290 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); +0291 if matches > 0 && ~wild % If wildcard, ignore substrate match +0292 origin = 2; +0293 %If no match, try to match organism but with any substrate: +0294 else +0295 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,false,... +0296 phylDist,org_index,SAcell,stringMatchesEC_cell,[]); +0297 if matches > 0 +0298 origin = 3; +0299 %If no match, try to match organism but for any substrate (SA*MW): +0300 else +0301 %create matching index for SA, has not been needed until now +0302 stringMatchesSA = extract_string_matches(EC,SAcell{1},wild,[],[]); +0303 +0304 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,name,false,... +0305 true,phylDist,org_index,... +0306 SAcell,stringMatchesEC_cell,stringMatchesSA); +0307 if matches > 0 +0308 origin = 4; +0309 %If no match, try any organism and any substrate: +0310 else +0311 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',false,... +0312 false,phylDist,... +0313 org_index,SAcell,stringMatchesEC_cell,stringMatchesSA); +0314 if matches > 0 +0315 origin = 5; +0316 %Again if no match, look for any org and SA*MW +0317 else +0318 [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,'',... +0319 false,true,phylDist,... +0320 org_index,SAcell,stringMatchesEC_cell,stringMatchesSA); +0321 if matches > 0 +0322 origin = 6; +0323 end +0324 end +0325 +0326 end +0327 end +0328 end +0329 end +0330 end +0331 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0332 function [kcat,matches] = matchKcat(EC,subs,substrCoeff,KCATcell,organism,... +0333 substrate,SA,phylDist,... +0334 org_index,SAcell,KCATcellMatches,SAcellMatches) +0335 +0336 %Will go through BRENDA and will record any match. Afterwards, it will +0337 %return the average value and the number of matches attained. +0338 kcat = []; +0339 matches = 0; +0340 +0341 if SA +0342 %SAcell{1},wild,[],[] +0343 EC_indexes = extract_indexes(SAcellMatches,[],SAcell{2},subs,substrate,... +0344 organism,org_index,phylDist); +0345 +0346 kcat = SAcell{3}(EC_indexes); +0347 org_cell = SAcell{2}(EC_indexes); +0348 MW_BRENDA = SAcell{4}(EC_indexes); 0349 -0350 kcat = SAcell{3}(EC_indexes); -0351 org_cell = SAcell{2}(EC_indexes); -0352 MW_BRENDA = SAcell{4}(EC_indexes); -0353 -0354 else -0355 %KCATcell{1},wild,ECIndexIds,EcIndexIndices -0356 EC_indexes = extract_indexes(KCATcellMatches,KCATcell{2},KCATcell{3},... -0357 subs,substrate,organism,org_index,... -0358 phylDist); -0359 if substrate -0360 for j = 1:length(EC_indexes) -0361 indx = EC_indexes(j); -0362 for k = 1:length(subs) -0363 if (isempty(subs{k})) -0364 break; -0365 end -0366 %l = logical(strcmpi(model.metNames,subs{k}).*(model.S(:,i)~=0)); %I don't understand the .* (model.S(:,i)~=0) part, it shouldn't be needed?/JG; -0367 if ~isempty(subs{k}) && strcmpi(subs{k},KCATcell{2}(indx)) -0368 if KCATcell{4}(indx) > 0 -0369 coeff = min(substrCoeff); -0370 kCatTmp = KCATcell{4}(indx); -0371 kcat = [kcat;kCatTmp/coeff]; -0372 end -0373 end -0374 end -0375 end -0376 else -0377 kcat = KCATcell{4}(EC_indexes); -0378 end -0379 end -0380 %Return maximum value: -0381 if isempty(kcat) -0382 kcat = 0; -0383 else -0384 matches = length(kcat); -0385 [kcat,MaxIndx] = max(kcat); +0350 else +0351 %KCATcell{1},wild,ECIndexIds,EcIndexIndices +0352 EC_indexes = extract_indexes(KCATcellMatches,KCATcell{2},KCATcell{3},... +0353 subs,substrate,organism,org_index,... +0354 phylDist); +0355 if substrate +0356 for j = 1:length(EC_indexes) +0357 indx = EC_indexes(j); +0358 for k = 1:length(subs) +0359 if (isempty(subs{k})) +0360 break; +0361 end +0362 %l = logical(strcmpi(model.metNames,subs{k}).*(model.S(:,i)~=0)); %I don't understand the .* (model.S(:,i)~=0) part, it shouldn't be needed?/JG; +0363 if ~isempty(subs{k}) && strcmpi(subs{k},KCATcell{2}(indx)) +0364 if KCATcell{4}(indx) > 0 +0365 coeff = min(substrCoeff); +0366 kCatTmp = KCATcell{4}(indx); +0367 kcat = [kcat;kCatTmp/coeff]; +0368 end +0369 end +0370 end +0371 end +0372 else +0373 kcat = KCATcell{4}(EC_indexes); +0374 end +0375 end +0376 %Return maximum value: +0377 if isempty(kcat) +0378 kcat = 0; +0379 else +0380 matches = length(kcat); +0381 [kcat,MaxIndx] = max(kcat); +0382 end +0383 %Avoid SA*Mw values over the diffusion limit rate [Bar-Even et al. 2011] +0384 if kcat>(1E7) +0385 kcat = 1E7; 0386 end -0387 %Avoid SA*Mw values over the diffusion limit rate [Bar-Even et al. 2011] -0388 if kcat>(1E7) -0389 kcat = 1E7; -0390 end -0391 end -0392 -0393 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0394 %Make the string matches of the ECs. This is heavy, so only do it once! -0395 % -0396 function EC_indexes = extract_string_matches(EC,EC_cell,wild,ECIndexIds,EcIndexIndices) -0397 EC_indexes = []; -0398 EC_indexesOld = []; -0399 if wild -0400 if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell -0401 X = find(contains(ECIndexIds, EC)); -0402 for j = 1:length(X) -0403 EC_indexes = [EC_indexes,EcIndexIndices{X(j)}]; -0404 end -0405 else %Not optimized -0406 for j=1:length(EC_cell) -0407 if strfind(EC_cell{j},EC)==1 -0408 EC_indexes = [EC_indexes,j]; -0409 end -0410 end -0411 end -0412 else -0413 if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell -0414 mtch = find(strcmpi(EC,ECIndexIds)); -0415 if (~isempty(mtch)) -0416 EC_indexes = EcIndexIndices{mtch}; +0387 end +0388 +0389 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0390 %Make the string matches of the ECs. This is heavy, so only do it once! +0391 % +0392 function EC_indexes = extract_string_matches(EC,EC_cell,wild,ECIndexIds,EcIndexIndices) +0393 EC_indexes = []; +0394 EC_indexesOld = []; +0395 if wild +0396 if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell +0397 X = find(contains(ECIndexIds, EC)); +0398 for j = 1:length(X) +0399 EC_indexes = [EC_indexes,EcIndexIndices{X(j)}]; +0400 end +0401 else %Not optimized +0402 for j=1:length(EC_cell) +0403 if strfind(EC_cell{j},EC)==1 +0404 EC_indexes = [EC_indexes,j]; +0405 end +0406 end +0407 end +0408 else +0409 if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell +0410 mtch = find(strcmpi(EC,ECIndexIds)); +0411 if (~isempty(mtch)) +0412 EC_indexes = EcIndexIndices{mtch}; +0413 end +0414 else %%Not optimized +0415 if ~isempty(EC_cell) +0416 EC_indexes = transpose(find(strcmpi(EC,EC_cell))); 0417 end -0418 else %%Not optimized -0419 if ~isempty(EC_cell) -0420 EC_indexes = transpose(find(strcmpi(EC,EC_cell))); -0421 end -0422 end -0423 end -0424 -0425 end -0426 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0427 %Extract the indexes of the entries in the BRENDA data that meet the -0428 %conditions specified by the search criteria -0429 function EC_indexes = extract_indexes(EC_indCellStringMatches,subs_cell,orgs_cell,subs,... -0430 substrate,organism, org_index,... -0431 phylDist) -0432 -0433 EC_indexes = EC_indCellStringMatches;%reuse so the string comparisons are not run many times -0434 -0435 %If substrate=true then it will extract only the substrates appereances -0436 %indexes in the EC subset from the BRENDA cell array -0437 -0438 if substrate -0439 if (~isempty(EC_indexes)) %optimization -0440 Subs_indexes = []; -0441 for l = 1:length(subs) -0442 if (isempty(subs{l})) -0443 break; -0444 end -0445 Subs_indexes = horzcat(Subs_indexes,EC_indexes(strcmpi(subs(l),... -0446 subs_cell(EC_indexes)))); -0447 end -0448 EC_indexes = Subs_indexes; -0449 end -0450 end -0451 -0452 EC_orgs = orgs_cell(EC_indexes); -0453 %If specific organism values are requested looks for all the organism -0454 %repetitions on the subset BRENDA cell array(EC_indexes) -0455 if string(organism) ~= '' -0456 EC_indexes = EC_indexes(strcmpi(string(organism),EC_orgs)); -0457 -0458 %If KEGG code was assigned to the organism (model) then it will look for -0459 %the Kcat value for the closest organism -0460 elseif org_index~='*' %&& org_index~='' -0461 KEGG_indexes = [];temp = []; -0462 -0463 %For relating a phyl dist between the modelled organism and the organisms -0464 %on the BRENDA cell array it should search for a KEGG code for each of -0465 %these -0466 for j=1:length(EC_indexes) -0467 %Assigns a KEGG index for those found on the KEGG struct -0468 orgs_index = find(strcmpi(orgs_cell(EC_indexes(j)),phylDist.names),1); -0469 if ~isempty(orgs_index) -0470 KEGG_indexes = [KEGG_indexes; orgs_index]; -0471 temp = [temp;EC_indexes(j)]; -0472 %For values related to organisms without KEGG code, then it will -0473 %look for KEGG code for the first organism with the same genus -0474 else -0475 org = orgs_cell{EC_indexes(j)}; -0476 orgGenus = lower(org(1:(strfind(org,' ')-1))); -0477 if isKey(phylDist.genusHashMap,orgGenus) %annoyingly, this seems to be needed -0478 matchInd = cell2mat(values(phylDist.genusHashMap,{orgGenus})); -0479 matches = phylDist.uniqueGenusIndices{matchInd}; -0480 k = matches(1); -0481 KEGG_indexes = [KEGG_indexes;k]; -0482 temp = [temp;EC_indexes(j)]; -0483 end -0484 end -0485 end -0486 %Update the EC_indexes cell array -0487 EC_indexes = temp; -0488 %Looks for the taxonomically closest organism and saves the index of -0489 %its appearences in the BRENDA cell -0490 if ~isempty(EC_indexes) -0491 distances = phylDist.distMat(org_index,KEGG_indexes); -0492 EC_indexes = EC_indexes(distances == min(distances)); -0493 end -0494 end -0495 end -0496 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0497 function org_index = find_inKEGG(org_name,names) -0498 org_index = find(strcmpi(org_name,names)); -0499 if numel(org_index)>1 -0500 org_index = org_index(1); -0501 elseif isempty(org_index) -0502 i=1; -0503 while isempty(org_index) && i<length(names) -0504 % TODO: refactor with regexprep, keeping only first word. -0505 str = names{i}; -0506 if strcmpi(org_name(1:strfind(org_name,' ')-1),... -0507 str(1:strfind(str,' ')-1)) -0508 org_index = i; -0509 end -0510 i = i+1; -0511 end -0512 if isempty(org_index);org_index = '*';end -0513 end -0514 end -0515 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -0516 function phylDistStruct = KEGG_struct(phylpath) -0517 load(phylpath) -0518 phylDistStruct.ids = transpose(phylDistStruct.ids); -0519 phylDistStruct.names = transpose(phylDistStruct.names); -0520 -0521 %phylDistStruct.names = regexprep(phylDistStruct.names,'\s*\(.*',''); -0522 for i=1:length(phylDistStruct.names) -0523 pos = strfind(phylDistStruct.names{i}, ' ('); -0524 if ~isempty(pos) -0525 phylDistStruct.names{i} = phylDistStruct.names{i}(1:pos-1); -0526 end -0527 end -0528 end +0418 end +0419 end +0420 +0421 end +0422 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0423 %Extract the indexes of the entries in the BRENDA data that meet the +0424 %conditions specified by the search criteria +0425 function EC_indexes = extract_indexes(EC_indCellStringMatches,subs_cell,orgs_cell,subs,... +0426 substrate,organism, org_index,... +0427 phylDist) +0428 +0429 EC_indexes = EC_indCellStringMatches;%reuse so the string comparisons are not run many times +0430 +0431 %If substrate=true then it will extract only the substrates appereances +0432 %indexes in the EC subset from the BRENDA cell array +0433 +0434 if substrate +0435 if (~isempty(EC_indexes)) %optimization +0436 Subs_indexes = []; +0437 for l = 1:length(subs) +0438 if (isempty(subs{l})) +0439 break; +0440 end +0441 Subs_indexes = horzcat(Subs_indexes,EC_indexes(strcmpi(subs(l),... +0442 subs_cell(EC_indexes)))); +0443 end +0444 EC_indexes = Subs_indexes; +0445 end +0446 end +0447 +0448 EC_orgs = orgs_cell(EC_indexes); +0449 %If specific organism values are requested looks for all the organism +0450 %repetitions on the subset BRENDA cell array(EC_indexes) +0451 if string(organism) ~= '' +0452 EC_indexes = EC_indexes(strcmpi(string(organism),EC_orgs)); +0453 +0454 %If KEGG code was assigned to the organism (model) then it will look for +0455 %the Kcat value for the closest organism +0456 elseif org_index~='*' %&& org_index~='' +0457 KEGG_indexes = [];temp = []; +0458 +0459 %For relating a phyl dist between the modelled organism and the organisms +0460 %on the BRENDA cell array it should search for a KEGG code for each of +0461 %these +0462 for j=1:length(EC_indexes) +0463 %Assigns a KEGG index for those found on the KEGG struct +0464 orgs_index = find(strcmpi(orgs_cell(EC_indexes(j)),phylDist.names),1); +0465 if ~isempty(orgs_index) +0466 KEGG_indexes = [KEGG_indexes; orgs_index]; +0467 temp = [temp;EC_indexes(j)]; +0468 %For values related to organisms without KEGG code, then it will +0469 %look for KEGG code for the first organism with the same genus +0470 else +0471 org = orgs_cell{EC_indexes(j)}; +0472 orgGenus = lower(regexprep(org,'\s.*','')); +0473 if isKey(phylDist.genusHashMap,orgGenus) %annoyingly, this seems to be needed +0474 matchInd = cell2mat(values(phylDist.genusHashMap,{orgGenus})); +0475 matches = phylDist.uniqueGenusIndices{matchInd}; +0476 k = matches(1); +0477 KEGG_indexes = [KEGG_indexes;k]; +0478 temp = [temp;EC_indexes(j)]; +0479 end +0480 end +0481 end +0482 %Update the EC_indexes cell array +0483 EC_indexes = temp; +0484 %Looks for the taxonomically closest organism and saves the index of +0485 %its appearences in the BRENDA cell +0486 if ~isempty(EC_indexes) +0487 distances = phylDist.distMat(org_index,KEGG_indexes); +0488 EC_indexes = EC_indexes(distances == min(distances)); +0489 end +0490 end +0491 end +0492 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0493 function org_index = find_inKEGG(org_name,names) +0494 org_index = find(strcmpi(org_name,names)); +0495 if numel(org_index)>1 +0496 org_index = org_index(1); +0497 elseif isempty(org_index) +0498 org_name = regexprep(org_name,'\s.*',''); +0499 org_index = find(strcmpi(org_name,names)); +0500 if numel(org_index)>1 +0501 org_index = org_index(1); +0502 elseif isempty(org_index) +0503 org_index = '*'; +0504 end +0505 end +0506 end +0507 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +0508 function phylDistStruct = KEGG_struct(phylpath) +0509 load(phylpath) +0510 phylDistStruct.ids = transpose(phylDistStruct.ids); +0511 phylDistStruct.names = transpose(phylDistStruct.names); +0512 phylDistStruct.names = regexprep(phylDistStruct.names,'\s*\(.*',''); +0513 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/gather_kcats/getStandardKcat.html b/doc/src/geckomat/gather_kcats/getStandardKcat.html index f1173f56..aff9e498 100644 --- a/doc/src/geckomat/gather_kcats/getStandardKcat.html +++ b/doc/src/geckomat/gather_kcats/getStandardKcat.html @@ -236,172 +236,177 @@

SOURCE CODE ^% Find reactions with GPR but without model.ec entry (for instance due to 0135 % no protein matching) 0136 rxnsMissingEnzyme = find(~cellfun(@isempty, model.grRules)); -0137 rxnsMissingEnzyme = find(and(~ismember(model.rxns(rxnsMissingEnzyme),model.ec.rxns), ~contains(model.rxns(rxnsMissingEnzyme),'usage_prot_'))); -0138 rxnsMissingGPR = [rxnsMissingGPR;rxnsMissingEnzyme]; -0139 -0140 % Get custom list of reaction IDs to ignore, if existing. First column -0141 % contains reaction IDs, second column contains reaction names for -0142 % reference only. -0143 if exist(fullfile(params.path,'data','pseudoRxns.tsv'),'file') -0144 fID = fopen(fullfile(params.path,'data','pseudoRxns.tsv')); -0145 fileData = textscan(fID,'%s %s','delimiter','\t'); -0146 fclose(fID); -0147 customRxns = fileData{1}; -0148 customRxns = find(ismember(model.rxns,customRxns)); -0149 else -0150 customRxns = []; -0151 end -0152 % Get and remove exchange, transport, spontaneous and pseudo reactions -0153 [~, exchangeRxns] = getExchangeRxns(model); -0154 transportRxns = getTransportRxns(model); -0155 [spontaneousRxns, ~] = modelAdapter.getSpontaneousReactions(model); -0156 pseudoRxns = contains(model.rxnNames,'pseudoreaction'); -0157 slimeRxns = contains(model.rxnNames,'SLIME rxn'); -0158 rxnsToIgnore = [customRxns; exchangeRxns; find(transportRxns); ... -0159 find(spontaneousRxns); find(pseudoRxns); find(slimeRxns)]; -0160 rxnsMissingGPR(ismember(rxnsMissingGPR, rxnsToIgnore)) = []; -0161 -0162 % Only add if not geckoLight & getStandardKcat was not run earlier -0163 if ~any(strcmp(model.mets,'prot_standard')) -0164 % Add a new gene to be consistent with ec field named standard -0165 proteinStdGenes.genes = 'standard'; -0166 if isfield(model,'geneShortNames') -0167 proteinStdGenes.geneShortNames = 'std'; -0168 end -0169 model = addGenesRaven(model, proteinStdGenes); -0170 -0171 if ~model.ec.geckoLight -0172 % Add a new metabolite named prot_standard -0173 proteinStdMets.mets = 'prot_standard'; -0174 proteinStdMets.metNames = proteinStdMets.mets; -0175 proteinStdMets.compartments = 'c'; -0176 if isfield(model,'metNotes') -0177 proteinStdMets.metNotes = 'Standard enzyme-usage pseudometabolite'; -0178 end -0179 model = addMets(model, proteinStdMets); -0180 -0181 % Add a protein usage reaction if not a light version -0182 proteinStdUsageRxn.rxns = {'usage_prot_standard'}; -0183 proteinStdUsageRxn.rxnNames = proteinStdUsageRxn.rxns; -0184 proteinStdUsageRxn.mets = {proteinStdMets.mets, 'prot_pool'}; -0185 proteinStdUsageRxn.stoichCoeffs = [-1, 1]; -0186 proteinStdUsageRxn.lb = -1000; -0187 proteinStdUsageRxn.ub = 0; -0188 proteinStdUsageRxn.grRules = proteinStdGenes.genes; -0189 -0190 model = addRxns(model, proteinStdUsageRxn); -0191 end -0192 % Update .ec structure in model -0193 model.ec.genes(end+1) = {'standard'}; -0194 model.ec.enzymes(end+1) = {'standard'}; -0195 model.ec.mw(end+1) = standardMW; -0196 model.ec.sequence(end+1) = {''}; -0197 % Additional info -0198 if isfield(model.ec,'concs') -0199 model.ec.concs(end+1) = nan(); -0200 end -0201 -0202 % Expand the enzyme rxns matrix -0203 model.ec.rxnEnzMat = [model.ec.rxnEnzMat, zeros(length(model.ec.rxns), 1)]; % 1 new enzyme -0204 model.ec.rxnEnzMat = [model.ec.rxnEnzMat; zeros(length(rxnsMissingGPR), length(model.ec.enzymes))]; % new rxns -0205 end -0206 stdMetIdx = find(strcmpi(model.ec.enzymes, 'standard')); -0207 -0208 % Remove previous standard kcat assignment -0209 oldStandardEnz = find(strcmp(model.ec.source,'standard')); -0210 if ~isempty(oldStandardEnz) -0211 oldStandardProt = logical(model.ec.rxnEnzMat(oldStandardEnz,stdMetIdx)); -0212 % If annotated with real enzyme => set kcat to zero -0213 model.ec.kcat(oldStandardEnz(~oldStandardProt)) = 0; -0214 model.ec.source(oldStandardEnz(~oldStandardProt)) = {''}; -0215 % If annotated with standard protein => remove entry -0216 model.ec.rxns(oldStandardEnz(oldStandardProt)) = []; -0217 model.ec.kcat(oldStandardEnz(oldStandardProt)) = []; -0218 model.ec.source(oldStandardEnz(oldStandardProt)) = []; -0219 model.ec.notes(oldStandardEnz(oldStandardProt)) = []; -0220 model.ec.eccodes(oldStandardEnz(oldStandardProt)) = []; -0221 model.ec.rxnEnzMat(oldStandardEnz(oldStandardProt),:) = []; -0222 end -0223 -0224 numRxns = length(model.ec.rxns); -0225 for i = 1:numel(rxnsMissingGPR) -0226 rxnIdx = rxnsMissingGPR(i); -0227 -0228 % Update .ec structure in model -0229 if ~model.ec.geckoLight -0230 model.ec.rxns(end+1) = model.rxns(rxnIdx); -0231 % Add prefix in case is light version -0232 else -0233 model.ec.rxns{end+1} = ['001_' model.rxns{rxnIdx}]; -0234 end -0235 -0236 if ~standard -0237 kcatSubSystemIdx = strcmpi(enzSubSystem_names, model.subSystems{rxnIdx}(1)); -0238 if all(kcatSubSystemIdx) -0239 model.ec.kcat(end+1) = kcatSubSystem(kcatSubSystemIdx); -0240 else -0241 model.ec.kcat(end+1) = standardKcat; -0242 end -0243 else -0244 model.ec.kcat(end+1) = standardKcat; -0245 end -0246 -0247 model.ec.source(end+1) = {'standard'}; -0248 model.ec.notes(end+1) = {''}; -0249 model.ec.eccodes(end+1) = {''}; -0250 -0251 % Update the enzyme rxns matrix -0252 model.ec.rxnEnzMat(numRxns+i, stdMetIdx) = 1; -0253 end -0254 % Get the rxns identifiers of the updated rxns -0255 rxnsMissingGPR = model.rxns(rxnsMissingGPR); -0256 -0257 if fillZeroKcat -0258 zeroKcat = model.ec.kcat == 0 | isnan(model.ec.kcat); -0259 model.ec.kcat(zeroKcat) = standardKcat; -0260 model.ec.source(zeroKcat) = {'standard'}; -0261 rxnsNoKcat = model.ec.rxns(zeroKcat); -0262 else -0263 rxnsNoKcat = []; -0264 end -0265 end -0266 -0267 function Cflat = flattenCell(C,strFlag) -0268 %FLATTENCELL Flatten a nested column cell array into a matrix cell array. -0269 % -0270 % CFLAT = FLATTENCELL(C) takes a column cell array in which one or more -0271 % entries is a nested cell array, and flattens it into a 2D matrix cell -0272 % array, where the nested entries are spread across new columns. -0273 % -0274 % CFLAT = FLATTENCELL(C,STRFLAG) if STRFLAG is TRUE, empty entries in the -0275 % resulting CFLAT will be replaced with empty strings {''}. Default = FALSE -0276 if nargin < 2 -0277 strFlag = false; -0278 end -0279 -0280 % determine which entries are cells -0281 cells = cellfun(@iscell,C); -0282 -0283 % determine number of elements in each nested cell -0284 cellsizes = cellfun(@numel,C); -0285 cellsizes(~cells) = 1; % ignore non-cell entries -0286 -0287 % flatten single-entry cells -0288 Cflat = C; -0289 Cflat(cells & (cellsizes == 1)) = cellfun(@(x) x{1},Cflat(cells & (cellsizes == 1)),'UniformOutput',false); -0290 -0291 % iterate through multi-entry cells -0292 multiCells = find(cellsizes > 1); -0293 for i = 1:length(multiCells) -0294 cellContents = Cflat{multiCells(i)}; -0295 Cflat(multiCells(i),1:length(cellContents)) = cellContents; -0296 end -0297 -0298 % change empty elements to strings, if specified -0299 if ( strFlag ) -0300 Cflat(cellfun(@isempty,Cflat)) = {''}; +0137 if model.ec.geckoLight +0138 ecRxnsList = unique(extractAfter(model.ec.rxns,4)); +0139 else +0140 ecRxnsList = model.ec.rxns; +0141 end +0142 rxnsMissingEnzyme = find(and(~ismember(model.rxns(rxnsMissingEnzyme),ecRxnsList), ~contains(model.rxns(rxnsMissingEnzyme),'usage_prot_'))); +0143 rxnsMissingGPR = [rxnsMissingGPR;rxnsMissingEnzyme]; +0144 +0145 % Get custom list of reaction IDs to ignore, if existing. First column +0146 % contains reaction IDs, second column contains reaction names for +0147 % reference only. +0148 if exist(fullfile(params.path,'data','pseudoRxns.tsv'),'file') +0149 fID = fopen(fullfile(params.path,'data','pseudoRxns.tsv')); +0150 fileData = textscan(fID,'%s %s','delimiter','\t'); +0151 fclose(fID); +0152 customRxns = fileData{1}; +0153 customRxns = find(ismember(model.rxns,customRxns)); +0154 else +0155 customRxns = []; +0156 end +0157 % Get and remove exchange, transport, spontaneous and pseudo reactions +0158 [~, exchangeRxns] = getExchangeRxns(model); +0159 transportRxns = getTransportRxns(model); +0160 [spontaneousRxns, ~] = modelAdapter.getSpontaneousReactions(model); +0161 pseudoRxns = contains(model.rxnNames,'pseudoreaction'); +0162 slimeRxns = contains(model.rxnNames,'SLIME rxn'); +0163 rxnsToIgnore = [customRxns; exchangeRxns; find(transportRxns); ... +0164 find(spontaneousRxns); find(pseudoRxns); find(slimeRxns)]; +0165 rxnsMissingGPR(ismember(rxnsMissingGPR, rxnsToIgnore)) = []; +0166 +0167 % Only add if not geckoLight & getStandardKcat was not run earlier +0168 if ~any(strcmp(model.mets,'prot_standard')) +0169 % Add a new gene to be consistent with ec field named standard +0170 proteinStdGenes.genes = 'standard'; +0171 if isfield(model,'geneShortNames') +0172 proteinStdGenes.geneShortNames = 'std'; +0173 end +0174 model = addGenesRaven(model, proteinStdGenes); +0175 +0176 if ~model.ec.geckoLight +0177 % Add a new metabolite named prot_standard +0178 proteinStdMets.mets = 'prot_standard'; +0179 proteinStdMets.metNames = proteinStdMets.mets; +0180 proteinStdMets.compartments = 'c'; +0181 if isfield(model,'metNotes') +0182 proteinStdMets.metNotes = 'Standard enzyme-usage pseudometabolite'; +0183 end +0184 model = addMets(model, proteinStdMets); +0185 +0186 % Add a protein usage reaction if not a light version +0187 proteinStdUsageRxn.rxns = {'usage_prot_standard'}; +0188 proteinStdUsageRxn.rxnNames = proteinStdUsageRxn.rxns; +0189 proteinStdUsageRxn.mets = {proteinStdMets.mets, 'prot_pool'}; +0190 proteinStdUsageRxn.stoichCoeffs = [-1, 1]; +0191 proteinStdUsageRxn.lb = -1000; +0192 proteinStdUsageRxn.ub = 0; +0193 proteinStdUsageRxn.grRules = proteinStdGenes.genes; +0194 +0195 model = addRxns(model, proteinStdUsageRxn); +0196 end +0197 % Update .ec structure in model +0198 model.ec.genes(end+1) = {'standard'}; +0199 model.ec.enzymes(end+1) = {'standard'}; +0200 model.ec.mw(end+1) = standardMW; +0201 model.ec.sequence(end+1) = {''}; +0202 % Additional info +0203 if isfield(model.ec,'concs') +0204 model.ec.concs(end+1) = nan(); +0205 end +0206 +0207 % Expand the enzyme rxns matrix +0208 model.ec.rxnEnzMat = [model.ec.rxnEnzMat, zeros(length(model.ec.rxns), 1)]; % 1 new enzyme +0209 model.ec.rxnEnzMat = [model.ec.rxnEnzMat; zeros(length(rxnsMissingGPR), length(model.ec.enzymes))]; % new rxns +0210 end +0211 stdMetIdx = find(strcmpi(model.ec.enzymes, 'standard')); +0212 +0213 % Remove previous standard kcat assignment +0214 oldStandardEnz = find(strcmp(model.ec.source,'standard')); +0215 if ~isempty(oldStandardEnz) +0216 oldStandardProt = logical(model.ec.rxnEnzMat(oldStandardEnz,stdMetIdx)); +0217 % If annotated with real enzyme => set kcat to zero +0218 model.ec.kcat(oldStandardEnz(~oldStandardProt)) = 0; +0219 model.ec.source(oldStandardEnz(~oldStandardProt)) = {''}; +0220 % If annotated with standard protein => remove entry +0221 model.ec.rxns(oldStandardEnz(oldStandardProt)) = []; +0222 model.ec.kcat(oldStandardEnz(oldStandardProt)) = []; +0223 model.ec.source(oldStandardEnz(oldStandardProt)) = []; +0224 model.ec.notes(oldStandardEnz(oldStandardProt)) = []; +0225 model.ec.eccodes(oldStandardEnz(oldStandardProt)) = []; +0226 model.ec.rxnEnzMat(oldStandardEnz(oldStandardProt),:) = []; +0227 end +0228 +0229 numRxns = length(model.ec.rxns); +0230 for i = 1:numel(rxnsMissingGPR) +0231 rxnIdx = rxnsMissingGPR(i); +0232 +0233 % Update .ec structure in model +0234 if ~model.ec.geckoLight +0235 model.ec.rxns(end+1) = model.rxns(rxnIdx); +0236 % Add prefix in case is light version +0237 else +0238 model.ec.rxns{end+1} = ['001_' model.rxns{rxnIdx}]; +0239 end +0240 +0241 if ~standard +0242 kcatSubSystemIdx = strcmpi(enzSubSystem_names, model.subSystems{rxnIdx}(1)); +0243 if all(kcatSubSystemIdx) +0244 model.ec.kcat(end+1) = kcatSubSystem(kcatSubSystemIdx); +0245 else +0246 model.ec.kcat(end+1) = standardKcat; +0247 end +0248 else +0249 model.ec.kcat(end+1) = standardKcat; +0250 end +0251 +0252 model.ec.source(end+1) = {'standard'}; +0253 model.ec.notes(end+1) = {''}; +0254 model.ec.eccodes(end+1) = {''}; +0255 +0256 % Update the enzyme rxns matrix +0257 model.ec.rxnEnzMat(numRxns+i, stdMetIdx) = 1; +0258 end +0259 % Get the rxns identifiers of the updated rxns +0260 rxnsMissingGPR = model.rxns(rxnsMissingGPR); +0261 +0262 if fillZeroKcat +0263 zeroKcat = model.ec.kcat == 0 | isnan(model.ec.kcat); +0264 model.ec.kcat(zeroKcat) = standardKcat; +0265 model.ec.source(zeroKcat) = {'standard'}; +0266 rxnsNoKcat = model.ec.rxns(zeroKcat); +0267 else +0268 rxnsNoKcat = []; +0269 end +0270 end +0271 +0272 function Cflat = flattenCell(C,strFlag) +0273 %FLATTENCELL Flatten a nested column cell array into a matrix cell array. +0274 % +0275 % CFLAT = FLATTENCELL(C) takes a column cell array in which one or more +0276 % entries is a nested cell array, and flattens it into a 2D matrix cell +0277 % array, where the nested entries are spread across new columns. +0278 % +0279 % CFLAT = FLATTENCELL(C,STRFLAG) if STRFLAG is TRUE, empty entries in the +0280 % resulting CFLAT will be replaced with empty strings {''}. Default = FALSE +0281 if nargin < 2 +0282 strFlag = false; +0283 end +0284 +0285 % determine which entries are cells +0286 cells = cellfun(@iscell,C); +0287 +0288 % determine number of elements in each nested cell +0289 cellsizes = cellfun(@numel,C); +0290 cellsizes(~cells) = 1; % ignore non-cell entries +0291 +0292 % flatten single-entry cells +0293 Cflat = C; +0294 Cflat(cells & (cellsizes == 1)) = cellfun(@(x) x{1},Cflat(cells & (cellsizes == 1)),'UniformOutput',false); +0295 +0296 % iterate through multi-entry cells +0297 multiCells = find(cellsizes > 1); +0298 for i = 1:length(multiCells) +0299 cellContents = Cflat{multiCells(i)}; +0300 Cflat(multiCells(i),1:length(cellContents)) = cellContents; 0301 end -0302 end +0302 +0303 % change empty elements to strings, if specified +0304 if ( strFlag ) +0305 Cflat(cellfun(@isempty,Cflat)) = {''}; +0306 end +0307 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/limit_proteins/constrainEnzConcs.html b/doc/src/geckomat/limit_proteins/constrainEnzConcs.html index 9c3b7d13..cbf7db32 100644 --- a/doc/src/geckomat/limit_proteins/constrainEnzConcs.html +++ b/doc/src/geckomat/limit_proteins/constrainEnzConcs.html @@ -24,7 +24,7 @@

PURPOSE ^constrainEnzConcs

SYNOPSIS ^

-
function model = constrainEnzConcs(model)
+
function model = constrainEnzConcs(model, removeConstraints)

DESCRIPTION ^

 constrainEnzConcs
@@ -34,16 +34,20 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^

@@ -58,7 +62,7 @@

CROSS-REFERENCE INFORMATION ^
 
 
 <h2><a name=SOURCE CODE ^

-
0001 function model = constrainEnzConcs(model)
+
0001 function model = constrainEnzConcs(model, removeConstraints)
 0002 % constrainEnzConcs
 0003 %   Constrain enzyme usages by their concentration as provided in
 0004 %   model.ec.concs. For enzymes with non-NaN entries in model.ec.concs,
@@ -66,47 +70,59 @@ 

SOURCE CODE ^% but is rather constraint by the measured protein abundance. 0007 % 0008 % Input: -0009 % model an ecModel in GECKO 3 format (with ecModel.ec structure) with enzyme -0010 % concentrations in model.ec.concs -0011 % -0012 % Output: -0013 % model an ecModel constraint with available enzyme concentrations -0014 % -0015 % Note: to populate model.ec.concs you should run getProteomics. -0016 % -0017 % Usage: -0018 % model = constrainEnzConcs(model) -0019 -0020 %Enzyme with NaN entry in model.ec.concs => draw from prot_pool -0021 %Enzyme with numeric entry in model.ec.concs => exchange reaction with -0022 %enzyme level as UB +0009 % model an ecModel in GECKO 3 format (with ecModel.ec +0010 % structure) with enzyme concentrations in +0011 % model.ec.concs +0012 % removeConstraints logical, whether enzyme concentration +0013 % constraints should be removed (model.ec.concs +0014 % will remain unchanged). (optional, default false) +0015 % +0016 % Output: +0017 % model an ecModel constraint with available enzyme concentrations +0018 % +0019 % Note: To populate model.ec.concs you should run fillEnzConcs. +0020 % +0021 % Usage: +0022 % model = constrainEnzConcs(model, removeConstraints) 0023 -0024 %Get indices of usage reactions -0025 usageRxns = strcat('usage_prot_',model.ec.enzymes); -0026 [~, usageRxnsIdx] = ismember(usageRxns, model.rxns); +0024 %Enzyme with NaN entry in model.ec.concs => draw from prot_pool +0025 %Enzyme with numeric entry in model.ec.concs => exchange reaction with +0026 %enzyme level as UB 0027 -0028 if any(usageRxnsIdx == 0) -0029 error('Usage reactions are not defined for all enzymes. This is done by makeEcModel.') +0028 if nargin<2 +0029 removeConstraints = false; 0030 end -0031 %Get index of protein pool metabolite -0032 protPoolIdx = find(ismember(model.mets,'prot_pool')); -0033 if ~any(protPoolIdx) -0034 error('Cannot find protein pool pseudometabolite.') -0035 end -0036 -0037 %Protein that should be constraint by UB -0038 protCons = ~isnan(model.ec.concs); -0039 -0040 %Set all reactions to draw from prot_pool -0041 model.S(protPoolIdx, usageRxnsIdx(~protCons)) = 1; -0042 model.lb(usageRxnsIdx(protCons)) = -1000; -0043 -0044 %If non-NaN in model.ec.concs, then constrain by UB -0045 if any(protCons) -0046 % model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; % Since GECKO 3.2.0 -0047 model.lb(usageRxnsIdx(protCons)) = -model.ec.concs(protCons); -0048 end -0049 end

+0031 +0032 %Get indices of usage reactions +0033 usageRxns = strcat('usage_prot_',model.ec.enzymes); +0034 [~, usageRxnsIdx] = ismember(usageRxns, model.rxns); +0035 +0036 if any(usageRxnsIdx == 0) +0037 error('Usage reactions are not defined for all enzymes. This is done by makeEcModel.') +0038 end +0039 %Get index of protein pool metabolite +0040 protPoolIdx = find(ismember(model.mets,'prot_pool')); +0041 if ~any(protPoolIdx) +0042 error('Cannot find protein pool pseudometabolite.') +0043 end +0044 +0045 %Protein that should be constraint by UB +0046 if removeConstraints +0047 protCons = []; +0048 else +0049 protCons = ~isnan(model.ec.concs); +0050 end +0051 +0052 %Set all reactions to draw from prot_pool +0053 model.S(protPoolIdx, usageRxnsIdx) = 1; +0054 model.lb(usageRxnsIdx) = -1000; +0055 +0056 %If non-NaN in model.ec.concs, then constrain by UB +0057 if any(protCons) +0058 % model.S(protPoolIdx, usageRxnsIdx(protCons)) = 0; % Since GECKO 3.2.0 +0059 model.lb(usageRxnsIdx(protCons)) = -model.ec.concs(protCons); +0060 end +0061 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/limit_proteins/constrainFluxData.html b/doc/src/geckomat/limit_proteins/constrainFluxData.html index edd488d4..7e1fcabf 100644 --- a/doc/src/geckomat/limit_proteins/constrainFluxData.html +++ b/doc/src/geckomat/limit_proteins/constrainFluxData.html @@ -28,7 +28,8 @@

SYNOPSIS ^DESCRIPTION ^

 constrainFluxData
-   Constrains fluxes
+   Constrains fluxes to the data that is provided in the fluxData
+   structure, which itself is read by loadFluxData from data/fluxData.tsv.
 
  Input:
    model           an ecModel in GECKO 3 format (with ecModel.ec structure)
@@ -63,10 +64,14 @@ 

DESCRIPTION ^CROSS-REFERENCE INFORMATION ^
 <h2><a name=SOURCE CODE ^

0001 function model = constrainFluxData(model, fluxData, condition, maxMinGrowth, looseStrictFlux, modelAdapter)
 0002 % constrainFluxData
-0003 %   Constrains fluxes
-0004 %
-0005 % Input:
-0006 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
-0007 %   fluxData        structure with flux data
-0008 %                   conds       sampling condition
-0009 %                   Ptot        total protein (g/gDCW)
-0010 %                   grRate      growth rate (1/h)
-0011 %                   exchFluxes  exchange fluxes (mmol/gDCWh)
-0012 %                   exchMets    exchanged metabolites, matching exchFluxes
-0013 %                   exchRxnIDs  exchange reaction IDs, matching exchMets
-0014 %   condition       either index number or name of the sample condition in
-0015 %                   fluxData.conds (Optional, default = 1)
-0016 %   maxMinGrowth    'max' if the provided growth rate should be set as
-0017 %                   maximum growth rate (= upper bound), or 'min' if it
-0018 %                   should be set as minimum growth rate (= lower bound).
-0019 %                   The latter option is suitable if minimization of
-0020 %                   prot_pool_exchange is used as objective function. (Opt,
-0021 %                   default = 'max')
-0022 %   looseStrictFlux how strictly constrained the exchange fluxes should be,
-0023 %                   optional, default = 'loose'
-0024 %                   'loose' if the exchange fluxes should be constraint
-0025 %                           only by the "outer bounds". If exchFluxes(i)
-0026 %                           > 0, LB = 0 and UB = exchFluxes(i). If
-0027 %                           exchFluxes(i) < 0, LB = exchFluxes(i) and
-0028 %                           UB = 0
-0029 %                   0-100   LB and UB constraints are set with a specified
-0030 %                           percentage of variance around exchFluxes. If 10
-0031 %                           is specified, LB = exchFluxes*0.95 and UB =
-0032 %                           exchFluxes*1.05. This allows for 10% variance
-0033 %                           around the exchFluxes values, but strictly
-0034 %                           forces a flux through the exchRxns.
-0035 %   modelAdapter    a loaded model adapter (Optional, will otherwise use
-0036 %                   the default model adapter)
-0037 %
+0003 %   Constrains fluxes to the data that is provided in the fluxData
+0004 %   structure, which itself is read by loadFluxData from data/fluxData.tsv.
+0005 %
+0006 % Input:
+0007 %   model           an ecModel in GECKO 3 format (with ecModel.ec structure)
+0008 %   fluxData        structure with flux data
+0009 %                   conds       sampling condition
+0010 %                   Ptot        total protein (g/gDCW)
+0011 %                   grRate      growth rate (1/h)
+0012 %                   exchFluxes  exchange fluxes (mmol/gDCWh)
+0013 %                   exchMets    exchanged metabolites, matching exchFluxes
+0014 %                   exchRxnIDs  exchange reaction IDs, matching exchMets
+0015 %   condition       either index number or name of the sample condition in
+0016 %                   fluxData.conds (Optional, default = 1)
+0017 %   maxMinGrowth    'max' if the provided growth rate should be set as
+0018 %                   maximum growth rate (= upper bound), or 'min' if it
+0019 %                   should be set as minimum growth rate (= lower bound).
+0020 %                   The latter option is suitable if minimization of
+0021 %                   prot_pool_exchange is used as objective function. (Opt,
+0022 %                   default = 'max')
+0023 %   looseStrictFlux how strictly constrained the exchange fluxes should be,
+0024 %                   optional, default = 'loose'
+0025 %                   'loose' if the exchange fluxes should be constraint
+0026 %                           only by the "outer bounds". If exchFluxes(i)
+0027 %                           > 0, LB = 0 and UB = exchFluxes(i). If
+0028 %                           exchFluxes(i) < 0, LB = exchFluxes(i) and
+0029 %                           UB = 0
+0030 %                   0-100   LB and UB constraints are set with a specified
+0031 %                           percentage of variance around exchFluxes. If 10
+0032 %                           is specified, LB = exchFluxes*0.95 and UB =
+0033 %                           exchFluxes*1.05. This allows for 10% variance
+0034 %                           around the exchFluxes values, but strictly
+0035 %                           forces a flux through the exchRxns.
+0036 %   modelAdapter    a loaded model adapter (Optional, will otherwise use
+0037 %                   the default model adapter)
 0038 %
 0039 % Output:
 0040 %   model           an ecModel where fluxes are constraint
 0041 %
-0042 % Usage:
-0043 %   model = constrainFluxData(model, fluxData, condition, maxMinGrowth, looseStrictFlux, modelAdapter)
-0044 
-0045 if nargin < 6 || isempty(modelAdapter)
-0046     modelAdapter = ModelAdapterManager.getDefault();
-0047     if isempty(modelAdapter)
-0048         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
-0049     end
-0050 end
-0051 params = modelAdapter.getParameters();
-0052 
-0053 if nargin < 5 || isempty(looseStrictFlux)
-0054     looseStrictFlux = 'loose';
+0042 % Note: If a provided constraint is either -1000 or 1000, then the function
+0043 % will update the reaction lower and upper bound to either allow uptake or
+0044 % excretion, irrespective of what option is given as the looseStrictFlux
+0045 % parameter.
+0046 %
+0047 % Usage:
+0048 %   model = constrainFluxData(model, fluxData, condition, maxMinGrowth, looseStrictFlux, modelAdapter)
+0049 
+0050 if nargin < 6 || isempty(modelAdapter)
+0051     modelAdapter = ModelAdapterManager.getDefault();
+0052     if isempty(modelAdapter)
+0053         error('Either send in a modelAdapter or set the default model adapter in the ModelAdapterManager.')
+0054     end
 0055 end
-0056 
-0057 if nargin < 4 || isempty(maxMinGrowth)
-0058     maxMinGrowth = 'max';
-0059 end
-0060 
-0061 if nargin < 2 || isempty(fluxData)
-0062     fluxData = loadFluxData(fullfile(params.path,'data','fluxData.tsv'),modelAdapter);
-0063 end
-0064 
-0065 if nargin < 3 || isempty(condition)
-0066     condition = 1;
-0067 elseif ~isnumeric(condition)
-0068     idx = find(strcmp(fluxData.conds,condition));
-0069     if isempty(condition)
-0070         error(['Condition ' condition ' cannot be found in fluxData'])
-0071     else
-0072         condition = idx;
-0073     end
-0074 end
-0075 
-0076 % Select the exchange fluxes of specified condition
-0077 fluxData.exchFluxes = fluxData.exchFluxes(condition,:);
-0078 
-0079 %Set original c-source to zero
-0080 model = setParam(model,'eq',params.c_source,0);
-0081 %Set growth
-0082 switch maxMinGrowth
-0083     case 'max'
-0084         model = setParam(model,'lb',params.bioRxn,0);
-0085         model = setParam(model,'ub',params.bioRxn,fluxData.grRate(condition));
-0086     case 'min'
-0087         model = setParam(model,'lb',params.bioRxn,fluxData.grRate(condition));
-0088         model = setParam(model,'ub',params.bioRxn,1000);
-0089 end
-0090 
-0091 negFlux = lt(fluxData.exchFluxes,0); % less than 0
-0092 ub = fluxData.exchFluxes(~negFlux);
-0093 posFlux = fluxData.exchRxnIDs(~negFlux);
-0094 lb = fluxData.exchFluxes(negFlux);
-0095 negFlux = fluxData.exchRxnIDs(negFlux);
-0096 
-0097 switch looseStrictFlux
-0098     case 'loose'
-0099         model = setParam(model,'lb',negFlux,lb);
-0100         model = setParam(model,'ub',negFlux,0);
-0101         model = setParam(model,'lb',posFlux,0);
-0102         model = setParam(model,'ub',posFlux,ub);
-0103     otherwise
-0104         model = setParam(model,'var',fluxData.exchRxnIDs,fluxData.exchFluxes,looseStrictFlux);
-0105 end
-0106 end
+0056 params = modelAdapter.getParameters(); +0057 +0058 if nargin < 5 || isempty(looseStrictFlux) +0059 looseStrictFlux = 'loose'; +0060 end +0061 +0062 if nargin < 4 || isempty(maxMinGrowth) +0063 maxMinGrowth = 'max'; +0064 end +0065 +0066 if nargin < 2 || isempty(fluxData) +0067 fluxData = loadFluxData(fullfile(params.path,'data','fluxData.tsv'),modelAdapter); +0068 end +0069 +0070 if nargin < 3 || isempty(condition) +0071 condition = 1; +0072 elseif ~isnumeric(condition) +0073 idx = find(strcmp(fluxData.conds,condition)); +0074 if isempty(condition) +0075 error(['Condition ' condition ' cannot be found in fluxData']) +0076 else +0077 condition = idx; +0078 end +0079 end +0080 +0081 % Select the exchange fluxes of specified condition +0082 fluxData.exchFluxes = fluxData.exchFluxes(condition,:); +0083 +0084 %Set original c-source to zero +0085 model = setParam(model,'eq',params.c_source,0); +0086 %Set growth +0087 switch maxMinGrowth +0088 case 'max' +0089 model = setParam(model,'lb',params.bioRxn,0); +0090 model = setParam(model,'ub',params.bioRxn,fluxData.grRate(condition)); +0091 case 'min' +0092 model = setParam(model,'lb',params.bioRxn,fluxData.grRate(condition)); +0093 model = setParam(model,'ub',params.bioRxn,1000); +0094 end +0095 +0096 negFlux = lt(fluxData.exchFluxes,0); % less than 0 +0097 ub = fluxData.exchFluxes(~negFlux); +0098 posFlux = fluxData.exchRxnIDs(~negFlux); +0099 lb = fluxData.exchFluxes(negFlux); +0100 negFlux = fluxData.exchRxnIDs(negFlux); +0101 +0102 switch looseStrictFlux +0103 case 'loose' +0104 model = setParam(model,'lb',negFlux,lb); +0105 model = setParam(model,'ub',negFlux,0); +0106 model = setParam(model,'lb',posFlux,0); +0107 model = setParam(model,'ub',posFlux,ub); +0108 otherwise +0109 model = setParam(model,'var',fluxData.exchRxnIDs,fluxData.exchFluxes,looseStrictFlux); +0110 end +0111 extremeFlux = find(abs(fluxData.exchFluxes)==1000); +0112 if any(extremeFlux) +0113 exchFlux = fluxData.exchFluxes(extremeFlux); +0114 if any(exchFlux==-1000) +0115 model = setParam(model,'lb',fluxData.exchRxnIDs(extremeFlux(exchFlux==-1000)),-1000); +0116 model = setParam(model,'ub',fluxData.exchRxnIDs(extremeFlux(exchFlux==-1000)),0); +0117 end +0118 if any(exchFlux==1000) +0119 model = setParam(model,'lb',fluxData.exchRxnIDs(extremeFlux(exchFlux==1000)),0); +0120 model = setParam(model,'ub',fluxData.exchRxnIDs(extremeFlux(exchFlux==1000)),1000); +0121 end +0122 end +0123 end

Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/ecFVA.html b/doc/src/geckomat/utilities/ecFVA.html index e2b18dc3..110325d0 100644 --- a/doc/src/geckomat/utilities/ecFVA.html +++ b/doc/src/geckomat/utilities/ecFVA.html @@ -128,11 +128,19 @@

SOURCE CODE ^function nUpdateProgressbar(~) -0071 progressbar(p/N); -0072 p = p + 1; -0073 end -0074 end

+0070 % Mapped flux might have swapped directionality: min/max might be swapped +0071 swapDir = minFlux > maxFlux; +0072 if any(swapDir) +0073 tmpFlux = minFlux(swapDir); +0074 minFlux(swapDir) = maxFlux(swapDir); +0075 maxFlux(swapDir) = tmpFlux; +0076 end +0077 +0078 function nUpdateProgressbar(~) +0079 progressbar(p/N); +0080 p = p + 1; +0081 end +0082 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/plotEcFVA.html b/doc/src/geckomat/utilities/plotEcFVA.html index d356c40e..31b5b0d8 100644 --- a/doc/src/geckomat/utilities/plotEcFVA.html +++ b/doc/src/geckomat/utilities/plotEcFVA.html @@ -47,7 +47,9 @@

CROSS-REFERENCE INFORMATION ^
 </ul>
 <!-- crossreference -->
 
-
+<h2><a name=SUBFUNCTIONS ^

+

SOURCE CODE ^

0001 function plotEcFVA(minFlux, maxFlux)
@@ -62,30 +64,77 @@ 

SOURCE CODE ^% maxFlux vector of maximum flux values, matching minFlux. 0011 0012 numMods = size(minFlux,2); -0013 +0013 fluxRanges = cell(3,1); 0014 % Ignore zero flux reactions 0015 for i=1:numMods 0016 zeroFlux = abs(minFlux(:,i)) < 1e-10 & abs(maxFlux(:,i)) < 1e-10; 0017 minFlux(zeroFlux,i) = NaN; 0018 maxFlux(zeroFlux,i) = NaN; -0019 end -0020 -0021 % Calculate flux ranges -0022 fluxRange = maxFlux - minFlux; +0019 fluxRange = maxFlux(:,i) - minFlux(:,i); +0020 fluxRange(isnan(fluxRange)) = []; +0021 fluxRanges{i} = fluxRange; +0022 end 0023 0024 % Plot all together 0025 hold on 0026 legendText = cell(1,numel(numMods)); 0027 for i=1:numMods -0028 cdfplot(fluxRange(:,i)) -0029 legendText{i} = (['Model #' num2str(i) ' (median: ' num2str(median(fluxRange(:,i),'omitnan')) ')']); -0030 end -0031 set(gca, 'XScale', 'log', 'Xlim', [1e-7 1e4]) -0032 set(findall(gca, 'Type', 'Line'), 'LineWidth', 2) -0033 legend(legendText, 'Location','northwest') -0034 title('Flux variability (cumulative distribution)'); -0035 xlabel('Variability range (mmol/gDCWh)'); -0036 ylabel('Cumulative distribution');

+0028 fluxRange = fluxRanges{i}; +0029 cdfplot(fluxRange) +0030 legendText{i} = (['Model #' num2str(i) ' (median: ' num2str(median(fluxRange,'omitnan')) ')']); +0031 end +0032 set(gca, 'XScale', 'log', 'Xlim', [1e-7 1e4]) +0033 set(findall(gca, 'Type', 'Line'), 'LineWidth', 2) +0034 legend(legendText, 'Location','northwest') +0035 title('Flux variability (cumulative distribution)'); +0036 xlabel('Variability range (mmol/gDCWh)'); +0037 ylabel('Cumulative distribution'); +0038 hold off +0039 end +0040 +0041 function cdfplot(X) +0042 % cdfplot(X) +0043 % displays a plot of the Empirical Cumulative Distribution Function +0044 % (CDF) of the input array X in the current figure. The empirical +0045 % CDF y=F(x) is defined as the proportion of X values less than or equal to x. +0046 % If input X is a matrix, then cdfplot(X) parses it to the vector and +0047 % displays CDF of all values. +0048 % +0049 % EXAMPLE: +0050 % figure; +0051 % cdfplot(randn(1,100)); +0052 % hold on; +0053 % cdfplot(-log(1-rand(1,100))); +0054 % cdfplot(sqrt(randn(1,100).^2 + randn(1,100).^2)) +0055 % legend('Normal(0,1) CDF', 'Exponential(1) CDF', 'Rayleigh(1) CDF', 4) +0056 +0057 % Version 1.0 +0058 % Alex Podgaetsky, September 2003 +0059 % alex@wavion.co.il +0060 % +0061 % Revisions: +0062 % Version 1.0 - initial version +0063 +0064 tmp = sort(reshape(X,prod(size(X)),1)); +0065 Xplot = reshape([tmp tmp].',2*length(tmp),1); +0066 +0067 tmp = [1:length(X)].'/length(X); +0068 Yplot = reshape([tmp tmp].',2*length(tmp),1); +0069 Yplot = [0; Yplot(1:(end-1))]; +0070 +0071 figure(gcf); +0072 hp = plot(Xplot, Yplot); +0073 +0074 ColOrd = get(gca, 'ColorOrder'); +0075 ord = mod(length(get(gca,'Children')), size(ColOrd,1)); +0076 set(hp, 'Color', ColOrd((ord==0) + (ord>0)*ord, :)); +0077 if ~ishold +0078 xlabel('X', 'FontWeight','b','FontSize',12); +0079 ylabel('F(X)', 'FontWeight','b','FontSize',12); +0080 title('Empirical CDF', 'FontWeight','b','FontSize',12); +0081 grid on; +0082 end +0083 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/doc/src/geckomat/utilities/reportEnzymeUsage.html b/doc/src/geckomat/utilities/reportEnzymeUsage.html index 19eff297..a9d017c7 100644 --- a/doc/src/geckomat/utilities/reportEnzymeUsage.html +++ b/doc/src/geckomat/utilities/reportEnzymeUsage.html @@ -92,87 +92,134 @@

SOURCE CODE ^% Highest capacity usage 0032 highUsageProt = find(usageData.capUsage > highCapUsage); -0033 [~,enzIdx] = ismember(usageData.protID(highUsageProt),ecModel.ec.enzymes); -0034 [row, col] = find(ecModel.ec.rxnEnzMat(:,enzIdx)); -0035 [row, ordered] = sort(row); -0036 highUsage.rxnID = ecModel.ec.rxns(row); -0037 [~, rxnIdx] = ismember(highUsage.rxnID,ecModel.rxns); -0038 highUsage.rxnName = ecModel.rxnNames(rxnIdx); -0039 protID = highUsageProt(col); -0040 geneID = ecModel.ec.genes(enzIdx(col)); -0041 highUsage.protID = usageData.protID(protID(ordered)); -0042 highUsage.geneID = geneID(ordered); -0043 highUsage.grRules = ecModel.grRules(rxnIdx); -0044 highUsage.capUsage = usageData.capUsage(protID(ordered)); -0045 highUsage.absUsage = usageData.absUsage(protID(ordered)); +0033 highEnzyme = usageData.protID(highUsageProt); +0034 [~,enzIdx] = ismember(highEnzyme,ecModel.ec.enzymes); +0035 geneIDs = ecModel.ec.genes(enzIdx); +0036 +0037 highUsage.protID = {}; +0038 highUsage.geneID = {}; +0039 highUsage.absUsage = []; +0040 highUsage.capUsage = []; +0041 highUsage.kcat = []; +0042 highUsage.source = {}; +0043 highUsage.rxnID = {}; +0044 highUsage.rxnNames = {}; +0045 highUsage.grRules = {}; 0046 -0047 usageReport.highCapUsage = struct2table(highUsage); -0048 -0049 % Highest absolute usage -0050 [~,topUse] = sort(usageData.absUsage,'descend'); -0051 topEnzyme = usageData.protID(topUse(1:topAbsUsage)); -0052 [~,b] = ismember(topEnzyme,ecModel.ec.enzymes); -0053 geneIDs = ecModel.ec.genes(b); -0054 topUsage.protID = {}; -0055 topUsage.geneID = {}; -0056 topUsage.absUsage = []; -0057 topUsage.percUsage = []; -0058 topUsage.kcat = []; -0059 topUsage.source = {}; -0060 topUsage.rxnID = {}; -0061 topUsage.rxnNames = {}; -0062 topUsage.grRules = {}; -0063 -0064 protPool = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); -0065 -0066 for i=1:numel(topEnzyme) -0067 [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); -0068 rxnNumber = numel(rxns); -0069 % See if all reactions carried flux -0070 [~,rIdx] = ismember(rxns,ecModel.rxns); -0071 carriedFlux = usageData.fluxes(rIdx) > 1e-7; -0072 if isscalar(find(carriedFlux)) -0073 topUsage.protID(end+1,1) = topEnzyme(i); -0074 topUsage.geneID(end+1,1) = geneIDs(i); -0075 topUsage.absUsage(end+1,1) = usageData.absUsage(topUse(i)); -0076 topUsage.percUsage(end+1,1) = topUsage.absUsage(end,1)/protPool; -0077 topUsage.kcat(end+1,1) = kcat(carriedFlux); -0078 topUsage.source(end+1,1) = ecModel.ec.source(idx(carriedFlux)); -0079 topUsage.rxnID(end+1,1) = rxns(carriedFlux); -0080 topUsage.rxnNames(end+1,1) = rxnNames(carriedFlux); -0081 topUsage.grRules(end+1,1) = grRules(carriedFlux); -0082 else -0083 % Add one entry for combined usage -0084 topUsage.protID(end+1,1) = topEnzyme(i); -0085 topUsage.geneID(end+1,1) = geneIDs(i); -0086 topUsage.absUsage(end+1,1) = usageData.absUsage(topUse(i)); -0087 topUsage.percUsage(end+1,1) = topUsage.absUsage(end,1)/protPool*100; -0088 topUsage.kcat(end+1,1) = nan; -0089 topUsage.source{end+1,1} = '==='; -0090 topUsage.rxnID{end+1,1} = '==='; -0091 topUsage.rxnNames{end+1,1} = 'involved in multiple rxns, usage combined'; -0092 topUsage.grRules{end+1,1} = ''; -0093 % Recalculate reaction-specific usage -0094 rIdx = rIdx(carriedFlux); -0095 enzFlux = usageData.fluxes(rIdx); -0096 enzMet = strcat('prot_',topEnzyme{i}); -0097 [~, enzIdx] = ismember(enzMet,ecModel.mets); -0098 indUse = full(transpose(-ecModel.S(enzIdx,rIdx)).*enzFlux); -0099 rxnNumber = length(rIdx); -0100 topUsage.protID(end+1:end+rxnNumber,1) = topEnzyme(i); -0101 topUsage.geneID(end+1:end+rxnNumber,1) = geneIDs(i); -0102 topUsage.absUsage(end+1:end+rxnNumber,1) = indUse; -0103 topUsage.percUsage(end+1:end+rxnNumber,1) = topUsage.absUsage(end-(rxnNumber-1):end,1)/protPool*100; -0104 topUsage.kcat(end+1:end+rxnNumber,1) = kcat(carriedFlux); -0105 topUsage.source(end+1:end+rxnNumber,1) = ecModel.ec.source(idx(carriedFlux)); -0106 topUsage.rxnID(end+1:end+rxnNumber,1) = rxns(carriedFlux); -0107 topUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames(carriedFlux); -0108 topUsage.grRules(end+1:end+rxnNumber,1) = grRules(carriedFlux); -0109 end -0110 end -0111 usageReport.topAbsUsage = struct2table(topUsage); -0112 usageReport.totalUsageFlux = protPool; -0113 end +0047 for i=1:numel(enzIdx) +0048 [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,ecModel.ec.enzymes(enzIdx(i))); +0049 % See if all reactions carried flux +0050 [~,rIdx] = ismember(rxns,ecModel.rxns); +0051 carriedFlux = usageData.fluxes(rIdx) > 1e-7; +0052 if isscalar(find(carriedFlux)) +0053 highUsage.protID(end+1,1) = highEnzyme(i); +0054 highUsage.geneID(end+1,1) = geneIDs(i); +0055 highUsage.absUsage(end+1,1) = usageData.absUsage(enzIdx(i)); +0056 highUsage.capUsage(end+1,1) = usageData.capUsage(enzIdx(i)); +0057 highUsage.kcat(end+1,1) = kcat(carriedFlux); +0058 highUsage.source(end+1,1) = ecModel.ec.source(idx(carriedFlux)); +0059 highUsage.rxnID(end+1,1) = rxns(carriedFlux); +0060 highUsage.rxnNames(end+1,1) = rxnNames(carriedFlux); +0061 highUsage.grRules(end+1,1) = grRules(carriedFlux); +0062 else +0063 % Add one entry for combined usage +0064 highUsage.protID(end+1,1) = highEnzyme(i); +0065 highUsage.geneID(end+1,1) = geneIDs(i); +0066 highUsage.absUsage(end+1,1) = usageData.absUsage(enzIdx(i)); +0067 highUsage.capUsage(end+1,1) = usageData.capUsage(enzIdx(i)); +0068 highUsage.kcat(end+1,1) = nan; +0069 highUsage.source{end+1,1} = '==='; +0070 highUsage.rxnID{end+1,1} = '==='; +0071 highUsage.rxnNames{end+1,1} = 'involved in multiple rxns, usage combined, individual rxns below'; +0072 highUsage.grRules{end+1,1} = '==='; +0073 % Recalculate reaction-specific usage +0074 +0075 rIdx = rIdx(carriedFlux); +0076 enzFlux = usageData.fluxes(rIdx); +0077 enzMet = strcat('prot_',highEnzyme{i}); +0078 [~, enzEcIdx] = ismember(enzMet,ecModel.mets); +0079 indAbsUse = full(transpose(-ecModel.S(enzEcIdx,rIdx)).*enzFlux); +0080 indCapUse = (indAbsUse /sum(indAbsUse)) * usageData.capUsage(enzIdx(i)); +0081 +0082 rxnNumber = length(rIdx); +0083 highUsage.protID(end+1:end+rxnNumber,1) = highEnzyme(i); +0084 highUsage.geneID(end+1:end+rxnNumber,1) = geneIDs(i); +0085 highUsage.absUsage(end+1:end+rxnNumber,1) = indAbsUse; +0086 highUsage.capUsage(end+1:end+rxnNumber,1) = indCapUse; +0087 highUsage.kcat(end+1:end+rxnNumber,1) = kcat(carriedFlux); +0088 highUsage.source(end+1:end+rxnNumber,1) = ecModel.ec.source(idx(carriedFlux)); +0089 highUsage.rxnID(end+1:end+rxnNumber,1) = rxns(carriedFlux); +0090 highUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames(carriedFlux); +0091 highUsage.grRules(end+1:end+rxnNumber,1) = grRules(carriedFlux); +0092 end +0093 end +0094 +0095 usageReport.highCapUsage = struct2table(highUsage); +0096 +0097 % Highest absolute usage +0098 [~,topUse] = sort(usageData.absUsage,'descend'); +0099 topEnzyme = usageData.protID(topUse(1:topAbsUsage)); +0100 [~,b] = ismember(topEnzyme,ecModel.ec.enzymes); +0101 geneIDs = ecModel.ec.genes(b); +0102 topUsage.protID = {}; +0103 topUsage.geneID = {}; +0104 topUsage.absUsage = []; +0105 topUsage.percUsage = []; +0106 topUsage.kcat = []; +0107 topUsage.source = {}; +0108 topUsage.rxnID = {}; +0109 topUsage.rxnNames = {}; +0110 topUsage.grRules = {}; +0111 +0112 protPool = -ecModel.lb(strcmp(ecModel.rxns,'prot_pool_exchange')); +0113 +0114 for i=1:numel(topEnzyme) +0115 [rxns, kcat, idx, rxnNames, grRules] = getReactionsFromEnzyme(ecModel,topEnzyme{i}); +0116 % See if all reactions carried flux +0117 [~,rIdx] = ismember(rxns,ecModel.rxns); +0118 carriedFlux = usageData.fluxes(rIdx) > 1e-7; +0119 if isscalar(find(carriedFlux)) +0120 topUsage.protID(end+1,1) = topEnzyme(i); +0121 topUsage.geneID(end+1,1) = geneIDs(i); +0122 topUsage.absUsage(end+1,1) = usageData.absUsage(topUse(i)); +0123 topUsage.percUsage(end+1,1) = topUsage.absUsage(end,1)/protPool*100; +0124 topUsage.kcat(end+1,1) = kcat(carriedFlux); +0125 topUsage.source(end+1,1) = ecModel.ec.source(idx(carriedFlux)); +0126 topUsage.rxnID(end+1,1) = rxns(carriedFlux); +0127 topUsage.rxnNames(end+1,1) = rxnNames(carriedFlux); +0128 topUsage.grRules(end+1,1) = grRules(carriedFlux); +0129 else +0130 % Add one entry for combined usage +0131 topUsage.protID(end+1,1) = topEnzyme(i); +0132 topUsage.geneID(end+1,1) = geneIDs(i); +0133 topUsage.absUsage(end+1,1) = usageData.absUsage(topUse(i)); +0134 topUsage.percUsage(end+1,1) = topUsage.absUsage(end,1)/protPool*100; +0135 topUsage.kcat(end+1,1) = nan; +0136 topUsage.source{end+1,1} = '==='; +0137 topUsage.rxnID{end+1,1} = '==='; +0138 topUsage.rxnNames{end+1,1} = 'involved in multiple rxns, usage combined, individual rxns below'; +0139 topUsage.grRules{end+1,1} = '==='; +0140 % Recalculate reaction-specific usage +0141 rIdx = rIdx(carriedFlux); +0142 enzFlux = usageData.fluxes(rIdx); +0143 enzMet = strcat('prot_',topEnzyme{i}); +0144 [~, enzIdx] = ismember(enzMet,ecModel.mets); +0145 indUse = full(transpose(-ecModel.S(enzIdx,rIdx)).*enzFlux); +0146 rxnNumber = length(rIdx); +0147 topUsage.protID(end+1:end+rxnNumber,1) = topEnzyme(i); +0148 topUsage.geneID(end+1:end+rxnNumber,1) = geneIDs(i); +0149 topUsage.absUsage(end+1:end+rxnNumber,1) = indUse; +0150 topUsage.percUsage(end+1:end+rxnNumber,1) = topUsage.absUsage(end-(rxnNumber-1):end,1)/protPool*100; +0151 topUsage.kcat(end+1:end+rxnNumber,1) = kcat(carriedFlux); +0152 topUsage.source(end+1:end+rxnNumber,1) = ecModel.ec.source(idx(carriedFlux)); +0153 topUsage.rxnID(end+1:end+rxnNumber,1) = rxns(carriedFlux); +0154 topUsage.rxnNames(end+1:end+rxnNumber,1) = rxnNames(carriedFlux); +0155 topUsage.grRules(end+1:end+rxnNumber,1) = grRules(carriedFlux); +0156 end +0157 end +0158 usageReport.topAbsUsage = struct2table(topUsage); +0159 usageReport.totalUsageFlux = protPool; +0160 end
Generated by m2html © 2005
\ No newline at end of file diff --git a/src/geckomat/limit_proteins/constrainEnzConcs.m b/src/geckomat/limit_proteins/constrainEnzConcs.m index c9780db2..947816b6 100644 --- a/src/geckomat/limit_proteins/constrainEnzConcs.m +++ b/src/geckomat/limit_proteins/constrainEnzConcs.m @@ -50,8 +50,8 @@ end %Set all reactions to draw from prot_pool -model.S(protPoolIdx, usageRxnsIdx(~protCons)) = 1; -model.lb(usageRxnsIdx(protCons)) = -1000; +model.S(protPoolIdx, usageRxnsIdx) = 1; +model.lb(usageRxnsIdx) = -1000; %If non-NaN in model.ec.concs, then constrain by UB if any(protCons)