-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
576 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
id,representation_of,label,old_id,organ_label,organ_id | ||
kidney-ascending-thin-loop-of-henle,UBERON:0004193,loop of Henle ascending limb thin segment,#FTUAscendingThinLimb | ||
kidney-cortical-collecting-duct,UBERON:0004203,Cortical Collecting Duct,#FTUCorticalCollectingDuct | ||
kidney-descending-thin-loop-of-henle,UBERON:0001289,descending limb of loop of Henle,#FTUDescendingThinLimb | ||
kidney-inner-medullary-collecting-duct,UBERON:0004205,inner medullary collecting duct,#FTUInnerMedullaryCollectingDuct | ||
kidney-nephron,UBERON:0001285,nephron,#FTUNephron,Kidney,UBERON:0002113 | ||
kidney-outer-medullary-collecting-duct,UBERON:0004204,outer medullary collecting duct,#FTUOuterMedullaryCollectingDuct | ||
kidney-renal-corpuscle,UBERON:0001229,renal corpuscle,#FTURenalCorpuscle,Kidney,UBERON:0002113 | ||
kidney-thick-ascending-loop-of-henle,UBERON:0001291,thick ascending limb of loop of Henle,#FTUThickAscendingLimb | ||
large-intestine-crypt-lieberkuhn,UBERON:0001984,crypt of Lieberkuhn of large intestine,#FTUCryptOfLieberkuhn,Large Intestine,UBERON:0000059 | ||
liver-liver-lobule,UBERON:0004647,liver lobule,#FTULiverLobule_inset1,Liver,UBERON:0002107 | ||
liver-liver-lobule,UBERON:0004647,liver lobule,#FTULiverLobule_inset2,Liver,UBERON:0002107 | ||
lung-bronchial-submucosal-gland,UBERON_8410043,bronchus submucosal gland,#FTUBronchialSubmucosalGland | ||
lung-pulmonary-alveolus,UBERON:0002299,alveolus of lung,#FTUAlveoli,Lung,UBERON:0002048 | ||
pancreas-intercalated-duct,UBERON:0014726,intercalated duct of pancreas,#FTUIntercalatedDuct | ||
pancreas-islets-langerhans,UBERON:0000006,islet of Langerhans,#FTUIsletOfLangerhans,Pancreas,UBERON:0001264 | ||
pancreas-pancreatic-acinus,UBERON:0001263,pancreatic acinus,#FTUAcinus | ||
prostate-prostate-glandular-acinus,UBERON:0004179,prostate glandular acinus,#FTUProstateGlandularAcinus,Prostate Gland,UBERON:0002367 | ||
skin-dermal-papilla,UBERON:0001992,papillary layer of dermis,#FTUDermalPapilla | ||
skin-epidermal-ridge,UBERON:0013487,epidermal ridge of digit,#FTUEpidermalRidge | ||
spleen-red-pulp,UBERON:0001250,red pulp of spleen,#FTURedPulp_Inset1 | ||
spleen-red-pulp,UBERON:0001250,red pulp of spleen,#FTURedPulp_Inset2 | ||
spleen-white-pulp,UBERON:0001959,white pulp of spleen,#FTUWhitePulp_Inset1 | ||
spleen-white-pulp,UBERON:0001959,white pulp of spleen,#FTUWhitePulp_Inset2 | ||
thymus-thymus-lobule,UBERON:0002125,thymus lobule,#FTUThymusLobule_Inset1,Thymus,UBERON:0002370 | ||
thymus-thymus-lobule,UBERON:0002125,thymus lobule,#FTUThymusLobule_Inset2,Thymus,UBERON:0002370 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
title: Human Reference Atlas (HRA) | ||
description: 'Human Reference Atlas (HRA) <https://humanatlas.io>' | ||
creators: | ||
- fullName: Katy Börner | ||
firstName: Katy | ||
lastName: Börner | ||
orcid: 0000-0002-3321-6137 | ||
project_leads: | ||
- fullName: Katy Börner | ||
firstName: Katy | ||
lastName: Börner | ||
orcid: 0000-0002-3321-6137 | ||
reviewers: | ||
- fullName: Ellen M. Quardokus | ||
firstName: Ellen | ||
lastName: Quardokus | ||
orcid: 0000-0001-7655-4833 | ||
externalReviewers: [] | ||
creation_date: '2022-05-06' | ||
license: >- | ||
Creative Commons Attribution 4.0 International ([CC BY | ||
4.0](https://creativecommons.org/licenses/by/4.0/)) | ||
publisher: HuBMAP | ||
funders: | ||
- funder: National Institutes of Health | ||
awardNumber: OT2OD026671 | ||
hubmapId: HBM248.CBJV.556 | ||
doi: https://doi.org/10.48539/HBM248.CBJV.556 | ||
citation: >- | ||
Sanjay Jain; M. Todd Valerius; Yongqun He, HuBMAP ASCT+B Tables. Kidney v1.2 | ||
[https://doi.org/10.48539/HBM248.CBJV.556](https://doi.org/10.48539/HBM248.CBJV.556) | ||
citationOverall: >- | ||
Quardokus, Ellen, Bruce W. Herr II, Lisel Record, Katy Börner. 2022. [*HuBMAP | ||
ASCT+B | ||
Tables*](https://hubmapconsortium.github.io/ccf/pages/ccf-anatomical-structures.html). | ||
Accessed on May 6, 2022. | ||
datatable: | ||
- digital-objects.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
import { readFileSync } from 'fs'; | ||
import { basename } from 'path'; | ||
|
||
const NAME_REMAPPING = { | ||
'asctb-3d-models-crosswalk': 'asct-b-3d-models-crosswalk', | ||
'asctb-crosswalk': 'asct-b-2d-models-crosswalk', | ||
'bone-marrow-pelvis': 'bonemarrow-pelvis', | ||
'intestine-large': 'large-intestine', | ||
'ln-ibex': '1-human-lymph-node-ibex', | ||
'lymph-node-ibex': '1-human-lymph-node-ibex', | ||
'intestines-codex': '2-intestine-codex', | ||
'kidney-codex': '3-kidney-codex', | ||
'skin-celldive': '4-skin-cell-dive', | ||
'liver-sim': '5-liver-sims', | ||
'pancreas-codex': '6-pancreas-codex', | ||
'lung-celldive': '7-lung-cell-dive', | ||
'intestine-large-male': 'large-intestine-male', | ||
'intestine-large-female': 'large-intestine-female', | ||
'vasculature-male': 'blood-vasculature-male', | ||
'vasculature-female': 'blood-vasculature-female', | ||
vasculature: 'blood-vasculature', | ||
brain: 'allen-brain', | ||
'bone-marrow-and-blood': 'bonemarrow-pelvis', | ||
}; | ||
|
||
export class HraMarkdownParser { | ||
constructor(inputFile) { | ||
this.inputFile = inputFile; | ||
this.rawMd = readFileSync(inputFile) | ||
.toString() | ||
.replace(/\ö\;/g, 'ö') | ||
.trim() | ||
.split('\n'); | ||
} | ||
|
||
hasKey(key) { | ||
return !!this.rawMd.find((l) => l.includes(`**${key}:**`)); | ||
} | ||
getMetadata(key) { | ||
if (!this.hasKey(key)) { | ||
return ''; | ||
} | ||
return this.rawMd | ||
.find((l) => l.includes(`**${key}:**`)) | ||
.split('|')[2] | ||
.trim(); | ||
} | ||
getMultiValue(key) { | ||
return this.getMetadata(key) | ||
.replace('ö', 'ö') | ||
.split(/[\;\,]\ */g) | ||
.map((n) => n.trim()); | ||
} | ||
getAccessedDate(dateStr) { | ||
const [_dayOfWeek, month, day, year] = new Date(dateStr).toDateString().split(' '); | ||
return `${month} ${parseInt(day, 10)}, ${year}`; | ||
} | ||
getAuthors(nameKey, orcidKey) { | ||
if (!this.hasKey(nameKey) || !this.hasKey(orcidKey)) { | ||
return []; | ||
} | ||
const names = this.getMultiValue(nameKey); | ||
const orcids = this.getMultiValue(orcidKey).map((n) => n.slice(n.indexOf('[') + 1, n.indexOf(']')).trim()); | ||
return names.map((fullName, index) => ({ | ||
fullName, | ||
firstName: fullName.split(/\ +/g).slice(0)[0], | ||
lastName: fullName.replace(/\ II$/g, '').split(/\ +/g).slice(-1)[0], | ||
orcid: orcids[index], | ||
})); | ||
} | ||
getFunders(funderKey, awardKey) { | ||
const funders = this.getMultiValue(funderKey); | ||
const awards = this.getMultiValue(awardKey); | ||
|
||
return funders.map((funder, index) => ({ | ||
funder, | ||
awardNumber: awards[index], | ||
})); | ||
} | ||
|
||
getName() { | ||
let name = basename(this.inputFile, '.md') | ||
.replace(this.getDoType() + '-', '') | ||
.replace(/^3d\-/, '') | ||
.replace(/^vh\-/, '') | ||
.replace(/^f-/, 'female-') | ||
.replace(/^m-/, 'male-') | ||
.replace(/-l$/, '-left') | ||
.replace(/-r$/, '-right') | ||
.replace(/-mapping$/, '-crosswalk'); | ||
|
||
let sex; | ||
if (name.includes('female')) { | ||
sex = 'female'; | ||
} else if (name.includes('male')) { | ||
sex = 'male'; | ||
} | ||
if (sex) { | ||
const hasLaterality = name.endsWith('-left') || name.endsWith('-right'); | ||
const elts = name.split('-').filter((s) => s !== sex); | ||
|
||
// Format for reference organs = ${organ}-${sex}-${laterality "optional"} | ||
if (hasLaterality) { | ||
name = `${elts.slice(0, -1).join('-')}-${sex}-${elts.slice(-1).join('-')}`; | ||
} else { | ||
name = `${elts.join('-')}-${sex}`; | ||
} | ||
} | ||
|
||
name = NAME_REMAPPING[name] || name; | ||
|
||
return name; | ||
} | ||
getTitle() { | ||
return this.rawMd[0].slice(1).trim().split(' ').slice(0, -1).join(' ').trim().replace(/,$/, ''); | ||
} | ||
getVersion() { | ||
return this.rawMd[0].slice(1).trim().split(' ').slice(-1)[0]; | ||
} | ||
getDescription() { | ||
return this.rawMd[this.rawMd.findIndex((n) => n.startsWith('### Description')) + 1].trim(); | ||
} | ||
getHowToCiteKey() { | ||
return this.rawMd | ||
.find((l) => l.includes('**How to Cite') && !l.includes('Overall:**')) | ||
.split('|')[1] | ||
.trim() | ||
.replace(/\*/g, '') | ||
.replace(/\:/g, ''); | ||
} | ||
getHowToCiteOverallKey() { | ||
return this.rawMd | ||
.find((l) => l.includes('**How to Cite') && l.includes('Overall:**')) | ||
.split('|')[1] | ||
.trim() | ||
.replace(/\*/g, '') | ||
.replace(/\:/g, ''); | ||
} | ||
|
||
getDoType() { | ||
return this.inputFile.split('/').slice(-2)[0].replace('ref-organs', 'ref-organ'); | ||
} | ||
|
||
getDoString() { | ||
return [this.getDoType(), this.getName(), this.getVersion()].join('/'); | ||
} | ||
|
||
toJson() { | ||
return { | ||
title: this.getTitle(), | ||
description: this.getDescription(), | ||
|
||
creators: [ | ||
...this.getAuthors('Creator(s)', 'Creator ORCID(s)'), | ||
...this.getAuthors('Creator(s)', 'Creator ORCID'), | ||
], | ||
project_leads: this.getAuthors('Project Lead', 'Project Lead ORCID'), | ||
reviewers: [ | ||
...this.getAuthors('Reviewer(s)', 'Reviewers ORCID(s)'), | ||
...this.getAuthors('Reviewer(s)', 'Reviewer ORCID(s)'), | ||
...this.getAuthors('Internal Reviewer(s)', 'Internal Reviewer ORCID(s)'), | ||
], | ||
externalReviewers: this.getAuthors('External Reviewer(s)', 'External Reviewer ORCID(s)'), | ||
|
||
creation_date: this.getMetadata('Creation Date') || this.getMetadata('Date'), | ||
creation_year: (this.getMetadata('Creation Date') || this.getMetadata('Date')).split('-')[0], | ||
accessed_date: this.getAccessedDate(this.getMetadata('Creation Date') || this.getMetadata('Date')), | ||
|
||
license: this.getMetadata('License'), | ||
publisher: this.getMetadata('Publisher'), | ||
funders: this.getFunders('Funder', 'Award Number'), | ||
hubmapId: this.getMetadata('HuBMAP ID'), | ||
dataTable: this.getMetadata('Data Table') || this.getMetadata('3D Data') || this.getMetadata('2D Data'), | ||
doi: this.getMetadata('DOI').split('[')[1].split(']')[0], | ||
citation: this.getMetadata(this.getHowToCiteKey()), | ||
citationOverall: this.getMetadata(this.getHowToCiteOverallKey()), | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import { existsSync, writeFileSync } from 'fs'; | ||
import { dump } from 'js-yaml'; | ||
import { resolve } from 'path'; | ||
import sh from 'shelljs'; | ||
import { HraMarkdownParser } from './md-parser.js'; | ||
import { split2dFtuCrosswalk } from './split-2d-ftu-crosswalk.js'; | ||
import { splitRefOrganCrosswalk } from './split-ref-organ-crosswalk.js'; | ||
|
||
function writeDigitalObject(context, md) { | ||
const data = md.toJson(); | ||
// Write out metadata.yaml | ||
const yamlDir = resolve(context.doHome, md.getDoType(), md.getName(), md.getVersion(), 'raw'); | ||
sh.mkdir('-p', yamlDir); | ||
|
||
const dataPaths = data.dataTable | ||
.match(/\(https\:\/\/.*?\)/g) | ||
.map((u) => u.slice(1, -1).split('/').slice(-3).join('/')); | ||
|
||
Object.assign(data, { | ||
type: undefined, | ||
name: undefined, | ||
version: undefined, | ||
creation_year: undefined, | ||
accessed_date: undefined, | ||
dataTable: undefined, | ||
datatable: [], | ||
}); | ||
|
||
for (const inputSrcPath of dataPaths) { | ||
let srcName = inputSrcPath.split('/').slice(-1)[0]; | ||
const srcPath = resolve(context.ccfReleasesPath, inputSrcPath); | ||
let destPath = resolve(yamlDir, srcName); | ||
|
||
sh.cp(srcPath, destPath); | ||
|
||
if (srcPath.endsWith('.zip')) { | ||
srcName = srcName.replace('.zip', ''); | ||
destPath = destPath.replace('.zip', ''); | ||
sh.exec(`unzip -o ${srcPath} -d ${yamlDir} ${srcName}`); | ||
} else if (srcPath.endsWith('.bz2')) { | ||
srcName = srcName.replace('.bz2', ''); | ||
destPath = destPath.replace('.bz2', ''); | ||
sh.exec(`bunzip2 -c ${srcPath} > ${destPath}`); | ||
} | ||
if (srcPath.endsWith('.7z')) { | ||
srcName = srcName.replace('.7z', ''); | ||
destPath = destPath.replace('.7z', ''); | ||
sh.exec(`7z e -aoa ${srcPath} -o${yamlDir} ${srcName}`); | ||
} | ||
|
||
data.datatable.push(srcName); | ||
if (!existsSync(srcPath) || !existsSync(destPath)) { | ||
console.log(md.inputFile, md.getDoType(), srcPath, destPath); | ||
} | ||
} | ||
|
||
if (!md.getName().includes('crosswalk') && (md.getDoType() === 'ref-organ' || md.getDoType() === '2d-ftu')) { | ||
data.datatable.push('crosswalk.csv'); | ||
} | ||
|
||
writeFileSync(yamlDir + '/metadata.yaml', dump(data)); | ||
} | ||
|
||
export function migrateCcfReleases(context) { | ||
const inputDir = context.ccfReleasesPath; | ||
const srcDir = resolve(context.processorHome, 'src/migration/ccf-releases'); | ||
|
||
const allMd = sh | ||
.ls(resolve(inputDir, 'v1.*/markdown/*/*.md')) | ||
.map((s) => s.split('/').slice(-5)) | ||
.map((s) => [s[1], s[3], s[4].replace('.md', '')]); | ||
|
||
const collections = {}; | ||
for (const [collectionVersion, type, name] of allMd) { | ||
const mdFile = resolve(inputDir, `${collectionVersion}/markdown/${type}/${name}.md`); | ||
const parser = new HraMarkdownParser(mdFile); | ||
writeDigitalObject(context, parser); | ||
|
||
collections[collectionVersion] = collections[collectionVersion] || []; | ||
collections[collectionVersion].push(parser.getDoString()); | ||
} | ||
|
||
for (const [version, digitalObjects] of Object.entries(collections)) { | ||
const yamlDir = resolve(context.doHome, `collection/hra/${version}/raw`); | ||
sh.mkdir('-p', yamlDir); | ||
|
||
writeFileSync(yamlDir + '/digital-objects.yaml', dump({ 'digital-objects': digitalObjects })); | ||
|
||
sh.cp(resolve(srcDir, 'hra-metadata.yaml'), yamlDir + '/metadata.yaml'); | ||
|
||
const crosswalk = digitalObjects.find((str) => str.startsWith('2d-ftu/') && str.includes('crosswalk')); | ||
const ftuIllustrations = digitalObjects.filter((str) => str.startsWith('2d-ftu/') && !str.includes('crosswalk')); | ||
for (const doString of ftuIllustrations) { | ||
split2dFtuCrosswalk(context, crosswalk, doString); | ||
} | ||
|
||
const refOrganCrosswalk = digitalObjects.find((str) => str.startsWith('ref-organ/') && str.includes('crosswalk')); | ||
const refOrgans = digitalObjects.filter((str) => str.startsWith('ref-organ/') && !str.includes('crosswalk')); | ||
for (const doString of refOrgans) { | ||
splitRefOrganCrosswalk(context, refOrganCrosswalk, doString); | ||
} | ||
} | ||
} |
Oops, something went wrong.