From 8a2a294f737a67854dd18b1fe33b77dacc6e5323 Mon Sep 17 00:00:00 2001 From: kcleal Date: Wed, 31 Jul 2024 17:08:36 +0100 Subject: [PATCH] Updated gtf/gff3 name parsing --- src/hts_funcs.cpp | 56 +++++++++++++++++++++++++++------- test/test.gtf | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 test/test.gtf diff --git a/src/hts_funcs.cpp b/src/hts_funcs.cpp index d1fbb8d..5b17d09 100644 --- a/src/hts_funcs.cpp +++ b/src/hts_funcs.cpp @@ -1464,9 +1464,15 @@ namespace HGW { b.strand = 0; } for (const auto &item : Utils::split(b.parts[8], ';')) { + if (kind == GFF3_NOI) { std::vector keyval = Utils::split(item, '='); - if (keyval[0] == "ID") { + if (keyval[0] == "Name") { + b.parent = keyval[1]; + b.name = keyval[1]; + break; + } + else if (keyval[0] == "ID") { b.name = keyval[1]; } else if (keyval[0] == "Parent") { @@ -1475,15 +1481,38 @@ namespace HGW { } } else { std::vector keyval = Utils::split(item, ' '); - if (keyval[0] == "transcript_id") { - b.name = keyval[1]; - } - else if (keyval[0] == "gene_id") { + if (keyval[0] == "gene_name") { b.parent = keyval[1]; + b.name = keyval[1]; break; + } else if (keyval[0] == "gene_id") { + b.parent = keyval[1]; + b.name = keyval[1]; + } else if (keyval[0] == "transcript_id") { + b.name = keyval[1]; } } +// if (kind == GFF3_NOI) { +// std::vector keyval = Utils::split(item, '='); +// if (keyval[0] == "ID") { +// b.name = keyval[1]; +// } +// else if (keyval[0] == "Parent") { +// b.parent = keyval[1]; +// break; +// } +// } else { +// std::vector keyval = Utils::split(item, ' '); +// if (keyval[0] == "transcript_id") { +// b.name = keyval[1]; +// } +// else if (keyval[0] == "gene_id") { +// b.parent = keyval[1]; +// break; +// } +// } + } allBlocks[b.chrom].add(b.start, b.end, b); } @@ -1640,6 +1669,7 @@ namespace HGW { return; } strand = 0; + if (kind > BCF_IDX) { // non indexed cached VCF_NOI / BED_NOI / GFF3 (todo) / GW_LABEL / STDIN? // add_to_dict==false, only BED and GW_LABEL files supported (iterate whole file) if (!add_to_dict) { @@ -1819,23 +1849,27 @@ namespace HGW { for (const auto &item : Utils::split(parts[8], ';')) { if (kind == GFF3_IDX) { std::vector keyval = Utils::split(item, '='); - if (keyval[0] == "ID") { - rid = keyval[1]; - } else if (keyval[0] == "Name") { + if (keyval[0] == "Name") { parent = keyval[1]; - rid = keyval[1]; + rid = keyval[1]; break; } + else if (keyval[0] == "ID") { + rid = keyval[1]; + } else if (keyval[0] == "Parent") { parent = keyval[1]; break; } } else { std::vector keyval = Utils::split(item, ' '); - if (keyval[0] == "gene_id") { + if (keyval[0] == "gene_name") { parent = keyval[1]; - rid = keyval[1]; + rid = keyval[1]; break; + } else if (keyval[0] == "gene_id") { + parent = keyval[1]; + rid = keyval[1]; } else if (keyval[0] == "transcript_id") { rid = keyval[1]; } diff --git a/test/test.gtf b/test/test.gtf new file mode 100644 index 0000000..cfb803e --- /dev/null +++ b/test/test.gtf @@ -0,0 +1,77 @@ +chr1 processed_transcript exon 11869 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002234944"; +chr1 processed_transcript exon 12613 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00003582793"; +chr1 processed_transcript exon 13221 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-002"; exon_id "ENSE00002312635"; +chr1 transcribed_unprocessed_pseudogene exon 11872 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002234632"; +chr1 transcribed_unprocessed_pseudogene exon 12613 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00003608237"; +chr1 transcribed_unprocessed_pseudogene exon 13225 14412 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000515242"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-201"; exon_id "ENSE00002306041"; +chr1 transcribed_unprocessed_pseudogene exon 11874 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002269724"; +chr1 transcribed_unprocessed_pseudogene exon 12595 12721 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002270865"; +chr1 transcribed_unprocessed_pseudogene exon 13403 13655 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002216795"; +chr1 transcribed_unprocessed_pseudogene exon 13661 14409 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000518655"; exon_number "4"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-202"; exon_id "ENSE00002303382"; +chr1 transcribed_unprocessed_pseudogene exon 12010 12057 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "1"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001948541"; +chr1 transcribed_unprocessed_pseudogene exon 12179 12227 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "2"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001671638"; +chr1 transcribed_unprocessed_pseudogene exon 12613 12697 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "3"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001758273"; +chr1 transcribed_unprocessed_pseudogene exon 12975 13052 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "4"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001799933"; +chr1 transcribed_unprocessed_pseudogene exon 13221 13374 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "5"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001746346"; +chr1 transcribed_unprocessed_pseudogene exon 13453 13670 . + . gene_id "ENSG00000223972"; transcript_id "ENST00000450305"; exon_number "6"; gene_name "DDX11L1"; gene_biotype "pseudogene"; transcript_name "DDX11L1-001"; exon_id "ENSE00001863096"; +chr1 unprocessed_pseudogene exon 29321 29370 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001718035"; +chr1 unprocessed_pseudogene exon 24738 24891 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "2"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00003624050"; +chr1 unprocessed_pseudogene exon 18268 18379 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "3"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001642865"; +chr1 unprocessed_pseudogene exon 17915 18061 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "4"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00003638984"; +chr1 unprocessed_pseudogene exon 17602 17742 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "5"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001699689"; +chr1 unprocessed_pseudogene exon 17233 17364 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "6"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001656010"; +chr1 unprocessed_pseudogene exon 16854 17055 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "7"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001760358"; +chr1 unprocessed_pseudogene exon 16607 16765 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "8"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00003618297"; +chr1 unprocessed_pseudogene exon 15904 15947 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "9"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001375216"; +chr1 unprocessed_pseudogene exon 15796 15901 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "10"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00001388009"; +chr1 unprocessed_pseudogene exon 14970 15038 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "11"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00003497546"; +chr1 unprocessed_pseudogene exon 14363 14829 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000438504"; exon_number "12"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-202"; exon_id "ENSE00003511598"; +chr1 unprocessed_pseudogene exon 24734 24886 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00002254515"; +chr1 unprocessed_pseudogene exon 18268 18369 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "2"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00002303227"; +chr1 unprocessed_pseudogene exon 17915 18061 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "3"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00003638984"; +chr1 unprocessed_pseudogene exon 17606 17742 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "4"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00003629019"; +chr1 unprocessed_pseudogene exon 17498 17504 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "5"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00002285713"; +chr1 unprocessed_pseudogene exon 17233 17364 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "6"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00001656010"; +chr1 unprocessed_pseudogene exon 16854 17055 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "7"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00001760358"; +chr1 unprocessed_pseudogene exon 14970 15038 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "8"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00003497546"; +chr1 unprocessed_pseudogene exon 14363 14829 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000541675"; exon_number "9"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-204"; exon_id "ENSE00003511598"; +chr1 unprocessed_pseudogene exon 29321 29370 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00001718035"; +chr1 unprocessed_pseudogene exon 24738 24891 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "2"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003603734"; +chr1 unprocessed_pseudogene exon 17915 18061 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "3"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003513603"; +chr1 unprocessed_pseudogene exon 17606 17742 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "4"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003565315"; +chr1 unprocessed_pseudogene exon 17233 17368 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "5"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003685767"; +chr1 unprocessed_pseudogene exon 16858 17055 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "6"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003553898"; +chr1 unprocessed_pseudogene exon 16607 16765 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "7"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003621279"; +chr1 unprocessed_pseudogene exon 15796 15947 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "8"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00002030414"; +chr1 unprocessed_pseudogene exon 14970 15038 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "9"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003591210"; +chr1 unprocessed_pseudogene exon 14363 14829 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000423562"; exon_number "10"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-201"; exon_id "ENSE00003693168"; +chr1 unprocessed_pseudogene exon 29534 29570 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00001890219"; +chr1 unprocessed_pseudogene exon 24738 24891 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "2"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003507205"; +chr1 unprocessed_pseudogene exon 18268 18366 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "3"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003477500"; +chr1 unprocessed_pseudogene exon 17915 18061 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "4"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003565697"; +chr1 unprocessed_pseudogene exon 17606 17742 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "5"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003475637"; +chr1 unprocessed_pseudogene exon 17233 17368 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "6"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003502542"; +chr1 unprocessed_pseudogene exon 16858 17055 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "7"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003553898"; +chr1 unprocessed_pseudogene exon 16607 16765 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "8"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00003621279"; +chr1 unprocessed_pseudogene exon 15796 15947 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "9"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00002030414"; +chr1 unprocessed_pseudogene exon 15005 15038 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "10"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00001935574"; +chr1 unprocessed_pseudogene exon 14404 14501 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000488147"; exon_number "11"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-001"; exon_id "ENSE00001843071"; +chr1 unprocessed_pseudogene exon 29534 29806 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "1"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00001378845"; +chr1 unprocessed_pseudogene exon 24737 24891 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "2"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00002317443"; +chr1 unprocessed_pseudogene exon 18268 18366 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "3"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00003682243"; +chr1 unprocessed_pseudogene exon 17915 18061 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "4"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00003638984"; +chr1 unprocessed_pseudogene exon 17602 17742 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "5"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00001699689"; +chr1 unprocessed_pseudogene exon 17233 17364 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "6"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00001656010"; +chr1 unprocessed_pseudogene exon 16858 17055 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "7"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00003632482"; +chr1 unprocessed_pseudogene exon 16748 16765 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "8"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00002275850"; +chr1 unprocessed_pseudogene exon 16607 16745 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "9"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00002241734"; +chr1 unprocessed_pseudogene exon 15904 15947 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "10"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00001375216"; +chr1 unprocessed_pseudogene exon 15796 15901 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "11"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00001388009"; +chr1 unprocessed_pseudogene exon 15000 15038 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "12"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00002215305"; +chr1 unprocessed_pseudogene exon 14411 14502 . - . gene_id "ENSG00000227232"; transcript_id "ENST00000538476"; exon_number "13"; gene_name "WASH7P"; gene_biotype "pseudogene"; transcript_name "WASH7P-203"; exon_id "ENSE00002295553"; +chr1 lincRNA exon 29554 30039 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000473358"; exon_number "1"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-001"; exon_id "ENSE00001947070"; +chr1 lincRNA exon 30564 30667 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000473358"; exon_number "2"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-001"; exon_id "ENSE00001922571"; +chr1 lincRNA exon 30976 31097 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000473358"; exon_number "3"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-001"; exon_id "ENSE00001827679"; +chr1 lincRNA exon 30267 30667 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000469289"; exon_number "1"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-002"; exon_id "ENSE00001841699"; +chr1 lincRNA exon 30976 31109 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000469289"; exon_number "2"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-002"; exon_id "ENSE00001890064"; +chr1 miRNA exon 30366 30503 . + . gene_id "ENSG00000243485"; transcript_id "ENST00000607096"; exon_number "1"; gene_name "MIR1302-11"; gene_biotype "lincRNA"; transcript_name "MIR1302-11-201"; exon_id "ENSE00003695741"; \ No newline at end of file