Skip to content

Commit

Permalink
[AN-338] Fix IGV handling of signed URLs for requester-pays, refine m…
Browse files Browse the repository at this point in the history
…etrics (#5211)
  • Loading branch information
eweitz authored Jan 10, 2025
1 parent c51e9c3 commit ff1fd37
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 20 deletions.
25 changes: 13 additions & 12 deletions src/components/IGVBrowser.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { knownBucketRequesterPaysStatuses, requesterPaysProjectStore } from 'src
import * as Utils from 'src/libs/utils';
import { RequesterPaysModal } from 'src/workspaces/common/requester-pays/RequesterPaysModal';

// format for selectedFiles prop: [{ filePath, indexFilePath } }]
// format for selectedFiles prop: [{ filePath, indexFilePath, isSignedUrl } }]
const IGVBrowser = ({ selectedFiles, refGenome: { genome, reference }, workspace, onDismiss }) => {
const [loadingIgv, setLoadingIgv] = useState(true);
const [requesterPaysModal, setRequesterPaysModal] = useState(null);
Expand All @@ -24,25 +24,21 @@ const IGVBrowser = ({ selectedFiles, refGenome: { genome, reference }, workspace
const signal = useCancellation();

const addTracks = withErrorReporting('Unable to add tracks')(async (tracks) => {
const gsTracks = tracks.filter((track) => track.isSignedUrl === false);

// Select one file per each bucket represented in the tracks list.
const bucketExemplars = _.flow(
_.map(_.get('url')),
_.uniqBy((url) => {
const [bucket] = parseGsUri(url);
return bucket;
})
)(tracks);
)(gsTracks);

// Learn the requester pays status of each bucket.
// Requesting a file will store its requester pays status in knownBucketRequesterPaysStatuses.
const isRequesterPays = await Promise.all(
_.map(async (url) => {
// As seen in requester-pays access URLs resolved from a DRS URI, e.g. AnVIL
// `userProject` is required to know who to bill
if (url.startsWith('https') && url.includes('requestedBy=') && url.includes('userProject=')) {
return true;
}

const [bucket, file] = parseGsUri(url);

if (knownBucketRequesterPaysStatuses.get()[bucket] === undefined) {
Expand Down Expand Up @@ -84,20 +80,23 @@ const IGVBrowser = ({ selectedFiles, refGenome: { genome, reference }, workspace
}
}

_.forEach(({ name, url, indexURL }) => {
_.forEach(({ name, url, indexURL, isSignedUrl }) => {
const [bucket] = parseGsUri(url);
const userProjectParam = { userProject: knownBucketRequesterPaysStatuses.get()[bucket] ? userProject : undefined };

// Omit residual URL parameters from access URLs resolved via DRS Hub
const simpleUrl = _.last(url.split('/')).split('?')[0];

const fullUrl = isSignedUrl ? url : Utils.mergeQueryParams(userProjectParam, url);
const fullIndexUrl = isSignedUrl ? indexURL : Utils.mergeQueryParams(userProjectParam, indexURL);

// Enable viewing features upon searching most genes, without needing to zoom several times
const visibilityWindow = 75_000;

igvBrowser.current.loadTrack({
name: name || `${simpleUrl} (${url})`,
url: Utils.mergeQueryParams(userProjectParam, url),
indexURL: indexURL ? Utils.mergeQueryParams(userProjectParam, indexURL) : undefined,
url: fullUrl,
indexURL: indexURL ? fullIndexUrl : undefined,
visibilityWindow,
});
}, tracks);
Expand All @@ -118,7 +117,9 @@ const IGVBrowser = ({ selectedFiles, refGenome: { genome, reference }, workspace
igv.setGoogleOauthToken(() => saToken(workspace.workspace.googleProject));
igvBrowser.current = await igv.createBrowser(containerRef.current, options);

const initialTracks = _.map(({ filePath, indexFilePath }) => ({ url: filePath, indexURL: indexFilePath }), selectedFiles);
const initialTracks = _.map(({ filePath, indexFilePath, isSignedUrl }) => {
return { url: filePath, indexURL: indexFilePath, isSignedUrl };
}, selectedFiles);
addTracks(initialTracks);
} catch (e) {
reportError('Error loading IGV.js', e);
Expand Down
47 changes: 43 additions & 4 deletions src/components/IGVFileSelector.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,29 @@ const splitExtension = (fileUrl) => {
return [base, extension];
};

/** Get file extension from URL, even if two-part / compound (e.g. "vcf.gz") */
const getCompoundExtension = (fileUrl) => {
const splitPath = fileUrl.split('?')[0].split('.');
const numExtensions = splitPath.length > 2 ? 2 : 1;
const compoundExtension = splitPath.slice(-1 * numExtensions).join('.');
return compoundExtension;
};

export const getIgvMetricDetails = (selectedFiles, refGenome) => {
const igvNumTracks = selectedFiles.length;
const igvHasDrsUris = selectedFiles.some((f) => f.isSignedUrl);
const igvFileExtensions = selectedFiles.map((f) => getCompoundExtension(f.filePath));
const igvIndexExtensions = selectedFiles.map((f) => getCompoundExtension(f.indexFilePath));
const igvGenome = refGenome.genome;
return {
igvNumTracks,
igvFileExtensions,
igvIndexExtensions,
igvHasDrsUris,
igvGenome,
};
};

const UUID_PATTERN = '[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}';

const UUID_REGEX = new RegExp(UUID_PATTERN);
Expand Down Expand Up @@ -130,17 +153,33 @@ export const getValidIgvFiles = async (values, signal) => {
}
});

const fileUrls = basicFileUrls.map((fus) => {
const url = new URL(fus);
url.isSignedUrl = false;
return url;
});

const accessUrls = await resolveValidIgvDrsUris(values, signal);
const fileUrlStrings = basicFileUrls.concat(accessUrls);
const fileUrls = fileUrlStrings.map((fus) => new URL(fus));
accessUrls.forEach((accessUrl) => {
const url = new URL(accessUrl);

// Reliably indicate this is an access URL that should not be modified
// downstream, as done e.g. for some requester-pays URLS not resolved
// via DRS Hub.
url.isSignedUrl = true;

fileUrls.push(url);
});

return fileUrls.flatMap((fileUrl) => {
const filePath = fileUrl.href;
const isSignedUrl = fileUrl.isSignedUrl;
if (fileUrl.pathname.endsWith('.bed')) {
return [{ filePath: fileUrl.href, indexFilePath: false }];
return [{ filePath, indexFilePath: false, isSignedUrl }];
}
const indexFileUrl = findIndexForFile(fileUrl, fileUrls);
if (indexFileUrl !== undefined) {
return [{ filePath: fileUrl.href, indexFilePath: indexFileUrl.href }];
return [{ filePath, indexFilePath: indexFileUrl.href, isSignedUrl }];
}
return [];
});
Expand Down
50 changes: 48 additions & 2 deletions src/components/IGVFileSelector.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { getValidIgvFiles, getValidIgvFilesFromAttributeValues, isDrsUri } from 'src/components/IGVFileSelector';
import { getIgvMetricDetails, getValidIgvFiles, getValidIgvFilesFromAttributeValues, isDrsUri } from 'src/components/IGVFileSelector';
import { DrsUriResolver } from 'src/libs/ajax/drs/DrsUriResolver';
import { isFeaturePreviewEnabled } from 'src/libs/feature-previews';

Expand All @@ -25,14 +25,17 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://bucket/test2.bam',
indexFilePath: 'gs://bucket/test2.bai',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test3.bam',
indexFilePath: 'gs://bucket/test3.bam.bai',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test4.sorted.bam',
indexFilePath: 'gs://bucket/test4.sorted.bam.bai',
isSignedUrl: false,
},
]);
});
Expand All @@ -50,10 +53,12 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://bucket/test2.cram',
indexFilePath: 'gs://bucket/test2.crai',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test3.cram',
indexFilePath: 'gs://bucket/test3.cram.crai',
isSignedUrl: false,
},
]);
});
Expand All @@ -77,22 +82,27 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://bucket/test2.vcf',
indexFilePath: 'gs://bucket/test2.idx',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test3.vcf',
indexFilePath: 'gs://bucket/test3.vcf.idx',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test4.vcf',
indexFilePath: 'gs://bucket/test4.tbi',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test5.vcf',
indexFilePath: 'gs://bucket/test5.vcf.tbi',
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test6.vcf.gz',
indexFilePath: 'gs://bucket/test6.vcf.gz.tbi',
isSignedUrl: false,
},
]);
});
Expand All @@ -102,6 +112,7 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://bucket/test.bed',
indexFilePath: false,
isSignedUrl: false,
},
]);
});
Expand All @@ -111,6 +122,7 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://bucket/test.bed',
indexFilePath: false,
isSignedUrl: false,
},
]);
});
Expand All @@ -126,6 +138,7 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://datarepo-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/5f5f634d-70f3-4914-9c71-9d14c7f98e60/test.bam',
indexFilePath: 'gs://datarepo-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/2eeff61f-ae9e-41ae-bb40-909ff6bdfba8/test.bam.bai',
isSignedUrl: false,
},
]);
});
Expand All @@ -141,6 +154,7 @@ describe('getValidIgvFiles', () => {
filePath: 'gs://datarepo-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/5f5f634d-70f3-4914-9c71-9d14c7f98e60/path/to/test.bam',
indexFilePath:
'gs://datarepo-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/2eeff61f-ae9e-41ae-bb40-909ff6bdfba8/path/to/test.bam.bai',
isSignedUrl: false,
},
]);
});
Expand All @@ -155,6 +169,7 @@ describe('getValidIgvFiles', () => {
{
filePath: 'gs://datarepo-dev-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/5f5f634d-70f3-4914-9c71-9d14c7f98e60/test.bam',
indexFilePath: 'gs://datarepo-dev-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/2eeff61f-ae9e-41ae-bb40-909ff6bdfba8/test.bam.bai',
isSignedUrl: false,
},
]);
});
Expand All @@ -174,14 +189,17 @@ describe('getValidIgvFilesFromAttributeValues', () => {
{
filePath: 'gs://bucket/test1.bed',
indexFilePath: false,
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test2.bed',
indexFilePath: false,
isSignedUrl: false,
},
{
filePath: 'gs://bucket/test3.bed',
indexFilePath: false,
isSignedUrl: false,
},
]);
});
Expand Down Expand Up @@ -225,7 +243,7 @@ describe('getValidIgvFilesFromAttributeValues', () => {
});

it('calls to resolve access URLs when two DRS URIs are found', async () => {
// An IGV selection must have a file (e.g. VCF) and an index file (TBI)
// An IGV selection generally must have a file (e.g. VCF) and an index file (TBI)
const fileDrsUri = 'drs://dg.4503:2802a94d-f540-499f-950a-db3c2a9f2dc4';
const indexFileDrsUri = 'drs://dg.4503:2802a94d-f540-499f-950a-11111111111';
const fileName = 'foo.vcf.gz';
Expand Down Expand Up @@ -268,7 +286,35 @@ describe('getValidIgvFilesFromAttributeValues', () => {
{
filePath: 'https://bucket/[email protected]&userProject=my-billing-project&signature=secret',
indexFilePath: 'https://bucket/[email protected]&userProject=my-billing-project&signature=secret',
isSignedUrl: true,
},
]);
});

it('provides relevant component-specific logging metrics', async () => {
const selectedFiles = [
{
filePath: 'https://bucket/[email protected]&userProject=my-billing-project&signature=secret',
indexFilePath: 'https://bucket/[email protected]&userProject=my-billing-project&signature=secret',
isSignedUrl: true,
},
{
filePath: 'gs://datarepo-dev-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/5f5f634d-70f3-4914-9c71-9d14c7f98e60/test.bam',
indexFilePath: 'gs://datarepo-dev-ab123456-bucket/cae37a2a-657f-4b04-9fef-59c215020078/2eeff61f-ae9e-41ae-bb40-909ff6bdfba8/test.bam.bai',
isSignedUrl: false,
},
];

const refGenome = { genome: 'hg38' };

const igvDetails = getIgvMetricDetails(selectedFiles, refGenome);

expect(igvDetails).toEqual({
igvNumTracks: 2,
igvFileExtensions: ['vcf.gz', 'bam'],
igvIndexExtensions: ['gz.tbi', 'bam.bai'],
igvHasDrsUris: true,
igvGenome: 'hg38',
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { tools } from 'src/analysis/utils/tool-utils';
import { ButtonSecondary } from 'src/components/common';
import { icon } from 'src/components/icons';
import IGVBrowser from 'src/components/IGVBrowser';
import IGVFileSelector from 'src/components/IGVFileSelector';
import IGVFileSelector, { getIgvMetricDetails } from 'src/components/IGVFileSelector';
import { MenuButton } from 'src/components/MenuButton';
import { withModalDrawer } from 'src/components/ModalDrawer';
import { ModalToolButton } from 'src/components/ModalToolButton';
Expand Down Expand Up @@ -664,7 +664,11 @@ const EntitiesContent = ({
setShowToolSelector(false);
setIgvFiles(selectedFiles);
setIgvRefGenome(refGenome);
void Metrics().captureEvent(Events.workspaceDataOpenWithIGV, extractWorkspaceDetails(workspace.workspace));

const workspaceDetails = extractWorkspaceDetails(workspace.workspace);
const igvDetails = getIgvMetricDetails(selectedFiles, refGenome);
const details = Object.assign(igvDetails, workspaceDetails);
void Metrics().captureEvent(Events.workspaceDataOpenWithIGV, details);
},
entityMetadata,
entityKey,
Expand Down

0 comments on commit ff1fd37

Please sign in to comment.