Skip to content

Commit

Permalink
Looks like rtracklayer::import() works on Windows now with local file…
Browse files Browse the repository at this point in the history
…s. So we can now run all this code on Windows, unlike before. It's not working for remote BigWig files, but I don't know if that's due to lawremi/rtracklayer#83 or something else Windows-specific.
  • Loading branch information
lcolladotor committed May 21, 2024
1 parent 7e903b0 commit f36c5d5
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 106 deletions.
34 changes: 13 additions & 21 deletions R/coverage_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,25 +54,22 @@
#'
#' @examples
#'
#' if (.Platform$OS.type != "windows") {
#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83
#' download_study("SRP002001", type = "mean")
#' download_study("SRP002001", type = "samples")
#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83
#' download_study("SRP002001", type = "mean")
#' download_study("SRP002001", type = "samples")
#'
#' ## Reading BigWig files is not supported by rtracklayer on Windows
#' ## Define expressed regions for study DRP002835, chrY
#' regions <- expressed_regions("SRP002001", "chrY",
#' cutoff = 5L,
#' maxClusterGap = 3000L,
#' outdir = "SRP002001"
#' )
#' ## Define expressed regions for study DRP002835, chrY
#' regions <- expressed_regions("SRP002001", "chrY",
#' cutoff = 5L,
#' maxClusterGap = 3000L,
#' outdir = "SRP002001"
#' )
#'
#' ## Now calculate the coverage matrix for this study
#' rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001")
#' ## Now calculate the coverage matrix for this study
#' rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001")
#'
#' ## One row per region
#' identical(length(regions), nrow(rse))
#' }
#' ## One row per region
#' identical(length(regions), nrow(rse))
coverage_matrix <- function(
project, chr, regions, chunksize = 1000,
bpparam = NULL, outdir = NULL, chrlen = NULL, verbose = TRUE,
Expand All @@ -82,11 +79,6 @@ coverage_matrix <- function(
stopifnot(is.character(chr) & length(chr) == 1)
stopifnot((is.numeric(chunksize) | is.integer(chunksize)) & length(chunksize) == 1)

## Windows-specific info
if (.Platform$OS.type == "windows") {
warning("rtracklayer does not support importing BigWig files on Windows, so this function might not work")
}

## Use table from the package
url_table <- recount::recount_url

Expand Down
26 changes: 10 additions & 16 deletions R/expressed_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,17 @@
#' [railMatrix][derfinder::railMatrix]
#'
#' @examples
#' ## Define expressed regions for study DRP002835, chrY
#' if (.Platform$OS.type != "windows") {
#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83
#' download_study("SRP002001", type = "mean")
#' ## Define expressed regions for study SRP002001, chrY
#'
#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83
#' download_study("SRP002001", type = "mean")
#'
#' regions <- expressed_regions("SRP002001", "chrY",
#' cutoff = 5L,
#' maxClusterGap = 3000L,
#' outdir = "SRP002001"
#' )
#'
#' ## Reading BigWig files is not supported by rtracklayer on Windows
#' regions <- expressed_regions("SRP002001", "chrY",
#' cutoff = 5L,
#' maxClusterGap = 3000L,
#' outdir = "SRP002001"
#' )
#' }
#' \dontrun{
#' ## Define the regions for multiple chrs
#' regs <- sapply(chrs, expressed_regions, project = "SRP002001", cutoff = 5L)
Expand All @@ -65,11 +64,6 @@ expressed_regions <- function(
stopifnot(is.character(project) & length(project) == 1)
stopifnot(is.character(chr) & length(chr) == 1)

## Windows-specific info
if (.Platform$OS.type == "windows") {
warning("rtracklayer does not support importing BigWig files on Windows, so this function might not work")
}

## Use table from the package
url_table <- recount::recount_url

Expand Down
35 changes: 16 additions & 19 deletions man/coverage_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 10 additions & 11 deletions man/expressed_regions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 28 additions & 30 deletions tests/testthat/test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,40 +108,38 @@ test_that("Scaling", {
), assay(rse_gene_SRP009615, 1))
})

if (.Platform$OS.type != "windows") {
regions <- expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir)
## Artificially remove the mean coverage file so that the file will have to
## get downloaded on the first test, then it'll be present for the second
## test
unlink(localfiles["mean_SRP002001.bw"])

test_that("Expressed regions", {
expect_equal(
regions,
expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir)
)
})
regions <- expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir)
## Artificially remove the mean coverage file so that the file will have to
## get downloaded on the first test, then it'll be present for the second
## test
unlink(localfiles["mean_SRP002001.bw"])

test_that("Expressed regions", {
expect_equal(
regions,
expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir)
)
})

rse_ER <- coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir)
## Same for the phenotype data and the sample bigwig file
unlink(localfiles["SRP002001.tsv"])
unlink(localfiles["SRR036661.bw"])

test_that("Coverage matrix", {
expect_equal(
rse_ER,
coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir)
)
expect_equal(
rse_ER,
coverage_matrix("SRP002001", "chrY", regions,
outdir = tmpdir,
chunksize = 500
)
rse_ER <- coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir)
## Same for the phenotype data and the sample bigwig file
unlink(localfiles["SRP002001.tsv"])
unlink(localfiles["SRR036661.bw"])

test_that("Coverage matrix", {
expect_equal(
rse_ER,
coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir)
)
expect_equal(
rse_ER,
coverage_matrix("SRP002001", "chrY", regions,
outdir = tmpdir,
chunksize = 500
)
})
}
)
})

## Check size once:
# > tmpdir
Expand Down
18 changes: 9 additions & 9 deletions vignettes/recount-quickstart.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ The [recount project](https://jhubiostatistics.shinyapps.io/recount/) also hosts

For an annotation-agnostic differential expression analysis, we first need to define the regions of interest. With `r Biocpkg('recount')` we can do so using the `expressed_regions()` function as shown below for the same study we studied earlier.

```{r 'download_bigwigs', eval = .Platform$OS.type != 'windows'}
```{r 'download_bigwigs'}
## Normally, one can use rtracklayer::import() to access remote parts of BigWig
## files without having to download the complete files. However, as of
## 2024-05-20 this doesn't seem to be working well. So this is a workaround to
Expand All @@ -529,7 +529,7 @@ download_study("SRP009615", type = "mean")
download_study("SRP009615", type = "samples")
```

```{r 'define_ers', eval = .Platform$OS.type != 'windows'}
```{r 'define_ers'}
## Define expressed regions for study SRP009615, only for chromosome Y
regions <- expressed_regions("SRP009615", "chrY",
cutoff = 5L,
Expand All @@ -546,7 +546,7 @@ Once the regions have been defined, you can export them into a BED file using `r

Having defined the expressed regions, we can now compute a coverage matrix for these regions. We can do so using the function `coverage_matrix()` from `r Biocpkg('recount')` as shown below. It returns a _RangedSummarizedExperiment_ object.

```{r 'compute_covMat', eval = .Platform$OS.type != 'windows'}
```{r 'compute_covMat'}
## Compute coverage matrix for study SRP009615, only for chromosome Y
system.time(rse_ER <- coverage_matrix("SRP009615", "chrY", regions, outdir = "SRP009615"))
Expand All @@ -557,7 +557,7 @@ rse_ER

The resulting count matrix has one row per region and one column per sample. The counts correspond to the number (or fraction) of reads overlapping the regions and are scaled by default to a library size of 40 million reads. For some differential expression methods, you might have to round this matrix into integers. We'll use `r Biocpkg('DESeq2')` to identify which expressed regions are differentially expressed.

```{r 'to_integer', eval = .Platform$OS.type != 'windows'}
```{r 'to_integer'}
## Round the coverage matrix to integers
covMat <- round(assays(rse_ER)$counts, 0)
```
Expand All @@ -568,7 +568,7 @@ You can use `scale = FALSE` if you want the raw base-pair coverage counts and th

We first need to get some phenotype information for these samples similar to the first analysis we did. We can get this data using `download_study()`.

```{r 'phenoData', eval = .Platform$OS.type != 'windows'}
```{r 'phenoData'}
## Get phenotype data for study SRP009615
pheno <- colData(rse_ER)
Expand All @@ -582,7 +582,7 @@ head(pheno)

Now that we have the necessary data, we can construct a `DESeqDataSet` object using the function `DESeqDataSetFromMatrix()` from `r Biocpkg('DESeq2')`.

```{r 'ers_dds', eval = .Platform$OS.type != 'windows'}
```{r 'ers_dds'}
## Build a DESeqDataSet
dds_ers <- DESeqDataSetFromMatrix(
countData = covMat, colData = pheno,
Expand All @@ -595,7 +595,7 @@ dds_ers <- DESeqDataSetFromMatrix(

With the `DESeqDataSet` object in place, we can then use the function `DESeq()` from `r Biocpkg('DESeq2')` to perform the differential expression analysis (between groups) as shown below.

```{r 'de_analysis_ers', eval = .Platform$OS.type != 'windows'}
```{r 'de_analysis_ers'}
## Perform differential expression analysis with DESeq2 at the ER-level
dds_ers <- DESeq(dds_ers,
test = "LRT", reduced = ~gene_target,
Expand All @@ -606,7 +606,7 @@ res_ers <- results(dds_ers)

We can then visually explore the results as shown in Figure \@ref(fig:maploters), like we did before in Figure \@ref(fig:maplot).

```{r 'maploters', eval = .Platform$OS.type != 'windows', fig.cap = "MA plot for group differences adjusted by gene target for SRP009615 using ER-level data (just chrY)."}
```{r 'maploters', fig.cap = "MA plot for group differences adjusted by gene target for SRP009615 using ER-level data (just chrY)."}
## Explore results
plotMA(res_ers, main = "DESeq2 results for SRP009615 (ER-level, chrY)")
```
Expand Down Expand Up @@ -670,7 +670,7 @@ If you are interested in using another annotation based on hg38 coordinates you

If you are re-processing a small set of samples, it simply might be easier to use `coverage_matrix()` as shown below using the annotation information provided by the `r Biocpkg('EnsDb.Hsapiens.v79')` package.

```{r "newer annotation", eval = .Platform$OS.type != 'windows'}
```{r "newer annotation"}
## Get the disjoint exons based on EnsDb.Hsapiens.v79 which matches hg38
exons <- reproduce_ranges("exon", db = "EnsDb.Hsapiens.v79")
Expand Down

0 comments on commit f36c5d5

Please sign in to comment.