diff --git a/R/coverage_matrix.R b/R/coverage_matrix.R index f55675f..2e56081 100644 --- a/R/coverage_matrix.R +++ b/R/coverage_matrix.R @@ -54,25 +54,22 @@ #' #' @examples #' -#' if (.Platform$OS.type != "windows") { -#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 -#' download_study("SRP002001", type = "mean") -#' download_study("SRP002001", type = "samples") +#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 +#' download_study("SRP002001", type = "mean") +#' download_study("SRP002001", type = "samples") #' -#' ## Reading BigWig files is not supported by rtracklayer on Windows -#' ## Define expressed regions for study DRP002835, chrY -#' regions <- expressed_regions("SRP002001", "chrY", -#' cutoff = 5L, -#' maxClusterGap = 3000L, -#' outdir = "SRP002001" -#' ) +#' ## Define expressed regions for study DRP002835, chrY +#' regions <- expressed_regions("SRP002001", "chrY", +#' cutoff = 5L, +#' maxClusterGap = 3000L, +#' outdir = "SRP002001" +#' ) #' -#' ## Now calculate the coverage matrix for this study -#' rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001") +#' ## Now calculate the coverage matrix for this study +#' rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001") #' -#' ## One row per region -#' identical(length(regions), nrow(rse)) -#' } +#' ## One row per region +#' identical(length(regions), nrow(rse)) coverage_matrix <- function( project, chr, regions, chunksize = 1000, bpparam = NULL, outdir = NULL, chrlen = NULL, verbose = TRUE, @@ -82,11 +79,6 @@ coverage_matrix <- function( stopifnot(is.character(chr) & length(chr) == 1) stopifnot((is.numeric(chunksize) | is.integer(chunksize)) & length(chunksize) == 1) - ## Windows-specific info - if (.Platform$OS.type == "windows") { - warning("rtracklayer does not support importing BigWig files on Windows, so this function might not work") - } - ## Use table from the package url_table <- recount::recount_url diff --git a/R/expressed_regions.R b/R/expressed_regions.R index 449ceec..bfa2a1b 100644 --- a/R/expressed_regions.R +++ b/R/expressed_regions.R @@ -37,18 +37,17 @@ #' [railMatrix][derfinder::railMatrix] #' #' @examples -#' ## Define expressed regions for study DRP002835, chrY -#' if (.Platform$OS.type != "windows") { -#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 -#' download_study("SRP002001", type = "mean") +#' ## Define expressed regions for study SRP002001, chrY +#' +#' ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 +#' download_study("SRP002001", type = "mean") +#' +#' regions <- expressed_regions("SRP002001", "chrY", +#' cutoff = 5L, +#' maxClusterGap = 3000L, +#' outdir = "SRP002001" +#' ) #' -#' ## Reading BigWig files is not supported by rtracklayer on Windows -#' regions <- expressed_regions("SRP002001", "chrY", -#' cutoff = 5L, -#' maxClusterGap = 3000L, -#' outdir = "SRP002001" -#' ) -#' } #' \dontrun{ #' ## Define the regions for multiple chrs #' regs <- sapply(chrs, expressed_regions, project = "SRP002001", cutoff = 5L) @@ -65,11 +64,6 @@ expressed_regions <- function( stopifnot(is.character(project) & length(project) == 1) stopifnot(is.character(chr) & length(chr) == 1) - ## Windows-specific info - if (.Platform$OS.type == "windows") { - warning("rtracklayer does not support importing BigWig files on Windows, so this function might not work") - } - ## Use table from the package url_table <- recount::recount_url diff --git a/man/coverage_matrix.Rd b/man/coverage_matrix.Rd index f1a625a..f8016b5 100644 --- a/man/coverage_matrix.Rd +++ b/man/coverage_matrix.Rd @@ -91,25 +91,22 @@ Note that you will need to run \link{scale_counts} after running } \examples{ -if (.Platform$OS.type != "windows") { - ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 - download_study("SRP002001", type = "mean") - download_study("SRP002001", type = "samples") - - ## Reading BigWig files is not supported by rtracklayer on Windows - ## Define expressed regions for study DRP002835, chrY - regions <- expressed_regions("SRP002001", "chrY", - cutoff = 5L, - maxClusterGap = 3000L, - outdir = "SRP002001" - ) - - ## Now calculate the coverage matrix for this study - rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001") - - ## One row per region - identical(length(regions), nrow(rse)) -} +## Workaround for https://github.com/lawremi/rtracklayer/issues/83 +download_study("SRP002001", type = "mean") +download_study("SRP002001", type = "samples") + +## Define expressed regions for study DRP002835, chrY +regions <- expressed_regions("SRP002001", "chrY", + cutoff = 5L, + maxClusterGap = 3000L, + outdir = "SRP002001" +) + +## Now calculate the coverage matrix for this study +rse <- coverage_matrix("SRP002001", "chrY", regions, outdir = "SRP002001") + +## One row per region +identical(length(regions), nrow(rse)) } \seealso{ \link{download_study}, \link[derfinder:findRegions]{findRegions}, diff --git a/man/expressed_regions.Rd b/man/expressed_regions.Rd index 9b95fc5..0a7b00e 100644 --- a/man/expressed_regions.Rd +++ b/man/expressed_regions.Rd @@ -52,18 +52,17 @@ identify the expressed regions (ERs) for a given chromosome. It returns a defined by \link[derfinder:findRegions]{findRegions}. } \examples{ -## Define expressed regions for study DRP002835, chrY -if (.Platform$OS.type != "windows") { - ## Workaround for https://github.com/lawremi/rtracklayer/issues/83 - download_study("SRP002001", type = "mean") +## Define expressed regions for study SRP002001, chrY + +## Workaround for https://github.com/lawremi/rtracklayer/issues/83 +download_study("SRP002001", type = "mean") + +regions <- expressed_regions("SRP002001", "chrY", + cutoff = 5L, + maxClusterGap = 3000L, + outdir = "SRP002001" +) - ## Reading BigWig files is not supported by rtracklayer on Windows - regions <- expressed_regions("SRP002001", "chrY", - cutoff = 5L, - maxClusterGap = 3000L, - outdir = "SRP002001" - ) -} \dontrun{ ## Define the regions for multiple chrs regs <- sapply(chrs, expressed_regions, project = "SRP002001", cutoff = 5L) diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R index ef7c2e6..66e69f8 100644 --- a/tests/testthat/test-data.R +++ b/tests/testthat/test-data.R @@ -108,40 +108,38 @@ test_that("Scaling", { ), assay(rse_gene_SRP009615, 1)) }) -if (.Platform$OS.type != "windows") { - regions <- expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir) - ## Artificially remove the mean coverage file so that the file will have to - ## get downloaded on the first test, then it'll be present for the second - ## test - unlink(localfiles["mean_SRP002001.bw"]) - - test_that("Expressed regions", { - expect_equal( - regions, - expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir) - ) - }) +regions <- expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir) +## Artificially remove the mean coverage file so that the file will have to +## get downloaded on the first test, then it'll be present for the second +## test +unlink(localfiles["mean_SRP002001.bw"]) +test_that("Expressed regions", { + expect_equal( + regions, + expressed_regions("SRP002001", "chrY", cutoff = 5, outdir = tmpdir) + ) +}) - rse_ER <- coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir) - ## Same for the phenotype data and the sample bigwig file - unlink(localfiles["SRP002001.tsv"]) - unlink(localfiles["SRR036661.bw"]) - test_that("Coverage matrix", { - expect_equal( - rse_ER, - coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir) - ) - expect_equal( - rse_ER, - coverage_matrix("SRP002001", "chrY", regions, - outdir = tmpdir, - chunksize = 500 - ) +rse_ER <- coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir) +## Same for the phenotype data and the sample bigwig file +unlink(localfiles["SRP002001.tsv"]) +unlink(localfiles["SRR036661.bw"]) + +test_that("Coverage matrix", { + expect_equal( + rse_ER, + coverage_matrix("SRP002001", "chrY", regions, outdir = tmpdir) + ) + expect_equal( + rse_ER, + coverage_matrix("SRP002001", "chrY", regions, + outdir = tmpdir, + chunksize = 500 ) - }) -} + ) +}) ## Check size once: # > tmpdir diff --git a/vignettes/recount-quickstart.Rmd b/vignettes/recount-quickstart.Rmd index 97cfd47..63d4bb7 100644 --- a/vignettes/recount-quickstart.Rmd +++ b/vignettes/recount-quickstart.Rmd @@ -520,7 +520,7 @@ The [recount project](https://jhubiostatistics.shinyapps.io/recount/) also hosts For an annotation-agnostic differential expression analysis, we first need to define the regions of interest. With `r Biocpkg('recount')` we can do so using the `expressed_regions()` function as shown below for the same study we studied earlier. -```{r 'download_bigwigs', eval = .Platform$OS.type != 'windows'} +```{r 'download_bigwigs'} ## Normally, one can use rtracklayer::import() to access remote parts of BigWig ## files without having to download the complete files. However, as of ## 2024-05-20 this doesn't seem to be working well. So this is a workaround to @@ -529,7 +529,7 @@ download_study("SRP009615", type = "mean") download_study("SRP009615", type = "samples") ``` -```{r 'define_ers', eval = .Platform$OS.type != 'windows'} +```{r 'define_ers'} ## Define expressed regions for study SRP009615, only for chromosome Y regions <- expressed_regions("SRP009615", "chrY", cutoff = 5L, @@ -546,7 +546,7 @@ Once the regions have been defined, you can export them into a BED file using `r Having defined the expressed regions, we can now compute a coverage matrix for these regions. We can do so using the function `coverage_matrix()` from `r Biocpkg('recount')` as shown below. It returns a _RangedSummarizedExperiment_ object. -```{r 'compute_covMat', eval = .Platform$OS.type != 'windows'} +```{r 'compute_covMat'} ## Compute coverage matrix for study SRP009615, only for chromosome Y system.time(rse_ER <- coverage_matrix("SRP009615", "chrY", regions, outdir = "SRP009615")) @@ -557,7 +557,7 @@ rse_ER The resulting count matrix has one row per region and one column per sample. The counts correspond to the number (or fraction) of reads overlapping the regions and are scaled by default to a library size of 40 million reads. For some differential expression methods, you might have to round this matrix into integers. We'll use `r Biocpkg('DESeq2')` to identify which expressed regions are differentially expressed. -```{r 'to_integer', eval = .Platform$OS.type != 'windows'} +```{r 'to_integer'} ## Round the coverage matrix to integers covMat <- round(assays(rse_ER)$counts, 0) ``` @@ -568,7 +568,7 @@ You can use `scale = FALSE` if you want the raw base-pair coverage counts and th We first need to get some phenotype information for these samples similar to the first analysis we did. We can get this data using `download_study()`. -```{r 'phenoData', eval = .Platform$OS.type != 'windows'} +```{r 'phenoData'} ## Get phenotype data for study SRP009615 pheno <- colData(rse_ER) @@ -582,7 +582,7 @@ head(pheno) Now that we have the necessary data, we can construct a `DESeqDataSet` object using the function `DESeqDataSetFromMatrix()` from `r Biocpkg('DESeq2')`. -```{r 'ers_dds', eval = .Platform$OS.type != 'windows'} +```{r 'ers_dds'} ## Build a DESeqDataSet dds_ers <- DESeqDataSetFromMatrix( countData = covMat, colData = pheno, @@ -595,7 +595,7 @@ dds_ers <- DESeqDataSetFromMatrix( With the `DESeqDataSet` object in place, we can then use the function `DESeq()` from `r Biocpkg('DESeq2')` to perform the differential expression analysis (between groups) as shown below. -```{r 'de_analysis_ers', eval = .Platform$OS.type != 'windows'} +```{r 'de_analysis_ers'} ## Perform differential expression analysis with DESeq2 at the ER-level dds_ers <- DESeq(dds_ers, test = "LRT", reduced = ~gene_target, @@ -606,7 +606,7 @@ res_ers <- results(dds_ers) We can then visually explore the results as shown in Figure \@ref(fig:maploters), like we did before in Figure \@ref(fig:maplot). -```{r 'maploters', eval = .Platform$OS.type != 'windows', fig.cap = "MA plot for group differences adjusted by gene target for SRP009615 using ER-level data (just chrY)."} +```{r 'maploters', fig.cap = "MA plot for group differences adjusted by gene target for SRP009615 using ER-level data (just chrY)."} ## Explore results plotMA(res_ers, main = "DESeq2 results for SRP009615 (ER-level, chrY)") ``` @@ -670,7 +670,7 @@ If you are interested in using another annotation based on hg38 coordinates you If you are re-processing a small set of samples, it simply might be easier to use `coverage_matrix()` as shown below using the annotation information provided by the `r Biocpkg('EnsDb.Hsapiens.v79')` package. -```{r "newer annotation", eval = .Platform$OS.type != 'windows'} +```{r "newer annotation"} ## Get the disjoint exons based on EnsDb.Hsapiens.v79 which matches hg38 exons <- reproduce_ranges("exon", db = "EnsDb.Hsapiens.v79")