From e700e371a509801813d54c31957642052ace8599 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Mon, 16 Dec 2024 07:35:50 +0100 Subject: [PATCH] Add files via upload --- DESCRIPTION | 2 +- doc/NetID.Rmd | 2 +- inst/doc/NetID.R | 5 +++ inst/doc/NetID.Rmd | 8 ++++ inst/doc/NetID.html | 90 ++++++++++++++++++++------------------------- vignettes/NetID.Rmd | 8 ++++ 6 files changed, 63 insertions(+), 52 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4be1586..6039226 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,5 +16,5 @@ License: MIT + file LICENSE Encoding: UTF-8 RoxygenNote: 7.3.2 NeedsCompilation: no -Packaged: 2024-12-15 21:33:02 UTC; weixu.wang +Packaged: 2024-12-16 01:04:56 UTC; weixu.wang Depends: R (>= 3.5.0) diff --git a/doc/NetID.Rmd b/doc/NetID.Rmd index bcd7120..51d6c53 100644 --- a/doc/NetID.Rmd +++ b/doc/NetID.Rmd @@ -195,7 +195,7 @@ dyn.out <- RunNetID(sce, velo=FALSE) ``` -We should note that the `FateDynamic` function provides a preliminary estimation of cell fate probabilities. However, accurate cell fate estimation relies on further refinements and the incorporation of prior information. If users have their own cell fate probability matrix (`fate_prob`) and `pseudotime`, they can directly plug in these values using the following function. +We note that the `FateDynamic` function provides a preliminary estimation of cell fate probabilities. However, accurate cell fate estimation relies on further refinements and the incorporation of prior information. If users have their own cell fate probability matrix (`fate_prob`) and `pseudotime`, they can directly plug in these values using the following function. ```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE} dyn.out$LineageClass <- LineageClassifer(fate_prob, maxState = 10, cut_off = 0) diff --git a/inst/doc/NetID.R b/inst/doc/NetID.R index 2217403..137d91c 100644 --- a/inst/doc/NetID.R +++ b/inst/doc/NetID.R @@ -113,6 +113,11 @@ dyn.out <- RunNetID(sce, dynamicInfer = TRUE, velo=FALSE) +## ----Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE----- +# dyn.out$LineageClass <- LineageClassifer(fate_prob, maxState = 10, cut_off = 0) +# dyn.out$pseudotime <- pseudotime +# dyn.out$fate_prob <- fate_prob # cell fate probability matrix + ## ----plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE---- ## load basis information dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)] diff --git a/inst/doc/NetID.Rmd b/inst/doc/NetID.Rmd index 8090be3..b8ba218 100644 --- a/inst/doc/NetID.Rmd +++ b/inst/doc/NetID.Rmd @@ -195,6 +195,14 @@ dyn.out <- RunNetID(sce, velo=FALSE) ``` +We note that the `FateDynamic` function provides a preliminary estimation of cell fate probabilities. However, accurate cell fate estimation relies on further refinements and the incorporation of prior information. If users have their own cell fate probability matrix (`fate_prob`) and `pseudotime`, they can directly plug in these values using the following function. + +```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE} +dyn.out$LineageClass <- LineageClassifer(fate_prob, maxState = 10, cut_off = 0) +dyn.out$pseudotime <- pseudotime +dyn.out$fate_prob <- fate_prob # cell fate probability matrix +``` + NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized: ```{r plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE} diff --git a/inst/doc/NetID.html b/inst/doc/NetID.html index a3a08c5..37f650c 100644 --- a/inst/doc/NetID.html +++ b/inst/doc/NetID.html @@ -10,7 +10,7 @@ - + A brief tutorial for estimating cell fate specific GRN using NetID @@ -697,7 +697,7 @@

A brief tutorial for estimating cell fate specific GRN using NetID

Weixu Wang1,2

1Institute of Computational Biology, Helmholtz Center Munich, Munich, Germany
2Human Phenome Institute, Fudan University, Shanghai, China

-

2024-12-15

+

2024-12-16

Package

NetID 0.1.2

@@ -793,7 +793,7 @@

2 Learning GRN skeleton from sket ## Using NetID to perform skeleton estimation... ## prune sampled neighbourhoods according to q-value... ## assign weight for edges using p-value... -## aggregated matrix: the number of genes:895; the number of samples:386 +## aggregated matrix: the number of genes:895; the number of samples:373 ## Done...

The computed global GRN is saved in dyn.out$skeleton.

names(dyn.out)
@@ -888,7 +888,7 @@

4 Inferring Fate Probability and ## Using NetID to perform skeleton estimation... ## prune sampled neighbourhoods according to q-value... ## assign weight for edges using p-value... -## aggregated matrix: the number of genes:897; the number of samples:356 +## aggregated matrix: the number of genes:897; the number of samples:360 ## Classify lineage for palantir fate prob... ## cluster1 cluster2 cluster3 cluster4 ## mpo+_neutrophils 130.98625350 1.79677466 1.169567288 9.853646e-02 @@ -900,6 +900,10 @@

4 Inferring Fate Probability and ## erythroblasts_stage2 19.975161059 ## Allow shared cell state between different lineage... ## Done... +

We note that the FateDynamic function provides a preliminary estimation of cell fate probabilities. However, accurate cell fate estimation relies on further refinements and the incorporation of prior information. If users have their own cell fate probability matrix (fate_prob) and pseudotime, they can directly plug in these values using the following function.

+
dyn.out$LineageClass <- LineageClassifer(fate_prob, maxState = 10, cut_off = 0)
+dyn.out$pseudotime <- pseudotime
+dyn.out$fate_prob <- fate_prob # cell fate probability matrix

NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized:

## load basis information
 dyn.out$basis <- reducedDim(sce, "PCA")[,c(1,2)]
@@ -920,33 +924,22 @@ 

5 Learning cell fate specific GRN

The dyn.out object contains the skeleton of the global network (learned from all cells without lineage information) and cell fate probability information. NetID would run an L2-penalized Granger regression model for each target gene to re-calculate the regulatory coefficients of the skeleton, with using cell fate probability to define the “time-series”, and then aggregates the learned coefficients with the global coefficients through a rank-based method.

GRN <- FateCausal(dyn.out,L=30)
## Inferring GRN on Lineage 1,ncells = 110 ,readout = Gene Expression..
-## Inferring GRN on Lineage 2,ncells = 102 ,readout = Gene Expression..
-## Inferring GRN on Lineage 3,ncells = 144 ,readout = Gene Expression..
+## Inferring GRN on Lineage 2,ncells = 107 ,readout = Gene Expression..
+## Inferring GRN on Lineage 3,ncells = 143 ,readout = Gene Expression..
 ## Integrating Network Skeleton And Granger Causal Coefficient...

The output of FateCausal is a list object that contains the lineage-specific Gene Regulatory Network (GRN). In this context, grn represents the weighted unsigned network, while sign_grn represents the weighted signed network.

GRN$grn[[1]][1:10,1:10]
-
##        Abl1         Abt1 Aebp2          Aes Aff4 Agap3 Ahctf1 Ahr Aip
-## Aatf      0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Abl1      0 8.860347e-10     0 0.000000e+00    0     0      0   0   0
-## Abt1      0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Aebp2     0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Aes       0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Aff4      0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Agap3     0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Ahctf1    0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Ahr       0 0.000000e+00     0 0.000000e+00    0     0      0   0   0
-## Aip       0 0.000000e+00     0 7.096193e-09    0     0      0   0   0
-##                Akt1
-## Aatf   0.000000e+00
-## Abl1   0.000000e+00
-## Abt1   0.000000e+00
-## Aebp2  0.000000e+00
-## Aes    1.168183e-09
-## Aff4   0.000000e+00
-## Agap3  0.000000e+00
-## Ahctf1 0.000000e+00
-## Ahr    0.000000e+00
-## Aip    0.000000e+00
+
##        Abl1 Abt1 Aebp2          Aes Aff4 Agap3 Ahctf1 Ahr Aip         Akt1
+## Aatf      0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Abl1      0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Abt1      0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Aebp2     0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Aes       0    0     0 0.000000e+00    0     0      0   0   0 3.230875e-09
+## Aff4      0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Agap3     0    0     0 3.231243e-09    0     0      0   0   0 0.000000e+00
+## Ahctf1    0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Ahr       0    0     0 0.000000e+00    0     0      0   0   0 0.000000e+00
+## Aip       0    0     0 8.410626e-09    0     0      0   0   0 0.000000e+00

Rows represent targets, and columns represent regulators

@@ -957,23 +950,23 @@

6 Module Identification and Visua
## Using signed network, the regulation type is determined by granger regression model
 ## targets and regulators not the same
 ## Modularity values=
-##        Gata1 
-## 0.0005778051 
+##       Gata1 
+## 0.000607628 
 ## Starting Monte Carlo Runs
 ## Done for seed/module Gata1
result
## $ModuleList
 ## $ModuleList$Gata1
-##  [1] "Gata1"  "Ahctf1" "Bcl11a" "Btaf1"  "E2f4"   "Ell2"   "Epor"   "Foxo3" 
-##  [9] "Gfi1b"  "Gtf2h3" "Hdac7"  "Hdgf"   "Jmjd1c" "Klf1"   "Klf16"  "Klf3"  
-## [17] "Ldb1"   "Mbd2"   "Mllt3"  "Nfia"   "Nfix"   "Nrip1"  "Orc2"   "Pbx3"  
-## [25] "Pcgf6"  "Phf10"  "Polr3d" "Rfx2"   "Sox6"   "Sp4"    "Ssbp2"  "Ssbp3" 
-## [33] "Tal1"   "Tcf3"   "Upf1"   "Zbtb32" "Ikzf1"  "Mapk9"  "Ubp1"   "Mta3"  
+##  [1] "Gata1"  "Ahctf1" "Bcl11a" "Btaf1"  "E2f4"   "Ell2"   "Epor"   "Asb6"  
+##  [9] "Cbx3"   "Gfi1b"  "Gtf2h3" "Hdgf"   "Klf1"   "Klf16"  "Klf3"   "Ldb1"  
+## [17] "Mbd2"   "Mllt3"  "Nfia"   "Nfix"   "Nrip1"  "Orc2"   "Pbx3"   "Phf10" 
+## [25] "Polr3d" "Rfx2"   "Sox6"   "Sp4"    "Ssbp2"  "Ssbp3"  "Tal1"   "Tcf3"  
+## [33] "Upf1"   "Zbtb32" "Ikzf1"  "Taf12"  "Foxo3"  "Fubp3" 
 ## 
 ## 
 ## $ModuleTestResult
 ##          TF Module_size ModularityScore P_value
-## Gata1 Gata1          40    0.0005778051       0
+## Gata1 Gata1 38 0.000607628 0

Upon examining the ModuleFinder output, we retrieve the identified Gata1-related module along with its corresponding statistical test results. Notably, in the Erythroid branch, we successfully identified the Gata1-related regulatory module. To compare, we can ascertain whether a Gata1-related module can be recovered for the Neutrophils (Neu) branch.

result2 = ModuleFinder(GRN,"neutrophils",c("Gata1"),
                        method = "spinglass")
@@ -981,23 +974,21 @@

6 Module Identification and Visua ## targets and regulators not the same ## Modularity values= ## Gata1 -## 0.0002784286 +## 0.0003404084 ## Starting Monte Carlo Runs ## Done for seed/module Gata1
result2
## $ModuleList
 ## $ModuleList$Gata1
-##  [1] "Gata1"   "Ahctf1"  "Bcl11a"  "Btaf1"   "E2f4"    "Ell2"    "Epor"   
-##  [8] "Foxo3"   "Gfi1b"   "Gtf2h3"  "Hdgf"    "Jmjd1c"  "Klf1"    "Klf16"  
-## [15] "Klf3"    "Ldb1"    "Mbd2"    "Mllt3"   "Nfia"    "Nfix"    "Nrip1"  
-## [22] "Orc2"    "Phf10"   "Polr3d"  "Rfx2"    "Ssbp2"   "Ssbp3"   "Taf12"  
-## [29] "Tal1"    "Tcf3"    "Upf1"    "Sox6"    "Asb1"    "Brpf3"   "Ubp1"   
-## [36] "Zbtb32"  "Tfdp2"   "Dennd4a"
+##  [1] "Gata1"  "Ahctf1" "Bcl11a" "Btaf1"  "E2f4"   "Ell2"   "Epor"   "Gfi1b" 
+##  [9] "Gtf2h3" "Hdgf"   "Klf1"   "Klf16"  "Klf3"   "Ldb1"   "Mbd2"   "Mllt3" 
+## [17] "Nfia"   "Nfix"   "Nrip1"  "Orc2"   "Phf10"  "Rfx2"   "Ssbp3"  "Tal1"  
+## [25] "Tcf3"   "Brpf3" 
 ## 
 ## 
 ## $ModuleTestResult
 ##          TF Module_size ModularityScore P_value
-## Gata1 Gata1          38    0.0002784286       0
+## Gata1 Gata1 26 0.0003404084 0

As anticipated, in the Neu branch, we are unable to identify a Gata1-specific module. This observation further supports the notion that Gata1 plays a crucial role in Ery-specific differentiation.

Additionally, should we desire to discover smaller modules, it is possible to adjust the gamma parameter within the ModuleFinder function.

result = ModuleFinder(GRN,"erythroblasts_stage2",c("Gata1"),
@@ -1006,26 +997,25 @@ 

6 Module Identification and Visua ## targets and regulators not the same ## Modularity values= ## Gata1 -## 0.0005974315 +## 0.0006320134 ## Starting Monte Carlo Runs ## Done for seed/module Gata1

result
## $ModuleList
 ## $ModuleList$Gata1
 ##  [1] "Gata1"  "Ahctf1" "Bcl11a" "Btaf1"  "E2f4"   "Ell2"   "Epor"   "Gfi1b" 
-##  [9] "Gtf2h3" "Hdgf"   "Klf1"   "Klf16"  "Klf3"   "Ldb1"   "Mllt3"  "Nfia"  
-## [17] "Nfix"   "Nrip1"  "Orc2"   "Phf10"  "Rfx2"   "Ssbp2"  "Ssbp3"  "Tal1"  
-## [25] "Tcf3"   "Ikzf1" 
+##  [9] "Gtf2h3" "Klf1"   "Klf3"   "Ldb1"   "Mllt3"  "Nrip1"  "Phf10"  "Rfx2"  
+## [17] "Ssbp3"  "Tal1"  
 ## 
 ## 
 ## $ModuleTestResult
 ##          TF Module_size ModularityScore P_value
-## Gata1 Gata1          26    0.0005974315       0
+## Gata1 Gata1 18 0.0006320134 0

gamma serves as a parameter in the spinglass algorithm, allowing control over the size of the module.

For network visualization, NetID also offers a function designed to assist in visualizing the identified modules.

graphPlot(GRN,result,"Gata1","erythroblasts_stage2")
## targets and regulators not the same
-

+

7 Session information

diff --git a/vignettes/NetID.Rmd b/vignettes/NetID.Rmd index 8090be3..b8ba218 100644 --- a/vignettes/NetID.Rmd +++ b/vignettes/NetID.Rmd @@ -195,6 +195,14 @@ dyn.out <- RunNetID(sce, velo=FALSE) ``` +We note that the `FateDynamic` function provides a preliminary estimation of cell fate probabilities. However, accurate cell fate estimation relies on further refinements and the incorporation of prior information. If users have their own cell fate probability matrix (`fate_prob`) and `pseudotime`, they can directly plug in these values using the following function. + +```{r Plugin cell fate, eval=FALSE, echo=TRUE, warning=FALSE,message=FALSE} +dyn.out$LineageClass <- LineageClassifer(fate_prob, maxState = 10, cut_off = 0) +dyn.out$pseudotime <- pseudotime +dyn.out$fate_prob <- fate_prob # cell fate probability matrix +``` + NetID classifies the cells based on the fate probability matrix using a Gaussian Mixture Model. Next, we compute the lineage fate probability fold change to assign each cluster to a specific lineage. To visualize the cell fate probability in a PCA 2D space, the following function can be utilized: ```{r plot cell fate probability, eval=TRUE, echo=TRUE, warning=FALSE,message=FALSE}