Corrections and changes before CRAN submission

ghislainv · Jun 23, 2019 · 0fe7f9e · 0fe7f9e
1 parent cc7c758
commit 0fe7f9e
Show file tree

Hide file tree

Showing 10 changed files with 74 additions and 70 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -2,18 +2,29 @@
 # .Rbuildignore, see
 # http://cran.r-project.org/doc/manuals/R-exts.html#index-_002eRbuildignore-file
 
-# GitHub files
-.git/
+# Default
 ^.*\.Rproj$
 ^\.Rproj\.user$
+^README\.Rmd$
+^NEWS\.Rmd$
+^vignettes/figure/.*$
+^vignettes/cache/.*$
+vignettes/.*_files/
+vignettes/.*\.html
+vignettes/.*\.md
+^.git$
+^.gitignore$
 
 # pkgdown
-docs
-logos
-pkgdown
-README.Rmd
-vignettes/jSDM_boral.Rmd
-vignettes/proof.Rmd
+^docs$
+^logos$
+^pkgdown$
 
 # travis
 ^\.travis\.yml$
+
+# vignettes
+^vignettes/jSDM_boral\.Rmd$
+^vignettes/jagsboralmodel\.txt$
+^vignettes/proof\.Rmd$
+^vignettes/.*_cache$
diff --git a/R/zzz.R b/R/zzz.R
@@ -13,6 +13,7 @@
    # echo output to screen
    packageStartupMessage("##\n## jSDM R package \n",
                          "## For joint species distribution models \n",
+   					     "## https://ecology.ghislainv.fr/jSDM \n",
                          "##\n")
 }
 

diff --git a/README.md b/README.md
@@ -3,11 +3,11 @@
 
 # jSDM R Package <img src="man/figures/logo.png" align="right" alt="" width="120" />
 
+[![Travis
+CI](https://api.travis-ci.org/ghislainv/jSDM.svg?branch=master)](https://travis-ci.org/ghislainv/jSDM)
 [![CRAN
 Status](https://www.r-pkg.org/badges/version/jSDM)](https://cran.r-project.org/package=jSDM)
 [![Downloads](https://cranlogs.r-pkg.org/badges/jSDM)](https://cran.r-project.org/package=jSDM)
-[![Travis
-CI](https://api.travis-ci.org/ghislainv/jSDM.svg?branch=master)](https://travis-ci.org/ghislainv/jSDM)
 
 Package for fitting joint species distribution models (jSDM) in a
 hierarchical Bayesian framework (Warton *et al.*

diff --git a/man/frogs.Rd b/man/frogs.Rd
@@ -23,7 +23,7 @@
 }
 
 \source{
-Wilkinson, D. P.; Golding, N.; Guillera‐-Arroita, G.; Tingley, R. and McCarthy, M. A. (2018) \emph{A comparison of joint species distribution models for presence-absence data.} Methods in Ecology and Evolution.}
+Wilkinson, D. P.; Golding, N.; Guillera-Arroita, G.; Tingley, R. and McCarthy, M. A. (2018) \emph{A comparison of joint species distribution models for presence-absence data.} Methods in Ecology and Evolution.}
 
 \examples{
 data(frogs, package="jSDM")

diff --git a/man/jSDM_probit_block.Rd b/man/jSDM_probit_block.Rd
@@ -42,12 +42,11 @@ seed=1234, verbose=1)}
     \item{V_alpha_start}{Starting value for variance of random site effect must be a stricly positive scalar.}
 
     \item{W_start}{Starting values for latent variables must be either a scalar or a \eqn{nsite \times n_latent}{n_site x n_latent} matrix. If \code{W_start} takes a scalar value, then that value will serve for all of the Ws.}  
+
+    \item{shape}{Shape parameter of the Inverse-Gamma prior for the random site effect variance \code{V_alpha}. Must be a stricly positive scalar. Default to 0.5 for weak informative prior.}
 
-    \item{rate}{Rate parameter of the Inverse-Gamma prior for the variance \code{V_alpha} of random site effect must be a stricly positive scalar.}
-
-    \item{shape}{Shape parameter of the Inverse-Gamma prior for the variance \code{V_alpha} of random site effect must be a stricly positive scalar.}
+    \item{rate}{Rate parameter of the Inverse-Gamma prior for the random site effect variance \code{V_alpha}. Must be a stricly positive scalar. Default to 0.0005 for weak informative prior.}
 
-
   \item{mu_beta}{Means of the Normal priors for the \eqn{\beta}{beta} parameters of the suitability process. \code{mubeta} must be either a scalar or a p-length vector. If \code{mubeta} takes a scalar value, then that value will serve as the prior mean for all of the betas. The default value is set to 0 for an uninformative prior.}
 
   \item{V_beta}{Variances of the Normal priors for the \eqn{\beta}{beta} parameters of the suitability process. \code{Vbeta} must be either a scalar or a \eqn{p \times p}{p x p} symmetric positive semi-definite square matrix. If \code{Vbeta} takes a scalar value,

diff --git a/src/Rcpp_jSDM_probit_block.cpp b/src/Rcpp_jSDM_probit_block.cpp
@@ -85,7 +85,7 @@ Rcpp::List Rcpp_jSDM_probit_block(const int ngibbs, int nthin, int nburn,
   // Z latent (nsite*nsp)
   arma::mat Z_run; Z_run.zeros(NSITE,NSP);
   // probit_theta_ij = X_i*beta_j + W_i*lambda_j + alpha_i
-  arma::mat probit_theta_run ;probit_theta_run.zeros(NSITE,NSP);
+  arma::mat probit_theta_run; probit_theta_run.zeros(NSITE,NSP);
   // data 
   arma::mat data = arma::join_rows(X,W_run);
 
@@ -104,19 +104,12 @@ Rcpp::List Rcpp_jSDM_probit_block(const int ngibbs, int nthin, int nburn,
     // latent variable Z // 
 
     for (int j=0; j<NSP; j++) {
-
       for (int i=0; i<NSITE; i++) {
-
-        // Mean of the prior
-        double probit_theta = arma::as_scalar(data.row(i)*param_run.col(j)  + alpha_run(i));
-
         // Actualization
-        if ( Y(i,j) == 1) {
-          Z_run(i,j) = rtnorm(s,0,R_PosInf,probit_theta_run(i,j), 1);
-        }
-
-        if ( Y(i,j) == 0) {
-          Z_run(i,j) = rtnorm(s,R_NegInf,0,probit_theta_run(i,j), 1);
+        if (Y(i,j) == 0) {
+          Z_run(i,j) = rtnorm(s, R_NegInf, 0, probit_theta_run(i,j), 1);
+        } else {
+          Z_run(i,j) = rtnorm(s, 0, R_PosInf, probit_theta_run(i,j), 1);
         }
       }
     }
@@ -140,7 +133,7 @@ Rcpp::List Rcpp_jSDM_probit_block(const int ngibbs, int nthin, int nburn,
         if (l > j) {
           param_prop(NP+l) = 0;
         }
-        if ((l==j) & (param_prop(NP+l)< 0)) {
+        if ((l==j) & (param_prop(NP+l) < 0)) {
           param_prop(NP+l) = param_run(NP+l,j);
         }
       }
@@ -166,7 +159,7 @@ Rcpp::List Rcpp_jSDM_probit_block(const int ngibbs, int nthin, int nburn,
       W_run.row(i) = W_i.t();
     }
 
-    data = arma::join_rows(X,W_run);
+    data = arma::join_rows(X, W_run);
 
     ///////////////////////////////
     // vec alpha : Gibbs algorithm //
@@ -202,7 +195,7 @@ Rcpp::List Rcpp_jSDM_probit_block(const int ngibbs, int nthin, int nburn,
     for ( int i = 0; i < NSITE; i++ ) {
       for ( int j = 0; j < NSP; j++ ) {
         // probit(theta_ij) = X_i*beta_j + W_i*lambda_j + alpha_i 
-        probit_theta_run(i,j) = arma::as_scalar(data.row(i)*param_run.col(j)  + alpha_run(i));
+        probit_theta_run(i,j) = arma::as_scalar(data.row(i)*param_run.col(j) + alpha_run(i));
         // link function probit is the inverse of N(0,1) repartition function 
         double theta = gsl_cdf_ugaussian_P(probit_theta_run(i,j));
 
@@ -324,7 +317,8 @@ mod <- Rcpp_jSDM_probit_block(ngibbs=ngibbs, nthin=nthin, nburn=nburn,
                               Y=Y,T=visits,X=X,
                               param_start=param_start, Vparam=diag(c(rep(1.0E6,np),rep(10,nl))),
                               muparam = rep(0,np+nl), W_start=matrix(0,nsite,nl), VW=diag(rep(1,nl)),
-                              alpha_start=rep(0,nsite),Valpha_start=1, shape = 0.5, rate = 0.0005, seed=123, verbose=1)
+                              alpha_start=rep(0,nsite), Valpha_start=1, shape=0.5, rate=0.0005,
+                              seed=123, verbose=1)
 
 # ===================================================
 # Result analysis

diff --git a/src/Rcpp_jSDM_useful.cpp b/src/Rcpp_jSDM_useful.cpp
@@ -2652,10 +2652,10 @@ double rtnorm(
   double ALPHA = 1.837877066409345;             // = log(2*pi)
   int xsize=sizeof(Rtnorm::x)/sizeof(double);   // Length of table x
   int stop = false;
-  double sq2 = 7.071067811865475e-1;            // = 1/sqrt(2)
-  double sqpi = 1.772453850905516;              // = sqrt(pi)
+  //double sq2 = 7.071067811865475e-1;            // = 1/sqrt(2), unused var
+  //double sqpi = 1.772453850905516;              // = sqrt(pi), unused var
 
-  double r, z, e, ylk, simy, lbound, u, d, sim, Z, p;
+  double r, z, e, ylk, simy, lbound, u, d, sim; //, Z, p; unused var
   int i, ka, kb, k;
 
   // Scaling

diff --git a/src/Rcpp_jSDM_useful.h b/src/Rcpp_jSDM_useful.h
@@ -27,6 +27,6 @@ double invlogit (double x);
 // Returns the random variable x and its probability p(x).
 //  The code is based on the implementation by Guillaume Dollé, Vincent Mazet
 //  available from http://miv.u-strasbg.fr/mazet/rtnorm/.
-double rtnorm(gsl_rng *gen, double a,double b,const double mu,const double sigma);
+double rtnorm (gsl_rng *gen, double a,double b,const double mu,const double sigma);
 
 // End
diff --git a/vignettes/bib/biblio-jSDM.bib b/vignettes/bib/biblio-jSDM.bib
@@ -1,13 +1,13 @@
 % Encoding: UTF-8
 
 @Article{Warton2015,
-  author    = {Warton, David I. and Blanchet, F. Guillaume and O’Hara, Robert B. and Ovaskainen, Otso and Taskinen, Sara and Walker, Steven C. and Hui, Francis K.C.},
+  author    = {Warton, David I. and Blanchet, F. Guillaume and O'Hara, Robert B. and Ovaskainen, Otso and Taskinen, Sara and Walker, Steven C. and Hui, Francis K.C.},
   title     = {So Many Variables: Joint Modeling in Community Ecology},
   journal   = {Trends in Ecology \& Evolution},
   year      = {2015},
   volume    = {30},
   number    = {12},
-  pages     = {766--779},
+  pages     = {766-779},
   issn      = {0169-5347},
   doi       = {10.1016/j.tree.2015.09.007},
   url       = {http://dx.doi.org/10.1016/j.tree.2015.09.007},
@@ -21,12 +21,12 @@ @Article{Warton2015
 
 @article{Wilkinson2019,
 author = {Wilkinson, David P. and Golding, Nick and Guillera-Arroita, Gurutzeta and Tingley, Reid and McCarthy, Michael A.},
-title = {A comparison of joint species distribution models for presence–absence data},
+title = {A comparison of joint species distribution models for presence-absence data},
 journal = {Methods in Ecology and Evolution},
 volume = {10},
 number = {2},
 pages = {198-211},
-keywords = {biotic interactions, community assembly, hierarchical models, joint species distribution model, latent factors, presence–absence, residual correlation},
+keywords = {biotic interactions, community assembly, hierarchical models, joint species distribution model, latent factors, presence-absence, residual correlation},
 doi = {10.1111/2041-210X.13106},
 url = {https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13106},
 eprint = {https://besjournals.onlinelibrary.wiley.com/doi/pdf/10.1111/2041-210X.13106},

diff --git a/vignettes/jSDM.Rmd b/vignettes/jSDM.Rmd
@@ -31,7 +31,7 @@ knitr::opts_chunk$set(
 )
 ```
 
-Joint Species distribution models (jSDM) are useful tools to explain or predict species range and abundance from various environmental factors and species correlations [@Warton2015]. jSDM is becoming an increasingly popular statistical method in conservation biology. In this vignette, we illustrate the use of the `jSDM` R package wich aims at providing user-friendly statistical functions using field observations (occurence or abundance data) to fit jSDMs models. Package's functions are developped in a hierarchical Bayesian framework and use conjugate priors to estimate model's parameters. Using compiled C code for the Gibbs sampler reduce drastically the computation time. By making these new statistical tools available to the scientific community, we hope to democratize the use of more complex, but more realistic, statistical models for increasing knowledge in ecology and conserving biodiversity. 
+Joint Species distribution models (jSDM) are useful tools to explain or predict species range and abundance from various environmental factors and species correlations [@Warton2015]. jSDM is becoming an increasingly popular statistical method in conservation biology. In this vignette, we illustrate the use of the `jSDM` R package wich aims at providing user-friendly statistical functions using field observations (occurence or abundance data) to fit jSDMs models. Package's functions are developped in a hierarchical Bayesian framework and use conjugate priors to estimate model's parameters. Using compiled C++ code for the Gibbs sampler reduce drastically the computation time. By making these new statistical tools available to the scientific community, we hope to democratize the use of more complex, but more realistic, statistical models for increasing knowledge in ecology and conserving biodiversity. 
 
 Below, we show an example of the use of `jSDM` for fitting species distribution model to abundance data for 9 frog's species. Model types available in `jSDM` are not limited to those described in this example. `jSDM` includes various model types for occurrence data: 
 
@@ -40,25 +40,25 @@ Below, we show an example of the use of `jSDM` for fitting species distribution
 
 Referring to the models used in the articles @Warton2015 and @Albert1993, we define the following model :
 
-$$ \mathrm{probit}(\theta_{i,j}) =\alpha_i + \beta_{0j}+X_i'\beta_j+ W_i'\lambda_j $$
+$$ \mathrm{probit}(\theta_{ij}) =\alpha_i + \beta_{0j}+X_i'\beta_j+ W_i'\lambda_j $$
 
-- Link function probit: $\mathrm{probit} : q \rightarrow \Phi^{-1}(q)$ where $\Phi$ correspond to the repartition function of the reduced centred normal distribution.
+- Link function probit: $\mathrm{probit}: q \rightarrow \Phi^{-1}(q)$ where $\Phi$ correspond to the repartition function of the reduced centred normal distribution.
 
-- Response variable: $Y=(y_{i,j})^{i=1,\ldots,nsite}_{j=1,\ldots,nsp}$ with:
+- Response variable: $Y=(y_{ij})^{i=1,\ldots,nsite}_{j=1,\ldots,nsp}$ with:
 
-$$y_{i,j}=\begin{cases}
+$$y_{ij}=\begin{cases}
     0 & \text{ if species $j$ is absent on the site $i$}\\
-    1 &  \text{ if species  $j$ is present on the site $i$}
-    \end{cases}.$$
+    1 &  \text{ if species  $j$ is present on the site $i$}.
+    \end{cases}$$
 
-- Latent variable $z_{i,j} = \alpha_i + \beta_{0j}+X_i'\beta_j+ W_i'\lambda_j + \epsilon_{i,j}$, with $\forall i,j \ \epsilon_{i,j} \sim \mathcal{N}(0,1)$ and such as:
+- Latent variable $z_{ij} = \alpha_i + \beta_{0j} + X_i'\beta_j + W_i'\lambda_j + \epsilon_{i,j}$, with $\forall (i,j) \ \epsilon_{ij} \sim \mathcal{N}(0,1)$ and such that:
 
-$$y_{i,j}=\begin{cases}
-    1 & \text{ if } z_{i,j} > 0 \\
-    0 &  \text{ otherwise.}
+$$y_{ij}=\begin{cases}
+    1 & \text{if} \ z_{ij} > 0 \\
+    0 &  \text{otherwise.}
     \end{cases}$$
 
-It can be easily shown that: $y_{i,j}| z_{i,j} \sim \mathcal{B}ernouilli(\theta_{i,j})$. 
+It can be easily shown that: $y_{ij} \sim \mathcal{B}ernouilli(\theta_{ij})$. 
 
 - Latent variables: $W_i=(W_i^1,\ldots,W_i^q)$ where $q$ is the number of latent variables considered, which has to be fixed by the user (by default q=2).
 We assume that $W_i \sim \mathcal{N}(0,I_q)$ and we define the associated coefficients: $\lambda_j=(\lambda_j^1,\ldots, \lambda_j^q)'$. We use a prior distribution $\mathcal{N}(0,10)$ for all lambdas not concerned by constraints. 
@@ -68,7 +68,7 @@ The corresponding regression coefficients for each species $j$ are noted : $\bet
 
 - $\beta_{0j}$ correspond to the intercept for species $j$ which is assume to be a fixed effect. We use a prior distribution $\mathcal{N}(0,10^6)$ for all betas. 
 
-- $\alpha_i$ represents the random effect of site $i$ such as $\alpha_i \sim \mathcal{N}(0,V_{\alpha})$ and we assumed that $V_{\alpha} \sim \mathcal {IG}(0.5,\frac{1}{0.005})$ as prior distribution by default. 
+- $\alpha_i$ represents the random effect of site $i$ such as $\alpha_i \sim \mathcal{N}(0,V_{\alpha})$ and we assumed that $V_{\alpha} \sim \mathcal {IG}(\text{shape}=0.5, \text{rate}=0.005)$ as prior distribution by default. 
 
 
 # Data-set
@@ -86,27 +86,25 @@ We first load the `jSDM` library.
 library(jSDM)
 ```
 
-This data-set is available in the `jSDM` R package. It can be loaded  with the `data` command. The data is in "wide" format: each line is a site and the occurrence data (from Species_1 to Species_9) are in columns. A site is characterized by its x-y geographical coordinates, one discrete covariates and two other continuous. 
+This data-set is available in the `jSDM` R package. It can be loaded with the `data` command. The data is in "wide" format: each line is a site and the occurrence data (from Species_1 to Species_9) are in columns. A site is characterized by its x-y geographical coordinates, one discrete covariate and two other continuous covariates. 
 
 ```{r frogs-data}
 # frogs data
 data(frogs, package="jSDM")
 head(frogs)
 ```
 
-We rearrange the data in two data-sets, one for the observation (or detection) process and one for the suitability (or ecological) process. 
+We rearrange the data in two data-sets: a first one for the presence-absence observations for each species (columns) at each site (rows), and a second one for the site characteristics.
 
-We normalize the continuous data to facilitate MCMC convergence. 
+We also normalize the continuous explicative variables to facilitate MCMC convergence. 
 
 ```{r arranging-data}
+# data.obs
+PA_frogs <- frogs[,4:12]
 
-# data.suit
 # Normalized continuous variables
 Env_frogs <- cbind(scale(frogs[,1]),frogs[,2],scale(frogs[,3]))
-colnames(Env_frogs) <- colnames (frogs[,1:3])
-
-# data.obs
-PA_frogs <- frogs[,4:12]
+colnames(Env_frogs) <- colnames(frogs[,1:3])
 ```
 
 # Parameter inference
@@ -137,16 +135,17 @@ mod_jSDM_block_frogs <- jSDM_probit_block (
 # Analysis of the results
 
 ```{r plot-results}
-## alpha_i of 3 first sites
-plot(coda::as.mcmc(mod_jSDM_block_frogs$mcmc.alpha[,1:3]))
+## alpha_i of the first two sites
+plot(coda::as.mcmc(mod_jSDM_block_frogs$mcmc.alpha[,1:2]))
 
 ## Valpha
 par(mfrow=c(1,2))
-coda::traceplot(mod_jSDM_block_frogs$mcmc.Valpha, main = "V_alpha")
-coda::densplot(mod_jSDM_block_frogs$mcmc.Valpha, main = "V_alpha")
+coda::traceplot(mod_jSDM_block_frogs$mcmc.Valpha, main="V_alpha")
+coda::densplot(mod_jSDM_block_frogs$mcmc.Valpha, main="V_alpha")
 
 np <- nrow(mod_jSDM_block_frogs$model_spec$beta_start)
-## beta_j of two first species
+
+## beta_j of the first two species
 par(mfrow=c(np,2))
 for (j in 1:2) {
   for (p in 1:np) {
@@ -157,8 +156,8 @@ main = paste(colnames(mod_jSDM_block_frogs$mcmc.sp[[paste0("sp_",j)]])[p],
   }
 }
 
+## lambda_j of the first two species
 n_latent <- mod_jSDM_block_frogs$model_spec$n_latent
-## lambda_j of two first species
 par(mfrow=c(n_latent*2,2))
 for (j in 1:2) {
   for (l in 1:n_latent) {
@@ -169,18 +168,19 @@ for (j in 1:2) {
     }
   }
 
-## Latent variables W_i of two first sites
+## Latent variables W_i for the first two sites
 par(mfrow=c(1,2))
 for (l in 1:n_latent) {
   plot(mod_jSDM_block_frogs$mcmc.latent[[paste0("lv_",l)]][,1:2], main = paste0("Latent variable W_", l))
 }
 
 ## probit_theta
-plot(mod_jSDM_block_frogs$mcmc.Deviance,main = "Deviance")
-## Deviance
 par (mfrow=c(1,1))
 hist(mod_jSDM_block_frogs$probit_theta_pred, main = "Predicted probit theta", xlab ="predicted probit theta")
 
+## Deviance
+plot(mod_jSDM_block_frogs$mcmc.Deviance,main = "Deviance")
+
 ```
 
 # Matrice of correlations 
@@ -210,7 +210,6 @@ simdata$Covariate_1 <- rnorm(50)
 simdata$Covariate_3 <- rnorm(50)
 simdata$Covariate_2 <- rbinom(50,1,0.5)
 
-
 # Predictions 
 theta_pred <- predict.jSDM(mod_jSDM_block_frogs, newdata=simdata, Id_species=Id_species,
 						   Id_sites=Id_sites, type="mean")