Merge pull request #10 from modal-inria/rework

Code rework
modal-inria · Nov 18, 2022 · ef9344d · ef9344d
2 parents d3449b3 + 6f56195
commit ef9344d
Show file tree

Hide file tree

Showing 58 changed files with 3,604 additions and 3,163 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -2,5 +2,6 @@
 ^\.Rproj\.user$
 README.md
 ^\.github$
+.vscode
 src/.vscode
 CITATION.bib
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -29,7 +29,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
       - uses: r-lib/actions/setup-pandoc@v1
 

diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,4 @@ src/*.a
 *.spl
 *.synctex.gz
 *.tex
-src/.vscode
+.vscode
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,11 @@
 Package: Rankcluster
 Type: Package
 Title: Model-Based Clustering for Multivariate Partial Ranking Data
-Version: 0.94.5
-Date: 2021-01-26
+Version: 0.98.0
+Date: 2022-11-11
 Authors@R: c(person("Quentin", "Grimonprez", role = c("aut", "cre"), email = "quentingrim@yahoo.fr"),
-              person("Julien", "Jacques", role = "aut"),
-              person("Christophe", "Biernacki", role = "aut"))
+             person("Julien", "Jacques", role = "aut", email = "julien.jacques@univ-lyon2.fr"),
+             person("Christophe", "Biernacki", role = "aut", email = "christophe.biernacki@inria.fr"))
 Description: Implementation of a model-based clustering algorithm for
     ranking data (C. Biernacki, J. Jacques (2013) <doi:10.1016/j.csda.2012.08.008>). 
     Multivariate rankings as well as partial rankings are taken
@@ -24,4 +24,4 @@ LinkingTo: Rcpp, RcppEigen
 Suggests: knitr, rmarkdown, testthat
 VignetteBuilder: knitr
 Encoding: UTF-8
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.1
diff --git a/NEWS b/NEWS
@@ -1,5 +1,9 @@
-# 
+# v0.98.0 - 2022-11-11
 - add unit tests
+- improve numerical stability of probability computation (WIP)
+- refactor C++ code (WIP)
+- fix c++ warnings
+- lint R code
 
 # v0.94.5 - 2021-01-26
 - replace random_shuffle in C++
@@ -18,7 +22,49 @@
 - factorization
 - correct roxygen S4 tag
 
+# v0.94.1 - 2019-08-28
+- beautify R code
+- update Makevars
+- add src/Rankcluster_init.c file
+
+# v0.94 - 2016-07-29
+- fix bug in gibbs for ties and partial ranks
+
+# v0.93.1 - 2016-01-12
+- update documentation url
+- use r random number generator 
+
+# v0.92.9 - 2014-07-25
+- data and package documentation in their own files
+- parallelization with OPENMP
+- update doc
+
 # v0.92 - 2014-02-12
-- ranks have to be given to the package in the ranking notation (see convertRank function), with the following convention:
-  - missing positions are replaced by 0
-  - tied are replaced by the lowest position they share
+- remove Eigen and use RcppEigen
+- add probability function
+- ranks have to be given to the package in the ranking notation (to manage ties)
+- update doc
+
+# v0.91.6 - 2013-12-19
+- update doc
+- eigen licence/copyright
+
+# v0.91.5 - 2013-12-05
+- update eigen
+- add eigen copyright
+
+# v0.91 - 2013-11-03
+- correct c++ warnings: comparison between unsigned and signed int
+- correction in khi2 for partial data
+
+# v0.90.3 - 2013-09-13
+- add vignettes
+
+# v0.90.2 - 2013-09-05
+- fix error in examples due to randomness
+
+# v0.90.1 - 2013-08-30
+- correct limited development formula
+
+# v0.89 - 2013-08-28
+- on CRAN, now
diff --git a/R/ISRdistribution.R b/R/ISRdistribution.R
@@ -1,9 +1,9 @@
 #' @title Simulate a sample of ISR(pi,mu)
-#' 
+#'
 #' @description This function simulates univariate rankings data (ordering representation) according to the ISR(pi,mu).
 #'
 #' @param n size of the sample.
-#' @param pi dispersion parameter: probability of correct paired comparaison according to mu.
+#' @param pi dispersion parameter: probability of correct paired comparison according to mu.
 #' @param mu position parameter: modal ranking in ordering representation.
 #' @return a matrix with simulated ranks.
 #'
@@ -23,45 +23,54 @@
 #' result of the judge is o = (3, 1, 2) whereas the ranking
 #' result is r = (2, 3, 1).
 #'
-#' You can see the \link{convertRank} function to convert the simualted ranking drom ordering to ranking representation.
+#' You can see the \link{convertRank} function to convert the simulated ranking from ordering to ranking representation.
 #'
 #' @references
-#' [1] C.Biernacki and J.Jacques (2013), A generative model for rank data based on sorting algorithm, Computational Statistics and Data Analysis, 58, 162-176.
-#' 
+#' [1] C.Biernacki and J.Jacques (2013), A generative model for rank data based on sorting algorithm,
+#' Computational Statistics and Data Analysis, 58, 162-176.
+#'
 #' @examples
 #' x <- simulISR(30, 0.8, 1:4)
-#' 
+#'
 #' @author Julien Jacques
-#' 
+#'
 #' @export
-simulISR <- function(n, pi, mu)
-{
-  if (missing(n))
+simulISR <- function(n, pi, mu) {
+  if (missing(n)) {
     stop("n is missing")
-  if (missing(mu))
+  }
+  if (missing(mu)) {
     stop("mu is missing")
-  if (missing(pi))
+  }
+  if (missing(pi)) {
     stop("pi is missing")
-
-  if (!is.numeric(n) || (length(n) > 1))
+  }
+
+  if (!is.numeric(n) || (length(n) > 1)) {
     stop("n must be a strictly positive integer")
-  if ((n != round(n)) || (n <= 0))
+  }
+  if ((n != round(n)) || (n <= 0)) {
     stop("n must be a strictly positive integer")
-
-  if (!is.numeric(pi) || (length(pi) > 1))
+  }
+
+  if (!is.numeric(pi) || (length(pi) > 1)) {
     stop("pi must be a real between 0 and 1")
-  if ((pi > 1) || (pi < 0))
+  }
+  if ((pi > 1) || (pi < 0)) {
     stop("pi must be a real between 0 and 1")
-
-  if (!is.vector(mu, mode = "numeric"))
+  }
+
+  if (!is.vector(mu, mode = "numeric")) {
     stop("mu must be a complete rank")
-  if (!checkRank(mu))
+  }
+  if (!checkRank(mu)) {
     stop("mu must be a complete rank")
-
-
-
+  }
+
+
+
   res <- .Call("simulISRR", n, length(mu), mu, pi, PACKAGE = "Rankcluster")
-  
+
   return(res)
 }
 
@@ -70,17 +79,20 @@ simulISR <- function(n, pi, mu)
 #'
 #' @description It computes the probability of a (multivariate) rank x according to a ISR(mu, pi).
 #'
-#' @param x a vector or a matrix containing the rankings in ranking notation (see Details or \link{convertRank} function). 
+#' @param x a vector or a matrix containing the rankings in ranking notation (see Details or \link{convertRank} function).
 #' The rankings of each dimension are placed end to end. \code{x} must contain only full ranking (no partial or tied).
-#' @param pi a vector of size \code{p=length(m)}, where \code{p} is the number of dimension, containing the probabilities of a good comparison of the model (dispersion parameters).
-#' @param mu a vector of length \code{sum(m)} containing the modal ranks in ranking notation (see Details or \link{convertRank} function). 
+#' @param pi a vector of size \code{p=length(m)}, where \code{p} is the number of dimension, containing the probabilities of
+#' a good comparison of the model (dispersion parameters).
+#' @param mu a vector of length \code{sum(m)} containing the modal ranks in ranking notation (see Details or
+#' \link{convertRank} function).
 #' The rankings of each dimension are placed end to end. \code{mu} must contain only full ranking (no partial or tied).
 #' @param m a vector containing the size of ranks for each dimension.
-#' 
+#'
 #' @return the probability of \code{x} according to a ISR(mu, pi).
 #'
 #' @details
-#'  The ranks have to be given to the package in the ranking notation (see \link{convertRank} function), with the following convention:
+#' The ranks have to be given to the package in the ranking notation (see \link{convertRank} function),
+#' with the following convention:
 #'
 #' - missing positions are replaced by 0
 #'
@@ -105,101 +117,121 @@ simulISR <- function(n, pi, mu)
 #'
 #' @examples
 #' m <- c(4, 5)
-#' x = mu <- matrix(nrow = 1, ncol = 9)
-#' x[1:4] = c(1, 4, 2, 3)
-#' x[5:9] = c(3, 5, 2, 4, 1)
-#' mu[1:4] = 1:4
-#' mu[5:9] = c(3, 5, 4, 2, 1)
+#' x <- mu <- matrix(nrow = 1, ncol = 9)
+#' x[1:4] <- c(1, 4, 2, 3)
+#' x[5:9] <- c(3, 5, 2, 4, 1)
+#' mu[1:4] <- 1:4
+#' mu[5:9] <- c(3, 5, 4, 2, 1)
 #' pi <- c(0.75, 0.82)
 #'
 #' prob <- probability(x, mu, pi, m)
 #' prob
-#' 
+#'
 #' @author Quentin Grimonprez
-#' 
+#'
 #' @export
 #'
-probability <- function(x, mu, pi, m = length(mu))
-{
+probability <- function(x, mu, pi, m = length(mu)) {
   x <- matrix(x, ncol = sum(m))
   mu <- matrix(mu, ncol = sum(m))
-  
+
   checkProbability(x, mu, pi, m)
-  
+
   # convert to ordering
-  for (i in 1:length(m))
-  {
-    mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] = convertRank(mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])])
-    for (j in 1:nrow(x))
-    {
-      x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] = convertRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])])
+  for (i in seq_along(m)) {
+    mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] <- convertRank(
+      mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])]
+    )
+    for (j in seq_len(nrow(x))) {
+      x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] <- convertRank(
+        x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])]
+      )
     }
-
   }
-  
+
   proba <- rep(NA, nrow(x))
-  for (j in seq_along(proba))
+  for (j in seq_along(proba)) {
     proba[j] <- .Call("computeProba", x[j, , drop = FALSE], mu, pi, m, PACKAGE = "Rankcluster")
-
+  }
+
   return(proba)
 }
 
 
-checkProbability <- function(x, mu, pi, m)
-{
+checkProbability <- function(x, mu, pi, m) {
   ### check parameters
-  if (missing(x))
+  if (missing(x)) {
     stop("x is missing.")
-  if (missing(mu))
+  }
+  if (missing(mu)) {
     stop("mu is missing.")
-  if (missing(pi))
+  }
+  if (missing(pi)) {
     stop("pi is missing.")
-
+  }
+
   # x
-  if (!(is.vector(x) || is.matrix(x)))
+  if (!(is.vector(x) || is.matrix(x))) {
     stop("x must be either a matrix or a vector.")
-  if (is.vector(x))
+  }
+  if (is.vector(x)) {
     x <- t(as.matrix(x))
-  if (!is.numeric(x))
+  }
+  if (!is.numeric(x)) {
     stop("x must be either a matrix or a vector of integer.")
-
+  }
+
   # mu
-  if (!(is.vector(mu) || is.matrix(mu)))
+  if (!(is.vector(mu) || is.matrix(mu))) {
     stop("mu must be either a matrix or a vector.")
-  if (is.vector(mu))
+  }
+  if (is.vector(mu)) {
     mu <- t(as.matrix(mu))
-  if (!is.numeric(mu))
+  }
+  if (!is.numeric(mu)) {
     stop("mu must be either a matrix or a vector of integer.")
-
+  }
+
   # pi
-  if (!is.numeric(pi))
+  if (!is.numeric(pi)) {
     stop("pi must be a vector of probabilities.")
-  if (!is.vector(pi))
+  }
+  if (!is.vector(pi)) {
     stop("pi must be a vector of probabilities.")
-  if ((min(pi) < 0) || max(pi) > 1)
+  }
+  if ((min(pi) < 0) || max(pi) > 1) {
     stop("pi must be a vector of probabilities.")
-
+  }
+
   # m
-  if (!is.numeric(m))
+  if (!is.numeric(m)) {
     stop("m must be a vector of integer.")
-  if (!is.vector(m))
+  }
+  if (!is.vector(m)) {
     stop("m must be a vector of integer.")
-  if (sum(unlist(lapply(m, is.wholenumber))) != length(m))
+  }
+  if (sum(unlist(lapply(m, is.wholenumber))) != length(m)) {
     stop("m contains non integer.")
-  if (sum(m) != ncol(x))
+  }
+  if (sum(m) != ncol(x)) {
     stop("sum(m) and the length of x do not match.")
-  if (sum(m) != length(mu))
+  }
+  if (sum(m) != length(mu)) {
     stop("sum(m) and the length of mu do not match.")
-  if (length(m) != length(pi))
+  }
+  if (length(m) != length(pi)) {
     stop("the length of pi and m do not match.")
-
+  }
+
   # check if mu contains ranks
-  for (i in 1:length(m))
-  {
-    if (!checkRank(mu[, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i]))
+  for (i in seq_along(m)) {
+    if (!checkRank(mu[, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i])) {
       stop("mu is not correct.")
-    for (j in 1:nrow(x))
-      if (!checkRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i]))
+    }
+    for (j in seq_len(nrow(x))) {
+      if (!checkRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i])) {
         stop("x is not correct.")
+      }
+    }
   }
 }