Skip to content

Commit

Permalink
Merge pull request #10 from modal-inria/rework
Browse files Browse the repository at this point in the history
Code rework
  • Loading branch information
Quentin62 authored Nov 18, 2022
2 parents d3449b3 + 6f56195 commit ef9344d
Show file tree
Hide file tree
Showing 58 changed files with 3,604 additions and 3,163 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
^\.Rproj\.user$
README.md
^\.github$
.vscode
src/.vscode
CITATION.bib
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- uses: r-lib/actions/setup-pandoc@v1

Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ src/*.a
*.spl
*.synctex.gz
*.tex
src/.vscode
.vscode
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: Rankcluster
Type: Package
Title: Model-Based Clustering for Multivariate Partial Ranking Data
Version: 0.94.5
Date: 2021-01-26
Version: 0.98.0
Date: 2022-11-11
Authors@R: c(person("Quentin", "Grimonprez", role = c("aut", "cre"), email = "quentingrim@yahoo.fr"),
person("Julien", "Jacques", role = "aut"),
person("Christophe", "Biernacki", role = "aut"))
person("Julien", "Jacques", role = "aut", email = "julien.jacques@univ-lyon2.fr"),
person("Christophe", "Biernacki", role = "aut", email = "christophe.biernacki@inria.fr"))
Description: Implementation of a model-based clustering algorithm for
ranking data (C. Biernacki, J. Jacques (2013) <doi:10.1016/j.csda.2012.08.008>).
Multivariate rankings as well as partial rankings are taken
Expand All @@ -24,4 +24,4 @@ LinkingTo: Rcpp, RcppEigen
Suggests: knitr, rmarkdown, testthat
VignetteBuilder: knitr
Encoding: UTF-8
RoxygenNote: 7.1.2
RoxygenNote: 7.2.1
54 changes: 50 additions & 4 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#
# v0.98.0 - 2022-11-11
- add unit tests
- improve numerical stability of probability computation (WIP)
- refactor C++ code (WIP)
- fix c++ warnings
- lint R code

# v0.94.5 - 2021-01-26
- replace random_shuffle in C++
Expand All @@ -18,7 +22,49 @@
- factorization
- correct roxygen S4 tag

# v0.94.1 - 2019-08-28
- beautify R code
- update Makevars
- add src/Rankcluster_init.c file

# v0.94 - 2016-07-29
- fix bug in gibbs for ties and partial ranks

# v0.93.1 - 2016-01-12
- update documentation url
- use r random number generator

# v0.92.9 - 2014-07-25
- data and package documentation in their own files
- parallelization with OPENMP
- update doc

# v0.92 - 2014-02-12
- ranks have to be given to the package in the ranking notation (see convertRank function), with the following convention:
- missing positions are replaced by 0
- tied are replaced by the lowest position they share
- remove Eigen and use RcppEigen
- add probability function
- ranks have to be given to the package in the ranking notation (to manage ties)
- update doc

# v0.91.6 - 2013-12-19
- update doc
- eigen licence/copyright

# v0.91.5 - 2013-12-05
- update eigen
- add eigen copyright

# v0.91 - 2013-11-03
- correct c++ warnings: comparison between unsigned and signed int
- correction in khi2 for partial data

# v0.90.3 - 2013-09-13
- add vignettes

# v0.90.2 - 2013-09-05
- fix error in examples due to randomness

# v0.90.1 - 2013-08-30
- correct limited development formula

# v0.89 - 2013-08-28
- on CRAN, now
194 changes: 113 additions & 81 deletions R/ISRdistribution.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#' @title Simulate a sample of ISR(pi,mu)
#'
#'
#' @description This function simulates univariate rankings data (ordering representation) according to the ISR(pi,mu).
#'
#' @param n size of the sample.
#' @param pi dispersion parameter: probability of correct paired comparaison according to mu.
#' @param pi dispersion parameter: probability of correct paired comparison according to mu.
#' @param mu position parameter: modal ranking in ordering representation.
#' @return a matrix with simulated ranks.
#'
Expand All @@ -23,45 +23,54 @@
#' result of the judge is o = (3, 1, 2) whereas the ranking
#' result is r = (2, 3, 1).
#'
#' You can see the \link{convertRank} function to convert the simualted ranking drom ordering to ranking representation.
#' You can see the \link{convertRank} function to convert the simulated ranking from ordering to ranking representation.
#'
#' @references
#' [1] C.Biernacki and J.Jacques (2013), A generative model for rank data based on sorting algorithm, Computational Statistics and Data Analysis, 58, 162-176.
#'
#' [1] C.Biernacki and J.Jacques (2013), A generative model for rank data based on sorting algorithm,
#' Computational Statistics and Data Analysis, 58, 162-176.
#'
#' @examples
#' x <- simulISR(30, 0.8, 1:4)
#'
#'
#' @author Julien Jacques
#'
#'
#' @export
simulISR <- function(n, pi, mu)
{
if (missing(n))
simulISR <- function(n, pi, mu) {
if (missing(n)) {
stop("n is missing")
if (missing(mu))
}
if (missing(mu)) {
stop("mu is missing")
if (missing(pi))
}
if (missing(pi)) {
stop("pi is missing")

if (!is.numeric(n) || (length(n) > 1))
}

if (!is.numeric(n) || (length(n) > 1)) {
stop("n must be a strictly positive integer")
if ((n != round(n)) || (n <= 0))
}
if ((n != round(n)) || (n <= 0)) {
stop("n must be a strictly positive integer")

if (!is.numeric(pi) || (length(pi) > 1))
}

if (!is.numeric(pi) || (length(pi) > 1)) {
stop("pi must be a real between 0 and 1")
if ((pi > 1) || (pi < 0))
}
if ((pi > 1) || (pi < 0)) {
stop("pi must be a real between 0 and 1")

if (!is.vector(mu, mode = "numeric"))
}

if (!is.vector(mu, mode = "numeric")) {
stop("mu must be a complete rank")
if (!checkRank(mu))
}
if (!checkRank(mu)) {
stop("mu must be a complete rank")



}



res <- .Call("simulISRR", n, length(mu), mu, pi, PACKAGE = "Rankcluster")

return(res)
}

Expand All @@ -70,17 +79,20 @@ simulISR <- function(n, pi, mu)
#'
#' @description It computes the probability of a (multivariate) rank x according to a ISR(mu, pi).
#'
#' @param x a vector or a matrix containing the rankings in ranking notation (see Details or \link{convertRank} function).
#' @param x a vector or a matrix containing the rankings in ranking notation (see Details or \link{convertRank} function).
#' The rankings of each dimension are placed end to end. \code{x} must contain only full ranking (no partial or tied).
#' @param pi a vector of size \code{p=length(m)}, where \code{p} is the number of dimension, containing the probabilities of a good comparison of the model (dispersion parameters).
#' @param mu a vector of length \code{sum(m)} containing the modal ranks in ranking notation (see Details or \link{convertRank} function).
#' @param pi a vector of size \code{p=length(m)}, where \code{p} is the number of dimension, containing the probabilities of
#' a good comparison of the model (dispersion parameters).
#' @param mu a vector of length \code{sum(m)} containing the modal ranks in ranking notation (see Details or
#' \link{convertRank} function).
#' The rankings of each dimension are placed end to end. \code{mu} must contain only full ranking (no partial or tied).
#' @param m a vector containing the size of ranks for each dimension.
#'
#'
#' @return the probability of \code{x} according to a ISR(mu, pi).
#'
#' @details
#' The ranks have to be given to the package in the ranking notation (see \link{convertRank} function), with the following convention:
#' The ranks have to be given to the package in the ranking notation (see \link{convertRank} function),
#' with the following convention:
#'
#' - missing positions are replaced by 0
#'
Expand All @@ -105,101 +117,121 @@ simulISR <- function(n, pi, mu)
#'
#' @examples
#' m <- c(4, 5)
#' x = mu <- matrix(nrow = 1, ncol = 9)
#' x[1:4] = c(1, 4, 2, 3)
#' x[5:9] = c(3, 5, 2, 4, 1)
#' mu[1:4] = 1:4
#' mu[5:9] = c(3, 5, 4, 2, 1)
#' x <- mu <- matrix(nrow = 1, ncol = 9)
#' x[1:4] <- c(1, 4, 2, 3)
#' x[5:9] <- c(3, 5, 2, 4, 1)
#' mu[1:4] <- 1:4
#' mu[5:9] <- c(3, 5, 4, 2, 1)
#' pi <- c(0.75, 0.82)
#'
#' prob <- probability(x, mu, pi, m)
#' prob
#'
#'
#' @author Quentin Grimonprez
#'
#'
#' @export
#'
probability <- function(x, mu, pi, m = length(mu))
{
probability <- function(x, mu, pi, m = length(mu)) {
x <- matrix(x, ncol = sum(m))
mu <- matrix(mu, ncol = sum(m))

checkProbability(x, mu, pi, m)

# convert to ordering
for (i in 1:length(m))
{
mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] = convertRank(mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])])
for (j in 1:nrow(x))
{
x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] = convertRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])])
for (i in seq_along(m)) {
mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] <- convertRank(
mu[1, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])]
)
for (j in seq_len(nrow(x))) {
x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])] <- convertRank(
x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])]
)
}

}

proba <- rep(NA, nrow(x))
for (j in seq_along(proba))
for (j in seq_along(proba)) {
proba[j] <- .Call("computeProba", x[j, , drop = FALSE], mu, pi, m, PACKAGE = "Rankcluster")

}

return(proba)
}


checkProbability <- function(x, mu, pi, m)
{
checkProbability <- function(x, mu, pi, m) {
### check parameters
if (missing(x))
if (missing(x)) {
stop("x is missing.")
if (missing(mu))
}
if (missing(mu)) {
stop("mu is missing.")
if (missing(pi))
}
if (missing(pi)) {
stop("pi is missing.")

}

# x
if (!(is.vector(x) || is.matrix(x)))
if (!(is.vector(x) || is.matrix(x))) {
stop("x must be either a matrix or a vector.")
if (is.vector(x))
}
if (is.vector(x)) {
x <- t(as.matrix(x))
if (!is.numeric(x))
}
if (!is.numeric(x)) {
stop("x must be either a matrix or a vector of integer.")

}

# mu
if (!(is.vector(mu) || is.matrix(mu)))
if (!(is.vector(mu) || is.matrix(mu))) {
stop("mu must be either a matrix or a vector.")
if (is.vector(mu))
}
if (is.vector(mu)) {
mu <- t(as.matrix(mu))
if (!is.numeric(mu))
}
if (!is.numeric(mu)) {
stop("mu must be either a matrix or a vector of integer.")

}

# pi
if (!is.numeric(pi))
if (!is.numeric(pi)) {
stop("pi must be a vector of probabilities.")
if (!is.vector(pi))
}
if (!is.vector(pi)) {
stop("pi must be a vector of probabilities.")
if ((min(pi) < 0) || max(pi) > 1)
}
if ((min(pi) < 0) || max(pi) > 1) {
stop("pi must be a vector of probabilities.")

}

# m
if (!is.numeric(m))
if (!is.numeric(m)) {
stop("m must be a vector of integer.")
if (!is.vector(m))
}
if (!is.vector(m)) {
stop("m must be a vector of integer.")
if (sum(unlist(lapply(m, is.wholenumber))) != length(m))
}
if (sum(unlist(lapply(m, is.wholenumber))) != length(m)) {
stop("m contains non integer.")
if (sum(m) != ncol(x))
}
if (sum(m) != ncol(x)) {
stop("sum(m) and the length of x do not match.")
if (sum(m) != length(mu))
}
if (sum(m) != length(mu)) {
stop("sum(m) and the length of mu do not match.")
if (length(m) != length(pi))
}
if (length(m) != length(pi)) {
stop("the length of pi and m do not match.")

}

# check if mu contains ranks
for (i in 1:length(m))
{
if (!checkRank(mu[, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i]))
for (i in seq_along(m)) {
if (!checkRank(mu[, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i])) {
stop("mu is not correct.")
for (j in 1:nrow(x))
if (!checkRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i]))
}
for (j in seq_len(nrow(x))) {
if (!checkRank(x[j, (1 + cumsum(c(0, m))[i]):(cumsum(c(0, m))[i + 1])], m[i])) {
stop("x is not correct.")
}
}
}
}
Loading

0 comments on commit ef9344d

Please sign in to comment.