From 6f0da9a12ca7c904c1b76c2d58b4c7254274bb49 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste-Camps Date: Sat, 5 May 2018 16:32:17 +0200 Subject: [PATCH] Igraph (#33) * switch to igraph for stemmata and added dotted lines for uncertainty' * basis of a function for a homemade stemma layout * added function layout_as_stemma * corrections * improved PCC.disagreement efficiency * some tests * simplified and documented PCC.Stemma output * plot virtual wits in grey * fixed namespace issue with V --- DESCRIPTION | 8 +- NAMESPACE | 3 +- R/PCC.R | 17 +- R/PCC.Stemma.R | 181 ++++++++++++++------- R/PCC.disagreement.R | 69 ++++---- R/PCC.overconflicting.R | 4 +- R/PCC.reconstructModel.R | 91 +++++------ R/layout_as_stemma.R | 103 ++++++++++++ man/PCC.Rd | 6 +- man/PCC.Stemma.Rd | 20 ++- man/PCC.buildGroup.Rd | 7 +- man/PCC.disagreement.Rd | 7 +- man/PCC.reconstructModel.Rd | 20 ++- man/layout_as_stemma.Rd | 52 ++++++ tests/testthat.R | 1 - tests/testthat/test-PCC.Stemma.R | 116 ++++++++----- tests/testthat/test-PCC.reconstructModel.R | 33 ++-- tests/testthat/test-layout_as_stemma.R | 69 ++++++++ tests/testthat/test-pcc-buildgroup.R | 2 +- 19 files changed, 588 insertions(+), 221 deletions(-) create mode 100644 R/layout_as_stemma.R create mode 100644 man/layout_as_stemma.Rd create mode 100644 tests/testthat/test-layout_as_stemma.R diff --git a/DESCRIPTION b/DESCRIPTION index 244a943..aacf36d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,19 @@ Package: stemmatology Type: Package Title: An R stemmatology package -Version: 0.2.2 -Date: 2014-07-16 +Version: 0.2.3 +Date: 2018-05-04 Author: Jean-Baptiste Camps ; Florian Cafiero Maintainer: Jean-Baptiste Camps Description: This package helps building and analysing the genealogy of textual or musical traditions. BugReports: https://github.com/Jean-Baptiste-Camps/stemmatology/issues -Imports: graphics, stats, utils, network, sna, cluster +Imports: graphics, stats, utils, network, sna, cluster, igraph Suggests: testthat, knitr, rmarkdown, covr -License: GPL-3 +License: GPL-3 | file LICENSE NeedsCompilation: no URL: https://github.com/Jean-Baptiste-Camps/stemmatology VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 72cfc4d..26b9cd9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,8 @@ exportPattern("^[[:alpha:]]+") -importFrom("graphics", "axis", "barplot", "legend", "par", "plot") +importFrom("graphics", "axis", "barplot", "legend", "par", "plot","title") importFrom("stats", "fisher.test", "na.omit") importFrom("utils", "read.csv") importFrom("sna", "gplot", "gplot.layout.fruchtermanreingold") importFrom("network", "as.network", "network.vertex.names", "as.matrix.network", "set.vertex.attribute", "get.vertex.attribute") importFrom("cluster", "pam") +importFrom("igraph", "graph_from_edgelist", "layout_as_tree", "plot.igraph", "E", "union", "topo_sort","V","neighbors") diff --git a/R/PCC.R b/R/PCC.R index 703b5c5..e4138aa 100644 --- a/R/PCC.R +++ b/R/PCC.R @@ -1,5 +1,13 @@ "PCC" <- - function(x, omissionsAsReadings = FALSE, alternateReadings = FALSE, limit = 0, recoverNAs = TRUE, pauseAtPlot = FALSE, interactive = TRUE) { + function(x, + omissionsAsReadings = FALSE, + alternateReadings = FALSE, + limit = 0, + recoverNAs = TRUE, + layout_as_stemma = FALSE, + pauseAtPlot = FALSE, + interactive = TRUE) { + # Global shell for the PCC functions. Successively calls PCC.Exploratory # and PCC.Stemma on the dataset NB: TODO(JBC) this function should be # updated when we will have defined appropriate object classes for the @@ -8,6 +16,7 @@ # Here, we will need to have appropriate version of the command for the # various outputs of PCC.Exploratory if is.pccOverconflicting if (is.matrix(pccExploratory)) { + # What does this do? output = pccStemma(pccExploratory) } else { if (!is.matrix(pccExploratory)) { @@ -15,7 +24,7 @@ if (class(pccExploratory) == "pccConflicts" | class(pccExploratory) == "pccOverconflicting" | class(pccExploratory) == "pccContam") { - output = PCC.Stemma(pccExploratory$database, limit = limit, recoverNAs = recoverNAs) + output = PCC.Stemma(pccExploratory$database, limit = limit, recoverNAs = recoverNAs, layout_as_stemma = layout_as_stemma) } else { # if is.pccEquipollent if (class(pccExploratory) == "pccEquipollent") { @@ -24,8 +33,8 @@ } output = as.list(NULL) for (i in 1:length(pccExploratory$databases)) { - pccStemma = PCC.Stemma(pccExploratory$databases[[i]]) - legend("topright", paste("Alternative stemma", i, "out of", + pccStemma = PCC.Stemma(pccExploratory$databases[[i]], limit = limit, recoverNAs = recoverNAs, layout_as_stemma = layout_as_stemma) + graphics::title(sub = paste("Alternative stemma", i, "out of", length(pccExploratory))) if (i < length(pccExploratory$databases)) { readline("Press enter to proceed to next alternative stemma") diff --git a/R/PCC.Stemma.R b/R/PCC.Stemma.R index d2c4e63..493b56d 100644 --- a/R/PCC.Stemma.R +++ b/R/PCC.Stemma.R @@ -3,8 +3,10 @@ PCC.Stemma <- omissionsAsReadings = FALSE, limit = 0, recoverNAs = TRUE, + layout_as_stemma = FALSE, ask = TRUE, verbose = FALSE) { + # TODO(JBC): find a way to avoid redundancy for output ? # TODO(JBC): la sortie de cette fonction n'a pas de classe, il faudrait # l'implémenter # TODO(JBC): Modifier cette fonction pour pouvoir prendre en entrée une @@ -17,17 +19,35 @@ PCC.Stemma <- # PCC.Stemma(x[[i]], omissionsAsReadings = omissionsAsReadings, limit = # limit) output[[i]] = pccStemma # } return(output) # } Deuxième # possibilité, le contenu n'est qu'une seule table de variantes - # TODO: deport output creation to a function to avoid redundancy? - # Explore graphical parameters for stemmata - # , - # pad=1, - # label.pad = 0, - #label.pos=5, - # label.cex=0.7 tableVariantes = x edgelistGlobal = NULL # matrix(c(character(0), character(0)), ncol = 2) - modelsGlobal = as.list(NULL) - modelsByGroupGlobal = as.list(NULL) + models = matrix( + nrow = nrow(tableVariantes), + ncol = 0, + dimnames = list(dimnames(tableVariantes)[[1]])) + modelsByGroup = matrix( + nrow = 1, + ncol = 0, + dimnames = list("Models") + ) + fullDatabase = tableVariantes + # And now, we need to define an internal + # function to collate databases, to + # avoid redundancy + collateDbs <- function(x,y){ + myCols = colnames(y) + for(i in seq_len(length(myCols))){ + # If the columns already exists, + # replace it. + if(myCols[i] %in% colnames(x)){ + x[, myCols[i]] = y[,i] + } else { + # Otherwise, add it + x = cbind(x, y[,i, drop = FALSE]) + } + } + return(x) + } counter = 0 while (ncol(tableVariantes) > 3) { counter = counter + 1 @@ -36,23 +56,29 @@ PCC.Stemma <- if (identical(pccBuildGroup$groups, list())) { message("No group was found. Unable to build stemma.") # Plot the stemma at this step, if it exists + # TODO: deport stemma plotting to avoid redundancy ? if (!is.null(edgelistGlobal)) { - stemma = as.network(edgelistGlobal, - directed = TRUE, - matrix.type = "edgelist") - gplot( - stemma, - displaylabels, - label = network.vertex.names(stemma), - gmode = "digraph", - boxed.labels = TRUE, - usearrows = TRUE - ) + myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE) + if(layout_as_stemma){ + myLayout = layout_as_stemma(edgelistGlobal) + } + else{ + myLayout = layout_as_tree(myNetwork) + } + # Color the reconstructed wit. + # (i.e., nodes with names starting with { ) + # in grey, others in orange + # NB: this could be made more robust by using a + # vector of color attributes passed between functions + igraph::V(myNetwork)$color = "orange" + igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey" + igraph::plot.igraph(myNetwork, layout=myLayout) output = as.list(NULL) - output$edgelist = edgelistGlobal + output$fullDatabase = fullDatabase output$database = tableVariantes - output$modelsGlobal = modelsGlobal - output$modelsByGroupGlobal = modelsByGroupGlobal + output$edgelist = edgelistGlobal + output$models = models + output$modelsByGroup = modelsByGroup return(output) } else { return() @@ -66,33 +92,34 @@ PCC.Stemma <- verbose = verbose ) tableVariantes = pccReconstructModel$database + fullDatabase = collateDbs(fullDatabase, tableVariantes) if (!exists("tableVariantes")) { stop("No database found.") return(tableVariantes) } # Now we save the objects given out by PCC.reconstructModel edgelistGlobal = rbind(edgelistGlobal, pccReconstructModel$edgelist) - modelsGlobal[[counter]] = pccReconstructModel$models - modelsByGroupGlobal[[counter]] = pccReconstructModel$modelsByGroup + models = cbind(models,pccReconstructModel$models) + modelsByGroup = cbind(modelsByGroup,pccReconstructModel$modelsByGroup) } - stemma = as.network(edgelistGlobal, - directed = TRUE, - matrix.type = "edgelist") - gplot( - stemma, - displaylabels, - label = network.vertex.names(stemma), - gmode = "digraph", - boxed.labels = TRUE, - usearrows = TRUE - ) if (is.null(tableVariantes)) { # Job's done + myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE) + if(layout_as_stemma){ + myLayout = layout_as_stemma(edgelistGlobal) + } + else{ + myLayout = layout_as_tree(myNetwork) + } + igraph::V(myNetwork)$color = "orange" + igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey" + igraph::plot.igraph(myNetwork, layout=myLayout) output = as.list(NULL) - output$edgelist = edgelistGlobal + output$fullDatabase = fullDatabase output$database = tableVariantes - output$modelsGlobal = modelsGlobal - output$modelsByGroupGlobal = modelsByGroupGlobal + output$edgelist = edgelistGlobal + output$models = models + output$modelsByGroup = modelsByGroup return(output) } # There is now less than 4 manuscripts in the database. The method is no @@ -100,6 +127,15 @@ PCC.Stemma <- # and if not, we ask the user if he wants the end of the stemma if (ncol(tableVariantes) > 1) { if(ask){ + myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE) + if(layout_as_stemma){ + myLayout = layout_as_stemma(edgelistGlobal) + } else{ + myLayout = layout_as_tree(myNetwork) + } + igraph::V(myNetwork)$color = "orange" + igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey" + igraph::plot.igraph(myNetwork, layout=myLayout) writeLines( "There is now less than four manuscripts in the database.\nStemma building has now lost in accuracy. \nDo you want to continue anyway (take last step with caution) ?\n Y/N\n" ) @@ -111,10 +147,11 @@ PCC.Stemma <- } if (answer == "N") { output = as.list(NULL) - output$edgelist = edgelistGlobal + output$fullDatabase = fullDatabase output$database = tableVariantes - output$modelsGlobal = modelsGlobal - output$modelsByGroupGlobal = modelsByGroupGlobal + output$edgelist = edgelistGlobal + output$models = models + output$modelsByGroup = modelsByGroup return(output) } if (answer == "Y") { @@ -136,33 +173,55 @@ PCC.Stemma <- verbose = verbose ) tableVariantes = pccReconstructModel$database + fullDatabase = collateDbs(fullDatabase, tableVariantes) + models = cbind(models,pccReconstructModel$models) + modelsByGroup = cbind(modelsByGroup,pccReconstructModel$modelsByGroup) + # And here, because we want dashes for the (uncertain) relations + # established as the last step, we will create to + # separate networks with differente properties, before concatening + myNetworkCert = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE) + # With full lines for all edges + igraph::E(myNetworkCert)$lty = 1 + # Then dashed for the uncertain ones + myNetworkUncert = igraph::graph_from_edgelist(pccReconstructModel$edgelist[,1:2, drop = FALSE], directed = TRUE) + igraph::E(myNetworkUncert)$lty = 3 + # Then unite them + myNetwork = igraph::union(myNetworkCert, myNetworkUncert, byname=TRUE) + # fusion lty_1 et lty_2 + igraph::E(myNetwork)$lty = ifelse(is.na(igraph::E(myNetwork)$lty_1), + igraph::E(myNetwork)$lty_2,igraph::E(myNetwork)$lty_1) + # Preparing edgelist for the output edgelistGlobal = rbind(edgelistGlobal, pccReconstructModel$edgelist) - modelsGlobal[[counter]] = pccReconstructModel$models - modelsByGroupGlobal[[counter]] = pccReconstructModel$modelsByGroup - stemma = as.network(edgelistGlobal, - directed = TRUE, - matrix.type = "edgelist") - gplot( - stemma, - displaylabels, - label = network.vertex.names(stemma), - gmode = "digraph", - boxed.labels = TRUE, - usearrows = TRUE - ) + # We can rely on how igraph sorts vertices (i.e., using unique on the + # edgelist turned, names <- unique(as.character(t(el))) ) + # to have the same indices for the vertices in the union + # and in the layout function + if(layout_as_stemma){ + myLayout = layout_as_stemma(edgelistGlobal) + } + else{ + myLayout = layout_as_tree(myNetwork) + } + # And plotting + igraph::V(myNetwork)$color = "orange" + igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey" + igraph::plot.igraph(myNetwork, layout=myLayout, main="Final stemma") + # and output output = as.list(NULL) - output$edgelist = edgelistGlobal + output$fullDatabase = fullDatabase output$database = tableVariantes - output$modelsGlobal = modelsGlobal - output$modelsByGroupGlobal = modelsByGroupGlobal + output$edgelist = edgelistGlobal + output$models = models + output$modelsByGroup = modelsByGroup return(output) } } else { output = as.list(NULL) - output$edgelist = edgelistGlobal + output$fullDatabase = fullDatabase output$database = tableVariantes - output$modelsGlobal = modelsGlobal - output$modelsByGroupGlobal = modelsByGroupGlobal + output$edgelist = edgelistGlobal + output$models = models + output$modelsByGroup = modelsByGroup return(output) } } diff --git a/R/PCC.disagreement.R b/R/PCC.disagreement.R index 8d8294b..0f38518 100644 --- a/R/PCC.disagreement.R +++ b/R/PCC.disagreement.R @@ -5,22 +5,27 @@ PCC.disagreement <- # disagreements, common and oriented omissions, the function also returns, # for information, the count of agreements between manuscripts. tableVariantes = as.matrix(x) - if (is.matrix(x) == FALSE | is.numeric(x) == FALSE) { + if (is.matrix(x) == FALSE || is.numeric(x) == FALSE) { stop ("Input is not a numeric matrix.") } - # tableVariantes = data.matrix(x) #We create tables crossing the whole set + # tableVariantes = data.matrix(x) + #We create tables crossing the whole set # of manuscripts. First, the one useful to build the stemma + myWit = dimnames(tableVariantes)[[2]] + myDimnames = list(myWit,myWit) + severeDisagreement = matrix(nrow = ncol(tableVariantes), ncol = ncol(tableVariantes), - dimnames = c(labels(tableVariantes)[2], labels(tableVariantes)[2])) + dimnames = myDimnames) benigneDisagreement = matrix(nrow = ncol(tableVariantes), ncol = ncol(tableVariantes), - dimnames = c(labels(tableVariantes)[2], labels(tableVariantes)[2])) + dimnames = myDimnames) omissionsInCommon = matrix(nrow = ncol(tableVariantes), ncol = ncol(tableVariantes), - dimnames = c(labels(tableVariantes)[2], labels(tableVariantes)[2])) + dimnames = myDimnames) omissionsOriented = matrix(nrow = ncol(tableVariantes), ncol = ncol(tableVariantes), - dimnames = c(labels(tableVariantes)[2], labels(tableVariantes)[2])) + dimnames = myDimnames) #Then the one being proposed as an FYI agreements = matrix(nrow = ncol(tableVariantes), ncol = ncol(tableVariantes), - dimnames = c(labels(tableVariantes)[2], labels(tableVariantes)[2])) + dimnames = myDimnames) + rm(myDimnames) # Je n'en mets pas pour les NA, qui, en tant que manque d'information, # peuvent être soit des lacunes, soit des oublis d'encodage, soit des # oublis de collation, etc. @@ -31,11 +36,13 @@ PCC.disagreement <- # First case: the lesson of the manuscript is unknown, and we don't do anything if (is.na(tableVariantes[i, j])) { } else { + myWitJ = myWit[j] #If the lesson is known, we compare it to every known lesson. ## Let's start by handling the omissions. if ((tableVariantes[i, j] == 0) && (omissionsAsReadings == FALSE)) { for (k in 1:ncol(tableVariantes)) { + myWitK = myWit[k] # On teste d'abord que l'on ne croise pas avec une # valeur manquante if (is.na(tableVariantes[i, k]) == FALSE) { @@ -43,22 +50,22 @@ PCC.disagreement <- # where an omission in one of the witness has # been compared to the other, so we set the two # values to 0 if they were NA - if (is.na(omissionsInCommon[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]])) { - omissionsInCommon[colnames(tableVariantes)[j], colnames(tableVariantes)[k]] = 0 + if (is.na(omissionsInCommon[myWitJ, + myWitK])) { + omissionsInCommon[myWitJ, myWitK] = 0 } - if (is.na(omissionsOriented[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]])) { - omissionsOriented[colnames(tableVariantes)[j], colnames(tableVariantes)[k]] = 0 + if (is.na(omissionsOriented[myWitJ, + myWitK])) { + omissionsOriented[myWitJ, myWitK] = 0 } # First case: common omission if (tableVariantes[i, j] == tableVariantes[i, k]) { # We use our own function for the table's incrementation. - omissionsInCommon[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] = omissionsInCommon[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] + 1 + omissionsInCommon[myWitJ,myWitK] = omissionsInCommon[myWitJ,myWitK] + 1 } # Second case: oriented omission if (tableVariantes[i, j] < tableVariantes[i, k]) { - omissionsOriented[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] = omissionsOriented[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] +1 + omissionsOriented[myWitJ,myWitK] = omissionsOriented[myWitJ,myWitK] +1 } } } @@ -67,8 +74,8 @@ PCC.disagreement <- for (k in 1:ncol(tableVariantes)) { # if k is nor NA, nor equal to 0 # ou que omissionsAsReadings == TRUE - if ((is.na(tableVariantes[i, k]) == FALSE) && ((tableVariantes[i, - k] != 0) | (omissionsAsReadings == TRUE))) { + myWitK = myWit[k] + if ((is.na(tableVariantes[i, k]) == FALSE) && ((tableVariantes[i, k] != 0) || (omissionsAsReadings == TRUE))) { # Careful, for this is a bit tricky : before # comparing the two readings, we already # have established something: the two @@ -80,25 +87,25 @@ PCC.disagreement <- # NB: this is where we remove NA, that will subsist only if # the category examined (agreements, disagreements, etc.) # between two manuscripts is completely empty. - if (is.na(agreements[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]])) { - agreements[colnames(tableVariantes)[j], colnames(tableVariantes)[k]] = 0 + if (is.na(agreements[myWitJ, + myWitK])) { + agreements[myWitJ, myWitK] = 0 } - if (is.na(severeDisagreement[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]])) { - severeDisagreement[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]] = 0 + if (is.na(severeDisagreement[myWitJ, + myWitK])) { + severeDisagreement[myWitJ, + myWitK] = 0 } - if (is.na(benigneDisagreement[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]])) { - benigneDisagreement[colnames(tableVariantes)[j], - colnames(tableVariantes)[k]] = 0 + if (is.na(benigneDisagreement[myWitJ, + myWitK])) { + benigneDisagreement[myWitJ, + myWitK] = 0 } # Now that it is done, let's proceed to # actual comparison. First case, il y a # accord if j = k, alors agreements++ if (tableVariantes[i, j] == tableVariantes[i, k]) { - agreements[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] = agreements[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] + 1 + agreements[myWitJ,myWitK] = agreements[myWitJ,myWitK] + 1 } # if j != k alors if (tableVariantes[i, j] != tableVariantes[i, k]) { @@ -119,10 +126,10 @@ PCC.disagreement <- # which(x,y) if ((tableVariantes[i, j] %in% tableSansjk) && (tableVariantes[i, k] %in% tableSansjk)) { - severeDisagreement[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] = severeDisagreement[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] +1 + severeDisagreement[myWitJ,myWitK] = severeDisagreement[myWitJ,myWitK] +1 } else { # sinon benigneDisagreements++ - benigneDisagreement[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] = benigneDisagreement[colnames(tableVariantes)[j],colnames(tableVariantes)[k]] +1 + benigneDisagreement[myWitJ,myWitK] = benigneDisagreement[myWitJ,myWitK] +1 } } } diff --git a/R/PCC.overconflicting.R b/R/PCC.overconflicting.R index 9430ac0..2de6518 100644 --- a/R/PCC.overconflicting.R +++ b/R/PCC.overconflicting.R @@ -105,7 +105,7 @@ PCC.overconflicting<- # eux-mêmes overconflicting if (vertexAttributes[colnames(adjacencyTable)[j], 1] != "overconflicting") { - # si non, alors le nœud est undécidable + # si non, alors le noeud est undécidable undecidable = TRUE break() } @@ -116,7 +116,7 @@ PCC.overconflicting<- } } } - # À présent, on va attribuer aux nœuds du réseau les attributs de + # À présent, on va attribuer aux noeuds du réseau les attributs de # couleur. Pour ce faire, on doit récupérer les identifiants à partir des # étiquettes. vertexNameId = network.vertex.names(myNetwork) diff --git a/R/PCC.reconstructModel.R b/R/PCC.reconstructModel.R index 5376710..6e735f5 100644 --- a/R/PCC.reconstructModel.R +++ b/R/PCC.reconstructModel.R @@ -23,13 +23,16 @@ PCC.reconstructModel <- groups = x$groups tableVariantes = x$database newDatabase = x$database - modelsReconstructed = as.list(NULL) + modelsReconstructed = matrix( + nrow = nrow(tableVariantes), + ncol = 0, + dimnames = list(dimnames(tableVariantes)[[1]])) # modelsToAdd is the list that will contain # only the reconstructed models that should be # added to the database. modelsToAdd = as.list(NULL) # The edgelist that will contain the stemmatic information - edgelist = matrix(c(character(0), character(0)), ncol = 2) + edgelist = matrix(c(character(0), character(0), character(0)), ncol = 3) # We create a matrix of models for each group, but we have # to create the labels first groupsLabels = NULL @@ -52,7 +55,7 @@ PCC.reconstructModel <- labelMyMss = paste(myGroup, collapse = "") ##Debug: if (verbose) { - cat("Now comparing group", labelMyMss) + cat("Now comparing group", labelMyMss,"\n") } labelMyModel = paste("{", labelMyMss, "}", sep = "") myModel = matrix( @@ -132,7 +135,9 @@ PCC.reconstructModel <- "Reading not assessable for the group", labelMyMss, "at VL", - rownames(tableVariantes)[j]#, + rownames(tableVariantes)[j], + "\n" + #, #"\nThis can happen sometimes." ) } @@ -152,9 +157,6 @@ PCC.reconstructModel <- # We bind the mss from the group with the virtual model myGroupAndModel = cbind(tableVariantes[, myGroup], myModel) # We compare them - ####TODO(JBC): it might not be a good idea to have this level 1 function - #### call another level 1 function. - #### Perhaps the comparison should go in the higher level global function... myGroupComp = PCC.disagreement(myGroupAndModel) for (m in seq_len(length(myGroup))) { #TODO: only 0 or add an option to treat NA as zeros? (deal @@ -168,7 +170,7 @@ PCC.reconstructModel <- is.na(myGroupComp$omissionsOriented[myGroup[m], labelMyModel]))) { if (verbose) { cat(myGroup[m], "seems to be the model of group", - labelMyMss) + labelMyMss, "\n") } extantModel = c(extantModel, myGroup[m]) } else { @@ -185,7 +187,7 @@ PCC.reconstructModel <- myGroupComp$omissionsOriented[myGroup[m], labelMyModel], "omissions", - "\ntowards the virtual model.\nIt does not seem to be the model" + "\ntowards the virtual model.\nIt does not seem to be the model\n" ) } } @@ -263,7 +265,7 @@ PCC.reconstructModel <- cat( extantModel, "is the only ms. inside the group that seems to be the model of group", - labelMyMss + labelMyMss, "\n" ) } colnames(modelsByGroup)[i] = labelMyMss @@ -280,7 +282,10 @@ PCC.reconstructModel <- !is.na(myModel[r, ])) { # Debug: if(verbose){ - cat("Recovering reading of virtual model", labelMyModel, "at VL", r, "Value was", tableVariantes[r, extantModel], "will be", myModel[r,]) + cat("Recovering reading of virtual model", + labelMyModel, "at VL", r, "Value was", + tableVariantes[r, extantModel], + "will be", myModel[r,],"\n") } tableVariantes[r, extantModel] = myModel[r, ] } @@ -304,7 +309,7 @@ PCC.reconstructModel <- paste( "No ms inside group", labelMyMss, - "seems to be the model. We will proceed\n to a comparison with mss outside the group." + "seems to be the model.\nWe will proceed to a comparison with mss outside the group." ) ) #NB: si nous voulons être rigoureux, il faut que la base de données inclue également les mss retirés aux étapes précédentes? } @@ -327,7 +332,7 @@ PCC.reconstructModel <- is.na(myOthersComp$omissionsOriented[others[n], labelMyModel]) )) { if (verbose) { - cat(others[n], "seems to be the model.") + cat(others[n], "seems to be the model.\n") } extantModel = c(extantModel, others[n]) } else { @@ -344,7 +349,7 @@ PCC.reconstructModel <- myOthersComp$omissionsOriented[others[n], labelMyModel], "omissions", - "towards the virtual model. It does not seem to be the model" + "towards the virtual model.\nIt does not seem to be the model\n" ) } } @@ -366,7 +371,7 @@ PCC.reconstructModel <- } if (length(extantModel) == 1) { if (verbose) { - cat(extantModel, "seems to be the model of this group") + cat(extantModel, "seems to be the model of this group\n") } colnames(modelsByGroup)[i] = labelMyMss modelsByGroup[, i] = extantModel @@ -389,7 +394,7 @@ PCC.reconstructModel <- } } } else { - if(verbose){print("There are no other manuscript left in the database.")} + if(verbose){print("There are no other manuscript left in the database.\n")} } if (length(extantModel) == 0) { # If length is STILL equal to 0, then the manuscript is lost, and we keep @@ -409,21 +414,26 @@ PCC.reconstructModel <- } } ### Here we create the edgelist. - ### If we want to modify edge - ### length, it might be possible using the phylo package... See : - ### [R-sig-phylo] convert edge list to phylo object - ### https://stat.ethz.ch/pipermail/r-sig-phylo/2009-July/000404.html for - ### each manuscript in the group + ### for each manuscript in the group for (p in 1:length(myGroup)) { # if he is not the model if (myGroup[p] != modelsByGroup[i]) { # we add a link between the model and him in the edgelist - edgelist = rbind(edgelist, c(modelsByGroup[i], myGroup[p])) + # as well as a calculation of distance: + # total number of disagreements, and omissions both ways + + myDist = c( + myGroupComp$benigneDisagreement[myGroup[p],modelsByGroup[i]], + myGroupComp$omissionsOriented[myGroup[p],modelsByGroup[i]], + myGroupComp$omissionsOriented[modelsByGroup[i],myGroup[p]]) + myDist[is.na(myDist)] = 0 + myDist = sum(myDist) + edgelist = rbind(edgelist, c(modelsByGroup[i], myGroup[p], myDist)) # we add the wit. to the descripti (to be removed) list descripti = c(descripti, myGroup[p]) } } - modelsReconstructed[[i]] = myModel + modelsReconstructed = cbind(modelsReconstructed,myModel) } # modelsToAdd is the list containing the database for each virtual model, if # and only if the virtual model could not be identified with an existing @@ -454,34 +464,15 @@ PCC.reconstructModel <- # So, to avoid that, you have to set ,drop = FALSE database = tableVariantes[, nonDescripti, drop = FALSE] } - output$database = database # the edgelist - # Debug: plot the stemma (no edge length modification for the moment) - if(verbose){ - stemma = as.network(edgelist, directed = TRUE, matrix.type = "edgelist") - gplot(stemma, displaylabels, label = network.vertex.names(stemma), gmode = "digraph", - boxed.labels = TRUE, usearrows = TRUE) - } + output$database = database + # the edgelist + # Debug: plot the stemma + #if(verbose){ + # myNetwork = igraph::graph_from_edgelist(edgelist, directed = TRUE) + # igraph::plot.igraph(myNetwork, layout=layout_as_tree) + #} output$edgelist = edgelist # and the rest output$models = modelsReconstructed output$modelsByGroup = modelsByGroup - return(output) - ## Adjust edgelength using igraph => NB, incompatible avec network, donc fait buguer les autres fonctions... - # library(igraph) edgelist = matrix( - # c('A','B','B','A','A','B','C','D','F','G'), ncol = 2 ) g = - # graph(edgelist, directed = TRUE) Pour calculer le poids, qui est - # inversement proportionnel à la distance (désaccords+omissionsoreientées - # dans les deux sens), en le rendant égal à 1/d length = c(29, 12, 10, 0, - # 28) ## On a besoin que le poids soit une valeur positive pour que ça - # marche à peu près, et plus le poids est élevé, plus on va avoir des - # nœuds proches. Pour l'implémenter, il faudrait donc calculer le nombre - # maximal de désaccords dans toute la tradition, mettons par ex. 30, et - # soustraire pour chaque ms. son nombre de désaccord de ce total, par ex. - # pour un ms. ayant deux désaccords, on passe à poids = 28, etc. et pour - # celui en ayant 30 à 0 (la solution d'utiliser des poids négatifs fait - # buguer l'algorithme) E(g)$weight = length l = - # layout.fruchterman.reingold(g, weights=E(g)$weight, niter = 100000 ) - # plot(g, layout=l) Autre solution, plus dans l'esprit (mais ne gérant - # pas la contamination?), utiliser le module phylo. À partir d'une - # edgelist : - # https://stat.ethz.ch/pipermail/r-sig-phylo/2009-July/000405.html + return(output) } diff --git a/R/layout_as_stemma.R b/R/layout_as_stemma.R new file mode 100644 index 0000000..7adbe02 --- /dev/null +++ b/R/layout_as_stemma.R @@ -0,0 +1,103 @@ +layout_as_stemma <- + function(x) { + # A function to create an igraph layout for stemma, where + # witnesses as placed at a vertical distance from their parent + # consistent with the number of disagreements and omissions + # TODO: try to avoid horizontal overlapping of vertices + # # # VOIR SI ON VEUT ÇA x : a directed igraph graph + # x : an edgelist with distances in third column + #myNetwork = x + edgelist = x + + myNetwork = igraph::graph_from_edgelist(edgelist[, 1:2], directed = TRUE) + + # Let's find roots + roots = which(sapply(sapply(igraph::V(myNetwork),function(x) igraph::neighbors(myNetwork,x, mode="in")), length) == 0) + + myLayout = igraph::layout_as_tree(myNetwork, root = roots, mode = "out") + + #First, it's good to have a topological sort, to have root first + mySortedNet = igraph::topo_sort(myNetwork, mode = "out") + + for (i in seq_len(length(mySortedNet))) { + # For each node, we get connection and weight from + # the input edgelist with it's label + for (j in seq_len(length(edgelist[, 1][edgelist[, 1] == mySortedNet[i]$name]))) { + # We look towards wich node it is connected, and adjust their + # (vertical) position using the edgelist + # the new position is equal to the parent node position - dist from him + # (we need substraction to go top -> down) + # N.B.: with this calculation, a wit. derived from several models + # will be placed according to its distance with the last parent + # in the topological sort + # To get parent pos, we need to get its numeric index from its name + # Warning: this is going to get a bit ugly with cross-references + newPos = + myLayout[as.numeric(V(myNetwork)[mySortedNet[i]$name]) , 2] - + as.numeric(edgelist[, 3][edgelist[, 1] == mySortedNet[i]$name][j]) + # NB: as.numeric is used to get node index from its name + # and modify position of the children + myLayout[as.numeric(V(myNetwork)[edgelist[, 2][edgelist[, 1] == mySortedNet[i]$name][j]]) + , 2] = newPos + } + } + # # And now, let's try to fix superpositions + # # first, let's take all unique horizontal coords + # myYs = sort(unique(myLayout[, 2])) + # for (i in seq_len(length(myYs))) { + # # Do we have more than one node ? + # if (length(myLayout[, 2][myLayout[, 2] == myYs[i]]) > 1) { + # # Are some of them at the same horizontal placement ? + # myXs = sort(unique(myLayout[, 1][myLayout[, 2] == myYs[i]])) + # # if we have less unique value than values + # if (length(myXs) < length(myLayout[, 1][myLayout[, 2] == myYs[i]])) { + # # for each values, let's see if it is close to another node + # # (within myRange) + # myRange = 0.2 + # for (j in seq_len(length(myXs))) { + # if (length(myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))]) + # > 1) { + # # And now, it becomes hard: let's try to avoid overlap + # # without creating new overlaps + # for (k in seq_len(length(myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))]))) { + # # let's determine how much we want to move it, as a function + # # of its heigth (the higher, the more it is moved) + # mov = sqrt((min(myYs)- myYs[i])^2) + 1 + # # if it is in the first half, move it to the left, otherwise, + # # move it to the right + # myLength = length(myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))]) + # if (k <= (myLength/2)){ + # myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))][k] = + # myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))][k] - mov * (1/k) + # } + # else{ + # myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))][k] = + # myLayout[, 1][myLayout[, 2] == myYs[i] & + # (myLayout[, 1] >= (myXs[j] - myRange) & + # myLayout[, 1] <= (myXs[j] + myRange))][k] + mov * (1/(myLength - k + 1)) + # } + # } + # } + # } + # } + # } + # } + # A layout object to be passed to igraph + return(myLayout) + # Maybe should return + # a list, with + # graph: an igraph graph + # layout: a layout to be passed to igraph + } diff --git a/man/PCC.Rd b/man/PCC.Rd index 05c5727..7ae4624 100644 --- a/man/PCC.Rd +++ b/man/PCC.Rd @@ -7,7 +7,7 @@ Global shell for all the PCC functions, both exploratory and stemma-building. This command successively executes PCC.Exploratory and PCC.Stemma, while asking user for input when necessary. } \usage{ -PCC(x, omissionsAsReadings = FALSE, alternateReadings = FALSE, limit = 0, recoverNAs = TRUE, pauseAtPlot = FALSE, interactive = TRUE) +PCC(x, omissionsAsReadings = FALSE, alternateReadings = FALSE, limit = 0, recoverNAs = TRUE, layout_as_stemma = FALSE, pauseAtPlot = FALSE, interactive = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ @@ -28,6 +28,10 @@ PCC(x, omissionsAsReadings = FALSE, alternateReadings = FALSE, limit = 0, recove } \item{recoverNAs}{ logical; if \code{TRUE}, when an actual witness or reconstructed subarchetype is identified to the reconstructed model of a group, every NA it has is recovered by taking the value of the reconstructed model; if \code{FALSE}, their NAs values are kept. Default: \code{TRUE}. +} +\item{layout_as_stemma}{logical; if TRUE, the witnesses will be placed vertically +according to the distance from their parent, as per the function \code{\link{layout_as_stemma}} (experimental!) +Default: FALSE } \item{pauseAtPlot}{logical; if \code{TRUE}, the algorithms stops at each plot during the execution of \code{PCC.contam} (by setting graphical parameter \code{ask = TRUE}). Default: \code{FALSE}. } diff --git a/man/PCC.Stemma.Rd b/man/PCC.Stemma.Rd index b4fc988..e469c48 100644 --- a/man/PCC.Stemma.Rd +++ b/man/PCC.Stemma.Rd @@ -6,13 +6,10 @@ PCC.Stemma: Building the Stemma Codicum } \description{The \code{PCC.Stemma} function calls successively the functions PCC.disagreement, PCC.buildgroup and PCC.reconstructModel to build a stemma codicum of the tradition studied. By default, it stops when less than four manuscripts are to be compared, as the possibility of errors becomes high. The user is however able to ask the algorithm its final answer for those last manuscripts. - -%% ~~ A concise (1-5 lines) description of what the function does. ~~ } \usage{ -PCC.Stemma(x, omissionsAsReadings = FALSE, limit = 0, recoverNAs= TRUE, ask = TRUE, verbose = FALSE) +PCC.Stemma(x, omissionsAsReadings = FALSE, limit = 0, recoverNAs= TRUE, layout_as_stemma = FALSE, ask = TRUE, verbose = FALSE) } -%- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a numeric matrix, with witnesses in columns, variant locations in rows, and readings coded by a number; or a character matrix, with witnesses in columns, variant locations in rows, and, in each cell, one or several readings, coded by numbers and separated by a comma (e.g. '1,2,3', if the witness has three different readings). } @@ -28,9 +25,12 @@ The maximum number of severe disagreements expected for witnesses to be in the s } \item{recoverNAs}{ logical; if \code{TRUE}, when an actual witness or reconstructed subarchetype is identified to the reconstructed model of a group, every NA it has is recovered by taking the value of the reconstructed model; if \code{FALSE}, their NAs values are kept. - Default: \code{TRUE}. } +\item{layout_as_stemma}{logical; if TRUE, the witnesses will be placed vertically +according to the distance from their parent, as per the function \code{\link{layout_as_stemma}} (experimental!) +Default: FALSE +} \item{ask}{ logical; if FALSE, decisions will be made without asking the user for input. Default: TRUE @@ -44,7 +44,13 @@ The maximum number of severe disagreements expected for witnesses to be in the s \details{ %% ~~ If necessary, more details than the description above ~~ } -\value{The function returns either a single object of class \code{"pccStemma"}, or a list containing several objects of class \code{"pccStemma"} (if multiple stemmatas were drawn) +\value{The function returns either a single list, or a list containing several lists (if multiple stemmata were drawn). Each list contains: +\item{fullDatabase}{The full database, with the new reconstructed models + and recovered NAs (if applicable).} + \item{database}{The same with the \emph{descripti} removed.} + \item{edgelist}{An edgelist expressing the relations between the witnesses of each group with, as a third column, the distances between witnesses.} + \item{models}{A list containing the database of readings for each model at the time of their reconstruction (i.e., before they are compared to extant witnesses).} + \item{modelsByGroup}{A matrix with, in columns the groups, and a single row containing the label of their model.} } \references{ Jean-Baptiste Camps et Florian Cafiero, « Genealogical variant locations and simplified stemma: a test case », in \emph{Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches}, dir. Tara Andrews & Caroline Macé, Turnhout, 2015, p. 69‑93 (Lectio, 1), \url{http://halshs.archives-ouvertes.fr/halshs-01435633/}. @@ -63,7 +69,7 @@ Jean-Baptiste Camps (\email{jbcamps@hotmail.com}) & Florian Cafiero (\email{flor %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ -\code{\link{PCC.disagreement}}, \code{\link{PCC.buildGroup}}, \code{\link{PCC.reconstructModel}}. +\code{\link{PCC.disagreement}}, \code{\link{PCC.buildGroup}}, \code{\link{PCC.reconstructModel}}, \code{\link{layout_as_stemma}}. } \examples{ diff --git a/man/PCC.buildGroup.Rd b/man/PCC.buildGroup.Rd index 0d8440c..92bbd22 100644 --- a/man/PCC.buildGroup.Rd +++ b/man/PCC.buildGroup.Rd @@ -2,8 +2,7 @@ \name{PCC.buildGroup} \alias{PCC.buildGroup} %- Also NEED an '\alias' for EACH other topic documented here. -\title{PCC.buildgroup -} +\title{PCC.buildgroup} \description{ \code{PCC.buildgroup} groups together witnesses in relevant clusters. } @@ -17,12 +16,12 @@ A PCC.disagreement object. } \item{limit}{ The maximum number of severe disagreements allowed for two witnesses in the same group. Default (and advised) value: \code{0}. + } \item{ask}{ logical; if FALSE, decisions will be made without asking the user for input. Default: TRUE } } -} \details{ %% ~~ If necessary, more details than the description above ~~ } @@ -54,7 +53,5 @@ Jean-Baptiste Camps (\email{jbcamps@hotmail.com}) & Florian Cafiero \examples{ } -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory. \keyword{stemma} \keyword{stemmatology} diff --git a/man/PCC.disagreement.Rd b/man/PCC.disagreement.Rd index 29bdff2..9d1af1f 100644 --- a/man/PCC.disagreement.Rd +++ b/man/PCC.disagreement.Rd @@ -53,7 +53,12 @@ Jean-Baptiste Camps & Florian Cafiero \code{\link{PCC.Stemma}}, \code{\link{PCC.buildGroup}}, \code{\link{PCC.reconstructModel}}. } \examples{ - +#Load a tradition +data("fournival") +#Option: explore the tradition to see problems in variant locations +#PCC.Exploratory(fournival) +#Calculate disagreements +PCC.disagreement(fournival) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. diff --git a/man/PCC.reconstructModel.Rd b/man/PCC.reconstructModel.Rd index 0b57647..fa50ae0 100644 --- a/man/PCC.reconstructModel.Rd +++ b/man/PCC.reconstructModel.Rd @@ -43,10 +43,9 @@ The function returns a list containing \item{fullDatabase}{The full database, with the new reconstructed models and recovered NAs (if applicable).} \item{database}{The same with the \emph{descripti} removed.} - \item{edgelist}{An edgelist expressing the relations between the witnesses of each group.} - \item{models}{A list containing the database of readings for each model.} + \item{edgelist}{An edgelist expressing the relations between the witnesses of each group with, as a third column, the distances between witnesses.} + \item{models}{A list containing the database of readings for each model at the time of their reconstruction (i.e., before they are compared to extant witnesses).} \item{modelsByGroup}{A matrix with, in columns the groups, and a single row containing the label of their model.} - } \references{ Jean-Baptiste Camps et Florian Cafiero, « Genealogical variant locations and simplified stemma: a test case », in \emph{Analysis of Ancient and Medieval Texts and Manuscripts: Digital Approaches}, dir. Tara Andrews & Caroline Macé, Turnhout, 2015, p. 69‑93 (Lectio, 1), \url{http://halshs.archives-ouvertes.fr/halshs-01435633/}. @@ -65,7 +64,20 @@ Jean-Baptiste Camps (\email{jbcamps@hotmail.com}) & Florian Cafiero %% ~Make other sections like Warning with \section{Warning }{....} ~ \examples{ - +#A fictional simple tradition +x = list(database = matrix( + c( + 1,0,1,1,1,1,1,1, + 1,0,1,2,2,2,1,2, + 1,0,0,3,2,1,NA,3, + 2,0,1,4,NA,1,1,1, + 2,1,2,5,2,1,1,4 + ), nrow = 8, ncol = 5, + dimnames = list(c("VL1","VL2","VL3","VL4","VL5","VL6","VL7","VL8"), + c("A","B","C","D","E"))), + groups = list(c("A", "B", "C"), c("D", "E"))) +#And now, reconstruct the groups +PCC.reconstructModel(x) } \seealso{ diff --git a/man/layout_as_stemma.Rd b/man/layout_as_stemma.Rd new file mode 100644 index 0000000..07edf24 --- /dev/null +++ b/man/layout_as_stemma.Rd @@ -0,0 +1,52 @@ +\name{layout_as_stemma} +\alias{layout_as_stemma} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{layout_as_stemma} +\description{ +\code{layout_as_stemma} creates a tree-like layout from an edgelist, +where nodes are placed horizontally according to +a measure of distance from their parent node. +%% ~~ A concise (1-5 lines) description of what the function does. ~~ +} +\usage{ +layout_as_stemma(x) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{an edgelist containing, as a third column, the distance between the + two nodes.} +} +\details{ +The distance between the nodes will usually correspond to the number of +different readings (disagreements and omissions). If a node has several +parents, the function will consider only the distance from the last parent +in topological order.} +\value{ +A layout, i.e. a matrix of two columns, giving x,y coordinates for each node. +} +%\references{ +%% ~put references to the literature/web site here ~ +%} +\author{Jean-Baptiste Camps} +%\note{ +%% ~~further notes~~ +%} + +\section{Warning}{This function is experimental. Horizontal overlapping may occur +has a result.} + +\seealso{ +\code{\link{PCC.Stemma}}, \code{\link{PCC.reconstructModel}}.} +\examples{ +edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A","A","G", + "A", "B", "C", "E", "F","G","H", + 1,5,3,10,3,4,5), .Dim = c(7L, 3L) + ) +g = igraph::graph_from_edgelist(edgelist[,1:2], directed = TRUE) +layout = layout_as_stemma(edgelist) +plot(g, layout = layout) +} +\keyword{stemmatology} +\keyword{stemma} +\keyword{graphs} diff --git a/tests/testthat.R b/tests/testthat.R index f1e4d0c..15bbd33 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,3 @@ library(testthat) library(stemmatology) - test_check("stemmatology") diff --git a/tests/testthat/test-PCC.Stemma.R b/tests/testthat/test-PCC.Stemma.R index 518887a..5eb2f6b 100644 --- a/tests/testthat/test-PCC.Stemma.R +++ b/tests/testthat/test-PCC.Stemma.R @@ -13,13 +13,13 @@ test_that("PCC.Stemma works properly", { c("A","B","C","D","E"))) results = list( - edgelist = structure( - c( - "{ABC}","{ABC}","{ABC}","D","{D{ABC}}","{D{ABC}}", - "A","B","C","E","D","{ABC}" - ), - .Dim = c(6L, 2L) - ), + fullDatabase = structure( + c(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 1, + 0, 0, 3, 2, 1, NA, 3, 2, 0, 1, 4, 2, 1, 1, 1, 2, 1, 2, 5, 2, + 1, 1, 4, 1, 0, 1, NA, 2, 1, 1, 1, NA, 0, 1, 4, 2, 1, 1, 1), + .Dim = c(8L, 7L), + .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), + c("A", "B", "C", "D", "E", "{ABC}", "{D{ABC}}"))), database = structure( c(NA, 0, 1, 4, 2, 1, 1, 1), .Dim = c(8L, 1L), @@ -28,44 +28,86 @@ test_that("PCC.Stemma works properly", { "{D{ABC}}" ) ), - modelsGlobal = list(list( - structure( - c(1, 0, 1, NA, 2, 1, 1, 1), - .Dim = c(8L, 1L), - .Dimnames = list(c( - "VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8" - ), "{ABC}") + edgelist = structure( + c( + "{ABC}","{ABC}","{ABC}","D","{D{ABC}}","{D{ABC}}", + "A","B","C","E","D","{ABC}", + "1","2","2","4","0","0" ), - structure( - c(2, NA, 1, NA, 2, 1, 1, 1), - .Dim = c(8L, 1L), + .Dim = c(6L, 3L) + ), + models = structure( + c(1, 0, 1, NA, 2, 1, 1, 1, + 2, NA, 1, NA, 2, 1, 1, 1, + NA, 0, 1, 4, 2, 1, 1, 1 + ), + .Dim = c(8L, 3L), .Dimnames = list(c( "VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8" - ), "{DE}") - ) - ), list(structure( - c(NA, 0, 1, 4, 2, 1, 1, 1), - .Dim = c(8L, 1L), - .Dimnames = list( - c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), - "{D{ABC}}" - ) - ))), - modelsByGroupGlobal = list( - structure( - c("{ABC}", "D"), - .Dim = 1:2, - .Dimnames = list("Models", c("ABC", "DE")) + ), c("{ABC}", "{DE}","{D{ABC}}")) ), - structure( - "{D{ABC}}", - .Dim = c(1L, 1L), - .Dimnames = list("Models", "D{ABC}") + modelsByGroup = structure( + c("{ABC}", "D","{D{ABC}}"), + .Dim = c(1L, 3L), + .Dimnames = list("Models", c("ABC", "DE","D{ABC}")) ) ) - ) expect_equal(PCC.Stemma(x, ask = FALSE), results) }) +test_that("PCC.Stemma works properly when no group can be built", { + # No group at first level + x = matrix( + c( + 1,1,1,0, + 1,2,1,1, + 1,2,2,2, + 2,1,1,1, + 2,1,1,2 + ), nrow = 4, ncol = 5, + dimnames = list(c("VL1","VL2","VL3","VL4"), + c("A","B","C","D","E"))) + + expect_equal(expect_message(PCC.Stemma(x, ask = FALSE)), NULL) + + # No group at second level with omissionsAsReadings to FALSE + + x = matrix( + c( + 1,1,1,0, + 1,2,1,1, + 1,2,2,2, + 2,1,1,1, + 2,1,1,2, + 2,0,1,2 + ), nrow = 4, ncol = 6, + dimnames = list(c("VL1","VL2","VL3","VL4"), + c("A","B","C","D","E","F"))) + + results = structure( + list(fullDatabase = matrix( + c( + 1,1,1,0, + 1,2,1,1, + 1,2,2,2, + 2,1,1,1, + 2,1,1,2, + 2,0,1,2 + ), nrow = 4, ncol = 6, + dimnames = list(c("VL1","VL2","VL3","VL4"), + c("A","B","C","D","E","F"))), + database = + structure(c(1, 1, 1, 0, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2), + .Dim = 4:5, + .Dimnames = list(c("VL1", "VL2", "VL3", "VL4"), + c("A", "B", "C", "D", "E"))), + edgelist = structure(c("E", "F", "1"), .Dim = c(1L,3L)), + models = structure(c(2, 1, 1, 2), .Dim = c(4L, 1L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4"), "{EF}")), + modelsByGroup = structure("E", .Dim = c(1L, 1L), .Dimnames = list("Models", "EF")))) + + expect_equal(expect_message(PCC.Stemma(x, ask = FALSE)), results) + +}) + #TODO: implement more tests to see if options are passed along properly diff --git a/tests/testthat/test-PCC.reconstructModel.R b/tests/testthat/test-PCC.reconstructModel.R index b3fa7c0..e3b307b 100644 --- a/tests/testthat/test-PCC.reconstructModel.R +++ b/tests/testthat/test-PCC.reconstructModel.R @@ -19,12 +19,20 @@ test_that("Models are reconstructed correctly", { fullDatabase = structure( c(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 1, 0, 0, 3, 2, 1, NA, 3, 2, 0, 1, 4, 2, 1, 1, 1, 2, 1, 2, 5, 2, 1, 1, 4, 1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 6L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), c("A", "B", "C", "D", "E", "{ABC}"))), database = structure(c(2, 0, 1, 4, 2, 1, 1, 1, 1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 2L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), c("D", "{ABC}"))), - edgelist = structure(c("{ABC}", "{ABC}", "{ABC}", "D", "A", "B", "C", "E"), .Dim = c(4L, 2L)), - models = list( - structure(c(1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 1L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), "{ABC}")), - structure(c(2, NA, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 1L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), "{DE}"))), - modelsByGroup = structure(c("{ABC}", "D"), .Dim = 1:2, .Dimnames = list("Models", c("ABC", "DE")))) - + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", + "A", "B", "C", "E", + "1","2","2","4" + ), .Dim = c(4L, 3L)), + models = + matrix(c(1, 0, 1, NA, 2, 1, 1, 1,2, NA, 1, NA, 2, 1, 1, 1), + nrow = 8, ncol = 2, + dimnames = + list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), + c("{ABC}","{DE}"))), + modelsByGroup = structure(c("{ABC}", "D"), + .Dim = 1:2, .Dimnames = list("Models", c("ABC", "DE")))) + expect_equal(PCC.reconstructModel(x), results) results = list( @@ -32,18 +40,21 @@ test_that("Models are reconstructed correctly", { c(1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 1, 2, 1, 0, 0, 3, 2, 1, NA, 3, 2, 0, 1, 4, 2, 1, 1, 1, 2, 1, 2, 5, 2, 1, 1, 4, 1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 6L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), c("A", "B", "C", "D", "E", "{ABC}"))), database = structure(c(2, 0, 1, 4, 2, 1, 1, 1, 1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 2L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), c("D", "{ABC}" ))), - edgelist = structure(c("{ABC}", "{ABC}", "{ABC}", "D", "A", "B", "C", "E"), .Dim = c(4L, 2L)), - models = list(structure(c(1, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 1L), .Dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), "{ABC}")), structure(c(2, 0, 1, NA, 2, 1, 1, 1), .Dim = c(8L, 1L), .Dimnames = list( c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8" ), "{DE}"))), + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A", "B", "C", "E", + "1","2","2","4" + ), .Dim = c(4L, 3L)), + models = + matrix(c(1, 0, 1, NA, 2, 1, 1, 1,2, 0, 1, NA, 2, 1, 1, 1), nrow=8, ncol=2, dimnames = list(c("VL1", "VL2", "VL3", "VL4", "VL5", "VL6", "VL7", "VL8"), c("{ABC}","{DE}"))), modelsByGroup = structure(c("{ABC}", "D" ), .Dim = 1:2, .Dimnames = list("Models", c("ABC", "DE")))) expect_equal(PCC.reconstructModel(x, omissionsAsReadings = TRUE), results) - result = PCC.reconstructModel(x, recoverNAs = FALSE) + result = expect_output(PCC.reconstructModel(x, recoverNAs = FALSE, verbose = TRUE)) expect_equal(result$fullDatabase[5,4], as.double(NA)) expect_equal(result$database[5,1], as.double(NA)) - expect_equal(result$models[[2]][5], 2) - + expect_equal(result$models[5,2], 2) }) #TODO: add more tests, for ask, verbose, etc. diff --git a/tests/testthat/test-layout_as_stemma.R b/tests/testthat/test-layout_as_stemma.R new file mode 100644 index 0000000..de58a6c --- /dev/null +++ b/tests/testthat/test-layout_as_stemma.R @@ -0,0 +1,69 @@ +context("layout_as_stemma") + +test_that("Layout as stemma works", { + + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", + "A", "B", "C", "E", + 1,5,3,10), .Dim = c(4L, 3L) + ) + + expect_equal( + object = layout_as_stemma(edgelist), + expected = structure( + c(-1, -2, -1, 0, 1, 1, + 1, 0, -4, -2, 1, -9), + .Dim = c(6L, 2L)) + ) + + # With more levels + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A","A","G", + "A", "B", "C", "E", "F","G","H", + 1,5,3,10,3,4,5), .Dim = c(7L, 3L) + ) + + layout = layout_as_stemma(edgelist) + + expect_equal(layout, structure(c(-1, -2, -1, 0, 1, 1, -2.5, -1.5, -1.5, + 3, 2, -2, 0, 3, -7, -1, -2, -7), .Dim = c(9L, 2L))) + + # With simple to handle contamination + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A","A","F", + "A", "B", "C", "E", "F","G","G", + 1,5,3,10,3,4,2), .Dim = c(7L, 3L) + ) + + layout = layout_as_stemma(edgelist) + expect_equal(layout, structure(c(-1, -2, -1, 0, 1, 1, -2.5, -1.5, + 2, 1, -3, -1, 2, -8, -2, -4), + .Dim = c(8L, 2L))) + + + # With hard to handle contamination + # visualisation problem due to original 'layout_as_tree' function ? + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A","A","D", + "A", "B", "C", "E", "F","G","B", + 1,5,3,10,3,4,2), .Dim = c(7L, 3L) + ) + + layout = layout_as_stemma(edgelist) + expect_equal(layout, structure(c(-1, -2, -1, 0, 1, 1, -2.5, -1.5, 2, 1, 0, -1, 2, + -8, -2, -3), .Dim = c(8L, 2L))) + + # With very hard to handle contamination + + edgelist = structure( + c("{ABC}", "{ABC}", "{ABC}", "D", "A","A","E", + "A", "B", "C", "E", "F","G","B", + 1,5,3,10,3,4,2), .Dim = c(7L, 3L) + ) + #myNetwork = igraph::graph_from_edgelist(edgelist[,1:2], directed = TRUE) + layout = layout_as_stemma(edgelist) + #plot(myNetwork, layout = layout) + expect_equal(layout,structure(c(-1, -2, -1, 0, 1, 1, -2.5, -1.5, 2, 1, -10, -1, 2, + -8, -2, -3), .Dim = c(8L, 2L))) + +}) diff --git a/tests/testthat/test-pcc-buildgroup.R b/tests/testthat/test-pcc-buildgroup.R index 2d233d6..d7224a3 100644 --- a/tests/testthat/test-pcc-buildgroup.R +++ b/tests/testthat/test-pcc-buildgroup.R @@ -1,4 +1,4 @@ -context("PCC.buildgroup") +context("PCC.buildGroup") test_that("Groups are built correctly", { #First, we fake a PCC.disagreement object