Skip to content

Commit

Permalink
Igraph (#33)
Browse files Browse the repository at this point in the history
* switch to igraph for stemmata and added dotted lines for uncertainty'

* basis of a function for a homemade stemma layout

* added function layout_as_stemma

* corrections

* improved PCC.disagreement efficiency

* some tests

* simplified and documented PCC.Stemma output

* plot virtual wits in grey

* fixed namespace issue with V
  • Loading branch information
Jean-Baptiste-Camps authored May 5, 2018
1 parent 180e2d0 commit 6f0da9a
Show file tree
Hide file tree
Showing 19 changed files with 588 additions and 221 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
Package: stemmatology
Type: Package
Title: An R stemmatology package
Version: 0.2.2
Date: 2014-07-16
Version: 0.2.3
Date: 2018-05-04
Author: Jean-Baptiste Camps ; Florian Cafiero
Maintainer: Jean-Baptiste Camps <jbcamps@hotmail.com>
Description: This package helps building and analysing the genealogy of textual or musical traditions.
BugReports: https://github.com/Jean-Baptiste-Camps/stemmatology/issues
Imports: graphics, stats, utils, network, sna, cluster
Imports: graphics, stats, utils, network, sna, cluster, igraph
Suggests:
testthat,
knitr,
rmarkdown,
covr
License: GPL-3
License: GPL-3 | file LICENSE
NeedsCompilation: no
URL: https://github.com/Jean-Baptiste-Camps/stemmatology
VignetteBuilder: knitr
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
exportPattern("^[[:alpha:]]+")
importFrom("graphics", "axis", "barplot", "legend", "par", "plot")
importFrom("graphics", "axis", "barplot", "legend", "par", "plot","title")
importFrom("stats", "fisher.test", "na.omit")
importFrom("utils", "read.csv")
importFrom("sna", "gplot", "gplot.layout.fruchtermanreingold")
importFrom("network", "as.network", "network.vertex.names", "as.matrix.network", "set.vertex.attribute", "get.vertex.attribute")
importFrom("cluster", "pam")
importFrom("igraph", "graph_from_edgelist", "layout_as_tree", "plot.igraph", "E", "union", "topo_sort","V","neighbors")
17 changes: 13 additions & 4 deletions R/PCC.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"PCC" <-
function(x, omissionsAsReadings = FALSE, alternateReadings = FALSE, limit = 0, recoverNAs = TRUE, pauseAtPlot = FALSE, interactive = TRUE) {
function(x,
omissionsAsReadings = FALSE,
alternateReadings = FALSE,
limit = 0,
recoverNAs = TRUE,
layout_as_stemma = FALSE,
pauseAtPlot = FALSE,
interactive = TRUE) {

# Global shell for the PCC functions. Successively calls PCC.Exploratory
# and PCC.Stemma on the dataset NB: TODO(JBC) this function should be
# updated when we will have defined appropriate object classes for the
Expand All @@ -8,14 +16,15 @@
# Here, we will need to have appropriate version of the command for the
# various outputs of PCC.Exploratory if is.pccOverconflicting
if (is.matrix(pccExploratory)) {
# What does this do?
output = pccStemma(pccExploratory)
} else {
if (!is.matrix(pccExploratory)) {
# if is.pccConflicts|pccOverconflicting|pccContam (with no alternate yet)
if (class(pccExploratory) == "pccConflicts"
| class(pccExploratory) == "pccOverconflicting"
| class(pccExploratory) == "pccContam") {
output = PCC.Stemma(pccExploratory$database, limit = limit, recoverNAs = recoverNAs)
output = PCC.Stemma(pccExploratory$database, limit = limit, recoverNAs = recoverNAs, layout_as_stemma = layout_as_stemma)
} else {
# if is.pccEquipollent
if (class(pccExploratory) == "pccEquipollent") {
Expand All @@ -24,8 +33,8 @@
}
output = as.list(NULL)
for (i in 1:length(pccExploratory$databases)) {
pccStemma = PCC.Stemma(pccExploratory$databases[[i]])
legend("topright", paste("Alternative stemma", i, "out of",
pccStemma = PCC.Stemma(pccExploratory$databases[[i]], limit = limit, recoverNAs = recoverNAs, layout_as_stemma = layout_as_stemma)
graphics::title(sub = paste("Alternative stemma", i, "out of",
length(pccExploratory)))
if (i < length(pccExploratory$databases)) {
readline("Press enter to proceed to next alternative stemma")
Expand Down
181 changes: 120 additions & 61 deletions R/PCC.Stemma.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@ PCC.Stemma <-
omissionsAsReadings = FALSE,
limit = 0,
recoverNAs = TRUE,
layout_as_stemma = FALSE,
ask = TRUE,
verbose = FALSE) {
# TODO(JBC): find a way to avoid redundancy for output ?
# TODO(JBC): la sortie de cette fonction n'a pas de classe, il faudrait
# l'implémenter
# TODO(JBC): Modifier cette fonction pour pouvoir prendre en entrée une
Expand All @@ -17,17 +19,35 @@ PCC.Stemma <-
# PCC.Stemma(x[[i]], omissionsAsReadings = omissionsAsReadings, limit =
# limit) output[[i]] = pccStemma # } return(output) # } Deuxième
# possibilité, le contenu n'est qu'une seule table de variantes
# TODO: deport output creation to a function to avoid redundancy?
# Explore graphical parameters for stemmata
# ,
# pad=1,
# label.pad = 0,
#label.pos=5,
# label.cex=0.7
tableVariantes = x
edgelistGlobal = NULL # matrix(c(character(0), character(0)), ncol = 2)
modelsGlobal = as.list(NULL)
modelsByGroupGlobal = as.list(NULL)
models = matrix(
nrow = nrow(tableVariantes),
ncol = 0,
dimnames = list(dimnames(tableVariantes)[[1]]))
modelsByGroup = matrix(
nrow = 1,
ncol = 0,
dimnames = list("Models")
)
fullDatabase = tableVariantes
# And now, we need to define an internal
# function to collate databases, to
# avoid redundancy
collateDbs <- function(x,y){
myCols = colnames(y)
for(i in seq_len(length(myCols))){
# If the columns already exists,
# replace it.
if(myCols[i] %in% colnames(x)){
x[, myCols[i]] = y[,i]
} else {
# Otherwise, add it
x = cbind(x, y[,i, drop = FALSE])
}
}
return(x)
}
counter = 0
while (ncol(tableVariantes) > 3) {
counter = counter + 1
Expand All @@ -36,23 +56,29 @@ PCC.Stemma <-
if (identical(pccBuildGroup$groups, list())) {
message("No group was found. Unable to build stemma.")
# Plot the stemma at this step, if it exists
# TODO: deport stemma plotting to avoid redundancy ?
if (!is.null(edgelistGlobal)) {
stemma = as.network(edgelistGlobal,
directed = TRUE,
matrix.type = "edgelist")
gplot(
stemma,
displaylabels,
label = network.vertex.names(stemma),
gmode = "digraph",
boxed.labels = TRUE,
usearrows = TRUE
)
myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE)
if(layout_as_stemma){
myLayout = layout_as_stemma(edgelistGlobal)
}
else{
myLayout = layout_as_tree(myNetwork)
}
# Color the reconstructed wit.
# (i.e., nodes with names starting with { )
# in grey, others in orange
# NB: this could be made more robust by using a
# vector of color attributes passed between functions
igraph::V(myNetwork)$color = "orange"
igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey"
igraph::plot.igraph(myNetwork, layout=myLayout)
output = as.list(NULL)
output$edgelist = edgelistGlobal
output$fullDatabase = fullDatabase
output$database = tableVariantes
output$modelsGlobal = modelsGlobal
output$modelsByGroupGlobal = modelsByGroupGlobal
output$edgelist = edgelistGlobal
output$models = models
output$modelsByGroup = modelsByGroup
return(output)
} else {
return()
Expand All @@ -66,40 +92,50 @@ PCC.Stemma <-
verbose = verbose
)
tableVariantes = pccReconstructModel$database
fullDatabase = collateDbs(fullDatabase, tableVariantes)
if (!exists("tableVariantes")) {
stop("No database found.")
return(tableVariantes)
}
# Now we save the objects given out by PCC.reconstructModel
edgelistGlobal = rbind(edgelistGlobal, pccReconstructModel$edgelist)
modelsGlobal[[counter]] = pccReconstructModel$models
modelsByGroupGlobal[[counter]] = pccReconstructModel$modelsByGroup
models = cbind(models,pccReconstructModel$models)
modelsByGroup = cbind(modelsByGroup,pccReconstructModel$modelsByGroup)
}
stemma = as.network(edgelistGlobal,
directed = TRUE,
matrix.type = "edgelist")
gplot(
stemma,
displaylabels,
label = network.vertex.names(stemma),
gmode = "digraph",
boxed.labels = TRUE,
usearrows = TRUE
)
if (is.null(tableVariantes)) {
# Job's done
myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE)
if(layout_as_stemma){
myLayout = layout_as_stemma(edgelistGlobal)
}
else{
myLayout = layout_as_tree(myNetwork)
}
igraph::V(myNetwork)$color = "orange"
igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey"
igraph::plot.igraph(myNetwork, layout=myLayout)
output = as.list(NULL)
output$edgelist = edgelistGlobal
output$fullDatabase = fullDatabase
output$database = tableVariantes
output$modelsGlobal = modelsGlobal
output$modelsByGroupGlobal = modelsByGroupGlobal
output$edgelist = edgelistGlobal
output$models = models
output$modelsByGroup = modelsByGroup
return(output)
}
# There is now less than 4 manuscripts in the database. The method is no
# longer really efficient, so we check if the stemma building is over,
# and if not, we ask the user if he wants the end of the stemma
if (ncol(tableVariantes) > 1) {
if(ask){
myNetwork = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE)
if(layout_as_stemma){
myLayout = layout_as_stemma(edgelistGlobal)
} else{
myLayout = layout_as_tree(myNetwork)
}
igraph::V(myNetwork)$color = "orange"
igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey"
igraph::plot.igraph(myNetwork, layout=myLayout)
writeLines(
"There is now less than four manuscripts in the database.\nStemma building has now lost in accuracy. \nDo you want to continue anyway (take last step with caution) ?\n Y/N\n"
)
Expand All @@ -111,10 +147,11 @@ PCC.Stemma <-
}
if (answer == "N") {
output = as.list(NULL)
output$edgelist = edgelistGlobal
output$fullDatabase = fullDatabase
output$database = tableVariantes
output$modelsGlobal = modelsGlobal
output$modelsByGroupGlobal = modelsByGroupGlobal
output$edgelist = edgelistGlobal
output$models = models
output$modelsByGroup = modelsByGroup
return(output)
}
if (answer == "Y") {
Expand All @@ -136,33 +173,55 @@ PCC.Stemma <-
verbose = verbose
)
tableVariantes = pccReconstructModel$database
fullDatabase = collateDbs(fullDatabase, tableVariantes)
models = cbind(models,pccReconstructModel$models)
modelsByGroup = cbind(modelsByGroup,pccReconstructModel$modelsByGroup)
# And here, because we want dashes for the (uncertain) relations
# established as the last step, we will create to
# separate networks with differente properties, before concatening
myNetworkCert = igraph::graph_from_edgelist(edgelistGlobal[,1:2, drop = FALSE], directed = TRUE)
# With full lines for all edges
igraph::E(myNetworkCert)$lty = 1
# Then dashed for the uncertain ones
myNetworkUncert = igraph::graph_from_edgelist(pccReconstructModel$edgelist[,1:2, drop = FALSE], directed = TRUE)
igraph::E(myNetworkUncert)$lty = 3
# Then unite them
myNetwork = igraph::union(myNetworkCert, myNetworkUncert, byname=TRUE)
# fusion lty_1 et lty_2
igraph::E(myNetwork)$lty = ifelse(is.na(igraph::E(myNetwork)$lty_1),
igraph::E(myNetwork)$lty_2,igraph::E(myNetwork)$lty_1)
# Preparing edgelist for the output
edgelistGlobal = rbind(edgelistGlobal, pccReconstructModel$edgelist)
modelsGlobal[[counter]] = pccReconstructModel$models
modelsByGroupGlobal[[counter]] = pccReconstructModel$modelsByGroup
stemma = as.network(edgelistGlobal,
directed = TRUE,
matrix.type = "edgelist")
gplot(
stemma,
displaylabels,
label = network.vertex.names(stemma),
gmode = "digraph",
boxed.labels = TRUE,
usearrows = TRUE
)
# We can rely on how igraph sorts vertices (i.e., using unique on the
# edgelist turned, names <- unique(as.character(t(el))) )
# to have the same indices for the vertices in the union
# and in the layout function
if(layout_as_stemma){
myLayout = layout_as_stemma(edgelistGlobal)
}
else{
myLayout = layout_as_tree(myNetwork)
}
# And plotting
igraph::V(myNetwork)$color = "orange"
igraph::V(myNetwork)[grep('^{', igraph::V(myNetwork)$name, perl=TRUE)]$color = "grey"
igraph::plot.igraph(myNetwork, layout=myLayout, main="Final stemma")
# and output
output = as.list(NULL)
output$edgelist = edgelistGlobal
output$fullDatabase = fullDatabase
output$database = tableVariantes
output$modelsGlobal = modelsGlobal
output$modelsByGroupGlobal = modelsByGroupGlobal
output$edgelist = edgelistGlobal
output$models = models
output$modelsByGroup = modelsByGroup
return(output)
}
} else {
output = as.list(NULL)
output$edgelist = edgelistGlobal
output$fullDatabase = fullDatabase
output$database = tableVariantes
output$modelsGlobal = modelsGlobal
output$modelsByGroupGlobal = modelsByGroupGlobal
output$edgelist = edgelistGlobal
output$models = models
output$modelsByGroup = modelsByGroup
return(output)
}
}
Loading

0 comments on commit 6f0da9a

Please sign in to comment.