diff --git a/DESCRIPTION b/DESCRIPTION index 6891cc47..9058a70e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: migraph Title: Tools for Multimodal Network Analysis -Version: 0.12.1 -Date: 2022-10-03 +Version: 0.12.2 +Date: 2022-10-12 Description: A set of tools for analysing multimodal networks. All functions operate with matrices, edge lists, and 'igraph', 'network', and 'tidygraph' objects, diff --git a/NAMESPACE b/NAMESPACE index 59ca03a5..1588a9b3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -293,6 +293,7 @@ export(is_complex) export(is_connected) export(is_directed) export(is_edgelist) +export(is_eulerian) export(is_graph) export(is_labelled) export(is_migraph) @@ -357,11 +358,15 @@ export(node_core) export(node_coreness) export(node_cuts) export(node_degree) +export(node_diversity) +export(node_edge_betweenness) export(node_efficiency) export(node_effsize) export(node_eigenvector) export(node_equivalence) +export(node_fast_greedy) export(node_hierarchy) +export(node_homophily) export(node_is_core) export(node_is_cutpoint) export(node_is_isolate) @@ -374,12 +379,15 @@ export(node_path_census) export(node_power) export(node_quad_census) export(node_reach) +export(node_reciprocity) export(node_redundancy) export(node_regular_equivalence) export(node_strong_components) export(node_structural_equivalence) export(node_tie_census) +export(node_transitivity) export(node_triad_census) +export(node_walktrap) export(node_weak_components) export(read_edgelist) export(read_nodelist) diff --git a/NEWS.md b/NEWS.md index 6db5de96..0a55254e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,42 @@ +# migraph 0.12.2 + +## Package + +- Fixed several typos in the centrality vignette and reexported figures +- Added community detection vignette + +## Make + +- `create_lattice()` now conforms to other `create_*()` functions in how it interprets `"n"` + - from an inferred `"n"` for a one-mode network, it will create a transitive lattice of as even dimensions as possible + - for a two-mode network, this depends on how balanced the two modes are, and is still a work in progress... (WIP) + +## Marks + +- Added `is_eulerian()` for a logical expression of whether the network has an Eulerian path + +## Measures + +- `network_smallworld()` now takes a method argument for different ways of calculating a small-world coefficient + - "omega" (the new default) offers a better range, 0 to 1, than the previous (now "sigma") metric. + - "SWI" is also included and offers a 0 to 1 range, but where 1 may not be realisable +- Added `node_diversity()` for calculating heterogeneity among each nodes' ego network +- Added `node_homophily()` for calculating homophilous ties among each nodes' ego network +- Added `node_reciprocity()` for calculating each node's reciprocity +- Added `node_transitivity()` for calculating each node's transitivity/clustering + +## Memberships + +- Added wrappers for several community detection algorithms from igraph, + unlike tidygraph these can operate on objects directly + - Added `node_walktrap()` + - Added `node_edge_betweenness()` + - Added `node_fast_greedy()` + +## Mapping + +- Reversed blue/red colour assignment for binary variables + # migraph 0.12.1 ## Manipulations @@ -19,7 +58,7 @@ ## Mapping -- Fixed `autographr()` tests to work with new version of `{ggraph}` +- Fixed `autographr()` tests to work with new version of `{ggraph}` (closed #247, thanks @henriquesposito) # migraph 0.12.0 diff --git a/R/make_create.R b/R/make_create.R index 443377f3..0d4a6d1c 100644 --- a/R/make_create.R +++ b/R/make_create.R @@ -89,11 +89,6 @@ create_complete <- function(n, directed = FALSE) { create_ring <- function(n, width = 1, directed = FALSE, ...) { n <- infer_n(n) - # Helper function - roll_over <- function(w) { - cbind(w[, ncol(w)], w[, 1:(ncol(w) - 1)]) - } - if (length(n) == 1) { if (width == 1) { out <- igraph::make_ring(n, directed, ...) @@ -203,13 +198,39 @@ create_tree <- function(n, #' @importFrom igraph make_lattice #' @examples #' autographr(create_lattice(5), layout = "kk") + -#' autographr(create_lattice(c(5,5))) + -#' autographr(create_lattice(c(5,5,5))) +#' autographr(create_lattice(c(5,5))) #' @export create_lattice <- function(n, directed = FALSE) { - if (is_migraph(n)) n <- network_dims(n) - igraph::make_lattice(n, directed = directed) + n <- infer_n(n) + + divisors <- function(x){ + y <- seq_len(x) + y[ x%%y == 0 ] + } + + if(length(n)== 1){ + divs <- divisors(n) + if((length(divs) %% 2) == 0){ + dims <- c(divs[length(divs)/2], divs[length(divs)/2+1]) + } else dims <- c(median(divs), median(divs)) + igraph::make_lattice(dims, nei = 2, directed = directed) + } else { + divs1 <- divisors(n[1]) + divs2 <- divisors(n[2]) + divs1 <- divs1[-c(1, length(divs1))] + divs2 <- divs2[-c(1, length(divs2))] + divs1 <- intersect(divs1, c(divs2+1, divs2-1)) + divs2 <- intersect(divs2, c(divs1+1, divs1-1)) + mat <- matrix(0, n[1], n[2]) + diag(mat) <- 1 + w <- roll_over(mat) + mat <- mat + w + mat[lower.tri(mat)] <- 0 + out <- mat[rowSums(mat)==2,] + out <- do.call(rbind, replicate(nrow(mat)/nrow(out), out, simplify=FALSE)) + as_igraph(out) + } } #' @describeIn create Creates a graph in which the nodes are clustered @@ -311,3 +332,7 @@ infer_membership <- function(n, membership) { } membership } + +roll_over <- function(w) { + cbind(w[, ncol(w)], w[, 1:(ncol(w) - 1)]) +} diff --git a/R/mark_is.R b/R/mark_is.R index 78cdac5b..ff468109 100644 --- a/R/mark_is.R +++ b/R/mark_is.R @@ -372,3 +372,13 @@ is_perfect_matching <- function(object, mark = "type"){ matches <- to_matching(object, mark = mark) network_ties(matches)*2 == network_nodes(matches) } + +#' @describeIn is Tests whether there is a Eulerian path for a network +#' where that path passes through every tie exactly once +#' @importFrom igraph has_eulerian_path +#' @examples +#' is_eulerian(ison_brandes) +#' @export +is_eulerian <- function(object){ + igraph::has_eulerian_path(as_igraph(object)) +} diff --git a/R/measure_closure.R b/R/measure_closure.R index 414596cf..1f2ab1a4 100644 --- a/R/measure_closure.R +++ b/R/measure_closure.R @@ -40,16 +40,36 @@ network_reciprocity <- function(object, method = "default") { object) } +#' @describeIn closure Calculate nodes' reciprocity +#' @examples +#' node_reciprocity(to_unweighted(ison_networkers)) +#' @export +node_reciprocity <- function(object) { + out <- as_matrix(object) + make_node_measure(rowSums(out * t(out))/rowSums(out), + object) +} + #' @describeIn closure Calculate transitivity in a network #' @importFrom igraph transitivity #' @examples -#' network_transitivity(ison_southern_women) +#' network_transitivity(ison_adolescents) #' @export network_transitivity <- function(object) { make_network_measure(igraph::transitivity(as_igraph(object)), object) } +#' @describeIn closure Calculate nodes' transitivity +#' @examples +#' node_transitivity(ison_adolescents) +#' @export +node_transitivity <- function(object) { + make_node_measure(igraph::transitivity(as_igraph(object), + type = "local"), + object) +} + #' @describeIn closure Calculate equivalence or reinforcement #' in a (usually two-mode) network #' @examples diff --git a/R/measure_diversity.R b/R/measure_diversity.R index 29a031a2..1e7ddca5 100644 --- a/R/measure_diversity.R +++ b/R/measure_diversity.R @@ -58,8 +58,23 @@ network_diversity <- function(object, attribute, clusters = NULL){ make_network_measure(blauout, object) } -#' @describeIn diversity Calculates the embeddedness of a node within the group -#' of nodes of the same attribute +#' @describeIn diversity Calculates the heterogeneity of each node's +#' local neighbourhood. +#' @examples +#' node_diversity(marvel_friends, "Gender") +#' node_diversity(marvel_friends, "Attractive") +#' @export +node_diversity <- function(object, attribute){ + out <- vapply(igraph::ego(as_igraph(object)), + function(x) network_diversity( + igraph::induced_subgraph(as_igraph(object), x), + attribute), + FUN.VALUE = numeric(1)) + make_node_measure(out, object) +} + +#' @describeIn diversity Calculates how embedded nodes in the network +#' are within groups of nodes with the same attribute #' @section network_homophily: #' Given a partition of a network into a number of mutually exclusive groups then #' The E-I index is the number of ties between (or _external_) nodes @@ -91,6 +106,21 @@ network_homophily <- function(object, attribute){ make_network_measure(ei, object) } +#' @describeIn diversity Calculates each node's embeddedness within groups +#' of nodes with the same attribute +#' @examples +#' node_homophily(marvel_friends, "Gender") +#' node_homophily(marvel_friends, "Attractive") +#' @export +node_homophily <- function(object, attribute){ + out <- vapply(igraph::ego(as_igraph(object)), + function(x) network_homophily( + igraph::induced_subgraph(as_igraph(object), x), + attribute), + FUN.VALUE = numeric(1)) + make_node_measure(out, object) +} + #' @describeIn diversity Calculates the degree assortativity in a graph. #' @importFrom igraph assortativity_degree #' @examples diff --git a/R/measure_features.R b/R/measure_features.R index 57500fe9..8f3c023a 100644 --- a/R/measure_features.R +++ b/R/measure_features.R @@ -74,6 +74,27 @@ network_modularity <- function(object, #' Small-world networks can be highly clustered and yet #' have short path lengths. #' @param times Integer of number of simulations. +#' @param method There are three small-world measures implemented: +#' - "sigma" is the original equation from Watts and Strogatz (1998), +#' \deqn{\frac{\frac{C}{C_r}}{\frac{L}{L_r}}}, +#' where \eqn{C} and \eqn{L} are the observed +#' clustering coefficient and path length, respectively, +#' and \eqn{C_r} and \eqn{L_r} are the averages obtained from +#' random networks of the same dimensions and density. +#' A \eqn{\sigma > 1} is considered to be small-world, +#' but this measure is highly sensitive to network size. +#' - "omega" (the default) is an update from Telesford et al. (2011), +#' \deqn{\frac{L_r}{L} - \frac{C}{C_l}}, +#' where \eqn{C_l} is the clustering coefficient for a lattice graph +#' with the same dimensions. +#' \eqn{\omega} ranges between 0 and 1, +#' where 1 is as close to a small-world as possible. +#' - "SWI" is an alternative proposed by Neal (2017), +#' \deqn{\frac{L - L_l}{L_r - L_l} \times \frac{C - C_r}{C_l - C_r}}, +#' where \eqn{L_l} is the average path length for a lattice graph +#' with the same dimensions. +#' \eqn{SWI} also ranges between 0 and 1 with the same interpretation, +#' but where there may not be a network for which \eqn{SWI = 1}. #' @examples #' network_smallworld(ison_brandes) #' network_smallworld(ison_southern_women) @@ -81,30 +102,57 @@ network_modularity <- function(object, #' for how clustering is calculated #' @references #' Watts, Duncan J., and Steven H. Strogatz. 1998. -#' “Collective Dynamics of ‘Small-World’ Networks.” -#' _Nature_ 393(6684):440–42. -#' \doi{10.1038/30918}. +#' “Collective Dynamics of ‘Small-World’ Networks.” +#' _Nature_ 393(6684):440–42. +#' \doi{10.1038/30918}. +#' +#' Telesford QK, Joyce KE, Hayasaka S, Burdette JH, Laurienti PJ. 2011. +#' "The ubiquity of small-world networks". +#' _Brain Connectivity_ 1(5): 367–75. +#' \doi{10.1089/brain.2011.0038}. +#' +#' Neal Zachary P. 2017. +#' "How small is it? Comparing indices of small worldliness". +#' _Network Science_. 5 (1): 30–44. +#' \doi{10.1017/nws.2017.5}. #' @export -network_smallworld <- function(object, times = 100) { +network_smallworld <- function(object, + method = c("omega", "sigma", "SWI"), + times = 100) { + + method <- match.arg(method) if(is_twomode(object)){ - obsclust <- network_equivalency(object) - expclust <- mean(vapply(1:times, - function(x) network_equivalency(generate_random(object)), - FUN.VALUE = numeric(1))) + co <- network_equivalency(object) + cr <- mean(vapply(1:times, + function(x) network_equivalency(generate_random(object)), + FUN.VALUE = numeric(1))) + if(method %in% c("omega", "SWI")){ + cl <- network_equivalency(create_ring(object)) + } } else { - obsclust <- network_transitivity(object) - expclust <- mean(vapply(1:times, + co <- network_transitivity(object) + cr <- mean(vapply(1:times, function(x) network_transitivity(generate_random(object)), FUN.VALUE = numeric(1))) + if(method %in% c("omega", "SWI")){ + cl <- network_transitivity(create_lattice(object)) + } } - obspath <- network_length(object) - exppath <- mean(vapply(1:times, + lo <- network_length(object) + lr <- mean(vapply(1:times, function(x) network_length(generate_random(object)), FUN.VALUE = numeric(1))) + if(method == "SWI"){ + ll <- network_length(create_ring(object)) + } - make_network_measure((obsclust/expclust)/(obspath/exppath), + out <- switch(method, + "omega" = (lr/lo - co/cl), + "sigma" = (co/cr)/(lo/lr), + "SWI" = ((lo - ll)/(lr - ll))*((co - cr)/(cl - cr))) + make_network_measure(out, object) } diff --git a/R/member_community.R b/R/member_community.R index 6b91c306..b23b1367 100644 --- a/R/member_community.R +++ b/R/member_community.R @@ -62,3 +62,37 @@ node_kernighanlin <- function(object){ make_node_member(out, object) } +#' @describeIn community The walktrap algorithm +#' @param times Integer indicating number of simulations/walks used. +#' By default, `times=50`. +#' @examples +#' node_walktrap(ison_adolescents) +#' @export +node_walktrap <- function(object, times = 50){ + out <- igraph::cluster_walktrap(as_igraph(object), + steps=times)$membership + make_node_member(out, object) + +} + +#' @describeIn community The edge-betweenness algorithm +#' @examples +#' node_edge_betweenness(ison_adolescents) +#' @export +node_edge_betweenness <- function(object){ + out <- igraph::cluster_edge_betweenness(as_igraph(object) + )$membership + make_node_member(out, object) + +} + +#' @describeIn community The fast-greedy algorithm +#' @examples +#' node_fast_greedy(ison_adolescents) +#' @export +node_fast_greedy <- function(object){ + out <- igraph::cluster_fast_greedy(as_igraph(object) + )$membership + make_node_member(out, object) + +} \ No newline at end of file diff --git a/R/model_regression.R b/R/model_regression.R index bd6ad3f1..284587ed 100644 --- a/R/model_regression.R +++ b/R/model_regression.R @@ -578,7 +578,8 @@ plot.netlm <- function(x, ...){ distrib <- as.data.frame(distrib) names(distrib) <- x$names distrib$obs <- seq_len(nrow(distrib)) - distrib <- tidyr::pivot_longer(distrib, -.data$obs) + distrib <- tidyr::pivot_longer(distrib, + cols = 1:(ncol(distrib)-1)) distrib$coef <- rep(unname(x$coefficients), nrow(x$dist)) distrib$tstat <- rep(unname(x$tstat), nrow(x$dist)) distrib$name <- factor(distrib$name, x$names) @@ -597,7 +598,8 @@ plot.netlogit <- function(x, ...){ distrib <- as.data.frame(distrib) names(distrib) <- x$names distrib$obs <- seq_len(nrow(distrib)) - distrib <- tidyr::pivot_longer(distrib, -.data$obs) + distrib <- tidyr::pivot_longer(distrib, + cols = 1:(ncol(distrib)-1)) distrib$coef <- rep(unname(x$coefficients), nrow(x$dist)) distrib$tstat <- rep(unname(x$tstat), nrow(x$dist)) distrib$name <- factor(distrib$name, x$names) diff --git a/R/viz_autographr.R b/R/viz_autographr.R index 066b1f0b..83ef9072 100644 --- a/R/viz_autographr.R +++ b/R/viz_autographr.R @@ -311,7 +311,8 @@ autographr <- auto_graph <- function(object, size = nsize, shape = node_shape) + ggplot2::scale_colour_brewer(palette = "Set1", - # direction = -1, + direction = ifelse(length(unique(color_factor_node))==2, + -1,1), guide = "none") } else { p <- p + ggraph::geom_node_point(size = nsize, @@ -324,7 +325,8 @@ autographr <- auto_graph <- function(object, size = nsize, shape = node_shape) + ggplot2::scale_colour_brewer(palette = "Set1", - # direction = -1, + direction = ifelse(length(unique(color_factor_node))==2, + -1,1), guide = "none") } else { p <- p + ggraph::geom_node_point(size = nsize, diff --git a/README.Rmd b/README.Rmd index ba4ad43a..785a8ed8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -218,8 +218,24 @@ and install using an adapted version of the following commands: To install from source the latest main version of `{migraph}` from Github, please install the `{remotes}` or `{devtools}` package from CRAN and then: -- For latest stable version: `remotes::install_github("snlab-ch/migraph")` -- For latest development version: `remotes::install_github("snlab-ch/migraph@develop")` +- For latest stable version: +`remotes::install_github("snlab-ch/migraph", build_vignettes = TRUE)` +- For latest development version: +`remotes::install_github("snlab-ch/migraph@develop", build_vignettes = TRUE)` + +### Vignettes + +To make sure you have the vignettes installed, +please either install the latest binary from CRAN for your OS, +or use the `remotes` command above to make sure that the vignettes are also built. + +Once you have the package with the vignettes built installed, +you can access them using the following command: +`r browseVignettes("migraph")`. + +This gives you access to the compiled vignettes in html form, +as well as the original source (an Rmarkdown file), +and just the R code without any of the annotations. ## Relationship to other packages diff --git a/README.md b/README.md index ba05d94a..1cff254f 100644 --- a/README.md +++ b/README.md @@ -77,19 +77,21 @@ can be used to work with and manipulate your data. properties, e.g.: - `is_acyclic()`, `is_bipartite()`, `is_complex()`, `is_connected()`, - `is_directed()`, `is_edgelist()`, `is_graph()`, `is_labelled()`, - `is_migraph()`, `is_multiplex()`, `is_signed()`, `is_twomode()`, + `is_directed()`, `is_edgelist()`, `is_eulerian()`, `is_graph()`, + `is_labelled()`, `is_migraph()`, `is_multiplex()`, + `is_perfect_matching()`, `is_signed()`, `is_twomode()`, `is_uniplex()`, `is_weighted()` `{migraph}`’s `to_*()` functions can be used on any class object to reformat or transform networks into networks with other properties, e.g.: -- `to_blocks()`, `to_edges()`, `to_giant()`, `to_main_component()`, - `to_matching()`, `to_mode1()`, `to_mode2()`, `to_multilevel()`, - `to_named()`, `to_onemode()`, `to_redirected()`, `to_simplex()`, - `to_subgraph()`, `to_ties()`, `to_twomode()`, `to_undirected()`, - `to_uniplex()`, `to_unnamed()`, `to_unsigned()`, `to_unweighted()` +- `to_anti()`, `to_blocks()`, `to_edges()`, `to_giant()`, + `to_main_component()`, `to_matching()`, `to_mode1()`, `to_mode2()`, + `to_multilevel()`, `to_named()`, `to_onemode()`, `to_redirected()`, + `to_simplex()`, `to_subgraph()`, `to_ties()`, `to_twomode()`, + `to_undirected()`, `to_uniplex()`, `to_unnamed()`, `to_unsigned()`, + `to_unweighted()` #### Making @@ -231,9 +233,22 @@ Github, please install the `{remotes}` or `{devtools}` package from CRAN and then: - For latest stable version: - `remotes::install_github("snlab-ch/migraph")` + `remotes::install_github("snlab-ch/migraph", build_vignettes = TRUE)` - For latest development version: - `remotes::install_github("snlab-ch/migraph@develop")` + `remotes::install_github("snlab-ch/migraph@develop", build_vignettes = TRUE)` + +### Vignettes + +To make sure you have the vignettes installed, please either install the +latest binary from CRAN for your OS, or use the `remotes` command above +to make sure that the vignettes are also built. + +Once you have the package with the vignettes built installed, you can +access them using the following command: . + +This gives you access to the compiled vignettes in html form, as well as +the original source (an Rmarkdown file), and just the R code without any +of the annotations. ## Relationship to other packages diff --git a/man/closure.Rd b/man/closure.Rd index 37715fc3..9337b8bb 100644 --- a/man/closure.Rd +++ b/man/closure.Rd @@ -3,15 +3,21 @@ \name{closure} \alias{closure} \alias{network_reciprocity} +\alias{node_reciprocity} \alias{network_transitivity} +\alias{node_transitivity} \alias{network_equivalency} \alias{network_congruency} \title{Measures of network closure} \usage{ network_reciprocity(object, method = "default") +node_reciprocity(object) + network_transitivity(object) +node_transitivity(object) + network_equivalency(object) network_congruency(object, object2) @@ -43,8 +49,12 @@ spanning two two-mode networks that are closed by a fourth tie to establish a \itemize{ \item \code{network_reciprocity()}: Calculate reciprocity in a (usually directed) network +\item \code{node_reciprocity()}: Calculate nodes' reciprocity + \item \code{network_transitivity()}: Calculate transitivity in a network +\item \code{node_transitivity()}: Calculate nodes' transitivity + \item \code{network_equivalency()}: Calculate equivalence or reinforcement in a (usually two-mode) network @@ -53,7 +63,9 @@ in a (usually two-mode) network }} \examples{ network_reciprocity(ison_southern_women) -network_transitivity(ison_southern_women) +node_reciprocity(to_unweighted(ison_networkers)) +network_transitivity(ison_adolescents) +node_transitivity(ison_adolescents) network_equivalency(ison_southern_women) } \references{ diff --git a/man/community.Rd b/man/community.Rd index 10c39508..c930a0dd 100644 --- a/man/community.Rd +++ b/man/community.Rd @@ -3,9 +3,18 @@ \name{community} \alias{community} \alias{node_kernighanlin} +\alias{node_walktrap} +\alias{node_edge_betweenness} +\alias{node_fast_greedy} \title{Community graph partitioning algorithms} \usage{ node_kernighanlin(object) + +node_walktrap(object, times = 50) + +node_edge_betweenness(object) + +node_fast_greedy(object) } \arguments{ \item{object}{An object of a migraph-consistent class: @@ -16,6 +25,9 @@ node_kernighanlin(object) \item network, from the \code{{network}} package \item tbl_graph, from the \code{{tidygraph}} package }} + +\item{times}{Integer indicating number of simulations/walks used. +By default, \code{times=50}.} } \description{ Community graph partitioning algorithms @@ -26,10 +38,19 @@ Community graph partitioning algorithms partitioning algorithm that results in a graph with two equally-sized communities +\item \code{node_walktrap()}: The walktrap algorithm + +\item \code{node_edge_betweenness()}: The edge-betweenness algorithm + +\item \code{node_fast_greedy()}: The fast-greedy algorithm + }} \examples{ node_kernighanlin(ison_adolescents) node_kernighanlin(ison_southern_women) +node_walktrap(ison_adolescents) +node_edge_betweenness(ison_adolescents) +node_fast_greedy(ison_adolescents) } \references{ Kernighan, Brian W., and Shen Lin. 1970. diff --git a/man/create.Rd b/man/create.Rd index 6caafc65..8db70d4c 100644 --- a/man/create.Rd +++ b/man/create.Rd @@ -113,8 +113,7 @@ autographr(create_tree(c(7,8), directed = TRUE)) + autographr(create_tree(15, directed = TRUE), "tree") + autographr(create_tree(15, directed = TRUE, width = 3), "tree") autographr(create_lattice(5), layout = "kk") + -autographr(create_lattice(c(5,5))) + -autographr(create_lattice(c(5,5,5))) +autographr(create_lattice(c(5,5))) autographr(create_components(10, membership = c(1,1,1,2,2,2,3,3,3,3))) autographr(create_components(c(10, 12))) autographr(create_core(6)) + diff --git a/man/diversity.Rd b/man/diversity.Rd index fff10fcf..43739407 100644 --- a/man/diversity.Rd +++ b/man/diversity.Rd @@ -3,14 +3,20 @@ \name{diversity} \alias{diversity} \alias{network_diversity} +\alias{node_diversity} \alias{network_homophily} +\alias{node_homophily} \alias{network_assortativity} \title{Measures of network diversity} \usage{ network_diversity(object, attribute, clusters = NULL) +node_diversity(object, attribute) + network_homophily(object, attribute) +node_homophily(object, attribute) + network_assortativity(object) } \arguments{ @@ -38,8 +44,14 @@ across this attribute. \item \code{network_diversity()}: Calculates the heterogeneity of ties across a network or within clusters by node attributes. -\item \code{network_homophily()}: Calculates the embeddedness of a node within the group -of nodes of the same attribute +\item \code{node_diversity()}: Calculates the heterogeneity of each node's +local neighbourhood. + +\item \code{network_homophily()}: Calculates how embedded nodes in the network +are within groups of nodes with the same attribute + +\item \code{node_homophily()}: Calculates each node's embeddedness within groups +of nodes with the same attribute \item \code{network_assortativity()}: Calculates the degree assortativity in a graph. @@ -77,8 +89,12 @@ marvel_friends <- to_unsigned(ison_marvel_relationships, "positive") network_diversity(marvel_friends, "Gender") network_diversity(marvel_friends, "Attractive") network_diversity(marvel_friends, "Gender", "Rich") +node_diversity(marvel_friends, "Gender") +node_diversity(marvel_friends, "Attractive") network_homophily(marvel_friends, "Gender") network_homophily(marvel_friends, "Attractive") +node_homophily(marvel_friends, "Gender") +node_homophily(marvel_friends, "Attractive") network_assortativity(mpn_elite_mex) } \references{ diff --git a/man/features.Rd b/man/features.Rd index 4a775725..3fd11cdb 100644 --- a/man/features.Rd +++ b/man/features.Rd @@ -18,7 +18,7 @@ network_factions(object, membership = NULL) network_modularity(object, membership = NULL, resolution = 1) -network_smallworld(object, times = 100) +network_smallworld(object, method = c("omega", "sigma", "SWI"), times = 100) network_balance(object) } @@ -37,6 +37,30 @@ network_balance(object) \item{resolution}{A proportion indicating the resolution scale. By default 1.} +\item{method}{There are three small-world measures implemented: +\itemize{ +\item "sigma" is the original equation from Watts and Strogatz (1998), +\deqn{\frac{\frac{C}{C_r}}{\frac{L}{L_r}}}, +where \eqn{C} and \eqn{L} are the observed +clustering coefficient and path length, respectively, +and \eqn{C_r} and \eqn{L_r} are the averages obtained from +random networks of the same dimensions and density. +A \eqn{\sigma > 1} is considered to be small-world, +but this measure is highly sensitive to network size. +\item "omega" (the default) is an update from Telesford et al. (2011), +\deqn{\frac{L_r}{L} - \frac{C}{C_l}}, +where \eqn{C_l} is the clustering coefficient for a lattice graph +with the same dimensions. +\eqn{\omega} ranges between 0 and 1, +where 1 is as close to a small-world as possible. +\item "SWI" is an alternative proposed by Neal (2017), +\deqn{\frac{L - L_l}{L_r - L_l} \times \frac{C - C_r}{C_l - C_r}}, +where \eqn{L_l} is the average path length for a lattice graph +with the same dimensions. +\eqn{SWI} also ranges between 0 and 1 with the same interpretation, +but where there may not be a network for which \eqn{SWI = 1}. +}} + \item{times}{Integer of number of simulations.} } \description{ @@ -93,6 +117,16 @@ Watts, Duncan J., and Steven H. Strogatz. 1998. “Collective Dynamics of ‘Small-World’ Networks.” \emph{Nature} 393(6684):440–42. \doi{10.1038/30918}. + +Telesford QK, Joyce KE, Hayasaka S, Burdette JH, Laurienti PJ. 2011. +"The ubiquity of small-world networks". +\emph{Brain Connectivity} 1(5): 367–75. +\doi{10.1089/brain.2011.0038}. + +Neal Zachary P. 2017. +"How small is it? Comparing indices of small worldliness". +\emph{Network Science}. 5 (1): 30–44. +\doi{10.1017/nws.2017.5}. } \seealso{ \code{\link[=network_transitivity]{network_transitivity()}} and \code{\link[=network_equivalency]{network_equivalency()}} diff --git a/man/is.Rd b/man/is.Rd index a0952c7b..e9742c94 100644 --- a/man/is.Rd +++ b/man/is.Rd @@ -16,6 +16,7 @@ \alias{is_uniplex} \alias{is_acyclic} \alias{is_perfect_matching} +\alias{is_eulerian} \title{Marking networks based on their properties} \usage{ is_migraph(object) @@ -45,6 +46,8 @@ is_uniplex(object) is_acyclic(object) is_perfect_matching(object, mark = "type") + +is_eulerian(object) } \arguments{ \item{object}{An object of a migraph-consistent class: @@ -102,6 +105,10 @@ or multiple columns to the edgelist. \item \code{is_perfect_matching()}: Tests whether there is a matching for a network that covers every node in the network +\item \code{is_eulerian()}: Tests whether there is a Eulerian path for a network +where that path passes through every tie exactly once +@importFrom igraph has_eulerian_path + }} \examples{ is_twomode(ison_southern_women) @@ -114,6 +121,7 @@ is_complex(ison_southern_women) is_uniplex(ison_algebra) is_acyclic(ison_algebra) is_perfect_matching(ison_southern_women) +is_eulerian(ison_brandes) } \seealso{ Other marks: diff --git a/tests/testthat/test-make_create.R b/tests/testthat/test-make_create.R index fea80069..c9a9283c 100644 --- a/tests/testthat/test-make_create.R +++ b/tests/testthat/test-make_create.R @@ -49,4 +49,10 @@ test_that("core-periphery creation works", { # expect_s3_class(create_nest(2,4, as = "igraph"), "igraph") # expect_s3_class(create_nest(2,4, as = "tidygraph"), "tbl_graph") # }) -# + +test_that("create lattice works", { + expect_s3_class(create_lattice(4), "igraph") + expect_equal(network_nodes(create_lattice(5)), 5) + expect_false(is_directed(create_lattice(6))) + expect_true(is_directed(create_lattice(6, directed = TRUE))) +}) diff --git a/tests/testthat/test-manip_is.R b/tests/testthat/test-mark_is.R similarity index 77% rename from tests/testthat/test-manip_is.R rename to tests/testthat/test-mark_is.R index aad0cb4a..f1e145d9 100644 --- a/tests/testthat/test-manip_is.R +++ b/tests/testthat/test-mark_is.R @@ -2,9 +2,12 @@ test_that("is tests work", { expect_true(is_twomode(ison_southern_women)) expect_false(is_directed(ison_southern_women)) expect_false(is_weighted(ison_southern_women)) + expect_true(is_weighted(mpn_ryanair)) expect_true(is_labelled(ison_southern_women)) expect_true(is_connected(ison_southern_women)) expect_false(is_complex(ison_southern_women)) expect_true(is_graph(ison_southern_women)) + expect_true(is_migraph(ison_southern_women)) + expect_true(is_graph(ison_brandes)) expect_false(is_directed(as_network(ison_southern_women))) }) diff --git a/tests/testthat/test-measure_closure.R b/tests/testthat/test-measure_closure.R index 310a68dd..52c75030 100644 --- a/tests/testthat/test-measure_closure.R +++ b/tests/testthat/test-measure_closure.R @@ -38,3 +38,7 @@ test_that("three-mode clustering calculated correctly",{ expect_output(print(network_congruency(mat1, mat2))) }) +test_that("node_transitivity is reported correctly",{ + expect_length(node_transitivity(ison_algebra), network_nodes(ison_algebra)) + expect_s3_class(node_transitivity(ison_algebra), "node_measure") +}) diff --git a/tests/testthat/test-measure_diversity.R b/tests/testthat/test-measure_diversity.R index 3b046fce..4b352971 100644 --- a/tests/testthat/test-measure_diversity.R +++ b/tests/testthat/test-measure_diversity.R @@ -10,6 +10,12 @@ test_that("EI index function works", { expect_equal(as.numeric(network_homophily(mpn_elite_mex, "military")), -0.3675, tolerance = 0.001) }) +test_that("node_homophily function works", { + expect_length(node_homophily(mpn_elite_mex, "military"), + network_nodes(mpn_elite_mex)) + expect_s3_class(node_homophily(mpn_elite_mex, "military"), "node_measure") +}) + test_that("network_assortativity function works", { expect_length(network_assortativity(mpn_elite_mex), 1) expect_s3_class(network_assortativity(mpn_elite_mex), "network_measure") diff --git a/tests/testthat/test-measure_features.R b/tests/testthat/test-measure_features.R index ba686cf4..b41a04f6 100644 --- a/tests/testthat/test-measure_features.R +++ b/tests/testthat/test-measure_features.R @@ -2,7 +2,7 @@ set.seed(123) test_that("small-world metrics for two mode networks are calculated and displayed correctly", { expect_s3_class(network_smallworld(ison_southern_women), "network_measure") - expect_equal(as.numeric(network_smallworld(ison_southern_women)), 1.323, tolerance = 0.02) + expect_equal(as.numeric(network_smallworld(ison_southern_women)), -1.04, tolerance = 0.02) }) test_that("network_balance works", { diff --git a/tests/testthat/test-member_community.R b/tests/testthat/test-member_community.R index 3e65c25d..61448a4d 100644 --- a/tests/testthat/test-member_community.R +++ b/tests/testthat/test-member_community.R @@ -4,3 +4,21 @@ test_that("node_kernighanlin algorithm works", { network_nodes(mpn_elite_mex)) expect_false(any(node_kernighanlin(mpn_elite_mex) > 2)) }) + +test_that("node_edge_betweenness algorithm works", { + expect_s3_class(node_edge_betweenness(mpn_elite_mex), "node_member") + expect_length(node_edge_betweenness(mpn_elite_mex), + network_nodes(mpn_elite_mex)) +}) + +test_that("node_fast_greedy algorithm works", { + expect_s3_class(node_fast_greedy(ison_southern_women), "node_member") + expect_length(node_fast_greedy(ison_southern_women), + network_nodes(ison_southern_women)) +}) + +test_that("node_walktrap algorithm works", { + expect_s3_class(node_walktrap(ison_southern_women), "node_member") + expect_length(node_walktrap(ison_southern_women), + network_nodes(ison_southern_women)) +}) diff --git a/vignettes/p3centrality.Rmd b/vignettes/p3centrality.Rmd index 26a670c7..05dced1e 100644 --- a/vignettes/p3centrality.Rmd +++ b/vignettes/p3centrality.Rmd @@ -90,8 +90,8 @@ rowSums(mat) == colSums(mat) # Are they all equal? Why? # You can also just use a built in command in migraph though: node_degree(ison_brandes, normalized = FALSE) -#> Melinda Haley Mallory Pamela Florence Cora Olga -#> 1 1 1 3 3 2 3 3 +#> Becky Luna Rylee Jenna Deborah Claire Damian +#> 1 1 1 3 3 2 3 3 #> # ... with 4 more from this nodeset in the vector. ``` @@ -113,16 +113,16 @@ Fortunately, we can use functions from `{migraph}` to help: ```r node_betweenness(ison_brandes) -#> Melinda Haley Mallory Pamela Florence Cora Olga -#> 1 0 0 0.378 0.485 0.133 0.337 0.304 +#> Becky Luna Rylee Jenna Deborah Claire Damian +#> 1 0 0 0.378 0.485 0.133 0.337 0.304 #> # ... with 4 more from this nodeset in the vector. node_closeness(ison_brandes) -#> Melinda Haley Mallory Pamela Florence Cora Olga -#> 1 0.278 0.278 0.370 0.455 0.435 0.476 0.455 +#> Becky Luna Rylee Jenna Deborah Claire Damian +#> 1 0.278 0.278 0.370 0.455 0.435 0.476 0.455 #> # ... with 4 more from this nodeset in the vector. node_eigenvector(ison_brandes) -#> Melinda Haley Mallory Pamela Florence Cora Olga -#> 1 0.106 0.106 0.277 0.510 0.437 0.615 0.631 +#> Becky Luna Rylee Jenna Deborah Claire Damian +#> 1 0.106 0.106 0.277 0.510 0.437 0.615 0.631 #> # ... with 4 more from this nodeset in the vector. # TASK: Can you create degree distributions for each of these? ``` @@ -161,7 +161,7 @@ ison_brandes %>% ```r ison_brandes %>% - add_node_attribute("color", node_is_min(node_closeness(ison_brandes))) %>% + add_node_attribute("color", node_is_max(node_closeness(ison_brandes))) %>% autographr(node_color = "color") ``` @@ -169,7 +169,7 @@ ison_brandes %>% ```r ison_brandes %>% - add_node_attribute("color", node_is_min(node_eigenvector(ison_brandes))) %>% + add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes))) %>% autographr(node_color = "color") ``` diff --git a/vignettes/p3centrality.Rmd.orig b/vignettes/p3centrality.Rmd.orig index 2687a308..69196e3c 100644 --- a/vignettes/p3centrality.Rmd.orig +++ b/vignettes/p3centrality.Rmd.orig @@ -100,10 +100,10 @@ ison_brandes %>% add_node_attribute("color", node_is_max(node_betweenness(ison_brandes))) %>% autographr(node_color = "color") ison_brandes %>% - add_node_attribute("color", node_is_min(node_closeness(ison_brandes))) %>% + add_node_attribute("color", node_is_max(node_closeness(ison_brandes))) %>% autographr(node_color = "color") ison_brandes %>% - add_node_attribute("color", node_is_min(node_eigenvector(ison_brandes))) %>% + add_node_attribute("color", node_is_max(node_eigenvector(ison_brandes))) %>% autographr(node_color = "color") ``` diff --git a/vignettes/p4community.Rmd b/vignettes/p4community.Rmd new file mode 100644 index 00000000..77bab7c9 --- /dev/null +++ b/vignettes/p4community.Rmd @@ -0,0 +1,564 @@ +--- +title: "4. Community" +author: "James Hollway" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{4. Community} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +# Working with Multiplex Networks + +The data we're going to use here is included in the `{migraph}` package. +This dataset is multiplex, meaning that it contains +several different types of ties: friendship, social and task interactions. + + +```r +# library(migraph) +data("ison_algebra", package = "migraph") +# ?migraph::ison_algebra +``` + +Note that you do not need to load the package using `library()` to get the data, +but you probably want to load it all the same so that you have access to all the functions. +Now you know how to create new matrices in R, load .csv files, +saved .RData files, and data from packages! + +The network is anonymous, but I think it would be nice to add some names, +even if it's just pretend. +Luckily, `{migraph}` has a function for this. +This makes plotting the network just a wee bit more accessible and interpretable: + + +```r +ison_algebra <- to_named(ison_algebra) +autographr(ison_algebra) +``` + +![](teaching/addingnames-1.png) + +Note that you will likely get a different set of names, +as they are assigned randomly from a pool of (American) first names. + +As a multiplex network, +there are actually three different types of tie in this network. +We can extract them and investigate them separately using `to_uniplex()`: + + +```r +(friends <- to_uniplex(ison_algebra, "friends")) +#> # A tbl_graph: 16 nodes and 62 edges +#> # +#> # A directed simple graph with 3 components +#> # +#> # Node Data: 16 × 1 (active) +#> name +#> +#> 1 Jude +#> 2 Tristan +#> 3 Max +#> 4 Colby +#> 5 Mathew +#> 6 Julia +#> # … with 10 more rows +#> # +#> # Edge Data: 62 × 3 +#> from to weight +#> +#> 1 2 1 1 +#> 2 2 7 1 +#> 3 2 8 1 +#> # … with 59 more rows +gfriend <- autographr(friends) + ggtitle("Friendship") +(social <- to_uniplex(ison_algebra, "social")) +#> # A tbl_graph: 16 nodes and 129 edges +#> # +#> # A directed simple graph with 1 component +#> # +#> # Node Data: 16 × 1 (active) +#> name +#> +#> 1 Jude +#> 2 Tristan +#> 3 Max +#> 4 Colby +#> 5 Mathew +#> 6 Julia +#> # … with 10 more rows +#> # +#> # Edge Data: 129 × 3 +#> from to weight +#> +#> 1 1 5 1.2 +#> 2 1 8 0.15 +#> 3 1 9 2.85 +#> # … with 126 more rows +gsocial <- autographr(social) + ggtitle("Social") +(tasks <- to_uniplex(ison_algebra, "tasks")) +#> # A tbl_graph: 16 nodes and 88 edges +#> # +#> # A directed simple graph with 1 component +#> # +#> # Node Data: 16 × 1 (active) +#> name +#> +#> 1 Jude +#> 2 Tristan +#> 3 Max +#> 4 Colby +#> 5 Mathew +#> 6 Julia +#> # … with 10 more rows +#> # +#> # Edge Data: 88 × 3 +#> from to weight +#> +#> 1 1 5 0.3 +#> 2 1 9 0.3 +#> 3 1 10 0.3 +#> # … with 85 more rows +gtask <- autographr(tasks) + ggtitle("Task") +gfriend + gsocial + gtask +``` + +![](teaching/separatingnets-1.png) + +Note also that these are weighted networks. +`autographr()` automatically registers these different weights and plots them. + +# Cohesion + +Let's concentrate on the task network for now and calculate a few basic +measures of cohesion: density, reciprocity, transitivity, and components. + +## Density + +Because this is a directed network, we can calculate the density as: + + +```r +network_ties(tasks)/(network_nodes(tasks)*(network_nodes(tasks)-1)) +#> [1] 0.3666667 +``` + +but we can also just use the `{migraph}` function... + + +```r +network_density(tasks) +#> [1] 0.367 +``` + +Note that the various measures in `{migraph}` print results to three decimal points +by default, but the underlying result retains the same recurrence. +So same result... Is this high or low? + +## Closure + +Next let's calculate reciprocity. + + +```r +network_reciprocity(tasks) +#> [1] 0.932 +``` + +And let's calculate transitivity. + + +```r +network_transitivity(tasks) +#> [1] 0.568 +``` + +What can we say about task closure in this network? +Would we expect it to be higher or lower than for `social`? +Is it? + +## Components + +Now let's look at the friend network. + + +```r +network_components(friends) +#> [1] 4 +network_components(to_undirected(friends)) +#> [1] 3 +``` + +How many components are there? Why are the results different? + +We can use the membership vector in the resulting object to color nodes: + + +```r +friends <- friends %>% + mutate(weak_comp = node_components(to_undirected(friends)), + strong_comp = node_components(friends)) +autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") + +autographr(friends, node_color = "strong_comp") + ggtitle("Strong components") +``` + +![](teaching/comp-memb-1.png) + +# Community Detection + +Ok, the friendship network has 3-4 components, but how many 'groups' are there? +Just visually, it looks like there are two denser clusters within the main component. + +Today we'll use the friend subgraph for exploring community detection methods. +For clarity and simplicity, +we will concentrate on the main component (the so-called 'giant' component) +and consider friendship undirected: + + +```r +(friends <- to_giant(friends)) +#> # A tbl_graph: 14 nodes and 62 edges +#> # +#> # A directed simple graph with 1 component +#> # +#> # Node Data: 14 × 3 (active) +#> name weak_comp strong_comp +#> +#> 1 Jude 1 4 +#> 2 Tristan 1 3 +#> 3 Max 1 3 +#> 4 Mathew 1 3 +#> 5 Julia 1 3 +#> 6 Joel 1 3 +#> # … with 8 more rows +#> # +#> # Edge Data: 62 × 3 +#> from to weight +#> +#> 1 2 1 1 +#> 2 2 6 1 +#> 3 2 7 1 +#> # … with 59 more rows +(friends <- to_undirected(friends)) +#> # A tbl_graph: 14 nodes and 42 edges +#> # +#> # An undirected simple graph with 1 component +#> # +#> # Node Data: 14 × 3 (active) +#> name weak_comp strong_comp +#> +#> 1 Jude 1 4 +#> 2 Tristan 1 3 +#> 3 Max 1 3 +#> 4 Mathew 1 3 +#> 5 Julia 1 3 +#> 6 Joel 1 3 +#> # … with 8 more rows +#> # +#> # Edge Data: 42 × 3 +#> from to weight +#> +#> 1 1 2 1 +#> 2 1 4 1 +#> 3 3 4 1 +#> # … with 39 more rows +autographr(friends) +``` + +![](teaching/manip-fri-1.png) + +Comparing `friends` before and after these operations, +you'll notice the number of ties decreases as reciprocated directed ties +are consolidated into single undirected ties, +and the number of nodes decreases as the couple of isolates are removed. + +There is no one single best community detection algorithm. +Instead there are several, each with their strengths and weaknesses. +Since this is a rather small network, we'll focus on the following methods: +walktrap, edge betweenness, and fast greedy. +`{igraph}` also includes others though too; all are named cluster_... +As you use them, consider how they portray clusters and consider which one(s) +afford a sensible view of the social world as cohesively organized. + +## Walktrap + +This algorithm detects communities through a series of short random walks, +with the idea that nodes encountered on any given random walk +are more likely to be within a community than not. +It was proposed by Pons and Latapy (2005). + +The algorithm initially treats all nodes as communities of their own, then +merges them into larger communities, still larger communities, and so on. +In each step a new community is created from two other communities, +and its ID will be one larger than the largest community ID so far. +This means that before the first merge we have n communities +(the number of vertices in the graph) numbered from zero to n-1. +The first merge creates community n, the second community n+1, etc. +This merge history is returned by the function: +` # ?igraph::cluster_walktrap` + +Note the "steps=" argument that specifies the length of the random walks. +While `{igraph}` sets this to 4 by default, +which is what is recommended by Pons and Latapy, +Waugh et al (2009) found that for many groups (Congresses), +these lengths did not provide the maximum modularity score. +To be thorough in their attempts to optimize modularity, they ran the walktrap +algorithm 50 times for each group (using random walks of lengths 1–50) and +selected the network partition with the highest modularity value from those 50. +They call this the "maximum modularity partition" and insert the parenthetical +"(though, strictly speaking, this cannot be proven to be the optimum without +computationally-prohibitive exhaustive enumeration (Brandes et al. 2008))." + +So let's try and get a community classification using the walktrap algorithm +with path lengths of the random walks specified to be 50. + + +```r +friend_wt <- node_walktrap(friends, times=50) +friend_wt # note that it prints pretty, but underlying its just a vector: +#> 1 +#> 2, 6, 7, 12, 13 +#> 2 +#> 1, 3, 4, 5, 8, 9, 10, 11, 14 +c(friend_wt) +#> [1] 2 1 2 2 2 1 1 2 2 2 2 1 1 2 +``` + +This says that dividing the graph into 2 communities maximises modularity, +one with the nodes 2, 6, 7, 12, 13, and the other 1, 3, 4, 5, 8, 9, 10, 11, 14, +resulting in a modularity of 0.2695578. + +We can also visualise the clusters on the original network +How does the following look? Plausible? + + +```r +friends <- friends %>% + mutate(walk_comm = friend_wt) +autographr(friends, node_color = "walk_comm") +``` + +![](teaching/walkplot-1.png) + +```r +# to be fancy, we could even draw the group borders around the nodes +autographr(friends, node_group = "walk_comm") +``` + +![](teaching/walkplot-2.png) + +```r +# or both! +autographr(friends, + node_color = "walk_comm", + node_group = "walk_comm") + + ggtitle("Walktrap", + subtitle = round(network_modularity(friends, friend_wt), 3)) +``` + +![](teaching/walkplot-3.png) + +This can be helpful when polygons overlap to better identify membership +Or use node color and size to indicate other attributes... + +## Edge Betweenness + +Edge betweenness is like betweenness centrality but for ties not nodes. +The edge-betweenness score of an edge measures the number of +shortest paths from one vertex to another that go through it. + +The idea of the edge-betweenness based community structure detection is that +it is likely that edges connecting separate clusters have high edge-betweenness, +as all the shortest paths from one cluster to another must traverse through them. +So if we iteratively remove the edge with the highest edge-betweenness score +we will get a hierarchical map (dendrogram) of the communities in the graph. + +The following works similarly to walktrap, but no need to set a step length. + + +```r +friend_eb <- node_edge_betweenness(friends) +#> Warning in +#> igraph::cluster_edge_betweenness(as_igraph(object)): +#> At core/community/edge_betweenness.c:493 : +#> Membership vector will be selected based on the +#> highest modularity score. +#> Warning in +#> igraph::cluster_edge_betweenness(as_igraph(object)): +#> At core/community/edge_betweenness.c:500 : +#> Modularity calculation with weighted edge +#> betweenness community detection might not make +#> sense -- modularity treats edge weights as +#> similarities while edge betwenness treats them +#> as distances. +friend_eb +#> 1 +#> 1, 3, 4, 5, 8, 9, 10, 11, 14 +#> 2 +#> 2, 6, 7, 12, 13 +``` + +How does community membership differ here from that found by walktrap? + +We can see how the edge betweenness community detection method works +here: http://jfaganuk.github.io/2015/01/24/basic-network-analysis/ + +To visualise the result: + + +```r +friends <- friends %>% + mutate(eb_comm = friend_eb) +autographr(friends, + node_color = "eb_comm", + node_group = "eb_comm") + + ggtitle("Edge-betweenness", + subtitle = round(network_modularity(friends, friend_eb), 3)) +``` + +![](teaching/ebplot-1.png) + +For more on this algorithm, see M Newman and M Girvan: Finding and +evaluating community structure in networks, Physical Review E 69, 026113 +(2004), https://arxiv.org/abs/cond-mat/0308217. + +## Fast Greedy + +This algorithm is the Clauset-Newman-Moore algorithm. +Whereas edge betweenness was divisive (top-down), +the fast greedy algorithm is agglomerative (bottom-up). + +At each step, the algorithm seeks a merge that would most increase modularity. +This is very fast, but has the disadvantage of being a greedy algorithm, +so it might not produce the best overall community partitioning, +although I personally find it both useful and in many cases quite "accurate". + + +```r +friend_fg <- node_fast_greedy(friends) +friend_fg # Does this result in a different community partition? +#> 1 +#> 3, 4, 5, 10 +#> 2 +#> 2, 6, 7, 12, 13 +#> 3 +#> 1, 8, 9, 11, 14 +network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure +#> [1] 0.28 + +# Again, we can visualise these communities in different ways: +friends <- friends %>% + mutate(fg_comm = friend_fg) +autographr(friends, + node_color = "fg_comm", + node_group = "fg_comm") + + ggtitle("Fast-greedy", + subtitle = round(network_modularity(friends, friend_fg), 3)) +``` + +![](teaching/fg-1.png) + +See A Clauset, MEJ Newman, C Moore: +Finding community structure in very large networks, +https://arxiv.org/abs/cond-mat/0408187 + +# Two-mode network: Southern women + +The next dataset is also available in migraph. +Let's take a look at the loaded objects. + + +```r +data("ison_southern_women") +ison_southern_women +#> IGRAPH f8d9f5f UN-B 32 93 -- +#> + attr: type (v/l), name (v/c) +#> + edges from f8d9f5f (vertex names): +#> [1] EVELYN --E1 EVELYN --E2 EVELYN --E3 +#> [4] EVELYN --E4 EVELYN --E5 EVELYN --E6 +#> [7] EVELYN --E8 EVELYN --E9 LAURA --E1 +#> [10] LAURA --E2 LAURA --E3 LAURA --E5 +#> [13] LAURA --E6 LAURA --E7 LAURA --E8 +#> [16] THERESA--E2 THERESA--E3 THERESA--E4 +#> [19] THERESA--E5 THERESA--E6 THERESA--E7 +#> [22] THERESA--E8 THERESA--E9 BRENDA --E1 +#> + ... omitted several edges +autographr(ison_southern_women, node_color = "type") +``` + +![](teaching/setup-women-1.png) + +```r +autographr(ison_southern_women, "railway", node_color = "type") +``` + +![](teaching/setup-women-2.png) + +## Project two-mode network into two one-mode networks + +Now what if we are only interested in one part of the network? +For that, we can obtain a 'projection' of the two-mode network. +There are two ways of doing this. +The hard way... + + +```r +twomode_matrix <- as_matrix(ison_southern_women) +women_matrix <- twomode_matrix %*% t(twomode_matrix) +event_matrix <- t(twomode_matrix) %*% twomode_matrix +``` + +Or the easy way + + +```r +women_graph <- to_mode1(ison_southern_women) +autographr(women_graph) +``` + +![](teaching/easyway-1.png) + +```r +event_graph <- to_mode2(ison_southern_women) +autographr(event_graph) +``` + +![](teaching/easyway-2.png) + +`{migraph}` also includes several other options for how to construct the projection. +Please see the help file for more details. + + +```r +autographr(to_mode2(ison_southern_women, similarity = "jaccard")) + ggtitle("Jaccard") + +autographr(to_mode2(ison_southern_women, similarity = "rand")) + ggtitle("Rand") + +autographr(to_mode2(ison_southern_women, similarity = "pearson")) + ggtitle("Pearson") + +autographr(to_mode2(ison_southern_women, similarity = "yule")) + ggtitle("Yule's Q") +``` + +![](teaching/otherway-1.png) + +Which women/events 'bind' which events/women? +Let's return to the question of cohesion. + + +```r +network_equivalency(ison_southern_women) +#> [1] 0.487 +network_transitivity(women_graph) +#> [1] 0.928 +network_transitivity(event_graph) +#> [1] 0.831 +``` + +What do we learn from this? + +# Task/Unit Test + +1. What is the difference between communities and components? +2. Produce a plot comparing 3 community detection procedures used here on a +(women) projection of the ison_southern_women dataset. Identify which you prefer, and explain why. +3. Explain in no more than a paragraph why projection can lead to misleading transitivity measures. +4. Explain in no more than a paragraph how structural balance might lead to group identity. diff --git a/vignettes/p4community.Rmd.orig b/vignettes/p4community.Rmd.orig new file mode 100644 index 00000000..bae522c2 --- /dev/null +++ b/vignettes/p4community.Rmd.orig @@ -0,0 +1,352 @@ +--- +title: "4. Community" +author: "James Hollway" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{4. Community} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +#| purl = FALSE +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.cap = "", + fig.path = "teaching/" +) +``` + +# Working with Multiplex Networks + +The data we're going to use here is included in the `{migraph}` package. +This dataset is multiplex, meaning that it contains +several different types of ties: friendship, social and task interactions. + +```{r setup} +# library(migraph) +data("ison_algebra", package = "migraph") +# ?migraph::ison_algebra +``` + +Note that you do not need to load the package using `library()` to get the data, +but you probably want to load it all the same so that you have access to all the functions. +Now you know how to create new matrices in R, load .csv files, +saved .RData files, and data from packages! + +The network is anonymous, but I think it would be nice to add some names, +even if it's just pretend. +Luckily, `{migraph}` has a function for this. +This makes plotting the network just a wee bit more accessible and interpretable: + +```{r addingnames} +ison_algebra <- to_named(ison_algebra) +autographr(ison_algebra) +``` + +Note that you will likely get a different set of names, +as they are assigned randomly from a pool of (American) first names. + +As a multiplex network, +there are actually three different types of tie in this network. +We can extract them and investigate them separately using `to_uniplex()`: + +```{r separatingnets} +(friends <- to_uniplex(ison_algebra, "friends")) +gfriend <- autographr(friends) + ggtitle("Friendship") +(social <- to_uniplex(ison_algebra, "social")) +gsocial <- autographr(social) + ggtitle("Social") +(tasks <- to_uniplex(ison_algebra, "tasks")) +gtask <- autographr(tasks) + ggtitle("Task") +gfriend + gsocial + gtask +``` + +Note also that these are weighted networks. +`autographr()` automatically registers these different weights and plots them. + +# Cohesion + +Let's concentrate on the task network for now and calculate a few basic +measures of cohesion: density, reciprocity, transitivity, and components. + +## Density + +Because this is a directed network, we can calculate the density as: + +```{r dens-explicit} +network_ties(tasks)/(network_nodes(tasks)*(network_nodes(tasks)-1)) +``` + +but we can also just use the `{migraph}` function... + +```{r dens} +network_density(tasks) +``` + +Note that the various measures in `{migraph}` print results to three decimal points +by default, but the underlying result retains the same recurrence. +So same result... Is this high or low? + +## Closure + +Next let's calculate reciprocity. + +```{r recip} +network_reciprocity(tasks) +``` + +And let's calculate transitivity. + +```{r trans} +network_transitivity(tasks) +``` + +What can we say about task closure in this network? +Would we expect it to be higher or lower than for `social`? +Is it? + +## Components + +Now let's look at the friend network. + +```{r comp-no} +network_components(friends) +network_components(to_undirected(friends)) +``` + +How many components are there? Why are the results different? + +We can use the membership vector in the resulting object to color nodes: + +```{r comp-memb} +friends <- friends %>% + mutate(weak_comp = node_components(to_undirected(friends)), + strong_comp = node_components(friends)) +autographr(friends, node_color = "weak_comp") + ggtitle("Weak components") + +autographr(friends, node_color = "strong_comp") + ggtitle("Strong components") +``` + +# Community Detection + +Ok, the friendship network has 3-4 components, but how many 'groups' are there? +Just visually, it looks like there are two denser clusters within the main component. + +Today we'll use the friend subgraph for exploring community detection methods. +For clarity and simplicity, +we will concentrate on the main component (the so-called 'giant' component) +and consider friendship undirected: + +```{r manip-fri} +(friends <- to_giant(friends)) +(friends <- to_undirected(friends)) +autographr(friends) +``` + +Comparing `friends` before and after these operations, +you'll notice the number of ties decreases as reciprocated directed ties +are consolidated into single undirected ties, +and the number of nodes decreases as the couple of isolates are removed. + +There is no one single best community detection algorithm. +Instead there are several, each with their strengths and weaknesses. +Since this is a rather small network, we'll focus on the following methods: +walktrap, edge betweenness, and fast greedy. +`{igraph}` also includes others though too; all are named cluster_... +As you use them, consider how they portray clusters and consider which one(s) +afford a sensible view of the social world as cohesively organized. + +## Walktrap + +This algorithm detects communities through a series of short random walks, +with the idea that nodes encountered on any given random walk +are more likely to be within a community than not. +It was proposed by Pons and Latapy (2005). + +The algorithm initially treats all nodes as communities of their own, then +merges them into larger communities, still larger communities, and so on. +In each step a new community is created from two other communities, +and its ID will be one larger than the largest community ID so far. +This means that before the first merge we have n communities +(the number of vertices in the graph) numbered from zero to n-1. +The first merge creates community n, the second community n+1, etc. +This merge history is returned by the function: +` # ?igraph::cluster_walktrap` + +Note the "steps=" argument that specifies the length of the random walks. +While `{igraph}` sets this to 4 by default, +which is what is recommended by Pons and Latapy, +Waugh et al (2009) found that for many groups (Congresses), +these lengths did not provide the maximum modularity score. +To be thorough in their attempts to optimize modularity, they ran the walktrap +algorithm 50 times for each group (using random walks of lengths 1–50) and +selected the network partition with the highest modularity value from those 50. +They call this the "maximum modularity partition" and insert the parenthetical +"(though, strictly speaking, this cannot be proven to be the optimum without +computationally-prohibitive exhaustive enumeration (Brandes et al. 2008))." + +So let's try and get a community classification using the walktrap algorithm +with path lengths of the random walks specified to be 50. + +```{r walk} +friend_wt <- node_walktrap(friends, times=50) +friend_wt # note that it prints pretty, but underlying its just a vector: +c(friend_wt) +``` + +This says that dividing the graph into 2 communities maximises modularity, +one with the nodes `r which(friend_wt == 1)`, and the other `r which(friend_wt == 2)`, +resulting in a modularity of `r network_modularity(friends, friend_wt)`. + +We can also visualise the clusters on the original network +How does the following look? Plausible? + +```{r walkplot} +friends <- friends %>% + mutate(walk_comm = friend_wt) +autographr(friends, node_color = "walk_comm") +# to be fancy, we could even draw the group borders around the nodes +autographr(friends, node_group = "walk_comm") +# or both! +autographr(friends, + node_color = "walk_comm", + node_group = "walk_comm") + + ggtitle("Walktrap", + subtitle = round(network_modularity(friends, friend_wt), 3)) +``` + +This can be helpful when polygons overlap to better identify membership +Or use node color and size to indicate other attributes... + +## Edge Betweenness + +Edge betweenness is like betweenness centrality but for ties not nodes. +The edge-betweenness score of an edge measures the number of +shortest paths from one vertex to another that go through it. + +The idea of the edge-betweenness based community structure detection is that +it is likely that edges connecting separate clusters have high edge-betweenness, +as all the shortest paths from one cluster to another must traverse through them. +So if we iteratively remove the edge with the highest edge-betweenness score +we will get a hierarchical map (dendrogram) of the communities in the graph. + +The following works similarly to walktrap, but no need to set a step length. + +```{r eb} +friend_eb <- node_edge_betweenness(friends) +friend_eb +``` + +How does community membership differ here from that found by walktrap? + +We can see how the edge betweenness community detection method works +here: http://jfaganuk.github.io/2015/01/24/basic-network-analysis/ + +To visualise the result: + +```{r ebplot} +friends <- friends %>% + mutate(eb_comm = friend_eb) +autographr(friends, + node_color = "eb_comm", + node_group = "eb_comm") + + ggtitle("Edge-betweenness", + subtitle = round(network_modularity(friends, friend_eb), 3)) +``` + +For more on this algorithm, see M Newman and M Girvan: Finding and +evaluating community structure in networks, Physical Review E 69, 026113 +(2004), https://arxiv.org/abs/cond-mat/0308217. + +## Fast Greedy + +This algorithm is the Clauset-Newman-Moore algorithm. +Whereas edge betweenness was divisive (top-down), +the fast greedy algorithm is agglomerative (bottom-up). + +At each step, the algorithm seeks a merge that would most increase modularity. +This is very fast, but has the disadvantage of being a greedy algorithm, +so it might not produce the best overall community partitioning, +although I personally find it both useful and in many cases quite "accurate". + +```{r fg} +friend_fg <- node_fast_greedy(friends) +friend_fg # Does this result in a different community partition? +network_modularity(friends, friend_fg) # Compare this to the edge betweenness procedure + +# Again, we can visualise these communities in different ways: +friends <- friends %>% + mutate(fg_comm = friend_fg) +autographr(friends, + node_color = "fg_comm", + node_group = "fg_comm") + + ggtitle("Fast-greedy", + subtitle = round(network_modularity(friends, friend_fg), 3)) +``` + +See A Clauset, MEJ Newman, C Moore: +Finding community structure in very large networks, +https://arxiv.org/abs/cond-mat/0408187 + +# Two-mode network: Southern women + +The next dataset is also available in migraph. +Let's take a look at the loaded objects. + +```{r setup-women} +data("ison_southern_women") +ison_southern_women +autographr(ison_southern_women, node_color = "type") +autographr(ison_southern_women, "railway", node_color = "type") +``` + +## Project two-mode network into two one-mode networks + +Now what if we are only interested in one part of the network? +For that, we can obtain a 'projection' of the two-mode network. +There are two ways of doing this. +The hard way... + +```{r hardway} +twomode_matrix <- as_matrix(ison_southern_women) +women_matrix <- twomode_matrix %*% t(twomode_matrix) +event_matrix <- t(twomode_matrix) %*% twomode_matrix +``` + +Or the easy way + +```{r easyway} +women_graph <- to_mode1(ison_southern_women) +autographr(women_graph) +event_graph <- to_mode2(ison_southern_women) +autographr(event_graph) +``` + +`{migraph}` also includes several other options for how to construct the projection. +Please see the help file for more details. + +```{r otherway} +autographr(to_mode2(ison_southern_women, similarity = "jaccard")) + ggtitle("Jaccard") + +autographr(to_mode2(ison_southern_women, similarity = "rand")) + ggtitle("Rand") + +autographr(to_mode2(ison_southern_women, similarity = "pearson")) + ggtitle("Pearson") + +autographr(to_mode2(ison_southern_women, similarity = "yule")) + ggtitle("Yule's Q") +``` + +Which women/events 'bind' which events/women? +Let's return to the question of cohesion. + +```{r twomode-cohesion} +network_equivalency(ison_southern_women) +network_transitivity(women_graph) +network_transitivity(event_graph) +``` + +What do we learn from this? + +# Task/Unit Test + +1. What is the difference between communities and components? +2. Produce a plot comparing 3 community detection procedures used here on a +(women) projection of the ison_southern_women dataset. Identify which you prefer, and explain why. +3. Explain in no more than a paragraph why projection can lead to misleading transitivity measures. +4. Explain in no more than a paragraph how structural balance might lead to group identity. diff --git a/vignettes/p7linearmodel.Rmd b/vignettes/p7linearmodel.Rmd index 7917851e..9419cde0 100644 --- a/vignettes/p7linearmodel.Rmd +++ b/vignettes/p7linearmodel.Rmd @@ -138,17 +138,13 @@ We can visualise this quite effectively using the `node_groups` argument: autographr(marvel_friends, node_group = "PowerOrigin", node_color = "Gender") -``` - -![](teaching/blaugroups-1.png) - -```r +#> Error in anchors[[i]]: subscript out of bounds autographr(marvel_friends, node_color = "Gender", node_size = "Intellect") ``` -![](teaching/blaugroups-2.png) +![](teaching/blaugroups-1.png) Ok, this tells us about how (un)even the distribution of these variables is in this network, but it doesn't necessarily tell us whether within this network there is homophily/heterophily. diff --git a/vignettes/p7linearmodel.Rmd.orig b/vignettes/p7linearmodel.Rmd.orig index d868dc12..45abf9b2 100644 --- a/vignettes/p7linearmodel.Rmd.orig +++ b/vignettes/p7linearmodel.Rmd.orig @@ -161,13 +161,13 @@ such as `test_random()`: ```{r rando} rand.gender <- test_random(marvel_friends, network_homophily, attribute = "Gender", - times = 20) + times = 200) rand.power <- test_random(marvel_friends, network_homophily, attribute = "PowerOrigin", - times = 20) + times = 200) rand.attract <- test_random(marvel_friends, network_homophily, attribute = "Attractive", - times = 20) + times = 200) plot(rand.gender) / plot(rand.power) / plot(rand.attract) diff --git a/vignettes/teaching/addingnames-1.png b/vignettes/teaching/addingnames-1.png index 7a95f155..990ba439 100644 Binary files a/vignettes/teaching/addingnames-1.png and b/vignettes/teaching/addingnames-1.png differ diff --git a/vignettes/teaching/comp-memb-1.png b/vignettes/teaching/comp-memb-1.png new file mode 100644 index 00000000..dd45bfa1 Binary files /dev/null and b/vignettes/teaching/comp-memb-1.png differ diff --git a/vignettes/teaching/constraintplot-1.png b/vignettes/teaching/constraintplot-1.png index 267fb16c..08251077 100644 Binary files a/vignettes/teaching/constraintplot-1.png and b/vignettes/teaching/constraintplot-1.png differ diff --git a/vignettes/teaching/easyway-1.png b/vignettes/teaching/easyway-1.png new file mode 100644 index 00000000..37ffdfd1 Binary files /dev/null and b/vignettes/teaching/easyway-1.png differ diff --git a/vignettes/teaching/easyway-2.png b/vignettes/teaching/easyway-2.png new file mode 100644 index 00000000..f0f4f0d1 Binary files /dev/null and b/vignettes/teaching/easyway-2.png differ diff --git a/vignettes/teaching/ebplot-1.png b/vignettes/teaching/ebplot-1.png new file mode 100644 index 00000000..9e968115 Binary files /dev/null and b/vignettes/teaching/ebplot-1.png differ diff --git a/vignettes/teaching/fg-1.png b/vignettes/teaching/fg-1.png new file mode 100644 index 00000000..9bed5eae Binary files /dev/null and b/vignettes/teaching/fg-1.png differ diff --git a/vignettes/teaching/ggid-1.png b/vignettes/teaching/ggid-1.png index a7a63b85..7a241310 100644 Binary files a/vignettes/teaching/ggid-1.png and b/vignettes/teaching/ggid-1.png differ diff --git a/vignettes/teaching/ggid-2.png b/vignettes/teaching/ggid-2.png index eadb9836..7ec9a98e 100644 Binary files a/vignettes/teaching/ggid-2.png and b/vignettes/teaching/ggid-2.png differ diff --git a/vignettes/teaching/ggid-3.png b/vignettes/teaching/ggid-3.png index b329dcea..94f57cb1 100644 Binary files a/vignettes/teaching/ggid-3.png and b/vignettes/teaching/ggid-3.png differ diff --git a/vignettes/teaching/ggid-4.png b/vignettes/teaching/ggid-4.png index 87d74ed4..9eaff157 100644 Binary files a/vignettes/teaching/ggid-4.png and b/vignettes/teaching/ggid-4.png differ diff --git a/vignettes/teaching/manip-fri-1.png b/vignettes/teaching/manip-fri-1.png new file mode 100644 index 00000000..b8767abf Binary files /dev/null and b/vignettes/teaching/manip-fri-1.png differ diff --git a/vignettes/teaching/multiplot-1.png b/vignettes/teaching/multiplot-1.png index 3a34a156..c0c5439b 100644 Binary files a/vignettes/teaching/multiplot-1.png and b/vignettes/teaching/multiplot-1.png differ diff --git a/vignettes/teaching/otherway-1.png b/vignettes/teaching/otherway-1.png new file mode 100644 index 00000000..17f10394 Binary files /dev/null and b/vignettes/teaching/otherway-1.png differ diff --git a/vignettes/teaching/perm-1.png b/vignettes/teaching/perm-1.png index d17504d3..0e36ab60 100644 Binary files a/vignettes/teaching/perm-1.png and b/vignettes/teaching/perm-1.png differ diff --git a/vignettes/teaching/separatingnets-1.png b/vignettes/teaching/separatingnets-1.png index b343be33..757a1ea1 100644 Binary files a/vignettes/teaching/separatingnets-1.png and b/vignettes/teaching/separatingnets-1.png differ diff --git a/vignettes/teaching/setup-women-1.png b/vignettes/teaching/setup-women-1.png new file mode 100644 index 00000000..8e90720c Binary files /dev/null and b/vignettes/teaching/setup-women-1.png differ diff --git a/vignettes/teaching/setup-women-2.png b/vignettes/teaching/setup-women-2.png new file mode 100644 index 00000000..c56641d7 Binary files /dev/null and b/vignettes/teaching/setup-women-2.png differ diff --git a/vignettes/teaching/walkplot-1.png b/vignettes/teaching/walkplot-1.png new file mode 100644 index 00000000..8ef14935 Binary files /dev/null and b/vignettes/teaching/walkplot-1.png differ diff --git a/vignettes/teaching/walkplot-2.png b/vignettes/teaching/walkplot-2.png new file mode 100644 index 00000000..867d4f7a Binary files /dev/null and b/vignettes/teaching/walkplot-2.png differ diff --git a/vignettes/teaching/walkplot-3.png b/vignettes/teaching/walkplot-3.png new file mode 100644 index 00000000..24cb2a4a Binary files /dev/null and b/vignettes/teaching/walkplot-3.png differ