Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to find densest subgraph #635

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions releasenotes/notes/densest_subgraph-1b068f69f80facd4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
features:
- |
Added a new function, :func:`~.densest_subgraph_of_size`, which is used to return a
subgraph of given size that has the highest degree of connecitivity between the nodes.
For example, if you wanted to find the subgraph of 5 nodes in a 19 node heavy hexagon
graph:

.. jupyter-execute::

import rustworkx as rx
from rustworkx.visualization import mpl_draw

graph = rx.generators.hexagonal_lattice_graph(4, 5)

subgraph, node_map = rx.densest_subgraph_of_size(graph, 5)
subgraph_edge_set = set(subgraph.edge_list())
node_colors = []
for node in graph.node_indices():
if node in node_map:
node_colors.append('red')
else:
node_colors.append('blue')
graph[node] = node
edge_colors = []
for edge in graph.edge_list():
if edge[0] in node_map and edge[1] in node_map:
edge_colors.append('red')
else:
edge_colors.append('blue')
mpl_draw(graph, with_labels=True, node_color=node_colors, edge_color=edge_colors, labels=str)
212 changes: 212 additions & 0 deletions rustworkx-core/src/dense_subgraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

use hashbrown::{HashMap, HashSet};
use std::hash::Hash;

use petgraph::prelude::*;
use petgraph::visit::{
EdgeCount, GraphProp, IntoEdgeReferences, IntoNeighbors, IntoNodeIdentifiers, NodeCount,
Visitable,
};

use rayon::prelude::*;

struct SubsetResult<N> {
pub count: usize,
pub error: f64,
pub map: Vec<N>,
}

/// Find the most densely connected k-subgraph
///
/// This function will return the node indices of the subgraph of `num_nodes` that is the
/// most densely connected.
///
/// This method does not provide any guarantees on the approximation as it
/// does a naive search using BFS traversal.
///
/// # Arguments
///
/// * `graph` - The graph to find densest subgraph in.
/// * `num_nodes` - The number of nodes in the subgraph to find
/// * `edge_weight_callback` - An optional callable that if specified will be
/// passed the node indices of each edge in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for edges in
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
/// * `node_weight_callback` - An optional callable that if specified will be
/// passed the node indices of each node in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for node of
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
///
/// # Example:
///
/// ```rust
/// use std::convert::Infallible;
/// use rustworkx_core::petgraph::stable_graph::{StableDiGraph, NodeIndex};
/// use rustworkx_core::petgraph::visit::IntoEdgeReferences;
/// use rustworkx_core::generators::grid_graph;
/// use rustworkx_core::dense_subgraph::densest_subgraph;
///
/// type EdgeWeightType = Box<dyn FnMut(<&StableDiGraph<(), ()> as IntoEdgeReferences>::EdgeRef) -> Result<f64, Infallible>>;
/// type NodeWeightType = Box<dyn FnMut(NodeIndex) -> Result<f64, Infallible>>;
///
/// let graph: StableDiGraph<(), ()> = grid_graph(
/// Some(10),
/// Some(10),
/// None,
/// || {()},
/// || {()},
/// false
/// ).unwrap();
/// let subgraph_nodes = densest_subgraph(&graph, 10, None::<EdgeWeightType>, None::<NodeWeightType>).unwrap();
///
/// let expected = vec![
/// NodeIndex::new(7), NodeIndex::new(8), NodeIndex::new(17), NodeIndex::new(9),
/// NodeIndex::new(18), NodeIndex::new(27), NodeIndex::new(19), NodeIndex::new(28),
/// NodeIndex::new(37), NodeIndex::new(29)
/// ];
///
/// assert_eq!(subgraph_nodes, expected);
/// ```
pub fn densest_subgraph<G, H, F, E>(
graph: G,
num_nodes: usize,
edge_weight_callback: Option<H>,
node_weight_callback: Option<F>,
) -> Result<Vec<G::NodeId>, E>
where
G: IntoNodeIdentifiers
+ IntoEdgeReferences
+ EdgeCount
+ GraphProp
+ NodeCount
+ IntoNeighbors
+ Visitable
+ Sync,
G::NodeId: Eq + Hash + Send + Sync,
F: FnMut(G::NodeId) -> Result<f64, E>,
H: FnMut(G::EdgeRef) -> Result<f64, E>,
{
let node_indices: Vec<G::NodeId> = graph.node_identifiers().collect();
let mut edge_weight_map: Option<HashMap<[G::NodeId; 2], f64>> = None;
let mut node_weight_map: Option<HashMap<G::NodeId, f64>> = None;

if edge_weight_callback.is_some() {
let mut inner_weight_map: HashMap<[G::NodeId; 2], f64> =
HashMap::with_capacity(graph.edge_count());
let mut callback = edge_weight_callback.unwrap();
for edge in graph.edge_references() {
let source = edge.source();
let target = edge.target();
let weight = callback(edge)?;
inner_weight_map.insert([source, target], weight);
if !graph.is_directed() {
inner_weight_map.insert([target, source], weight);
}
}
edge_weight_map = Some(inner_weight_map);
}
let mut avg_node_error: f64 = 0.;
if node_weight_callback.is_some() {
let mut callback = node_weight_callback.unwrap();
let mut inner_weight_map: HashMap<G::NodeId, f64> =
HashMap::with_capacity(graph.node_count());
for node in graph.node_identifiers() {
let weight = callback(node)?;
avg_node_error += weight;
inner_weight_map.insert(node, weight);
}
avg_node_error /= graph.node_count() as f64;
node_weight_map = Some(inner_weight_map);
}
let reduce_identity_fn = || -> SubsetResult<G::NodeId> {
SubsetResult {
count: 0,
map: Vec::new(),
error: f64::INFINITY,
}
};

let reduce_fn =
|best: SubsetResult<G::NodeId>, curr: SubsetResult<G::NodeId>| -> SubsetResult<G::NodeId> {
if edge_weight_map.is_some() || node_weight_map.is_some() {
if curr.count >= best.count && curr.error <= best.error {
curr
} else {
best
}
} else if curr.count > best.count {
curr
} else {
best
}
};

let best_result = node_indices
.into_par_iter()
.filter_map(|index| {
let mut subgraph: Vec<[G::NodeId; 2]> = Vec::with_capacity(num_nodes);
let mut bfs = Bfs::new(&graph, index);
let mut bfs_vec: Vec<G::NodeId> = Vec::with_capacity(num_nodes);
let mut bfs_set: HashSet<G::NodeId> = HashSet::with_capacity(num_nodes);

let mut count = 0;
while let Some(node) = bfs.next(&graph) {
bfs_vec.push(node);
bfs_set.insert(node);
count += 1;
if count >= num_nodes {
break;
}
}
if bfs_vec.len() < num_nodes {
return None;
}
let mut connection_count = 0;
for node in &bfs_vec {
for nbr in graph.neighbors(*node).filter(|j| bfs_set.contains(j)) {
connection_count += 1;
subgraph.push([*node, nbr]);
}
}
let mut error = match &edge_weight_map {
Some(map) => {
subgraph.iter().map(|edge| map[edge]).sum::<f64>() / subgraph.len() as f64
}
None => 0.,
};
error *= match &node_weight_map {
Some(map) => {
let subgraph_node_error_avg =
bfs_vec.iter().map(|node| map[node]).sum::<f64>() / num_nodes as f64;
let node_error_diff = subgraph_node_error_avg - avg_node_error;
if node_error_diff > 0. {
num_nodes as f64 * node_error_diff
} else {
1.
}
}
None => 1.,
};

Some(SubsetResult {
count: connection_count,
error,
map: bfs_vec,
})
})
.reduce(reduce_identity_fn, reduce_fn);
Ok(best_result.map)
}
1 change: 1 addition & 0 deletions rustworkx-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ pub mod planar;
pub mod shortest_path;
pub mod traversal;
// These modules define additional data structures
pub mod dense_subgraph;
pub mod dictmap;
pub mod distancemap;
mod min_scored;
Expand Down
27 changes: 27 additions & 0 deletions rustworkx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,33 @@ def longest_simple_path(graph):
found in the graph. If the graph is empty ``None`` will be returned instead.
:rtype: NodeIndices
"""
raise TypeError("Invalid Input Type %s for graph" % type(graph))


@_rustworkx_dispatch
def densest_subgraph_of_size(
graph, num_nodes, /, edge_weight_callback=None, node_weight_callback=None
):
"""Find a connected and dense subgraph of a given size in a graph.

This method does not provide any guarantees on the approximation of the optimal solution as it
does a naive search using BFS traversal.

:param graph: The graph to find the densest subgraph in. This can be a
:class:`~retworkx.PyGraph` or a :class:`~retworkx.PyDiGraph`.
:param int num_nodes: The number of nodes in the subgraph to find
:param func weight_callback: An optional callable that if specified will be
passed the node indices of each edge in the graph and it is expected to
return a float value. If specified the lowest avg weight for edges in
a found subgraph will be a criteria for selection in addition to the
connectivity of the subgraph.
:returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the
mapping of node indices in the input ``graph`` to the index in the
output subgraph.

:rtype: (subgraph, node_map)
"""
raise TypeError("Invalid Input Type %s for graph" % type(graph))


@_rustworkx_dispatch
Expand Down
9 changes: 9 additions & 0 deletions rustworkx/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ from .rustworkx import steiner_tree as steiner_tree
from .rustworkx import metric_closure as metric_closure
from .rustworkx import digraph_union as digraph_union
from .rustworkx import graph_union as graph_union
from .rustworkx import digraph_densest_subgraph_of_size as digraph_densest_subgraph_of_size
from .rustworkx import graph_densest_subgraph_of_size as graph_densest_subgraph_of_size
from .rustworkx import NodeIndices as NodeIndices
from .rustworkx import PathLengthMapping as PathLengthMapping
from .rustworkx import PathMapping as PathMapping
Expand Down Expand Up @@ -602,3 +604,10 @@ def longest_simple_path(graph: PyGraph[_S, _T] | PyDiGraph[_S, _T]) -> NodeIndic
def isolates(graph: PyGraph[_S, _T] | PyDiGraph[_S, _T]) -> NodeIndices: ...
def two_color(graph: PyGraph[_S, _T] | PyDiGraph[_S, _T]) -> dict[int, int]: ...
def is_bipartite(graph: PyGraph[_S, _T] | PyDiGraph[_S, _T]) -> bool: ...
def densest_subgraph_of_size(
graph: PyGraph[_S, _T] | PyDiGraph[_S, _T],
num_nodes: int,
/,
edge_weight_callback: Callable[[_T], float] | None = ...,
node_weight_callback: Callable[[_S], float] | None = ...,
) -> tuple[PyGraph[_S, _T], NodeMap]: ...
17 changes: 17 additions & 0 deletions rustworkx/rustworkx.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,23 @@ def graph_union(
merge_edges: bool = ...,
) -> PyGraph[_S, _T]: ...

# Densest Subgraph

def graph_densest_subgraph_of_size(
graph: PyGraph[_S, _T],
num_nodes: int,
/,
edge_weight_callback: Callable[[_T], float] | None = ...,
node_weight_callback: Callable[[_T], float] | None = ...,
) -> tuple[PyGraph[_S, _T], NodeMap]: ...
def digraph_densest_subgraph_of_size(
graph: PyDiGraph[_S, _T],
num_nodes: int,
/,
edge_weight_callback: Callable[[_T], float] | None = ...,
node_weight_callback: Callable[[_T], float] | None = ...,
) -> tuple[PyGraph[_S, _T], NodeMap]: ...

# Iterators

_T_co = TypeVar("_T_co", covariant=True)
Expand Down
Loading