From 8059b649e203ecd1881196c4ae5767f9c13e2f92 Mon Sep 17 00:00:00 2001 From: pere verges Date: Wed, 8 Nov 2023 15:48:12 -0800 Subject: [PATCH] Experiment 1,2,3 --- GraphHD_v2/graphhd_std_centrality.py | 512 +++++++++++++++++++++++++++ 1 file changed, 512 insertions(+) create mode 100644 GraphHD_v2/graphhd_std_centrality.py diff --git a/GraphHD_v2/graphhd_std_centrality.py b/GraphHD_v2/graphhd_std_centrality.py new file mode 100644 index 00000000..b8daca12 --- /dev/null +++ b/GraphHD_v2/graphhd_std_centrality.py @@ -0,0 +1,512 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from tqdm import tqdm +from torch_geometric.datasets import TUDataset +from torch_geometric.utils import to_networkx +from torch_geometric.data import DataLoader +from torch_geometric.utils.degree import degree +import networkx as nx + +# Note: this example requires the torch_geometric library: https://pytorch-geometric.readthedocs.io +from torch_geometric.datasets import TUDataset + +# Note: this example requires the torchmetrics library: https://torchmetrics.readthedocs.io +import torchmetrics + +import torchhd +from torchhd import embeddings +from torchhd.models import Centroid +import csv +from torch_geometric.utils import to_networkx +import networkx as nx +import numpy as np +import time + +csv_file = "metrics/result" + str(time.time()) + ".csv" +DIM = 10000 + + +def experiment(randomness=0, embed="random", dataset="MUTAG", metric="page_rank"): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("Using {} device".format(device)) + + DIMENSIONS = DIM # hypervectors dimension + + # for other available datasets see: https://pytorch-geometric.readthedocs.io/en/latest/notes/data_cheatsheet.html?highlight=tudatasets + # dataset = "MUTAG" + + graphs = TUDataset("../data", dataset) + train_size = int(0.7 * len(graphs)) + test_size = len(graphs) - train_size + train_ld, test_ld = torch.utils.data.random_split(graphs, [train_size, test_size]) + + def sparse_stochastic_graph(G): + """ + Returns a sparse adjacency matrix of the graph G. + The values indicate the probability of leaving a vertex. + This means that each column sums up to one. + """ + rows, columns = G.edge_index + # Calculate the probability for each column + values_per_column = 1.0 / torch.bincount(columns, minlength=G.num_nodes) + values_per_node = values_per_column[columns] + size = (G.num_nodes, G.num_nodes) + return torch.sparse_coo_tensor(G.edge_index, values_per_node, size) + + def centrality(data): + degree_centrality = data.edge_index[0].bincount(minlength=data.num_nodes) + degree_ranked_nodes = sorted(range(data.num_nodes), key=lambda node: degree_centrality[node]) + + def semi_local_centrality(data): + G = nx.Graph() + + for i in range(data.edge_index.size(1)): + edge = tuple(data.edge_index[:, i].tolist()) + G.add_edge(*edge) + + # Calculate semi-local centrality using a custom approach + semi_local_centrality = [] + + for node in G.nodes(): + ego_graph = nx.ego_graph(G, node, radius=2) # Adjust the radius (2 in this case) + semi_local_centrality.append(len(ego_graph)) + + # Store the semi-local centrality scores in the PyTorch Geometric Data object + data.semi_local_centrality = torch.tensor(semi_local_centrality) + + # Rank nodes based on semi-local centrality + semi_local_ranked_nodes = sorted(G.nodes(), key=lambda node: semi_local_centrality[node]) + return semi_local_ranked_nodes + + def degree_centrality(data): + G = to_networkx(data) + + scores = nx.degree_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def eigen_centrality(data): + G = to_networkx(data) + + scores = nx.eigenvector_centrality(G, max_iter=1000) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def katz_centrality(data): + G = to_networkx(data) + + beta = 0.1 + scores = nx.katz_centrality(G, beta=beta) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def closeness_centrality(data): + G = to_networkx(data) + + scores = nx.closeness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def incremental_closeness_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + G.add_edges_from(data.edge_index.t().tolist()) + + scores = nx.incremental_closeness_centrality(G, G.edges()) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def current_flow_closeness_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + scores = nx.current_flow_closeness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def information_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + scores = nx.information_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def betweenness_centrality(data): + G = to_networkx(data) + + scores = nx.betweenness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def edge_betweenness_centrality(data): + G = to_networkx(data) + + scores = nx.edge_betweenness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def current_flow_betweeness_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.current_flow_closeness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + + def edge_current_flow_betweeness_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.edge_current_flow_betweenness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def communicability_betweeness_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.communicability_betweenness_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def load_centrality(data): + G = to_networkx(data) + + scores = nx.load_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def edge_load_centrality(data): + G = to_networkx(data) + + scores = nx.edge_load_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def subgraph_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.subgraph_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def subgraph_centrality_exp(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.subgraph_centrality_exp(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def estrada_index(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.estrada_index(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def harmonic_centrality(data): + G = to_networkx(data) + + scores = nx.harmonic_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def dispersion(data): + G = to_networkx(data) + + scores = nx.dispersion(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def global_reaching_centrality(data): + G = to_networkx(data) + + scores = nx.global_reaching_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def percolation_centrality(data): + G = to_networkx(data) + + scores = nx.percolation_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def second_order_centrality(data): + G = to_networkx(data) + G = G.to_undirected() + + scores = nx.second_order_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def trophic_levels(data): + G = to_networkx(data) + + scores = nx.trophic_levels(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def trophic_differences(data): + G = to_networkx(data) + + scores = nx.trophic_differences(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def trophic_incoherence_parameter(data): + G = to_networkx(data) + + scores = nx.trophic_incoherence_parameter(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def voterank(data): + G = to_networkx(data) + + scores = nx.voterank(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def laplacian_centrality(data): + G = to_networkx(data) + + scores = nx.laplacian_centrality(G) + scores_nodes = sorted(G.nodes(), key=lambda node: scores[node]) + + return scores_nodes + + def pagerank(G, alpha=0.85, max_iter=100, tol=1e-06): + N = G.num_nodes + M = sparse_stochastic_graph(G) * alpha + v = torch.zeros(N, device=G.edge_index.device) + 1 / N + p = torch.zeros(N, device=G.edge_index.device) + 1 / N + for _ in range(max_iter): + v_prev = v + v = M @ v + p * (1 - alpha) + + err = (v - v_prev).abs().sum() + if tol != None and err < N * tol: + return v + + return v + + def to_undirected(edge_index): + """ + Returns the undirected edge_index + [[0, 1], [1, 0]] will result in [[0], [1]] + """ + edge_index = edge_index.sort(dim=0)[0] + edge_index = torch.unique(edge_index, dim=1) + return edge_index + + def min_max_graph_size(graph_dataset): + if len(graph_dataset) == 0: + return None, None + + max_num_nodes = float("-inf") + min_num_nodes = float("inf") + + for G in graph_dataset: + num_nodes = G.num_nodes + max_num_nodes = max(max_num_nodes, num_nodes) + min_num_nodes = min(min_num_nodes, num_nodes) + + return min_num_nodes, max_num_nodes + + class Encoder(nn.Module): + def __init__(self, out_features, size, metric): + super(Encoder, self).__init__() + self.out_features = out_features + self.metric = metric + if embed == "thermometer": + self.node_ids = embeddings.Thermometer(size, out_features, vsa=VSA) + elif embed == "circular": + self.node_ids = embeddings.Circular(size, out_features, vsa=VSA) + elif embed == "projection": + self.node_ids = embeddings.Projection(size, out_features, vsa=VSA) + elif embed == "sinusoid": + self.node_ids = embeddings.Sinusoid(size, out_features, vsa=VSA) + elif embed == "density": + self.node_ids = embeddings.Density(size, out_features, vsa=VSA) + else: + self.node_ids = embeddings.Level(size, out_features, vsa=VSA) + + def forward(self, x): + if metric == "degree_centrality": + order = degree_centrality(x) + elif metric == "eigen_centrality": + order = eigen_centrality(x) + elif metric == "katz_centrality": + order = katz_centrality(x) + elif metric == "closeness_centrality": + order = closeness_centrality(x) + elif metric == "current_flow_closeness_centrality": + order = current_flow_closeness_centrality(x) + elif metric == "information_centrality": + order = information_centrality(x) + elif metric == "betweenness_centrality": + order = betweenness_centrality(x) + elif metric == "current_flow_betweeness_centrality": + order = current_flow_betweeness_centrality(x) + elif metric == "communicability_betweeness_centrality": + order = communicability_betweeness_centrality(x) + elif metric == "load_centrality": + order = load_centrality(x) + elif metric == "subgraph_centrality": + order = subgraph_centrality(x) + elif metric == "subgraph_centrality_exp": + order = subgraph_centrality_exp(x) + elif metric == "harmonic_centrality": + order = harmonic_centrality(x) + elif metric == "second_order_centrality": + order = second_order_centrality(x) + elif metric == "trophic_levels": + order = trophic_levels(x) + elif metric == "laplacian_centrality": + order = laplacian_centrality(x) + elif metric == "none": + order = list(range(x.num_nodes)) + else: + pr = pagerank(x) + pr_sort, order = pr.sort() + + + node_id_hvs = torchhd.empty(x.num_nodes, self.out_features, VSA) + node_id_hvs[order] = self.node_ids.weight[: x.num_nodes] + + row, col = to_undirected(x.edge_index) + + hvs = torchhd.bind(node_id_hvs[row], node_id_hvs[col]) + return torchhd.multiset(hvs) + + min_graph_size, max_graph_size = min_max_graph_size(graphs) + encode = Encoder(DIMENSIONS, max_graph_size, metric) + encode = encode.to(device) + + model = Centroid(DIMENSIONS, graphs.num_classes, VSA) + model = model.to(device) + + train_t = time.time() + with torch.no_grad(): + for samples in tqdm(train_ld, desc="Training"): + samples.edge_index = samples.edge_index.to(device) + samples.y = samples.y.to(device) + + samples_hv = encode(samples).unsqueeze(0) + model.add(samples_hv, samples.y) + train_t = time.time() - train_t + accuracy = torchmetrics.Accuracy("multiclass", num_classes=graphs.num_classes) + # f1 = torchmetrics.F1Score(num_classes=graphs.num_classes, average='macro', multiclass=True) + f1 = torchmetrics.F1Score("multiclass", num_classes=graphs.num_classes) + + test_t = time.time() + with torch.no_grad(): + if VSA != "BSC": + model.normalize() + + for samples in tqdm(test_ld, desc="Testing"): + samples.edge_index = samples.edge_index.to(device) + + samples_hv = encode(samples).unsqueeze(0) + outputs = model(samples_hv, dot=True) + accuracy.update(outputs.cpu(), samples.y) + f1.update(outputs.cpu(), samples.y) + test_t = time.time() - test_t + acc = accuracy.compute().item() * 100 + f = f1.compute().item() * 100 + return acc, f, train_t, test_t + +REPETITIONS = 50 +RANDOMNESS = ["random"] +# DATASET = ["PTC_FM", "MUTAG", "NCI1", "ENZYMES", "PROTEINS", "DD"] +METRICS = ["none","page_rank","degree_centrality", + "closeness_centrality", + "betweenness_centrality", "load_centrality", "subgraph_centrality", + "subgraph_centrality_exp", "harmonic_centrality"] +DATASET = ["PTC_FM", "MUTAG", "NCI1", "ENZYMES", "PROTEINS", "DD"] +# VSAS = ["BSC", "MAP", "HRR", "FHRR"] +VSAS = ["FHRR"] + + +for VSA in VSAS: + for d in DATASET: + for METRIC in METRICS: + acc_final = [] + f1_final = [] + train_final = [] + test_final = [] + for i in RANDOMNESS: + acc_aux = [] + f1_aux = [] + train_aux = [] + test_aux = [] + for j in range(REPETITIONS): + acc, f1, train_t, test_t = experiment(1, i, d, METRIC) + acc_aux.append(acc) + f1_aux.append(f1) + train_aux.append(train_t) + test_aux.append(test_t) + acc_final.append(round(sum(acc_aux) / REPETITIONS, 2)) + f1_final.append(round(sum(f1_aux) / REPETITIONS, 2)) + train_final.append(round(sum(train_aux) / REPETITIONS, 2)) + test_final.append(round(sum(test_aux) / REPETITIONS, 2)) + + with open(csv_file, mode="a", newline="") as file: + writer = csv.writer(file) + writer.writerow( + [ + "dataset", + "dimensions", + "train_time", + "test_time", + "accuracy", + "f1", + "VSA", + "metric" + ] + ) + writer.writerows( + [ + [ + d, + DIM, + train_final[0], + test_final[0], + acc_final[0], + f1_final[0], + VSA, + METRIC + ] + ] + )