From 1d73ed7b81c7198cd59b6686ba2aa30c1f7193de Mon Sep 17 00:00:00 2001 From: Ronakshoghi Date: Mon, 11 Dec 2023 21:32:50 -0500 Subject: [PATCH] #Active Learning with distance penalty using a repulsive method to encourage exploration. --- examples/active_learning/QBC_SVC.py | 4 +- examples/active_learning/QBC_SVC_Batch.py | 17 ++-- examples/active_learning/Repulsive_Method.py | 84 ++++++++++++++++++++ 3 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 examples/active_learning/Repulsive_Method.py diff --git a/examples/active_learning/QBC_SVC.py b/examples/active_learning/QBC_SVC.py index f4a1514..164c601 100644 --- a/examples/active_learning/QBC_SVC.py +++ b/examples/active_learning/QBC_SVC.py @@ -6,9 +6,9 @@ we prioritize data points where model predictions show high disagreement, leading to reduced variance in predictions. -Authers: Ronak Shoghi1, Lukas Morand2, Alexandere Hartmaier1 +Authors: Ronak Shoghi1, Lukas Morand2, Alexandere Hartmaier1 1: [ICAMS/Ruhr University Bochum, Germany] -2: [Fraunhofer Institute for Mechanics of Materials (IWM)] +2: [Fraunhofer Institute for Mechanics of Materials IWM, Freiburg, Germany] July 2023 """ diff --git a/examples/active_learning/QBC_SVC_Batch.py b/examples/active_learning/QBC_SVC_Batch.py index 1c04c98..eb79f4b 100644 --- a/examples/active_learning/QBC_SVC_Batch.py +++ b/examples/active_learning/QBC_SVC_Batch.py @@ -5,15 +5,15 @@ in training Machine Learning yield functions. Using the Query-By-Committee algorithm we prioritize data points where model predictions show high disagreement, leading to reduced variance in predictions. +A key enhancement in this approach is the application of a distance penalty. This feature aims to encourage exploration +of the model in the feature space by penalizing new data points that are too close to existing points. +Such a strategy ensures a more diverse and comprehensive sampling of the data space, +leading to potentially more robust and generalized models. -Authers: Ronak Shoghi1, Lukas Morand2, Alexandere Hartmaier1 -1: [ICAMS/Ruhr University Bochum, Germany] -2: [Fraunhofer Institute for Mechanics of Materials (IWM)] -July 2023 +December 2023 """ import sys -import numpy as np from scipy.spatial import distance_matrix sys.path.append('src/data-gen') sys.path.append('src/verify') @@ -27,7 +27,6 @@ from sklearn.metrics import classification_report from scipy.optimize import differential_evolution import matplotlib.pyplot as plt - matplotlib.use('Agg') print('pyLabFEA version', FE.__version__) @@ -44,10 +43,8 @@ def apply_repulsion(points, k, iterations, learning_rate): distances = distance_matrix(points, points) np.fill_diagonal(distances, np.inf) indices = np.argsort(distances, axis=1)[:, :k] # Indices of k nearest neighbors - # Initialize displacement vector displacement = np.zeros_like(points) - # Calculate repulsion from each of the k nearest neighbors for i, point in enumerate(points): neighbors = points[indices[i]] @@ -55,17 +52,13 @@ def apply_repulsion(points, k, iterations, learning_rate): distances_to_neighbors = distances[i, indices[i]].reshape(-1, 1) repulsion = diff / distances_to_neighbors ** 2 # Repulsion proportional to inverse square of distance displacement[i] = repulsion.sum(axis=0) - # Update points with displacement points += learning_rate * displacement - # Normalize to keep points on the sphere surface norms = np.linalg.norm(points, axis=1).reshape(-1, 1) points /= norms - return points - def spherical_to_cartesian(angles): """ Convert a list of 5 spherical angles to Cartesian coordinates. diff --git a/examples/active_learning/Repulsive_Method.py b/examples/active_learning/Repulsive_Method.py new file mode 100644 index 0000000..0406072 --- /dev/null +++ b/examples/active_learning/Repulsive_Method.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +This script is designed to generate and distribute points evenly on the surface of a unit sphere in multi-dimensional +spaces (specifically 3D and 6D). It includes functions for generating random points on a sphere, calculating distances +to the k-th nearest neighbors, and applying a repulsion algorithm to spread the points more uniformly across the sphere's +surface. + +The script demonstrates the effect of the repulsion algorithm by plotting histograms of the k-nearest neighbor distances +before and after applying the algorithm, providing a visual representation of the points' distribution on the sphere's +surface. + +""" +import numpy as np +from scipy.spatial import distance_matrix +import matplotlib.pyplot as plt +def generate_random_points_on_sphere(n_points, dimensions): + """Generate n_points random points on the surface of a unit sphere in given dimensions.""" + if dimensions == 3: + # For 3D, use spherical coordinates + phi = np.random.uniform(0, np.pi * 2, n_points) + cos_theta = np.random.uniform(-1, 1, n_points) + theta = np.arccos(cos_theta) + x = np.sin(theta) * np.cos(phi) + y = np.sin(theta) * np.sin(phi) + z = np.cos(theta) + return np.column_stack((x, y, z)) + elif dimensions == 6: + # For 6D, use a general approach for higher dimensions + gaussians = np.random.normal(size=(n_points, dimensions)) + norm = np.linalg.norm(gaussians, axis=1) + return gaussians / norm[:, np.newaxis] + else: + raise ValueError("Only 3D and 6D spheres are supported.") + +def calculate_knn_distances(points, k): + """Calculate the distance to the kth nearest neighbor for each point.""" + distances = distance_matrix(points, points) + np.fill_diagonal(distances, np.inf) # Ignore distance to self + sorted_distances = np.sort(distances, axis=1) + return sorted_distances[:, k-1] # kth nearest neighbor distance + +def apply_repulsion(points, k, iterations, learning_rate): + """Apply repulsion to maximize the distance to the kth nearest neighbor.""" + for _ in range(iterations): + distances = distance_matrix(points, points) + np.fill_diagonal(distances, np.inf) + indices = np.argsort(distances, axis=1)[:, :k] # Indices of k nearest neighbors + # Initialize displacement vector + displacement = np.zeros_like(points) + # Calculate repulsion from each of the k nearest neighbors + for i, point in enumerate(points): + neighbors = points[indices[i]] + diff = point - neighbors # Vector from neighbors to point + distances_to_neighbors = distances[i, indices[i]].reshape(-1, 1) + repulsion = diff / distances_to_neighbors ** 2 # Repulsion proportional to inverse square of distance + displacement[i] = repulsion.sum(axis=0) + # Update points with displacement + points += learning_rate * displacement + # Normalize to keep points on the sphere surface + norms = np.linalg.norm(points, axis=1).reshape(-1, 1) + points /= norms + return points + +# Parameters +n_points = 100 # Number of points +dimensions = 6 +k = 5 +iterations = 60 # Number of iterations for the repulsion algorithm +learning_rate = 0.01 # Learning rate for the displacement +# Generate initial random points on the surface of a 3D sphere +initial_points_6d = generate_random_points_on_sphere(n_points, dimensions) +initial_knn_distances_3d = calculate_knn_distances(initial_points_6d, k) +# Apply the repulsion method +final_points_3d = apply_repulsion(np.copy(initial_points_6d), k, iterations, learning_rate) +final_knn_distances_3d = calculate_knn_distances(final_points_3d, k) +# Plotting histograms of k-nearest neighbor distances +plt.hist(initial_knn_distances_3d, bins=20, alpha=0.7, label='Initial (6D Sphere)') +plt.hist(final_knn_distances_3d, bins=20, alpha=0.7, label='Final (6D Sphere)') +plt.xlabel('5 Nearest Neighboring Distance') +plt.ylabel('Frequency') +plt.legend() +plt.show() +