Corrected bugs discovered fom tests. Restructured seed use to only ex…

…ist in train method. Added grid property to tiles plots. Corrected Readme. Improved install required
FrancoBobadilla · Nov 21, 2020 · 6d9e482 · 6d9e482
1 parent 26b780c
commit 6d9e482
Show file tree

Hide file tree

Showing 11 changed files with 419 additions and 58 deletions.
diff --git a/.gitignore b/.gitignore
@@ -143,5 +143,5 @@ cython_debug/
 # Jupyter Notebook related folders
 /examples/.ipynb_checkpoints/
 /examples/datasets/
-/examples/BootstrapCCpy
+/examples/BootstrapCCpy/
 /examples/BootstrapCCpy/__init__.py
diff --git a/README.md b/README.md
@@ -50,7 +50,7 @@ dimensional space and scale it to fit the observations to the range [0, 1]:
 ```python
 from sklearn import datasets, preprocessing
 
-blobs = datasets.make_blobs(n_samples=1000, n_features=5, centers=10)
+blobs = datasets.make_blobs(n_samples=1000, n_features=5, centers=10, random_state=1)
 scaler = preprocessing.MinMaxScaler()
 data = scaler.fit_transform(blobs[0])
 ```
@@ -73,7 +73,7 @@ NeuralMap was able to successfully **discover** and cluster all the **original b
 
 It's also possible to get the **U-matrix**:
 ```python
-nm.plot_unified_distane_matrix(data)
+nm.plot_unified_distance_matrix()
 ```
 <img src="https://github.com/FrancoBobadilla/NeuralMap/raw/master/examples/images/U-matrix.png" alt="U-matrix">
 

diff --git a/examples/CIDIE COVID features.ipynb b/examples/CIDIE COVID features.ipynb
@@ -0,0 +1,181 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "sys.path.insert(0, '../')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.preprocessing import MinMaxScaler\n",
+ "from scipy.spatial.distance import cdist\n",
+ "from neural_map import NeuralMap, _plot"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_features = np.load('datasets/train_features.npy')\n",
+ "train_labels = np.load('datasets/train_labels.npy')\n",
+ "test_features = np.load('datasets/test_features.npy')\n",
+ "test_labels = np.load('datasets/test_labels.npy')\n",
+ "\n",
+ "train_features.shape, train_labels.shape, test_features.shape, test_labels.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scaler = MinMaxScaler()\n",
+ "\n",
+ "s_train_features = scaler.fit_transform(train_features)\n",
+ "s_test_features = scaler.transform(test_features)\n",
+ "\n",
+ "s_train_features.max(), s_train_features.min(), s_test_features.max(), s_test_features.min()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dictionary = {}\n",
+ "\n",
+ "with open('datasets/features_training_correlation_10.json') as json_file: \n",
+ " dictionary = json.load(json_file)\n",
+ " dictionary['relative_positions'] = np.array(dictionary['relative_positions'])\n",
+ " dictionary['weights'] = np.array(dictionary['weights'])\n",
+ " \n",
+ "nm = NeuralMap(**dictionary)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nm.plot_unified_distance_matrix(detailed=True, borders=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nm.plot_analysis(data=s_train_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nm.plot_analysis(data=s_test_features)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nm.plot_labels(data=s_train_features, labels=train_labels)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "nm.plot_labels(data=s_test_features, labels=test_labels)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# guarda la distancia euclidea (o la que uses) de cada feature de entrenamiento con su nodo ganador.\n",
+ "\n",
+ "train_quantization_error = np.ones(s_train_features.shape[0]) * np.nan\n",
+ "for i in range(s_train_features.shape[0]):\n",
+ " train_quantization_error[i] = nm.generate_activation_map(s_train_features[i]).min()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Luego con las imagenes de testing lo que hacemos es:\n",
+ "# 1) encontrar el nodo ganador\n",
+ "# 2) calcular la distancia entre la imagen(features) de testing contra las imagenes(features) del conjunto de entrenamiento que mapean a ese nodo.\n",
+ "# 3) mostramos una imagen y la otra al lado.\n",
+ "# Un ejemplo simple es, hacer esos pasos con una imagen de entrenamiento y recuerar la clase de la misma.\n",
+ "\n",
+ "selected_feature_index = 412\n",
+ "\n",
+ "if not ('mapped_train_features_indices' in vars() or 'mapped_train_features_indices' in globals()):\n",
+ " mapped_train_features_indices = nm.map_attachments(s_train_features, np.array(range(s_train_features.shape[0])))\n",
+ "selected_feature = s_test_features[selected_feature_index]\n",
+ "bmu = nm.get_best_matching_unit(selected_feature)\n",
+ "train_features_indices = mapped_train_features_indices[bmu]\n",
+ "distances = cdist(selected_feature[None], s_train_features[train_features_indices], nm.metric)[0]\n",
+ "\n",
+ "print(\"\\n\\n\\nSelecetd feature idnex:\\n\\n\", selected_feature_index)\n",
+ "print(\"\\n\\n\\nSelecetd feature:\\n\\n\", selected_feature)\n",
+ "print(\"\\n\\n\\nSelected feature class:\\n\\n\", test_labels[selected_feature_index])\n",
+ "print(\"\\n\\n\\nBest matching unit:\\n\\n\", bmu)\n",
+ "print(\"\\n\\n\\nIndices of training features that share the same node:\\n\\n\", mapped_train_features_indices[bmu])\n",
+ "print(\"\\n\\n\\nClasses of the training features that shares same node:\\n\\n\", train_labels[train_features_indices])\n",
+ "print(\"\\n\\n\\nDistances from selected features to training features:\\n\\n\", distances)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/images/RP-HDBSCAN.png b/examples/images/RP-HDBSCAN.png
diff --git a/examples/images/U-matrix.png b/examples/images/U-matrix.png
diff --git a/examples/travelling salesman problem.ipynb b/examples/travelling salesman problem.ipynb
@@ -0,0 +1,143 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from neural_map import NeuralMap\n",
+ "\n",
+ "# https://github.com/DiegoVicen/ntnu-som/blob/master/src/helper.py\n",
+ "# http://www.math.uwaterloo.ca/tsp/world/countries.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "# towns = pd.read_csv('datasets/ar9152.tsp', delimiter=' ').values[:, [2, 1]]\n",
+ "# optimal_route_distance = 837377\n",
+ "\n",
+ "towns = pd.read_csv('http://www.math.uwaterloo.ca/tsp/world/uy734.tsp', delimiter=' ', skiprows=lambda x: (x <= 6 or x >= 741)).values[:, [2, 1]]\n",
+ "optimal_route_distance = 79114"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "def euclidean_distance_2d(X, Y):\n",
+ " return ((X[0] - Y[0]) ** 2 + (X[1] - Y[1]) ** 2) ** (1/2)\n",
+ "\n",
+ "def tsp(nm_instance, points, optimal_route_distance):\n",
+ " # find nearest neuron for each point\n",
+ " city_neurons = {}\n",
+ " for city_idx, city in enumerate(points):\n",
+ " idx = nm_instance.get_best_matching_unit(city)[1]\n",
+ " if idx not in city_neurons:\n",
+ " city_neurons[idx] = [city]\n",
+ " else:\n",
+ " print(\"hola\")\n",
+ " city_neurons[idx].append(city)\n",
+ "\n",
+ " # order cities according to neuron order\n",
+ " tsp_order = []\n",
+ " for neuron_idx in range(nm_instance.rows):\n",
+ " if neuron_idx in city_neurons:\n",
+ " tsp_order += city_neurons[neuron_idx]\n",
+ "\n",
+ " # calculate tsp distance for tsp_order\n",
+ " tsp_distance = euclidean_distance_2d(tsp_order[0], tsp_order[-1])\n",
+ " for idx in range(len(tsp_order)-1):\n",
+ " tsp_distance += euclidean_distance_2d(tsp_order[idx], tsp_order[idx + 1])\n",
+ " \n",
+ " # print total distance, optimal distance, and their relation\n",
+ " response = \"Travelling Salesman Problem\"\n",
+ " response += \"\\n total distance: \" + str(int(tsp_distance))\n",
+ " response += \"\\n optimal route ristance: \" + str(int(optimal_route_distance))\n",
+ " response += \"\\n total distance as percentage of optimal distance: \" + str(int(100 * tsp_distance / optimal_route_distance)) + \"%\"\n",
+ " print(response)\n",
+ " \n",
+ " # visualize route\n",
+ " n_towns = points.shape[0]\n",
+ " nodes = nm_instance.weights.reshape(-1, 2)\n",
+ " plt.figure(figsize=(12,10))\n",
+ " plt.scatter(points[:, 0], points[:, 1])\n",
+ " for i in range(n_towns * factor):\n",
+ " first = nodes[i % (n_towns * factor)]\n",
+ " second = nodes[(i + 1) % (n_towns * factor)]\n",
+ " plt.plot((first[0], second[0]), (first[1], second[1]))\n",
+ " plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ },
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "factor = 6\n",
+ "n_towns = towns.shape[0]\n",
+ "\n",
+ "nm = NeuralMap(variables=2, metric='euclidean', columns=1, rows=n_towns * factor, hexagonal=False, toroidal=True)\n",
+ "nm.train(data=towns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tsp(nm, towns, optimal_route_distance)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import json\n",
+ "\n",
+ "# with open(\"datasets/argentina_som.json\", 'w') as outfile:\n",
+ "# json.dump(nm_dict, outfile)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/neural_map/_check_inputs.py b/neural_map/_check_inputs.py
@@ -120,9 +120,17 @@ def value_type(value, types):
 
  """
  if not isinstance(value, types):
+ if isinstance(types, (tuple, list)):
+ string_types = types[0].__name__
+ for i in range(1, len(types)):
+ string_types += ' or ' + types[i].__name__
+
+ else:
+ string_types = types.__name__
+
  raise ValueError(
  'Value {value} is {value_type}, but should be {types}!'
- .format(value=value, value_type=type(value), types=types)
+ .format(value=value, value_type=type(value).__name__, types=string_types)
  )
 
  return True
@@ -180,7 +188,7 @@ def function(func):
  if not callable(func):
  raise ValueError(
  'Value {func} is {value_type}, but should be callable!'
- .format(func=func, value_type=type(func)))
+ .format(func=func, value_type=type(func).__name__))
 
  return True
 
@@ -218,7 +226,7 @@ def numpy_matrix(data, expected_len):
  if not isinstance(data, ndarray):
  raise ValueError(
  'Input data is {data_type}, but should be {types}!'
- .format(data_type=type(data), types=ndarray)
+ .format(data_type=type(data).__name__, types=ndarray.__name__)
  )
 
  if not len(data.shape) == 2:

diff --git a/neural_map/_neighbourhood_functions.py b/neural_map/_neighbourhood_functions.py
@@ -60,8 +60,8 @@ def bubble(positions, bmu, radius, learning_rate):
 
  for i in range(positions.shape[0]):
  for j in range(positions.shape[1]):
- if ((positions[i, j, 0] - bmu[0]) ** 2 + (
- positions[i, j, 1] - bmu[1]) ** 2) / radius ** 2 < 1:
+ if (positions[i, j, 0] - bmu[0]) ** 2 + (positions[i, j, 1] - bmu[1]) ** 2 - \
+ radius ** 2 <= 0:
  res[i, j] = learning_rate
 
  else: