Skip to content

Commit

Permalink
Corrected bugs discovered fom tests. Restructured seed use to only ex…
Browse files Browse the repository at this point in the history
…ist in train method. Added grid property to tiles plots. Corrected Readme. Improved install required
  • Loading branch information
FrancoBobadilla committed Nov 21, 2020
1 parent 26b780c commit 6d9e482
Show file tree
Hide file tree
Showing 11 changed files with 419 additions and 58 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -143,5 +143,5 @@ cython_debug/
# Jupyter Notebook related folders
/examples/.ipynb_checkpoints/
/examples/datasets/
/examples/BootstrapCCpy
/examples/BootstrapCCpy/
/examples/BootstrapCCpy/__init__.py
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dimensional space and scale it to fit the observations to the range [0, 1]:
```python
from sklearn import datasets, preprocessing

blobs = datasets.make_blobs(n_samples=1000, n_features=5, centers=10)
blobs = datasets.make_blobs(n_samples=1000, n_features=5, centers=10, random_state=1)
scaler = preprocessing.MinMaxScaler()
data = scaler.fit_transform(blobs[0])
```
Expand All @@ -73,7 +73,7 @@ NeuralMap was able to successfully **discover** and cluster all the **original b

It's also possible to get the **U-matrix**:
```python
nm.plot_unified_distane_matrix(data)
nm.plot_unified_distance_matrix()
```
<img src="https://github.com/FrancoBobadilla/NeuralMap/raw/master/examples/images/U-matrix.png" alt="U-matrix">

Expand Down
181 changes: 181 additions & 0 deletions examples/CIDIE COVID features.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.insert(0, '../')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"from scipy.spatial.distance import cdist\n",
"from neural_map import NeuralMap, _plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_features = np.load('datasets/train_features.npy')\n",
"train_labels = np.load('datasets/train_labels.npy')\n",
"test_features = np.load('datasets/test_features.npy')\n",
"test_labels = np.load('datasets/test_labels.npy')\n",
"\n",
"train_features.shape, train_labels.shape, test_features.shape, test_labels.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"scaler = MinMaxScaler()\n",
"\n",
"s_train_features = scaler.fit_transform(train_features)\n",
"s_test_features = scaler.transform(test_features)\n",
"\n",
"s_train_features.max(), s_train_features.min(), s_test_features.max(), s_test_features.min()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dictionary = {}\n",
"\n",
"with open('datasets/features_training_correlation_10.json') as json_file: \n",
" dictionary = json.load(json_file)\n",
" dictionary['relative_positions'] = np.array(dictionary['relative_positions'])\n",
" dictionary['weights'] = np.array(dictionary['weights'])\n",
" \n",
"nm = NeuralMap(**dictionary)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nm.plot_unified_distance_matrix(detailed=True, borders=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nm.plot_analysis(data=s_train_features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nm.plot_analysis(data=s_test_features)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nm.plot_labels(data=s_train_features, labels=train_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nm.plot_labels(data=s_test_features, labels=test_labels)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# guarda la distancia euclidea (o la que uses) de cada feature de entrenamiento con su nodo ganador.\n",
"\n",
"train_quantization_error = np.ones(s_train_features.shape[0]) * np.nan\n",
"for i in range(s_train_features.shape[0]):\n",
" train_quantization_error[i] = nm.generate_activation_map(s_train_features[i]).min()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Luego con las imagenes de testing lo que hacemos es:\n",
"# 1) encontrar el nodo ganador\n",
"# 2) calcular la distancia entre la imagen(features) de testing contra las imagenes(features) del conjunto de entrenamiento que mapean a ese nodo.\n",
"# 3) mostramos una imagen y la otra al lado.\n",
"# Un ejemplo simple es, hacer esos pasos con una imagen de entrenamiento y recuerar la clase de la misma.\n",
"\n",
"selected_feature_index = 412\n",
"\n",
"if not ('mapped_train_features_indices' in vars() or 'mapped_train_features_indices' in globals()):\n",
" mapped_train_features_indices = nm.map_attachments(s_train_features, np.array(range(s_train_features.shape[0])))\n",
"selected_feature = s_test_features[selected_feature_index]\n",
"bmu = nm.get_best_matching_unit(selected_feature)\n",
"train_features_indices = mapped_train_features_indices[bmu]\n",
"distances = cdist(selected_feature[None], s_train_features[train_features_indices], nm.metric)[0]\n",
"\n",
"print(\"\\n\\n\\nSelecetd feature idnex:\\n\\n\", selected_feature_index)\n",
"print(\"\\n\\n\\nSelecetd feature:\\n\\n\", selected_feature)\n",
"print(\"\\n\\n\\nSelected feature class:\\n\\n\", test_labels[selected_feature_index])\n",
"print(\"\\n\\n\\nBest matching unit:\\n\\n\", bmu)\n",
"print(\"\\n\\n\\nIndices of training features that share the same node:\\n\\n\", mapped_train_features_indices[bmu])\n",
"print(\"\\n\\n\\nClasses of the training features that shares same node:\\n\\n\", train_labels[train_features_indices])\n",
"print(\"\\n\\n\\nDistances from selected features to training features:\\n\\n\", distances)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Binary file modified examples/images/RP-HDBSCAN.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified examples/images/U-matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
143 changes: 143 additions & 0 deletions examples/travelling salesman problem.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from neural_map import NeuralMap\n",
"\n",
"# https://github.com/DiegoVicen/ntnu-som/blob/master/src/helper.py\n",
"# http://www.math.uwaterloo.ca/tsp/world/countries.html"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# towns = pd.read_csv('datasets/ar9152.tsp', delimiter=' ').values[:, [2, 1]]\n",
"# optimal_route_distance = 837377\n",
"\n",
"towns = pd.read_csv('http://www.math.uwaterloo.ca/tsp/world/uy734.tsp', delimiter=' ', skiprows=lambda x: (x <= 6 or x >= 741)).values[:, [2, 1]]\n",
"optimal_route_distance = 79114"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"def euclidean_distance_2d(X, Y):\n",
" return ((X[0] - Y[0]) ** 2 + (X[1] - Y[1]) ** 2) ** (1/2)\n",
"\n",
"def tsp(nm_instance, points, optimal_route_distance):\n",
" # find nearest neuron for each point\n",
" city_neurons = {}\n",
" for city_idx, city in enumerate(points):\n",
" idx = nm_instance.get_best_matching_unit(city)[1]\n",
" if idx not in city_neurons:\n",
" city_neurons[idx] = [city]\n",
" else:\n",
" print(\"hola\")\n",
" city_neurons[idx].append(city)\n",
"\n",
" # order cities according to neuron order\n",
" tsp_order = []\n",
" for neuron_idx in range(nm_instance.rows):\n",
" if neuron_idx in city_neurons:\n",
" tsp_order += city_neurons[neuron_idx]\n",
"\n",
" # calculate tsp distance for tsp_order\n",
" tsp_distance = euclidean_distance_2d(tsp_order[0], tsp_order[-1])\n",
" for idx in range(len(tsp_order)-1):\n",
" tsp_distance += euclidean_distance_2d(tsp_order[idx], tsp_order[idx + 1])\n",
" \n",
" # print total distance, optimal distance, and their relation\n",
" response = \"Travelling Salesman Problem\"\n",
" response += \"\\n total distance: \" + str(int(tsp_distance))\n",
" response += \"\\n optimal route ristance: \" + str(int(optimal_route_distance))\n",
" response += \"\\n total distance as percentage of optimal distance: \" + str(int(100 * tsp_distance / optimal_route_distance)) + \"%\"\n",
" print(response)\n",
" \n",
" # visualize route\n",
" n_towns = points.shape[0]\n",
" nodes = nm_instance.weights.reshape(-1, 2)\n",
" plt.figure(figsize=(12,10))\n",
" plt.scatter(points[:, 0], points[:, 1])\n",
" for i in range(n_towns * factor):\n",
" first = nodes[i % (n_towns * factor)]\n",
" second = nodes[(i + 1) % (n_towns * factor)]\n",
" plt.plot((first[0], second[0]), (first[1], second[1]))\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"scrolled": true
},
"outputs": [],
"source": [
"factor = 6\n",
"n_towns = towns.shape[0]\n",
"\n",
"nm = NeuralMap(variables=2, metric='euclidean', columns=1, rows=n_towns * factor, hexagonal=False, toroidal=True)\n",
"nm.train(data=towns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tsp(nm, towns, optimal_route_distance)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import json\n",
"\n",
"# with open(\"datasets/argentina_som.json\", 'w') as outfile:\n",
"# json.dump(nm_dict, outfile)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}
14 changes: 11 additions & 3 deletions neural_map/_check_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,17 @@ def value_type(value, types):
"""
if not isinstance(value, types):
if isinstance(types, (tuple, list)):
string_types = types[0].__name__
for i in range(1, len(types)):
string_types += ' or ' + types[i].__name__

else:
string_types = types.__name__

raise ValueError(
'Value {value} is {value_type}, but should be {types}!'
.format(value=value, value_type=type(value), types=types)
.format(value=value, value_type=type(value).__name__, types=string_types)
)

return True
Expand Down Expand Up @@ -180,7 +188,7 @@ def function(func):
if not callable(func):
raise ValueError(
'Value {func} is {value_type}, but should be callable!'
.format(func=func, value_type=type(func)))
.format(func=func, value_type=type(func).__name__))

return True

Expand Down Expand Up @@ -218,7 +226,7 @@ def numpy_matrix(data, expected_len):
if not isinstance(data, ndarray):
raise ValueError(
'Input data is {data_type}, but should be {types}!'
.format(data_type=type(data), types=ndarray)
.format(data_type=type(data).__name__, types=ndarray.__name__)
)

if not len(data.shape) == 2:
Expand Down
4 changes: 2 additions & 2 deletions neural_map/_neighbourhood_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ def bubble(positions, bmu, radius, learning_rate):

for i in range(positions.shape[0]):
for j in range(positions.shape[1]):
if ((positions[i, j, 0] - bmu[0]) ** 2 + (
positions[i, j, 1] - bmu[1]) ** 2) / radius ** 2 < 1:
if (positions[i, j, 0] - bmu[0]) ** 2 + (positions[i, j, 1] - bmu[1]) ** 2 - \
radius ** 2 <= 0:
res[i, j] = learning_rate

else:
Expand Down
Loading

0 comments on commit 6d9e482

Please sign in to comment.