Skip to content

Commit

Permalink
Parallelization via OpenMP
Browse files Browse the repository at this point in the history
see: lvdmaaten/bhtsne#18. These changes are inspired by https://github.com/maximsch2/bhtsne. This approach was selected as it requires minimal changes to parallelize the algorithm.

In particular, these changes correspond roughly to the changes in maximsch2/bhtsne@08d8a2a
  • Loading branch information
Daniel Rapp authored and Daniel Rapp committed Mar 30, 2017
1 parent 3521869 commit 4affd0a
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 59 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,11 @@ More Information
----------------

See *Barnes-Hut-SNE* (2013), L.J.P. van der Maaten. It is available on [arxiv](http://arxiv.org/abs/1301.3342).

On OSX
------
Run `brew install gcc --without-multilib` (to get OpenMP version of compiler)

You'll need to set environment variables for CC and CXX to get setuptools to build things properly.

`export CXX=/usr/local/bin/g++-6; export CC=/usr/local/bin/gcc-6; pip install -e .`
Empty file modified docker/dev/test.py
100644 → 100755
Empty file.
51 changes: 26 additions & 25 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,38 @@
v2 = int(parts[1])
v3 = int(parts[2]) if len(parts) == 3 else None

if v2 >= 10:
# More than 10.10
extra_compile_args=['-I/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers']
else:
extra_compile_args=['-I/System/Library/Frameworks/vecLib.framework/Headers']

ext_modules = [Extension(name='tsne.bh_sne',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'],
extra_compile_args=extra_compile_args + ['-ffast-math', '-O3'],
extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lcblas'],
language='c++')]
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'],
extra_compile_args=['-ffast-math', '-fopenmp', '-flto'],
extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lgomp'],
language='c++'),

Extension(name='tsne.bh_sne_3d',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'],
include_dirs=[numpy.get_include(), 'tsne/bh_sne_src/'],
extra_compile_args=['-ffast-math', '-DTSNE3D', '-fopenmp', '-flto'],
extra_link_args=['-Wl,-framework', '-Wl,Accelerate', '-lgomp'],
language='c++')]

else:
# LINUX

ext_modules = [Extension(name='tsne.bh_sne',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-O3', '-fPIC', '-w', '-ffast-math'],
extra_link_args=['-Wl,-Bstatic', '-lcblas', '-Wl,-Bdynamic'],
language='c++'),
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-fPIC', '-w', '-ffast-math', '-O2', '-fopenmp', '-flto'],
extra_link_args=['-lgomp'],
language='c++'),

Extension(name='tsne.bh_sne_3d',
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-O3', '-fPIC', '-w', '-ffast-math', '-DTSNE3D'],
extra_link_args=['-Wl,-Bstatic', '-lcblas', '-Wl,-Bdynamic'],
language='c++')]
sources=['tsne/bh_sne_src/sptree.cpp', 'tsne/bh_sne_src/tsne.cpp', 'tsne/bh_sne_3d.pyx'],
include_dirs=[numpy.get_include(), '/usr/local/include', 'tsne/bh_sne_src/'],
library_dirs=['/usr/local/lib', '/usr/lib64/atlas'],
extra_compile_args=['-msse2', '-fPIC', '-w', '-ffast-math', '-DTSNE3D', '-O2', '-fopenmp',
'-flto'],
extra_link_args=['-lgomp'],
language='c++')]

ext_modules = cythonize(ext_modules)

Expand All @@ -77,4 +78,4 @@
packages=find_packages(),
ext_modules=ext_modules,
install_requires=required
)
)
10 changes: 3 additions & 7 deletions tsne/bh_sne_src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR)
PROJECT(bh_tsne CXX)
ENABLE_LANGUAGE(CXX)
set_target_properties(${TARGET} PROPERTIES LINKER_LANGUAGE CXX)

SET(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_MODULE_PATH}")

SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC -ffast-math -funroll-loops -msse2")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-framework -Wl,Accelerate -lcblas")
FIND_PACKAGE(OpenMP REQUIRED)

SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_CURRENT_SOURCE_DIR})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -flto -ffast-math ${OpenMP_CXX_FLAGS}")
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")

file(GLOB SOURCES *.cpp)
add_executable(bh_tsne ${SOURCES})
Expand Down
6 changes: 3 additions & 3 deletions tsne/bh_sne_src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#CFLAGS = -march=haswell -ffast-math -O3 -Rpass=loop-vectorize -Rpass-missed=loop-vectorize -Rpass-analysis=loop-vectorize
#CFLAGS = -march=haswell -ffast-math -O3

CXX = g++
CFLAGS = -ffast-math -O3
CXX = /usr/local/bin/g++-6
CFLAGS = -O2 -flto -ffast-math -fopenmp

all: bh_tsne bh_tsne_3d

Expand All @@ -20,7 +20,7 @@ tsne.o: tsne.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -c tsne.cpp

tsne_3d.o: tsne.cpp tsne.h sptree.h vptree.h
$(CXX) $(CFLAGS) -DTSNE3D -c tsne.cpp
$(CXX) $(CFLAGS) -DTSNE3D -c tsne.cpp -o tsne_3d.o

clean:
rm -Rf *.o bh_tsne bh_tsne_3d
33 changes: 19 additions & 14 deletions tsne/bh_sne_src/sptree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ Cell<NDims>::~Cell() {
}

template<int NDims>
double Cell<NDims>::getCorner(unsigned int d) {
double Cell<NDims>::getCorner(unsigned int d) const {
return corner[d];
}

template<int NDims>
double Cell<NDims>::getWidth(unsigned int d) {
double Cell<NDims>::getWidth(unsigned int d) const {
return width[d];
}

Expand All @@ -77,7 +77,7 @@ void Cell<NDims>::setWidth(unsigned int d, double val) {

// Checks whether a point lies in a cell
template<int NDims>
bool Cell<NDims>::containsPoint(double point[])
bool Cell<NDims>::containsPoint(double point[]) const
{
for(int d = 0; d < NDims; d++) {
if(corner[d] - width[d] > point[d]) return false;
Expand Down Expand Up @@ -348,11 +348,13 @@ unsigned int SPTree<NDims>::getDepth() {

// Compute non-edge forces using Barnes-Hut algorithm
template<int NDims>
void SPTree<NDims>::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q)
double SPTree<NDims>::computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[]) const
{
double resultSum = 0;
double buff[NDims]; // make buff local for parallelization

// Make sure that we spend no time on empty nodes or self-interactions
if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return;
if(cum_size == 0 || (is_leaf && size == 1 && index[0] == point_index)) return resultSum;

// Compute distance between point and center-of-mass
double sqdist = .0;
Expand All @@ -375,33 +377,37 @@ void SPTree<NDims>::computeNonEdgeForces(unsigned int point_index, double theta,
// Compute and add t-SNE force between point and current node
sqdist = 1.0 / (1.0 + sqdist);
double mult = cum_size * sqdist;
*sum_Q += mult;
resultSum += mult;
mult *= sqdist;
for(unsigned int d = 0; d < NDims; d++) neg_f[d] += mult * buff[d];
}
else {

// Recursively apply Barnes-Hut to children
for(unsigned int i = 0; i < no_children; i++) children[i]->computeNonEdgeForces(point_index, theta, neg_f, sum_Q);
for(unsigned int i = 0; i < no_children; i++){
resultSum += children[i]->computeNonEdgeForces(point_index, theta, neg_f);
}
}
return resultSum;
}


// Computes edge forces
template<int NDims>
void SPTree<NDims>::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f)
void SPTree<NDims>::computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f) const
{

// Loop over all edges in the graph
unsigned int ind1 = 0;
unsigned int ind2 = 0;
double sqdist;
#pragma omp parallel for schedule(static)
for(unsigned int n = 0; n < N; n++) {
unsigned int ind1 = n * NDims;
for(unsigned int i = row_P[n]; i < row_P[n + 1]; i++) {

double buff[NDims]; // make buff local for parallelization

// Compute pairwise distance and Q-value
sqdist = 1.0;
ind2 = col_P[i] * NDims;
double sqdist = 1.0;
unsigned int ind2 = col_P[i] * NDims;

for(unsigned int d = 0; d < NDims; d++) {
buff[d] = data[ind1 + d] - data[ind2 + d];
Expand All @@ -413,7 +419,6 @@ void SPTree<NDims>::computeEdgeForces(unsigned int* row_P, unsigned int* col_P,
// Sum positive force
for(unsigned int d = 0; d < NDims; d++) pos_f[ind1 + d] += sqdist * buff[d];
}
ind1 += NDims;
}
}

Expand Down
13 changes: 5 additions & 8 deletions tsne/bh_sne_src/sptree.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,11 @@ class Cell {
Cell(double* inp_corner, double* inp_width);
~Cell();

double getCorner(unsigned int d);
double getWidth(unsigned int d);
double getCorner(unsigned int d) const;
double getWidth(unsigned int d) const;
void setCorner(unsigned int d, double val);
void setWidth(unsigned int d, double val);
bool containsPoint(double point[]);
bool containsPoint(double point[]) const;
};

template <int NDims=2>
Expand All @@ -65,9 +65,6 @@ class SPTree
// Fixed constants
static const unsigned int QT_NODE_CAPACITY = 1;

// A buffer we use when doing force computations
double buff[NDims];

// Properties of this node in the tree
SPTree<NDims>* parent;
unsigned int dimension;
Expand Down Expand Up @@ -102,8 +99,8 @@ class SPTree
void rebuildTree();
void getAllIndices(unsigned int* indices);
unsigned int getDepth();
void computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[], double* sum_Q);
void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f);
double computeNonEdgeForces(unsigned int point_index, double theta, double neg_f[]) const;
void computeEdgeForces(unsigned int* row_P, unsigned int* col_P, double* val_P, int N, double* pos_f) const;
void print();

private:
Expand Down
11 changes: 9 additions & 2 deletions tsne/bh_sne_src/tsne.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,13 @@ void TSNE::computeGradient(double* P, unsigned int* inp_row_P, unsigned int* inp
double* pos_f = (double*) calloc(N * D, sizeof(double));
double* neg_f = (double*) calloc(N * D, sizeof(double));
if(pos_f == NULL || neg_f == NULL) { fprintf(stderr,"Memory allocation failed!\n"); exit(1); }

tree->computeEdgeForces(inp_row_P, inp_col_P, inp_val_P, N, pos_f);
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, neg_f + n * D, &sum_Q);

#pragma omp parallel for schedule(guided) reduction(+:sum_Q)
for(int n = 0; n < N; n++){
sum_Q += tree->computeNonEdgeForces(n, theta, neg_f + n * D);
}

// Compute final t-SNE gradient
for(int i = 0; i < N * D; i++) {
Expand Down Expand Up @@ -334,7 +339,9 @@ double TSNE::evaluateError(unsigned int* row_P, unsigned int* col_P, double* val
SPTree<NDIMS>* tree = new SPTree<NDIMS>(Y, N);
double* buff = (double*) calloc(D, sizeof(double));
double sum_Q = .0;
for(int n = 0; n < N; n++) tree->computeNonEdgeForces(n, theta, buff, &sum_Q);
for(int n = 0; n < N; n++){
sum_Q += tree->computeNonEdgeForces(n, theta, buff);
}

// Loop over all edges to compute t-SNE error
int ind1, ind2;
Expand Down

0 comments on commit 4affd0a

Please sign in to comment.