Skip to content

Commit

Permalink
Merge pull request #36 from GAIA-UNIL/add-circular-simulation-and-ti
Browse files Browse the repository at this point in the history
Add circular simulation and ti
  • Loading branch information
Mathieu Gravey authored Nov 14, 2019
2 parents d57e75e + c1d2cf9 commit 8f898c1
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 63 deletions.
2 changes: 1 addition & 1 deletion build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ endif

export OS

dosentSupportOpenCL:= $(shell sh -c "echo 'int main(){}' | c++ -xc - -o /dev/null $OPENCL_LIB 2>/dev/null && echo 0 || echo 1 ")
dosentSupportOpenCL:= $(shell sh -c "echo 'int main(){}' | c++ -xc - -o /dev/null ${OPENCL_LIB} 2>/dev/null && echo 0 || echo 1 ")
ifeq ("$(dosentSupportOpenCL)","0")
export OPENCL_INC=-DWITH_OPENCL=1
export OPENCL_LIB
Expand Down
3 changes: 2 additions & 1 deletion include/CPUThreadDevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
class CPUThreadDevice : public ComputeDeviceModule
{
public:
CPUThreadDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int threadRatio=1, bool withCrossMesurement=false);
CPUThreadDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int threadRatio=1, bool withCrossMesurement=false, bool circularTI=false);
~CPUThreadDevice();

bool candidateForPatern(std::vector<std::vector<int> > &neighborArrayVector, std::vector<std::vector<float> > &neighborValueArrayVector, std::vector<float> &variablesCoeficient, std::vector<float> delta0);
Expand Down Expand Up @@ -107,6 +107,7 @@ class CPUThreadDevice : public ComputeDeviceModule

bool _trueMismatch=true;
bool _crossMesurement=false;
bool _circularTI=false;
};

#endif
20 changes: 9 additions & 11 deletions include/DataImage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,9 +240,8 @@ class DataImage{
{
finalValue*=_dims[i];
val[i]=position%_dims[i]+deltaVect[i];
if((val[i]<0)|| (val[i] >= int(_dims[i]))){
isOk=false;
}
isOk &= (val[i]>=0) && (val[i] < int(_dims[i]));
val[i] = ((val[i] % int(_dims[i])) + int(_dims[i])) % int(_dims[i]);
position/=_dims[i];
}

Expand All @@ -252,7 +251,6 @@ class DataImage{
finalValue+=val[i];
}


location=finalValue;

return isOk;
Expand Down Expand Up @@ -483,8 +481,8 @@ class DataImage{
int subVariablePosition=0;
for (unsigned int i = 0; i < _nbVariable; ++i)
{
convertionTypeVectorConstXmeassurement[i].push_back(P0);
convertionCoefVectorConstXmeassurement[i].push_back(1.f);
//convertionTypeVectorConstXmeassurement[i].push_back(P0);
//convertionCoefVectorConstXmeassurement[i].push_back(1.f);

if(_types[i]==Continuous){
std::vector<convertionType> convType;
Expand Down Expand Up @@ -528,14 +526,14 @@ class DataImage{
}
}
coeficientMatrix.push_back(regular);
coeficientMatrix.push_back(Xmeassurement);
if(forXMesurement)coeficientMatrix.push_back(Xmeassurement);

convertionTypeVectorConstVector.push_back(convertionTypeVectorConstRegular);
convertionTypeVectorConstVector.push_back(std::vector<std::vector<convertionType> >(_nbVariable));
convertionTypeVectorConstVector.push_back(convertionTypeVectorConstXmeassurement);
//convertionTypeVectorConstVector.push_back(std::vector<std::vector<convertionType> >(_nbVariable));
//convertionTypeVectorConstVector.push_back(convertionTypeVectorConstXmeassurement);
convertionCoefVectorConstVector.push_back(convertionCoefVectorConstRegular);
convertionCoefVectorConstVector.push_back(std::vector<std::vector<float> >(_nbVariable));
convertionCoefVectorConstVector.push_back(convertionCoefVectorConstXmeassurement);
//convertionCoefVectorConstVector.push_back(std::vector<std::vector<float> >(_nbVariable));
//convertionCoefVectorConstVector.push_back(convertionCoefVectorConstXmeassurement);
}

/*void generateCoef4Xcorr(std::vector<std::vector<float> > &variablesCoeficientMainVector, std::vector<std::vector<convertionType> > &convertionTypeVectorMainVector, bool needCrossMesurement, std::vector<std::vector<float> > categoriesValues){
Expand Down
3 changes: 2 additions & 1 deletion include/OpenCLGPUDevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class OpenCLGPUDevice : public ComputeDeviceModule
static std::vector<unsigned> DeviceWithHostUnifiedMemory(unsigned platform_id);


OpenCLGPUDevice(SharedMemoryManager* sharedMemoryManager, std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int platform, unsigned int device, bool withCrossMesurement=false);
OpenCLGPUDevice(SharedMemoryManager* sharedMemoryManager, std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int platform, unsigned int device, bool withCrossMesurement=false, bool circularTI=false);
~OpenCLGPUDevice();

bool candidateForPatern(std::vector<std::vector<int> > &neighborArrayVector, std::vector<std::vector<float> > &neighborValueArrayVector, std::vector<float> &variablesCoeficient, std::vector<float> delta0);
Expand Down Expand Up @@ -112,6 +112,7 @@ class OpenCLGPUDevice : public ComputeDeviceModule

bool _trueMismatch=true;
bool _crossMesurement=false;
bool _circularTI=false;

std::vector<cl_mem> frenquencySpaceOutputArray_d;
std::vector<cl_mem> realSpaceArray_d;
Expand Down
12 changes: 6 additions & 6 deletions include/simulation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

void simulation(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage> &TIs, SamplingModule &samplingModule,
std::vector<std::vector<int> > &pathPosition, unsigned* solvingPath, unsigned numberOfPointToSimulate, g2s::DataImage *ii, float* seedAray, unsigned* importDataIndex, std::vector<unsigned> numberNeighbor,
std::vector<std::vector<float> > categoriesValues, unsigned nbThreads=1, bool fullStationary=false){
std::vector<std::vector<float> > categoriesValues, unsigned nbThreads=1, bool fullStationary=false, bool circularSim=false){

unsigned* posterioryPath=(unsigned*)malloc( sizeof(unsigned) * di.dataSize()/di._nbVariable);
memset(posterioryPath,255,sizeof(unsigned) * di.dataSize()/di._nbVariable);
Expand All @@ -50,7 +50,7 @@ void simulation(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage> &T
{
numberOfVariable+=categoriesValues[i].size()-1;
}
#pragma omp parallel for num_threads(nbThreads) schedule(dynamic,1) default(none) firstprivate( fullStationary, numberOfVariable,categoriesValues,numberOfPointToSimulate,posterioryPath, solvingPath, seedAray, numberNeighbor, importDataIndex, logFile, ii) shared( pathPosition, di, samplingModule, TIs)
#pragma omp parallel for num_threads(nbThreads) schedule(dynamic,1) default(none) firstprivate(circularSim, fullStationary, numberOfVariable,categoriesValues,numberOfPointToSimulate,posterioryPath, solvingPath, seedAray, numberNeighbor, importDataIndex, logFile, ii) shared( pathPosition, di, samplingModule, TIs)
for (unsigned int indexPath = 0; indexPath < numberOfPointToSimulate; ++indexPath){

// if(indexPath<TIs[0].dataSize()/TIs[0]._nbVariable-1000){
Expand Down Expand Up @@ -83,7 +83,7 @@ void simulation(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage> &T
unsigned dataIndex;
std::vector<int> vectorInDi=pathPosition[positionSearch];
vectorInDi.resize(di._dims.size(),0);
if(di.indexWithDelta(dataIndex, currentCell, vectorInDi))
if(di.indexWithDelta(dataIndex, currentCell, vectorInDi) || circularSim)
{
//add for
if(posterioryPath[dataIndex]<=indexPath){
Expand Down Expand Up @@ -271,7 +271,7 @@ void simulation(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage> &T

void simulationFull(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage> &TIs, SamplingModule &samplingModule,
std::vector<std::vector<int> > &pathPosition, unsigned* solvingPath, unsigned numberOfPointToSimulate, g2s::DataImage *ii, float* seedAray, unsigned* importDataIndex, std::vector<unsigned> numberNeighbor,
std::vector<std::vector<float> > categoriesValues, unsigned nbThreads=1, bool fullStationary=false){
std::vector<std::vector<float> > categoriesValues, unsigned nbThreads=1, bool fullStationary=false, bool circularSim=false){

unsigned* posterioryPath=(unsigned*)malloc( sizeof(unsigned) * di.dataSize());
memset(posterioryPath,255,sizeof(unsigned) * di.dataSize());
Expand All @@ -295,7 +295,7 @@ void simulationFull(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage
{
numberOfVariable+=categoriesValues[i].size()-1;
}
#pragma omp parallel for num_threads(nbThreads) schedule(dynamic,1) default(none) firstprivate(fullStationary, numberOfVariable, categoriesValues, numberOfPointToSimulate, \
#pragma omp parallel for num_threads(nbThreads) schedule(dynamic,1) default(none) firstprivate(circularSim, fullStationary, numberOfVariable, categoriesValues, numberOfPointToSimulate, \
posterioryPath, solvingPath, seedAray, numberNeighbor, importDataIndex, logFile, ii) shared( pathPosition, di, samplingModule, TIs)
for (unsigned int indexPath = 0; indexPath < numberOfPointToSimulate; ++indexPath){

Expand All @@ -320,7 +320,7 @@ void simulationFull(FILE *logFile,g2s::DataImage &di, std::vector<g2s::DataImage
unsigned dataIndex;
std::vector<int> vectorInDi=pathPosition[positionSearch];
vectorInDi.resize(di._dims.size(),0);
if(di.indexWithDelta(dataIndex, currentPosition, vectorInDi))
if(di.indexWithDelta(dataIndex, currentPosition, vectorInDi) || circularSim)
{
bool needToBeadd=false;
for (unsigned int i = 0; i < di._nbVariable; ++i)
Expand Down
38 changes: 21 additions & 17 deletions src/CPUThreadDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@
// #endif


CPUThreadDevice::CPUThreadDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int threadRatio, bool withCrossMesurement){
CPUThreadDevice::CPUThreadDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int threadRatio, bool withCrossMesurement, bool circularTI){
_coeficientMatrix=coeficientMatrix;

_deviceType=DT_cpuThreads;
_threadRatio=threadRatio;
int chip,core;
g2s::rdtscp(&chip, &core);
_crossMesurement=withCrossMesurement;
_circularTI=circularTI;
//printf("core %d, chip %d\n",core, chip );
_deviceID=chip;
_sharedMemoryManager=sharedMemoryManager;
Expand Down Expand Up @@ -249,7 +250,7 @@ unsigned CPUThreadDevice::cvtIndexToPosition(unsigned index){
for (int i = int(_fftSize.size()-1); i>=0; --i)
{
divFactor/=_fftSize[i];
position=position*_srcSize[i] + (_fftSize[i]-(index/(divFactor))%_fftSize[i]-_min[i]-1);
position=position*_srcSize[i] + ((_fftSize[i]-(index/(divFactor))%_fftSize[i]-_min[i]-1)+_srcSize[i])%_srcSize[i];
}

return position;
Expand Down Expand Up @@ -379,25 +380,28 @@ bool CPUThreadDevice::candidateForPatern(std::vector<std::vector<int> > &neighb
FFTW_PRECISION(execute_dft_c2r)(_pInv, _frenquencySpaceOutputArray[dataArrayIndex], _realSpaceArray[dataArrayIndex]);
dataType* realSpace= _realSpaceArray[dataArrayIndex];
//Remove fobidden/wrong value
for (size_t i = 0; i < _fftSize.size(); ++i)
if (!_circularTI)
{
unsigned blockSize=1;
for (size_t j = 0; j < i; ++j)
for (size_t i = 0; i < _fftSize.size(); ++i)
{
blockSize*=_fftSize[j];
}
blockSize*=_fftSize[i]-(_srcSize[i]-(_max[i]-_min[i]));
unsigned blockSize=1;
for (size_t j = 0; j < i; ++j)
{
blockSize*=_fftSize[j];
}
blockSize*=_fftSize[i]-(_srcSize[i]-(_max[i]-_min[i]));

unsigned delta=1;
for (size_t j = 0; j <= i; ++j)
{
delta*=_fftSize[j];
}
unsigned delta=1;
for (size_t j = 0; j <= i; ++j)
{
delta*=_fftSize[j];
}

#pragma omp parallel for default(none) num_threads(_threadRatio) /*proc_bind(close)*/ firstprivate(delta,blockSize,realSpace)
for (unsigned int j = 0; j < _realSpaceSize; j+=delta)
{
fillVectorized(realSpace,j,blockSize,-INFINITY);
#pragma omp parallel for default(none) num_threads(_threadRatio) /*proc_bind(close)*/ firstprivate(delta,blockSize,realSpace)
for (unsigned int j = 0; j < _realSpaceSize; j+=delta)
{
fillVectorized(realSpace,j,blockSize,-INFINITY);
}
}
}

Expand Down
38 changes: 21 additions & 17 deletions src/OpenCLGPUDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,13 @@ std::vector<unsigned> OpenCLGPUDevice::DeviceWithHostUnifiedMemory(unsigned plat
return result;
}

OpenCLGPUDevice::OpenCLGPUDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int platformIndex, unsigned int deviceIndex, bool withCrossMesurement){
OpenCLGPUDevice::OpenCLGPUDevice(SharedMemoryManager* sharedMemoryManager,std::vector<g2s::OperationMatrix> coeficientMatrix, unsigned int platformIndex, unsigned int deviceIndex, bool withCrossMesurement, bool circularTI){
_coeficientMatrix=coeficientMatrix;
_deviceType=DT_gpuOpenCL;
int chip,core;
g2s::rdtscp(&chip, &core);
_crossMesurement=withCrossMesurement;
_circularTI=circularTI;
//printf("core %d, chip %d\n",core, chip );
_deviceID=chip;
_sharedMemoryManager=sharedMemoryManager;
Expand Down Expand Up @@ -393,7 +394,7 @@ unsigned OpenCLGPUDevice::cvtIndexToPosition(unsigned index){
for (int i = _fftSize.size()-1; i>=0; --i)
{
divFactor/=_fftSize[i];
position=position*_srcSize[i] + (_fftSize[i]-(index/(divFactor))%_fftSize[i]-_min[i]-1);
position=position*_srcSize[i] + ((_fftSize[i]-(index/(divFactor))%_fftSize[i]-_min[i]-1)+_srcSize[i])%_srcSize[i];
}

return position;
Expand Down Expand Up @@ -525,25 +526,28 @@ bool OpenCLGPUDevice::candidateForPatern(std::vector<std::vector<int> > &neighb

dataType* realSpace= _realSpaceArray[dataArrayIndex];
//Remove fobidden/wrong value
for (int i = 0; i < _fftSize.size(); ++i)
if (!_circularTI)
{
unsigned blockSize=1;
for (int j = 0; j < i; ++j)
for (int i = 0; i < _fftSize.size(); ++i)
{
blockSize*=_fftSize[j];
}
blockSize*=_fftSize[i]-(_srcSize[i]-(_max[i]-_min[i]));
unsigned blockSize=1;
for (int j = 0; j < i; ++j)
{
blockSize*=_fftSize[j];
}
blockSize*=_fftSize[i]-(_srcSize[i]-(_max[i]-_min[i]));

unsigned delta=1;
for (int j = 0; j <= i; ++j)
{
delta*=_fftSize[j];
}
unsigned delta=1;
for (int j = 0; j <= i; ++j)
{
delta*=_fftSize[j];
}

#pragma omp parallel for default(none) num_threads(_threadRatio) /*proc_bind(close)*/ firstprivate(delta,blockSize,realSpace)
for (int j = 0; j < _realSpaceSize; j+=delta)
{
fillVectorized(realSpace,j,blockSize,-INFINITY);
#pragma omp parallel for default(none) num_threads(_threadRatio) /*proc_bind(close)*/ firstprivate(delta,blockSize,realSpace)
for (int j = 0; j < _realSpaceSize; j+=delta)
{
fillVectorized(realSpace,j,blockSize,-INFINITY);
}
}
}

Expand Down
22 changes: 18 additions & 4 deletions src/ds-l.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ int main(int argc, char const *argv[]) {
float nbCandidate=std::nanf("0"); // 1/f for QS
unsigned seed=std::chrono::high_resolution_clock::now().time_since_epoch().count();
g2s::DistanceType searchDistance=g2s::EUCLIDIEN;
bool conciderTiAsCircular=false;
bool circularSimulation=false;

if (arg.count("-nV") == 1)
{
Expand Down Expand Up @@ -344,6 +346,18 @@ int main(int argc, char const *argv[]) {
}
arg.erase("-md");

if (arg.count("-cti") == 1)
{
conciderTiAsCircular=true;
}
arg.erase("-cti");

if (arg.count("-csim") == 1)
{
circularSimulation=true;
}
arg.erase("-csim");


//add extra paremetre here
float alpha=0;
Expand Down Expand Up @@ -767,22 +781,22 @@ int main(int argc, char const *argv[]) {

#endif

#pragma omp parallel for proc_bind(spread) num_threads(nbThreads) default(none) shared(computeDeviceModuleArray) firstprivate(nbThreadsLastLevel,coeficientMatrix, smm, nbThreads, needCrossMesurement)
#pragma omp parallel for proc_bind(spread) num_threads(nbThreads) default(none) shared(computeDeviceModuleArray) firstprivate(conciderTiAsCircular, nbThreadsLastLevel,coeficientMatrix, smm, nbThreads, needCrossMesurement)
for (unsigned int i = 0; i < nbThreads; ++i)
{
//#pragma omp critical (createDevices)
{
bool deviceCreated=false;
#ifdef WITH_OPENCL
if((!deviceCreated) && (i<gpuHostUnifiedMemory.size()) && withGPU){
OpenCLGPUDevice* signleThread=new OpenCLGPUDevice(smm, coeficientMatrix, 0,gpuHostUnifiedMemory[i], needCrossMesurement);
OpenCLGPUDevice* signleThread=new OpenCLGPUDevice(smm, coeficientMatrix, 0,gpuHostUnifiedMemory[i], needCrossMesurement,conciderTiAsCircular);
signleThread->setTrueMismatch(true);
computeDeviceModuleArray[i].push_back(signleThread);
deviceCreated=true;
}
#endif
if(!deviceCreated){
CPUThreadDevice* signleThread=new CPUThreadDevice(smm, coeficientMatrix, nbThreadsLastLevel, needCrossMesurement);
CPUThreadDevice* signleThread=new CPUThreadDevice(smm, coeficientMatrix, nbThreadsLastLevel, needCrossMesurement,conciderTiAsCircular);
signleThread->setTrueMismatch(true);
computeDeviceModuleArray[i].push_back(signleThread);
deviceCreated=true;
Expand All @@ -799,7 +813,7 @@ int main(int argc, char const *argv[]) {
auto begin = std::chrono::high_resolution_clock::now();

simulation(reportFile, DI, TIs, TSM, pathPosition, simulationPathIndex+beginPath, simulationPathSize-beginPath, (useUniqueTI4Sampling ? &idImage : nullptr ),
seedForIndex, importDataIndex, nbNeighbors, categoriesValues, nbThreads);
seedForIndex, importDataIndex, nbNeighbors, categoriesValues, nbThreads,circularSimulation);
auto end = std::chrono::high_resolution_clock::now();
double time = 1.0e-6 * std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin).count();
fprintf(reportFile,"compuattion time: %7.2f s\n", time/1000);
Expand Down
Loading

0 comments on commit 8f898c1

Please sign in to comment.