diff --git a/examples/compare_batch_iterators.py b/examples/compare_batch_iterators.py deleted file mode 100644 index ce57250f..00000000 --- a/examples/compare_batch_iterators.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -from functools import partial -from itertools import tee - -class Loader(): - def example_batch_generator(self,n): - for batch in range(n): - yield batch - -class MPIModel(): - def __init__(self,batch_generator): - self.batch_iterator = batch_generator - - def train_epochs(self,M): - num_total = 8 - for epoch in range(M): - num_so_far = 0 - print ("Batch iter. summary: {}{}".format(self,self.batch_iterator)) - for batch in self.batch_iterator(): - num_so_far += 1 - - whatever=batch - print ("Next batch id: {}".format(batch)) - if num_so_far > num_total: break - print "+++++++" - - -class MPIModel_default(): - def __init__(self,batch_generator): - self.batch_iterator = batch_generator - - def train_epochs(self,M): - num_total = 8 #number of samples per epoch - batch_generator_func = self.batch_iterator() - - for iepoch in range(M): - #print ("Batch iter. summary: {}{} epoch: {}".format(self,self.batch_iterator,iepoch)) - num_so_far = 0 - - while num_so_far < num_total: - num_so_far += 1 - - try: - batch = batch_generator_func.next() - except StopIteration: - batch_generator_func = self.batch_iterator() - batch = batch_generator_func.next() - print ("Next batch id: {}".format(batch)) - - print "+++++++" - - - -def main(): - num_batches = 10 - epochs = 3 - - loader = Loader() - batch_generator = partial(loader.example_batch_generator,n=num_batches) - my_example_class = MPIModel(batch_generator) - my_example_class.train_epochs(epochs) - -def main_default(): - num_batches = 10 - epochs = 3 - - loader = Loader() - batch_generator = partial(loader.example_batch_generator,n=num_batches) - my_example_class = MPIModel_default(batch_generator) - my_example_class.train_epochs(epochs) - -if __name__=='__main__': - import timeit - #print min(timeit.Timer(setup=main).repeat(7, 1000)) - print min(timeit.Timer(setup=main_default).repeat(7, 1000)) diff --git a/examples/conf.yaml b/examples/conf.yaml index 1b308cd8..a71947fb 100644 --- a/examples/conf.yaml +++ b/examples/conf.yaml @@ -10,7 +10,7 @@ paths: signal_prepath: '/signal_data/' #/signal_data/jet/ shot_list_dir: '/shot_lists/' tensorboard_save_path: '/Graph/' - data: 'jet_data' #'d3d_to_jet_data' #'d3d_to_jet_data' # 'jet_to_d3d_data' #jet_data + data: jet_data #'d3d_to_jet_data' #'d3d_to_jet_data' # 'jet_to_d3d_data' #jet_data specific_signals: [] #['q95','li','ip','betan','energy','lm','pradcore','pradedge','pradtot','pin','torquein','tmamp1','tmamp2','tmfreq1','tmfreq2','pechin','energydt','ipdirect','etemp_profile','edens_profile'] #if left empty will use all valid signals defined on a machine. Only use if need a custom set executable: "mpi_learn.py" shallow_executable: "learn.py" diff --git a/examples/guarantee_preprocessed.py b/examples/guarantee_preprocessed.py index 3f005f0e..67826ad0 100644 --- a/examples/guarantee_preprocessed.py +++ b/examples/guarantee_preprocessed.py @@ -11,8 +11,6 @@ pprint(conf) from plasma.preprocessor.preprocess import guarantee_preprocessed -os.environ["PYTHONHASHSEED"] = "0" - ##################################################### ####################PREPROCESSING#################### ##################################################### diff --git a/examples/jenkins.sh b/examples/jenkins.sh deleted file mode 100644 index 9d9087e6..00000000 --- a/examples/jenkins.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -rm /tigress/alexeys/model_checkpoints/* - -ls ${PWD} - -module load anaconda -module load cudatoolkit/8.0 -module load openmpi/intel-17.0/2.1.0/64 intel/17.0/64/17.0.4.196 intel-mkl/2017.3/4/64 -module load cudnn/cuda-8.0/6.0 -source activate PPPL - -export OMPI_MCA_btl="tcp,self,sm" - -echo $SLURM_NODELIST -srun python mpi_learn.py diff --git a/examples/mpi_learn.py b/examples/mpi_learn.py index e6fd6503..a965a86b 100644 --- a/examples/mpi_learn.py +++ b/examples/mpi_learn.py @@ -24,8 +24,6 @@ import random import numpy as np -os.environ["PYTHONHASHSEED"] = "0" - import matplotlib matplotlib.use('Agg') diff --git a/Analyze Hyperparameter Tuning.ipynb b/examples/notebooks/Analyze Hyperparameter Tuning.ipynb similarity index 100% rename from Analyze Hyperparameter Tuning.ipynb rename to examples/notebooks/Analyze Hyperparameter Tuning.ipynb diff --git a/Signal Influence.ipynb b/examples/notebooks/Signal Influence.ipynb similarity index 100% rename from Signal Influence.ipynb rename to examples/notebooks/Signal Influence.ipynb diff --git a/examples/signal_influence.py b/examples/signal_influence.py index 509af551..fc987177 100644 --- a/examples/signal_influence.py +++ b/examples/signal_influence.py @@ -26,8 +26,6 @@ import copy from functools import partial -os.environ["PYTHONHASHSEED"] = "0" - import matplotlib matplotlib.use('Agg') diff --git a/examples/simple_augmentation.py b/examples/simple_augmentation.py index e6c05a78..3afd19c9 100644 --- a/examples/simple_augmentation.py +++ b/examples/simple_augmentation.py @@ -26,8 +26,6 @@ import copy from functools import partial -os.environ["PYTHONHASHSEED"] = "0" - import matplotlib matplotlib.use('Agg') diff --git a/jenkins-ci/jenkins.sh b/jenkins-ci/jenkins.sh new file mode 100644 index 00000000..1fa10ffe --- /dev/null +++ b/jenkins-ci/jenkins.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +export OMPI_MCA_btl="tcp,self,sm" + +echo ${PWD} + +echo "Jenkins test Python3.6" +rm /tigress/alexeys/model_checkpoints/* +rm -rf /tigress/alexeys/processed_shots +rm -rf /tigress/alexeys/processed_shotlists +rm -rf /tigress/alexeys/normalization +module load anaconda3/4.4.0 +source activate /tigress/alexeys/jenkins/.conda/envs/jenkins3 +module load cudatoolkit/8.0 +module load cudnn/cuda-8.0/6.0 +module load openmpi/cuda-8.0/intel-17.0/2.1.0/64 +module load intel/17.0/64/17.0.4.196 + +echo ${PWD} +cd /home/alexeys/jenkins/workspace/FRNM/PPPL +echo ${PWD} +python setup.py install + +echo $SLURM_NODELIST +cd examples +echo ${PWD} +ls ${PWD} +sed -i -e 's/num_epochs: 1000/num_epochs: 2/g' conf.yaml +sed -i -e 's/data: jet_data/data: jenkins_jet/g' conf.yaml + +srun python mpi_learn.py + +echo "Jenkins test Python2.7" +#rm /tigress/alexeys/model_checkpoints/* + +#source deactivate +#module purge +#module load anaconda/4.4.0 +#source activate /tigress/alexeys/jenkins/.conda/envs/jenkins2 +#module load cudatoolkit/8.0 +#module load cudnn/cuda-8.0/6.0 +#module load openmpi/cuda-8.0/intel-17.0/2.1.0/64 +#module load intel/17.0/64/17.0.4.196 + +#cd .. +#python setup.py install + +#echo $SLURM_NODELIST +#cd examples +#sed -i -e 's/data: jenkins_jet/data: jenkins_d3d/g' conf.yaml +#srun python mpi_learn.py diff --git a/jenkins-ci/run_jenkins.py b/jenkins-ci/run_jenkins.py new file mode 100644 index 00000000..57be2675 --- /dev/null +++ b/jenkins-ci/run_jenkins.py @@ -0,0 +1,56 @@ +from plasma.utils.batch_jobs import generate_working_dirname,copy_files_to_environment,start_jenkins_job +import yaml +import sys,os,getpass +import plasma.conf + +num_nodes = 4 #Set in the Jenkins project area!! +test_matrix = [("Python3","jet_data"),("Python2","jet_data")] + +run_directory = "{}/{}/jenkins/".format(plasma.conf.conf['fs_path'],getpass.getuser()) +template_path = os.environ['PWD'] +conf_name = "conf.yaml" +executable_name = "mpi_learn.py" + +def generate_conf_file(test_configuration,template_path = "../",save_path = "./",conf_name="conf.yaml"): + assert(template_path != save_path) + with open(os.path.join(template_path,conf_name), 'r') as yaml_file: + conf = yaml.load(yaml_file) + conf['training']['num_epochs'] = 2 + conf['paths']['data'] = test_configuration[1] + if test_configuration[1] == "Python3": + conf['env']['name'] = "PPPL_dev3" + conf['env']['type'] = "anaconda3" + else: + conf['env']['name'] = "PPPL" + conf['env']['type'] = "anaconda" + + with open(os.path.join(save_path,conf_name), 'w') as outfile: + yaml.dump(conf, outfile, default_flow_style=False) + return conf + +working_directory = generate_working_dirname(run_directory) +os.makedirs(working_directory) + +os.system(" ".join(["cp -p",os.path.join(template_path,conf_name),working_directory])) +os.system(" ".join(["cp -p",os.path.join(template_path,executable_name),working_directory])) + +#os.chdir(working_directory) +#print("Going into {}".format(working_directory)) + +for ci in test_matrix: + subdir = working_directory + "/{}/".format(ci[0]) + os.makedirs(subdir) + copy_files_to_environment(subdir) + print("Making modified conf") + conf = generate_conf_file(ci,working_directory,subdir,conf_name) + print("Starting job") + if ci[1] == "Python3": + env_name = "PPPL_dev3" + env_type = "anaconda3" + else: + env_name = "PPPL" + env_type = "anaconda" + start_jenkins_job(subdir,num_nodes,executable_name,ci,env_name,env_type) + + +print("submitted jobs.") diff --git a/jenkins-ci/validate_jenkins.py b/jenkins-ci/validate_jenkins.py new file mode 100644 index 00000000..6014fb2b --- /dev/null +++ b/jenkins-ci/validate_jenkins.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python + +import mpi4py as mmm +print(mmm.__version__) + +import keras as kk +print(kk.__version__) + +import tensorflow as tf +print(tf.__version__) + +from mpi4py import MPI +import sys + +size = MPI.COMM_WORLD.Get_size() +rank = MPI.COMM_WORLD.Get_rank() +name = MPI.Get_processor_name() + +sys.stdout.write( + "Hello, World! I am process %d of %d on %s.\n" + % (rank, size, name)) diff --git a/jenkins-ci/validate_jenkins.sh b/jenkins-ci/validate_jenkins.sh new file mode 100644 index 00000000..f7d6b0d5 --- /dev/null +++ b/jenkins-ci/validate_jenkins.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +export OMPI_MCA_btl="tcp,self,sm" + +module load anaconda3/4.4.0 +source activate /tigress/alexeys/jenkins/.conda/envs/jenkins3 +module load cudatoolkit/8.0 +module load cudnn/cuda-8.0/6.0 +module load openmpi/cuda-8.0/intel-17.0/2.1.0/64 +module load intel/17.0/64/17.0.4.196 + +cd /home/alexeys/jenkins/workspace/FRNM/PPPL +python setup.py install + +echo `which python` +echo `which mpicc` + +echo ${PWD} +echo $SLURM_NODELIST + +cd jenkins-ci +echo ${PWD} +ls ${PWD} + +srun python validate_jenkins.py diff --git a/plasma/conf_parser.py b/plasma/conf_parser.py index 1ded3a1a..15bf4dbe 100644 --- a/plasma/conf_parser.py +++ b/plasma/conf_parser.py @@ -6,6 +6,7 @@ import uuid import yaml +import hashlib def parameters(input_file): """Parse yaml file of configuration parameters.""" @@ -69,12 +70,17 @@ def parameters(input_file): #shot lists jet_carbon_wall = ShotListFiles(jet,params['paths']['shot_list_dir'],['CWall_clear.txt','CFC_unint.txt'],'jet carbon wall data') jet_iterlike_wall = ShotListFiles(jet,params['paths']['shot_list_dir'],['ILW_unint.txt','BeWall_clear.txt'],'jet iter like wall data') + + jenkins_jet_carbon_wall = ShotListFiles(jet,params['paths']['shot_list_dir'],['jenkins_CWall_clear.txt','jenkins_CFC_unint.txt'],'Subset of jet carbon wall data for Jenkins tests') + jenkins_jet_iterlike_wall = ShotListFiles(jet,params['paths']['shot_list_dir'],['jenkins_ILW_unint.txt','jenkins_BeWall_clear.txt'],'Subset of jet iter like wall data for Jenkins tests') + jet_full = ShotListFiles(jet,params['paths']['shot_list_dir'],['ILW_unint.txt','BeWall_clear.txt','CWall_clear.txt','CFC_unint.txt'],'jet full data') d3d_10000 = ShotListFiles(d3d,params['paths']['shot_list_dir'],['d3d_clear_10000.txt','d3d_disrupt_10000.txt'],'d3d data 10000 ND and D shots') d3d_1000 = ShotListFiles(d3d,params['paths']['shot_list_dir'],['d3d_clear_1000.txt','d3d_disrupt_1000.txt'],'d3d data 1000 ND and D shots') d3d_100 = ShotListFiles(d3d,params['paths']['shot_list_dir'],['d3d_clear_100.txt','d3d_disrupt_100.txt'],'d3d data 100 ND and D shots') d3d_full = ShotListFiles(d3d,params['paths']['shot_list_dir'],['d3d_clear_data_avail.txt','d3d_disrupt_data_avail.txt'],'d3d data since shot 125500') + d3d_jenkins = ShotListFiles(d3d,params['paths']['shot_list_dir'],['jenkins_d3d_clear.txt','jenkins_d3d_disrupt.txt'],'Subset of d3d data for Jenkins test') d3d_jb_full = ShotListFiles(d3d,params['paths']['shot_list_dir'],['shotlist_JaysonBarr_clear.txt','shotlist_JaysonBarr_disrupt.txt'],'d3d shots since 160000-170000') nstx_full = ShotListFiles(nstx,params['paths']['shot_list_dir'],['disrupt_nstx.txt'],'nstx shots (all are disruptive') @@ -95,6 +101,10 @@ def parameters(input_file): params['paths']['shot_files'] = [jet_full] params['paths']['shot_files_test'] = [] params['paths']['use_signals_dict'] = jet_signals + elif params['paths']['data'] == 'jenkins_jet': + params['paths']['shot_files'] = [jenkins_jet_carbon_wall] + params['paths']['shot_files_test'] = [jenkins_jet_iterlike_wall] + params['paths']['use_signals_dict'] = jet_signals elif params['paths']['data'] == 'd3d_data': params['paths']['shot_files'] = [d3d_full] params['paths']['shot_files_test'] = [] @@ -116,6 +126,12 @@ def parameters(input_file): params['paths']['shot_files'] = [d3d_full] params['paths']['shot_files_test'] = [] params['paths']['use_signals_dict'] = d3d_signals + elif params['paths']['data'] == 'jenkins_d3d': + params['paths']['shot_files'] = [d3d_jenkins] + params['paths']['shot_files_test'] = [] + params['paths']['use_signals_dict'] = {'q95':q95,'li':li,'ip':ip,'lm':lm,'betan':betan,'energy':energy,'dens':dens,'pradcore':pradcore,'pradedge':pradedge,'pin':pin,'torquein':torquein,'ipdirect':ipdirect,'iptarget':iptarget,'iperr':iperr, +'etemp_profile':etemp_profile ,'edens_profile':edens_profile} + #cross-machine elif params['paths']['data'] == 'jet_to_d3d_data': @@ -167,7 +183,7 @@ def parameters(input_file): return params def get_unique_signal_hash(signals): - return hash(tuple(sorted(signals))) + return int(hashlib.md5(''.join(tuple(map(lambda x: x.description, sorted(signals)))).encode('utf-8')).hexdigest(),16) #make sure 1D signals come last! This is necessary for model builder. def sort_by_channels(list_of_signals): diff --git a/plasma/models/builder.py b/plasma/models/builder.py index 2e0fc0c0..0b223356 100644 --- a/plasma/models/builder.py +++ b/plasma/models/builder.py @@ -1,3 +1,4 @@ +from __future__ import division import keras from keras.models import Sequential, Model from keras.layers import Input @@ -21,6 +22,7 @@ from copy import deepcopy from plasma.utils.downloading import makedirs_process_safe +import hashlib class LossHistory(Callback): def on_train_begin(self, logs=None): @@ -39,7 +41,7 @@ def get_unique_id(self): this_conf = deepcopy(self.conf) #don't make hash dependent on number of epochs. this_conf['training']['num_epochs'] = 0 - unique_id = hash(dill.dumps(this_conf)) + unique_id = int(hashlib.md5((dill.dumps(this_conf).decode('unicode_escape')).encode('utf-8')).hexdigest(),16) return unique_id def get_0D_1D_indices(self): @@ -139,20 +141,20 @@ def slicer_output_shape(input_shape,indices): for i in range(model_conf['num_conv_layers']): div_fac = 2**i - pre_rnn_1D = Convolution1D(num_conv_filters/div_fac,size_conv_filters,padding='valid',activation='relu') (pre_rnn_1D) - pre_rnn_1D = Convolution1D(num_conv_filters/div_fac,1,padding='valid',activation='relu') (pre_rnn_1D) + pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,size_conv_filters,padding='valid',activation='relu') (pre_rnn_1D) + pre_rnn_1D = Convolution1D(num_conv_filters//div_fac,1,padding='valid',activation='relu') (pre_rnn_1D) pre_rnn_1D = MaxPooling1D(pool_size) (pre_rnn_1D) pre_rnn_1D = Flatten() (pre_rnn_1D) pre_rnn_1D = Dense(dense_size,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D) - pre_rnn_1D = Dense(dense_size/4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D) + pre_rnn_1D = Dense(dense_size//4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn_1D) pre_rnn = Concatenate() ([pre_rnn_0D,pre_rnn_1D]) else: pre_rnn = pre_rnn_input if model_conf['rnn_layers'] == 0 or model_conf['extra_dense_input']: pre_rnn = Dense(dense_size,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn) - pre_rnn = Dense(dense_size/2,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn) - pre_rnn = Dense(dense_size/4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn) + pre_rnn = Dense(dense_size//2,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn) + pre_rnn = Dense(dense_size//4,activation='relu',kernel_regularizer=l2(dense_regularization),bias_regularizer=l2(dense_regularization),activity_regularizer=l2(dense_regularization)) (pre_rnn) pre_rnn_model = Model(inputs = pre_rnn_input,outputs=pre_rnn) x_input = Input(batch_shape = batch_input_shape) diff --git a/plasma/models/shallow_runner.py b/plasma/models/shallow_runner.py index 140454cc..b9ccf883 100644 --- a/plasma/models/shallow_runner.py +++ b/plasma/models/shallow_runner.py @@ -29,6 +29,8 @@ from keras.utils.generic_utils import Progbar +import hashlib + debug_use_shots = 100000 model_filename = "saved_model.pkl" dataset_path = "dataset.npz" @@ -85,8 +87,8 @@ def load_shots(self,shot_list,is_inference=False,as_list=False,num_samples=np.In def get_save_prepath(self): prepath = self.loader.conf['paths']['processed_prepath'] use_signals = self.loader.conf['paths']['use_signals'] - identifying_tuple = tuple(sorted(use_signals)) - save_prepath = prepath + "shallow/use_signals_{}/".format(hash(identifying_tuple)) + identifying_tuple = ''.join(tuple(map(lambda x: x.description, sorted(use_signals)))).encode('utf-8') + save_prepath = prepath + "shallow/use_signals_{}/".format(int(hashlib.md5(identifying_tuple).hexdigest(),16)) return save_prepath def process(self,shot): diff --git a/plasma/primitives/data.py b/plasma/primitives/data.py index d6139f66..1354f506 100644 --- a/plasma/primitives/data.py +++ b/plasma/primitives/data.py @@ -1,3 +1,4 @@ +from __future__ import division import numpy as np import time import sys,os @@ -121,8 +122,8 @@ def load_data(self,prepath,shot,dtype='float32'): def fetch_data_basic(self,machine,shot_num,c,path=None): - if path is None: - path = self.get_path(machine) + if path is None: + path = self.get_path(machine) success = False mapping = None try: @@ -183,7 +184,8 @@ def __lt__(self,other): return self.description.__lt__(other.description) def __hash__(self): - return self.description.__hash__() + import hashlib + return int(hashlib.md5(self.description.encode('utf-8')).hexdigest(),16) def __str__(self): return self.description @@ -205,7 +207,7 @@ def load_data(self,prepath,shot,dtype='float32'): if np.ndim(data) == 1: data = np.expand_dims(data,axis=0) #_ = data[0,0] - T = data.shape[0]/2 #time is stored twice, once for mapping and once for signal + T = data.shape[0]//2 #time is stored twice, once for mapping and once for signal mapping = data[:T,1:] remapping = np.linspace(self.mapping_range[0],self.mapping_range[1],self.num_channels) t = data[:T,0] @@ -224,8 +226,12 @@ def load_data(self,prepath,shot,dtype='float32'): sig_interp = np.zeros((timesteps,self.num_channels)) for i in range(timesteps): _,order = np.unique(mapping[i,:],return_index=True) #make sure the mapping is ordered and unique - f = UnivariateSpline(mapping[i,order],sig[i,order],s=0,k=1,ext=0) - sig_interp[i,:] = f(remapping) + if sig[i,order].shape[0] > 2: + f = UnivariateSpline(mapping[i,order],sig[i,order],s=0,k=1,ext=0) + sig_interp[i,:] = f(remapping) + else: + print('Signal {}, shot {} has not enough points for linear interpolation. dfitpack.error: (m>k) failed for hidden m: fpcurf0:m=1'.format(self.description,shot.number)) + return None,None,False return t,sig_interp,True @@ -283,7 +289,7 @@ def get_channel_num(self,machine): def fetch_data(self,machine,shot_num,c): time,data,mapping,success = self.fetch_data_basic(machine,shot_num,c) - mapping = None #we are not interested in the whole profile + mapping = None #we are not interested in the whole profile channel_num = self.get_channel_num(machine) if channel_num is not None and success: if np.ndim(data) != 2: diff --git a/plasma/primitives/shots.py b/plasma/primitives/shots.py index 383ed7e6..09685093 100644 --- a/plasma/primitives/shots.py +++ b/plasma/primitives/shots.py @@ -323,8 +323,8 @@ def __eq__(self,other): return self.get_id_str().__eq__(other.get_id_str()) def __hash__(self): - return self.get_id_str().__hash__() - + import hashlib + return int(hashlib.md5(self.get_id_str().encode('utf-8')).hexdigest(),16) def __str__(self): string = 'number: {}\n'.format(self.number) diff --git a/plasma/utils/batch_jobs.py b/plasma/utils/batch_jobs.py index f0bcea7f..56203b97 100644 --- a/plasma/utils/batch_jobs.py +++ b/plasma/utils/batch_jobs.py @@ -1,3 +1,4 @@ +from __future__ import division from pprint import pprint import yaml import datetime @@ -11,8 +12,6 @@ def generate_working_dirname(run_directory): s += "_{}".format(uuid.uuid4()) return run_directory + s - - def get_executable_name(conf): shallow = conf['model']['shallow'] if shallow: @@ -31,6 +30,36 @@ def start_slurm_job(subdir,num_nodes,i,conf,shallow,env_name="frnn",env_type="an sp.Popen("sbatch "+script,shell=True) +def create_jenkins_script(subdir,num_nodes,executable_name,test_configuration,env_name="frnn",env_type="anaconda"): + filename = "jenkins_{}_{}.cmd".format(test_configuration[0],test_configuration[1]) #version of Python and the dataset + filepath = os.path.join(subdir,filename) + user = getpass.getuser() + with open(filepath,"w") as f: + f.write('#!/usr/bin/bash\n') + f.write('export OMPI_MCA_btl=\"tcp,self,sm\"\n') + f.write('echo \"Jenkins test {}\"\n'.format(test_configuration[1])) + f.write('rm /tigress/alexeys/model_checkpoints/*\n') + f.write('rm -rf /tigress/alexeys/processed_shots\n') + f.write('rm -rf /tigress/alexeys/processed_shotlists\n') + f.write('rm -rf /tigress/alexeys/normalization\n') + f.write('module load {}\n'.format(env_type)) + f.write('source activate {}\n'.format(env_name)) + f.write('module load cudatoolkit/8.0\n') + f.write('module load cudnn/cuda-8.0/6.0\n') + f.write('module load openmpi/cuda-8.0/intel-17.0/2.1.0/64\n') + f.write('module load intel/17.0/64/17.0.4.196\n') + f.write('cd /home/alexeys/jenkins/workspace/FRNM/PPPL\n') + f.write('python setup.py install\n') + f.write('cd {}\n'.format(subdir)) + f.write('srun -N {} -n {} python {}\n'.format(num_nodes//2, num_nodes//2*4, executable_name)) + + return filepath + +def start_jenkins_job(subdir,num_nodes,executable_name,test_configuration,env_name,env_type): + os.system(" ".join(["cp -p",executable_name,subdir])) + script = create_jenkins_script(subdir,num_nodes,executable_name,test_configuration,env_name,env_type) + sp.Popen("sh "+script,shell=True) + def start_pbs_job(subdir,num_nodes,i,conf,shallow,env_name="frnn",env_type="anaconda"): executable_name,use_mpi = get_executable_name(conf) os.system(" ".join(["cp -p",executable_name,subdir])) @@ -53,7 +82,7 @@ def create_slurm_script(subdir,num_nodes,idx,executable_name,use_mpi,env_name="f # f.write('rm -f /tigress/{}/model_checkpoints/*.h5\n'.format(user)) f.write('cd {}\n'.format(subdir)) f.write('export OMPI_MCA_btl=\"tcp,self,sm\"\n') - f.write('srun env PYTHONHASHSEED=0 python {}\n'.format(executable_name)) + f.write('srun python {}\n'.format(executable_name)) f.write('echo "done."') return filepath @@ -72,7 +101,7 @@ def create_pbs_script(subdir,num_nodes,idx,executable_name,use_mpi,env_name="frn f.write('module load tensorflow\n') # f.write('rm $HOME/tigress/alexeys/model_checkpoints/*\n') f.write('cd {}\n'.format(subdir)) - f.write('aprun -n {} -N1 env PYTHONHASHSEED=0 env KERAS_HOME={} singularity exec $TENSORFLOW_CONTAINER python3 {}\n'.format(str(num_nodes),subdir,executable_name)) + f.write('aprun -n {} -N1 env KERAS_HOME={} singularity exec $TENSORFLOW_CONTAINER python3 {}\n'.format(str(num_nodes),subdir,executable_name)) f.write('echo "done."') return filepath diff --git a/plasma/version.py b/plasma/version.py index 78235354..758ad1a2 100644 --- a/plasma/version.py +++ b/plasma/version.py @@ -1,6 +1,6 @@ import re -__version__ = "0.3.0" +__version__ = "1.0.0" version = __version__