forked from mandalgrouptamu/SemiClassical-NAMD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
153 lines (125 loc) · 4.35 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os,sys
sys.path.append(os.popen("pwd").read().replace("\n","")+"/Model")
def getInput(input,key):
try:
txt = [i for i in input if i.find(key)!=-1][0].split("=")[1].split("#", 1)[0].replace("\n","")
except:
txt = ""
return txt.replace(" ","")
def sbatch(filename, option="", pre = ""):
"""
Submit a job and get the job id returned
"""
import os
job = f"sbatch {pre} {filename} {option}"
submit = os.popen(job).read()
subId = submit.split()[3].replace("\n","")
return subId
try:
inputfile = sys.argv[1]
input = open(inputfile, 'r').readlines()
except:
inputfile = "input.txt"
input = open(inputfile, 'r').readlines()
print(f"Reading {inputfile}")
# System
system = getInput(input,"System")
#print (system)
# SLURM
if system == "slurm":
print ("Running jobs in a HPCC")
model = getInput(input,"Model")
exec(f"from {model} import parameters")
ntraj = parameters.NTraj
nodes = int(getInput(input,"Nodes"))
ncpus = int(getInput(input,"Cpus"))
totalTraj = ntraj * nodes * ncpus
print(f"Using {nodes} Nodes each with {ncpus} CPUs")
print("-"*50)
print(f"Total Number of Trajectories = {totalTraj}")
print(f"Trajectories per CPU = {ntraj}")
print("-"*50)
with open("output.log", "w+") as output:
output.write(f"Total Number of Trajectories = {totalTraj}\n")
output.write(f"Trajectories per CPU = {ntraj}\n")
output.write(f"Using {nodes} Nodes each with {ncpus} CPUs\n")
output.write("-"*50 + "\n")
os.system(f"rm -rf RUN")
os.mkdir("RUN")
ids = []
for i in range(nodes):
# Run the jobs
os.mkdir(f"RUN/run-{i}")
partition = getInput(input,"Partition")
method = getInput(input,"Method")
options = f"--partition {partition} \
--ntasks-per-node {ncpus}\
--job-name={model}-{method}\
--open-mode=append"
ids.append(sbatch("parallel.py", f"{inputfile} RUN/run-{i}", options))
print (f"Submitted Job {ids[-1]}")
# Gather and average
jobs = ":".join(ids)
pre = f"--dependency=afterok:{jobs} --partition {partition} --output=output.log --open-mode=append"
sbatch("avg.py", inputfile, pre)
elif system == "htcondor":
print ("Running jobs in a HTC")
# read input-------------------
cpus = getInput(input,"Cpus")
model = getInput(input,"Model")
method = getInput(input,"Method")
pylocation = getInput(input,"pylocation")
# read input-------------------
exec(f"from {model} import parameters")
ntraj = parameters.NTraj
print(f"Total Number of Trajectories = {int(float(ntraj) * float(cpus))}")
os.system(f"rm -rf RUN")
os.mkdir("RUN")
os.chdir("RUN")
for ic in range(int(cpus)):
os.mkdir(f"run-{ic}")
os.mkdir(f"run-{ic}/Model")
os.mkdir(f"run-{ic}/Method")
os.mkdir(f"run-{ic}/log")
os.system(f"cp ../Model/{model}.py run-{ic}/Model/")
os.system(f"cp ../Method/{method}.py run-{ic}/Method/")
os.system(f"cp ../condor.sh run-{ic}")
os.system(f"cp ../serial.py run-{ic}")
#os.system(f"cp ../condor.sub run-{ic}")
condor = open("../condor.sub","r").readlines()
condorfile = open(f"run-{ic}/condor.sub","w+")
condor[5] = 'pylocation = /home/arkajitmandal/py3.tar.gz\n'
condorfile.writelines(condor)
condorfile.close()
os.system(f"cp ../input.txt run-{ic}")
os.chdir(f"run-{ic}")
os.system("condor_submit condor.sub")
os.chdir(f"../")
# PC
else:
print ("Running jobs in your local machine (like a PC)")
# Some messages
ignoreList = []
try :
getInput(input,"Nodes")
ignoreList.append("Nodes")
except:
pass
try :
getInput(input,"Cpus")
ignoreList.append("Cpus")
except:
pass
try :
getInput(input,"Partition")
ignoreList.append("Partition")
except:
pass
print(f"Ignoring {ignoreList} in {inputfile}")
model = getInput(input,"Model")
exec(f"from {model} import parameters")
ntraj = parameters.NTraj
print("-"*50)
print(f"Total Number of Trajectories = {ntraj}")
print("-"*50)
os.system(f"python3 serial.py {inputfile}")