-
Notifications
You must be signed in to change notification settings - Fork 25
/
submit.yml.erb
144 lines (141 loc) · 4.33 KB
/
submit.yml.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
<%-
cores = num_cores.to_i
if cores == 0 && cluster == "pitzer"
# little optimization for pitzer nodes. They want the whole node, if they chose 'any',
# it can be scheduled on p18 or p20 nodes. If not, they'll get the constraint below.
base_slurm_args = ["--nodes", "1", "--exclusive"]
elsif cores == 0
# full node on owens
cores = 28
base_slurm_args = ["--nodes", "1", "--ntasks-per-node", "28"]
else
base_slurm_args = ["--nodes", "1", "--ntasks-per-node", "#{cores}"]
end
slurm_args = case node_type
when "gpu-40core"
base_slurm_args + ["--constraint", "40core"]
when "gpu-48core"
base_slurm_args + ["--constraint", "48core"]
when "any-40core"
base_slurm_args + ["--constraint", "40core"]
when "any-48core"
base_slurm_args + ["--constraint", "48core"]
when "hugemem"
base_slurm_args + ["--partition", "hugemem"]
when "largemem"
base_slurm_args + ["--partition", "largemem"]
when "debug"
base_slurm_args += ["--partition", "debug"]
else
base_slurm_args
end
-%>
<% if cluster =~ /owens|pitzer|ascend|cardinal/ -%>
---
batch_connect:
template: "basic"
conn_params:
- jupyter_api
script:
<% if node_type =~ /gpu/ -%>
gpus_per_node: 1
<% end -%>
native:
<%- slurm_args.each do |arg| %>
- "<%= arg %>"
<%- end %>
<% elsif cluster =~ /kubernetes/
if node_type =~ /owens/
compute_cluster = "owens"
apps_path = "/usr/local"
# Memory per core with hyperthreading enabled
memory_mb = num_cores.to_i * 2200
elsif node_type =~ /pitzer/
compute_cluster = "pitzer"
apps_path = "/apps"
# Memory per core with hyperthreading enabled
memory_mb = num_cores.to_i * 4000
end
mounts = {
'home' => OodSupport::User.new.home,
'support' => OodSupport::User.new('support').home,
'project' => '/fs/project',
'scratch' => '/fs/scratch',
'ess' => '/fs/ess',
}
-%>
---
script:
wall_time: "<%= bc_num_hours.to_i * 3600 %>"
<%- if node_type =~ /gpu/ -%>
gpus_per_node: 1
<%- end -%>
native:
container:
name: "jupyter"
image: "docker-registry.osc.edu/ondemand/ondemand-base-rhel7:0.10.0"
image_pull_policy: "IfNotPresent"
command: ["/bin/bash","-l","<%= staged_root %>/job_script_content.sh"]
restart_policy: 'OnFailure'
env:
NB_UID: "<%= Etc.getpwnam(ENV['USER']).uid %>"
NB_USER: "<%= ENV['USER'] %>"
NB_GID: "<%= Etc.getpwnam(ENV['USER']).gid %>"
CLUSTER: "<%= compute_cluster %>"
KUBECONFIG: "/dev/null"
labels:
osc.edu/cluster: "<%= compute_cluster %>"
port: "8080"
cpu: "<%= num_cores %>"
memory: "<%= memory_mb %>Mi"
mounts:
<%- mounts.each_pair do |name, mount| -%>
- type: host
name: <%= name %>
host_type: Directory
path: <%= mount %>
destination_path: <%= mount %>
<%- end -%>
- type: host
name: munge-socket
host_type: Socket
path: /var/run/munge/munge.socket.2
destination_path: /var/run/munge/munge.socket.2
- type: host
name: slurm-conf
host_type: Directory
path: /etc/slurm
destination_path: /etc/slurm
- type: host
name: sssd-pipes
host_type: Directory
path: /var/lib/sss/pipes
destination_path: /var/lib/sss/pipes
- type: host
name: sssd-conf
host_type: Directory
path: /etc/sssd
destination_path: /etc/sssd
- type: host
name: nsswitch
host_type: File
path: /etc/nsswitch.conf
destination_path: /etc/nsswitch.conf
- type: host
name: lmod-init
host_type: File
path: /apps/<%= compute_cluster %>/lmod/lmod.sh
destination_path: /etc/profile.d/lmod.sh
- type: host
name: intel
host_type: Directory
path: /nfsroot/<%= compute_cluster %>/opt/intel
destination_path: /opt/intel
- type: host
name: apps
host_type: Directory
path: /apps/<%= compute_cluster %>
destination_path: <%= apps_path %>
node_selector:
node-role.kubernetes.io/ondemand: ''
<% end -%>