Skip to content

Commit

Permalink
[CI] Fix deadlock (#395)
Browse files Browse the repository at this point in the history
Also add organization distinction when selecting instance; Increase
timeout; Allow Collaborator to trigger; Shut down instances by default.
  • Loading branch information
hjjq authored Dec 20, 2023
1 parent 9a2f213 commit 57d859a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 16 deletions.
17 changes: 6 additions & 11 deletions .github/scripts/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,15 @@
def run_command(cmd):
cmd = " ".join(cmd)
print("Running command: " + cmd)
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
outputs = []
for line in popen.stdout:
print(line, end='')
outputs.append(line)
popen.stdout.close()
ret = popen.wait()
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
stdout, stderr = process.communicate()
ret = process.returncode
if ret:
print('STDERR:')
for line in popen.stderr:
for line in stderr:
print(line, end='')
print(f'Command {cmd} failed with return code {ret}.')
return None
return outputs
raise RuntimeError(f'Command {cmd} failed with return code {ret}.')
return stdout

def get_bench_cmd(run_type, run_id, run_name, run_param_name, dtype):
# Get the name of the benchmark script from DB
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/start_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def run_command(cmd):

# e.g., ' 1, 2, ,3,,' -> ['1', '2', '3']
hw_config_ids = os.environ.get('HW_CONFIG').replace(' ', '')
repo_org = os.environ.get('REPO_NAME').split('/')[0]
if hw_config_ids == 'all':
query = (
'SELECT id FROM hardware_config'
Expand All @@ -34,7 +35,7 @@ def run_command(cmd):
query = (
'SELECT cloud_provider_id, instance_id, hardware_config.name as hw_config FROM cloud_instance '
'JOIN hardware_config ON cloud_instance.hardware_config_id = hardware_config.id '
f'WHERE hardware_config_id = {hw_config_id} LIMIT 1'
f'WHERE hardware_config_id = {hw_config_id} AND cloud_instance.org = \'{repo_org}\' LIMIT 1'
)
cursor.execute(query)
rows = cursor.fetchall()
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
description: 'Shut down GPU instances when finished.'
required: true
type: boolean
default: false
default: true
issue_comment:
types: [created]

Expand All @@ -29,8 +29,8 @@ jobs:
start_instances:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'issue_comment' && github.event.issue.pull_request &&
contains(fromJSON('["MEMBER", "OWNER"]'), github.event.comment.author_association) &&
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
contains(fromJSON('["MEMBER", "OWNER", "COLLABORATOR"]'), github.event.comment.author_association) &&
contains(github.event.comment.body, '$hidet-ci launch')
runs-on: ubuntu-latest
outputs:
Expand All @@ -48,6 +48,7 @@ jobs:
run: timeout 900 python ./.github/scripts/start_instances.py
env:
HW_CONFIG: all
REPO_NAME: ${{ github.repository }}
# TODO: Allow launching only specified GPU instances

- name: Upload run configs
Expand All @@ -59,6 +60,7 @@ jobs:

run_tests:
needs: start_instances
timeout-minutes: 2880
strategy:
matrix:
hw_configs: ${{ fromJSON(needs.start_instances.outputs.hw_configs) }}
Expand Down Expand Up @@ -110,6 +112,7 @@ jobs:
name: run_configs

- name: Run tests
timeout-minutes: 2880
run: |
python hidet/.github/scripts/run_tests.py
env:
Expand Down Expand Up @@ -165,7 +168,10 @@ jobs:
HW_CONFIGS: ${{ needs.start_instances.outputs.hw_configs }}

stop_instances:
if: ${{ inputs.shutdown_instances }}
if: |
github.event_name == 'workflow_dispatch' && inputs.shutdown_instances ||
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
!contains(github.event.comment.body, '--keep')
runs-on: ubuntu-latest
needs: [start_instances, run_tests]
steps:
Expand Down

0 comments on commit 57d859a

Please sign in to comment.