Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PTFE-1450 warn about failing queues #168

Merged
merged 15 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bert_e/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ class RequestIntegrationBranches(TemplateException):
status = "in_progress"


class QueueBuildFailedMessage(TemplateException):
code = 136
template = "queue_build_failed.md"


# internal exceptions
class UnableToSendEmail(InternalException):
code = 201
Expand Down Expand Up @@ -583,3 +588,7 @@ class JobSuccess(SilentException):

class JobFailure(SilentException):
code = 308


class QueueBuildFailed(SilentException):
code = 309
34 changes: 34 additions & 0 deletions bert_e/templates/queue_build_failed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{% extends "message.md" %}

{% block title -%}
Queue build failed
{% endblock %}

{% block message %}

The corresponding build for the queue failed:

- Checkout the [status page]({{ frontend_url }}).
- Identify the failing build and review the logs.
tcarmet marked this conversation as resolved.
Show resolved Hide resolved
- If no issue is found, re-run the build.
nootal marked this conversation as resolved.
Show resolved Hide resolved
- If an issue is identified, checkout the steps below to remove
the pull request from the queue for further analysis and maybe rebase/merge.

<details>
<summary><b>Remove the pull request from the queue</b></summary>

- Add a `/wait` comment on this pull request.
- Click on login on the [status page]({{ frontend_url }}).
- Go into the [manage]({{ frontend_url }}/manage) page.
- Find the option called `Rebuild the queue` and click on it.
Bert-E will loop again on all pull requests to put the valid ones
in the queue again, while skipping the one with the `/wait` comment.
- Wait for the new queue to merge, then merge/rebase your pull request
with the latest changes to then work on a proper fix.
- Once the issue is fixed, delete the `/wait` comment and
follow the usual process to merge the pull request.

</details>


{% endblock %}
34 changes: 30 additions & 4 deletions bert_e/tests/test_bert_e.py
Original file line number Diff line number Diff line change
Expand Up @@ -5446,6 +5446,21 @@ def test_validation_with_failed_stabilization_branch_stacked(self):
qc.validate()
self.assertEqual(qc.mergeable_prs, [1])

def test_notify_pr_on_queue_fail(self):
pr = self.create_pr('bugfix/TEST-01', 'development/4.3')
with self.assertRaises(exns.Queued):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
branch = f"q/{pr.id}/4.3/{pr.src_branch}"
self.set_build_status_on_branch_tip(branch, 'INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
self.set_build_status_on_branch_tip(branch, 'FAILED')
with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
# get last comment
comment = list(pr.get_comments())[-1].text
assert "Queue build failed" in comment

def test_system_nominal_case(self):
pr = self.create_pr('bugfix/TEST-00001', 'development/4.3')
self.handle(pr.id,
Expand Down Expand Up @@ -5490,11 +5505,16 @@ def test_system_nominal_case(self):
self.set_build_status_on_pr_id(pr.id, 'SUCCESSFUL')
self.set_build_status(sha1=sha1_w_5_1, state='SUCCESSFUL')
self.set_build_status(sha1=sha1_w_10_0, state='FAILED')
with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.id, options=self.bypass_all, backtrace=True)

with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)

self.set_build_status(sha1=sha1_w_10_0, state='INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)

self.set_build_status(sha1=sha1_w_10_0, state='SUCCESSFUL')
with self.assertRaises(exns.Merged):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)
Expand Down Expand Up @@ -5937,7 +5957,7 @@ def test_pr_dev_and_hotfix_with_hotfix_merged_first(self):
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00001' % pr1.id, 'SUCCESSFUL')

with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id, pr1.id})

Expand Down Expand Up @@ -6059,6 +6079,12 @@ def test_pr_hotfix_alone(self):

sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0.0.1/bugfix/TEST-00000' % pr0.id, 'FAILED')
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id})

sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0.0.1/bugfix/TEST-00000' % pr0.id, 'INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id})
Expand Down Expand Up @@ -6110,7 +6136,7 @@ def test_multi_branch_queues(self):
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00003' % pr3.id, 'SUCCESSFUL')

with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr1.id, pr2.id, pr3.id,
pr4217.id})
Expand Down Expand Up @@ -6140,7 +6166,7 @@ def test_multi_branch_queues(self):
'q/%d/5.1/bugfix/TEST-00004' % pr4.id, 'SUCCESSFUL')
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00004' % pr4.id, 'FAILED')
with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr2.id, pr3.id, pr4.id})

Expand Down
19 changes: 19 additions & 0 deletions bert_e/workflow/gitwaterflow/branches.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,25 @@ def validate(self):

self._validated = True

@property
def failed_prs(self):
"""Return a list PRs in which the build have failed in the queue."""
if not self._validated:
raise errors.QueuesNotValidated()

failed = []
for version in self._queues.keys():
qint = self._queues[version][QueueIntegrationBranch]
if qint:
qint = qint[0]
status = self.bbrepo.get_build_status(
qint.get_latest_commit(),
self.build_key
)
if status == 'FAILED':
failed.append(qint.pr_id)
return failed

def _recursive_lookup(self, queues):
"""Given a set of queues, remove all queues that can't be merged,
based on the build status obtained from the repository manager.
Expand Down
28 changes: 27 additions & 1 deletion bert_e/workflow/gitwaterflow/queueing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,30 @@
build_queue_collection)
from .integration import get_integration_branches
from typing import List


LOG = logging.getLogger(__name__)


def notify_queue_build_failed(failed_prs: List[int], job: QueuesJob):
"""Notify on the pull request that the queue build failed."""
# TODO: As this feature evolves, we might want to include
# the list of failed q/ branches in the message.
# Currently the drawback is that if the template changes a lot
# (one branch mentioned then two, then back to one)
# we will be sending multiple notifications to the user,
# in some cases with no good reason, and in other cases with a good reason.
# This becomes less of an issue if we focus on notifying the user
# only through build status checks.
for pr_id in failed_prs:
pull_request = job.project_repo.get_pull_request(pr_id)
notify_user(
job.settings, pull_request, exceptions.QueueBuildFailedMessage(
active_options=job.active_options,
frontend_url=job.bert_e.settings.frontend_url)
)


@job_handler(QueuesJob)
def handle_merge_queues(job):
"""Check merge queue and fast-forward development branches to the most
Expand All @@ -48,7 +69,12 @@ def handle_merge_queues(job):
job.bert_e.update_queue_status(queues)

if not queues.mergeable_prs:
raise exceptions.NothingToDo()
failed_prs = queues.failed_prs
if not failed_prs:
raise exceptions.NothingToDo()
else:
notify_queue_build_failed(failed_prs, job)
raise exceptions.QueueBuildFailed()

merge_queues(queues.mergeable_queues)

Expand Down
Loading