Skip to content

Commit

Permalink
PTFE-1450 warn about failing queues (#168)
Browse files Browse the repository at this point in the history
Co-authored-by: Charles Prost - Scality <[email protected]>
  • Loading branch information
tcarmet and nootal authored Mar 7, 2024
1 parent 4f2a3a0 commit c40630b
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 5 deletions.
9 changes: 9 additions & 0 deletions bert_e/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,11 @@ class RequestIntegrationBranches(TemplateException):
status = "in_progress"


class QueueBuildFailedMessage(TemplateException):
code = 136
template = "queue_build_failed.md"


# internal exceptions
class UnableToSendEmail(InternalException):
code = 201
Expand Down Expand Up @@ -583,3 +588,7 @@ class JobSuccess(SilentException):

class JobFailure(SilentException):
code = 308


class QueueBuildFailed(SilentException):
code = 309
34 changes: 34 additions & 0 deletions bert_e/templates/queue_build_failed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
{% extends "message.md" %}

{% block title -%}
Queue build failed
{% endblock %}

{% block message %}

The corresponding build for the queue failed:

- Checkout the [status page]({{ frontend_url }}).
- Identify the failing build and review the logs.
- If no issue is found, re-run the build.
- If an issue is identified, checkout the steps below to remove
the pull request from the queue for further analysis and maybe rebase/merge.

<details>
<summary><b>Remove the pull request from the queue</b></summary>

- Add a `/wait` comment on this pull request.
- Click on login on the [status page]({{ frontend_url }}).
- Go into the [manage]({{ frontend_url }}/manage) page.
- Find the option called `Rebuild the queue` and click on it.
Bert-E will loop again on all pull requests to put the valid ones
in the queue again, while skipping the one with the `/wait` comment.
- Wait for the new queue to merge, then merge/rebase your pull request
with the latest changes to then work on a proper fix.
- Once the issue is fixed, delete the `/wait` comment and
follow the usual process to merge the pull request.

</details>


{% endblock %}
34 changes: 30 additions & 4 deletions bert_e/tests/test_bert_e.py
Original file line number Diff line number Diff line change
Expand Up @@ -5446,6 +5446,21 @@ def test_validation_with_failed_stabilization_branch_stacked(self):
qc.validate()
self.assertEqual(qc.mergeable_prs, [1])

def test_notify_pr_on_queue_fail(self):
pr = self.create_pr('bugfix/TEST-01', 'development/4.3')
with self.assertRaises(exns.Queued):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
branch = f"q/{pr.id}/4.3/{pr.src_branch}"
self.set_build_status_on_branch_tip(branch, 'INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
self.set_build_status_on_branch_tip(branch, 'FAILED')
with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.id, options=self.bypass_all, backtrace=True)
# get last comment
comment = list(pr.get_comments())[-1].text
assert "Queue build failed" in comment

def test_system_nominal_case(self):
pr = self.create_pr('bugfix/TEST-00001', 'development/4.3')
self.handle(pr.id,
Expand Down Expand Up @@ -5490,11 +5505,16 @@ def test_system_nominal_case(self):
self.set_build_status_on_pr_id(pr.id, 'SUCCESSFUL')
self.set_build_status(sha1=sha1_w_5_1, state='SUCCESSFUL')
self.set_build_status(sha1=sha1_w_10_0, state='FAILED')
with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.id, options=self.bypass_all, backtrace=True)

with self.assertRaises(exns.QueueBuildFailed):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)

self.set_build_status(sha1=sha1_w_10_0, state='INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)

self.set_build_status(sha1=sha1_w_10_0, state='SUCCESSFUL')
with self.assertRaises(exns.Merged):
self.handle(pr.src_commit, options=self.bypass_all, backtrace=True)
Expand Down Expand Up @@ -5937,7 +5957,7 @@ def test_pr_dev_and_hotfix_with_hotfix_merged_first(self):
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00001' % pr1.id, 'SUCCESSFUL')

with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id, pr1.id})

Expand Down Expand Up @@ -6059,6 +6079,12 @@ def test_pr_hotfix_alone(self):

sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0.0.1/bugfix/TEST-00000' % pr0.id, 'FAILED')
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id})

sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0.0.1/bugfix/TEST-00000' % pr0.id, 'INPROGRESS')
with self.assertRaises(exns.NothingToDo):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr0.id})
Expand Down Expand Up @@ -6110,7 +6136,7 @@ def test_multi_branch_queues(self):
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00003' % pr3.id, 'SUCCESSFUL')

with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr1.id, pr2.id, pr3.id,
pr4217.id})
Expand Down Expand Up @@ -6140,7 +6166,7 @@ def test_multi_branch_queues(self):
'q/%d/5.1/bugfix/TEST-00004' % pr4.id, 'SUCCESSFUL')
sha1 = self.set_build_status_on_branch_tip(
'q/%d/10.0/bugfix/TEST-00004' % pr4.id, 'FAILED')
with self.assertRaises(exns.NothingToDo):
with self.assertRaises(exns.QueueBuildFailed):
self.handle(sha1, options=self.bypass_all, backtrace=True)
self.assertEqual(self.prs_in_queue(), {pr2.id, pr3.id, pr4.id})

Expand Down
19 changes: 19 additions & 0 deletions bert_e/workflow/gitwaterflow/branches.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,25 @@ def validate(self):

self._validated = True

@property
def failed_prs(self):
"""Return a list PRs in which the build have failed in the queue."""
if not self._validated:
raise errors.QueuesNotValidated()

failed = []
for version in self._queues.keys():
qint = self._queues[version][QueueIntegrationBranch]
if qint:
qint = qint[0]
status = self.bbrepo.get_build_status(
qint.get_latest_commit(),
self.build_key
)
if status == 'FAILED':
failed.append(qint.pr_id)
return failed

def _recursive_lookup(self, queues):
"""Given a set of queues, remove all queues that can't be merged,
based on the build status obtained from the repository manager.
Expand Down
28 changes: 27 additions & 1 deletion bert_e/workflow/gitwaterflow/queueing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,30 @@
build_queue_collection)
from .integration import get_integration_branches
from typing import List


LOG = logging.getLogger(__name__)


def notify_queue_build_failed(failed_prs: List[int], job: QueuesJob):
"""Notify on the pull request that the queue build failed."""
# TODO: As this feature evolves, we might want to include
# the list of failed q/ branches in the message.
# Currently the drawback is that if the template changes a lot
# (one branch mentioned then two, then back to one)
# we will be sending multiple notifications to the user,
# in some cases with no good reason, and in other cases with a good reason.
# This becomes less of an issue if we focus on notifying the user
# only through build status checks.
for pr_id in failed_prs:
pull_request = job.project_repo.get_pull_request(pr_id)
notify_user(
job.settings, pull_request, exceptions.QueueBuildFailedMessage(
active_options=job.active_options,
frontend_url=job.bert_e.settings.frontend_url)
)


@job_handler(QueuesJob)
def handle_merge_queues(job):
"""Check merge queue and fast-forward development branches to the most
Expand All @@ -48,7 +69,12 @@ def handle_merge_queues(job):
job.bert_e.update_queue_status(queues)

if not queues.mergeable_prs:
raise exceptions.NothingToDo()
failed_prs = queues.failed_prs
if not failed_prs:
raise exceptions.NothingToDo()
else:
notify_queue_build_failed(failed_prs, job)
raise exceptions.QueueBuildFailed()

merge_queues(queues.mergeable_queues)

Expand Down

0 comments on commit c40630b

Please sign in to comment.