daos-stack · ashleypittman · Dec 21, 2023 · Jan 2, 2024 · Jan 2, 2024 · Jan 2, 2024
diff --git a/.github/workflows/bash_unit_testing.yml b/.github/workflows/bash_unit_testing.yml
@@ -15,6 +15,7 @@ defaults:
 jobs:
   Test-gha-functions:
     name: Tests in ci/gha_functions.sh
+    if: github.repository == 'daos-stack/daos'
     runs-on: [self-hosted, light]
     steps:
       - name: Checkout code

diff --git a/.github/workflows/jenkins-status.yml b/.github/workflows/jenkins-status.yml
@@ -0,0 +1,16 @@
+name: Jenkins status report
+
+on:
+  pull_request:
+
+jobs:
+  # Check and report Jenkins test results.  Should use the check_suite trigger when stable, and
+  # test the PR that triggered it obviously.
+  jenkins_check:
+    name: Check Jenkins results
+    if: github.repository == 'daos-stack/daos'
+    runs-on: [self-hosted, light]
+    steps:
+      - uses: actions/checkout@v4
+      - name: Run check
+        run: ./ci/jenkins_status.py --pr ${{ github.event.pull_request.number }}
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -14,6 +14,14 @@ jobs:
   # This checks .py files only so misses SConstruct and SConscript files are not checked, rather
   # for these files check them afterwards.  The output-filter will not be installed for this part
   # so regressions will be detected but not annotated.
+
+  # In order for new checks to work when landed but when testing PRs which are not rebased then
+  # it is necessairy to check for the existence of scripts and files before calling them.
+  # This is because checks which annotate code have to check out the PR code rather than a merge
+  # commit so the line numbers are correct for reporting.  Any checks which simply return failure
+  # can checkout the (default) merge commit and therefore do not need this protection.
+  # run: \[ ! -x ci/run_shellcheck.sh \] || ./ci/run_shellcheck.sh
+
   isort:
     name: Python isort
     runs-on: ubuntu-latest
@@ -43,10 +51,7 @@ jobs:
       - name: Add error parser
         run: echo -n "::add-matcher::ci/shellcheck-matcher.json"
       - name: Run Shellcheck
-        # The check will run with this file from the target branch but the code from the PR so
-        # test for this file before calling it to prevent failures on PRs where this check is
-        # in the target branch but the PR is not updated to include it.
-        run: \[ ! -x ci/run_shellcheck.sh \] || ./ci/run_shellcheck.sh
+        run: ./ci/run_shellcheck.sh
 
   log-check:
     name: Logging macro checking

diff --git a/ci/jenkins_status.py b/ci/jenkins_status.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python
+
+"""Parse Jenkins build test results"""
+
+import argparse
+import json
+import sys
+import urllib
+from urllib.request import urlopen
+
+JENKINS_HOME = "https://build.hpdd.intel.com/job/daos-stack"
+
+
+class TestResult:
+    """Represents a single Jenkins test result"""
+
+    def __init__(self, data, blocks):
+        name = data["name"]
+        if "-" in name:
+            try:
+                (num, full) = name.split("-", 1)
+                test_num = int(num)
+                if full[-5] == "-":
+                    full = full[:-5]
+                name = f"{full} ({test_num})"
+            except ValueError:
+                pass
+        self.name = name
+        self.cname = data["className"]
+        self.skipped = False
+        self.passed = False
+        self.failed = False
+        assert data["status"] in ("PASSED", "FIXED", "SKIPPED", "FAILED", "REGRESSION")
+        if data["status"] in ("PASSED", "FIXED"):
+            self.passed = True
+        elif data["status"] in ("FAILED", "REGRESSION"):
+            self.failed = True
+        elif data["status"] == "SKIPPED":
+            self.skipped = True
+        self.data = data
+        self.blocks = blocks
+
+    def info(self, prefix=""):
+        """Return a string describing the test"""
+        return f"{prefix}{self.cname}\t\t{self.name}"
+
+    def full_info(self):
+        """Return a longer string describing the test"""
+
+        tcl = []
+        if self.blocks is not None:
+            tcl.extend(reversed(self.blocks))
+
+        tcl.append(f"{self.cname}.{self.name}")
+        details = self.data["errorDetails"]
+        if details:
+            return " / ".join(tcl) + "\n" + details.replace("\\n", "\n")
+        return self.info()
+
+    # Needed for set operations to compare results across sets.
+    def __eq__(self, other):
+        return self.name == other.name and self.cname == other.cname
+
+    # Needed to be able to add results to sets.
+    def __hash__(self):
+        return hash((self.name, self.cname))
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return f"Test result of {self.cname}"
+
+
+def je_load(job_name, jid=None, what=None, tree=None):
+    """Fetch something from Jenkins and return as native type."""
+
+    url = f"{JENKINS_HOME}/job/daos/job/{job_name}"
+
+    if jid:
+        url += f"/{jid}"
+        if what:
+            url += f"/{what}"
+
+    url += "/api/json"
+
+    if tree:
+        url += f"?tree={tree}"
+
+    with urlopen(url) as f:  # nosec
+        return json.load(f)
+
+
+def show_job(job_name, jid):
+    """Parse one job
+
+    Return a list of failed test objects"""
+
+    if not job_name.startswith("PR-"):
+        jdata = je_load(job_name, jid=jid, tree="actions[causes]")
+        if (
+            "causes" not in jdata["actions"][0]
+            or jdata["actions"][0]["causes"][0]["_class"]
+            != "hudson.triggers.TimerTrigger$TimerTriggerCause"
+        ):
+            return None
+
+    try:
+        jdata = je_load(job_name, jid=jid, what="testReport")
+    except urllib.error.HTTPError:
+        print(f"Job {jid} of {job_name} has no test results")
+        return None
+
+    print(f"Checking job {jid} of {job_name}")
+
+    failed = []
+
+    assert not jdata["testActions"]
+    for suite in jdata["suites"]:
+        for k in suite["cases"]:
+            tr = TestResult(k, suite["enclosingBlockNames"])
+            if not tr.failed:
+                continue
+            failed.append(tr)
+    return failed
+
+
+def test_against_job(all_failed, job_name, count):
+    """Check for failures in existing test runs
+
+    Takes set of failed tests, returns set of unexplained tests
+    """
+    data = je_load(job_name)
+    lcb = data["lastCompletedBuild"]["number"]
+    main_failed = set()
+    ccount = 0
+    for build in data["builds"]:
+        jid = build["number"]
+        if jid > lcb:
+            print(f"Job {jid} is of {job_name} is still running, skipping")
+        failed = show_job(job_name, jid)
+        if not isinstance(failed, list):
+            continue
+        for test in failed:
+            main_failed.add(test)
+        ccount += 1
+        if count == ccount:
+            break
+
+        unexplained = all_failed.difference(main_failed)
+        if not unexplained:
+            print(f"Stopping checking at {ccount} builds, all failures explained")
+            break
+
+    ignore = all_failed.intersection(main_failed)
+    if ignore:
+        print(f"Tests which failed in the PR and have also failed in {job_name} builds.")
+        for test in ignore:
+            print(test.full_info())
+
+    return all_failed.difference(main_failed)
+
+
+def main():
+    """Check the results of a PR"""
+
+    parser = argparse.ArgumentParser(description="Check Jenkins test results")
+    parser.add_argument("--pr", type=int, required=True)
+
+    args = parser.parse_args()
+
+    job_name = f"PR-{args.pr}"
+
+    data = je_load(job_name)
+
+    lcb = data["lastCompletedBuild"]["number"]
+
+    all_failed = set()
+    for build in data["builds"]:
+        jid = build["number"]
+        if jid > lcb:
+            print(f"Job {jid} is of {job_name} is still running, skipping")
+            continue
+        failed = show_job(job_name, jid)
+        if not isinstance(failed, list):
+            continue
+        for test in failed:
+            all_failed.add(test)
+        break
+    if not all_failed:
+        print("No failed tests in PR, returning")
+        return
+
+    print(f"PR had failed {len(all_failed)} tests, checking against landings builds")
+
+    all_failed = test_against_job(all_failed, "daily-testing", 14)
+
+    if all_failed:
+        all_failed = test_against_job(all_failed, "weekly-testing", 4)
+
+    if all_failed:
+        all_failed = test_against_job(all_failed, "master", 14)
+
+    if all_failed:
+        print("Tests which only failed in the PR")
+        for test in all_failed:
+            print(test.full_info())
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()