From 7002fc2c471f04ce15ec3f3ddf2d2590183c3a76 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Fri, 17 Jun 2022 13:52:00 +0300 Subject: [PATCH] Ensure Windows Periodic workflow errors out while still uploading results. This patch aims to ensure that any test failures in the Windows Periodic workflow will lead to the workflow being marked as failed (red) while still processing/uploading the JUnit result files to GCloud for them to show up in testgrid. Signed-off-by: Nashwan Azhari --- .github/workflows/windows-periodic.yml | 59 +++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/.github/workflows/windows-periodic.yml b/.github/workflows/windows-periodic.yml index a45168982..2407d8e82 100644 --- a/.github/workflows/windows-periodic.yml +++ b/.github/workflows/windows-periodic.yml @@ -35,6 +35,10 @@ jobs: contents: 'read' id-token: 'write' strategy: + # NOTE(aznashwan): this will permit all other jobs from the matrix to finish and + # upload their results even if one has a failing non-test-task: + # (e.g. hitting resource limits in the `AZTestVMCreate` task) + fail-fast: false matrix: win_ver: [ltsc2019, ltsc2022] include: @@ -60,8 +64,11 @@ jobs: LOGS_DIR=$HOME/$STARTED_TIME echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV - mkdir -p $LOGS_DIR/artifacts + echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV + echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV + + mkdir -p $LOGS_DIR/artifacts jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json - name: Generate ssh key pair @@ -152,13 +159,18 @@ jobs: ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries" - name: RunIntegrationTests + id: RunIntegrationTests + # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until + # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. + continue-on-error: true run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF cd /c/containerd export EXTRA_TESTFLAGS="-timeout=20m" - make integration | tee /c/Logs/integration.log + set -o pipefail + make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }} EOF - ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cat /c/Logs/integration.log | go-junit-report.exe > /c/Logs/junit_00.xml'" + echo '::set-output name=SUCCEEDED::1' - name: PrepareRepoList run: | @@ -176,14 +188,19 @@ jobs: scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml - name: RunCRIIntegrationTests + id: RunCRIIntegrationTests + # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until + # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. + continue-on-error: true run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" < c:/Logs/junit_01.xml' " + echo '::set-output name=SUCCEEDED::1' - name: GetCritestRepo run: | @@ -194,6 +211,10 @@ jobs: ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'" - name: RunCritest + id: RunCritest + # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until + # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. + continue-on-error: true run: | # This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy. # Info: https://github.com/containerd/containerd/issues/6652 @@ -205,11 +226,18 @@ jobs: ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }" ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" < /c/Logs/junit_integration.xml'" + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'" + + # Copy over all the JUnit reports: scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/ for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f) @@ -250,6 +278,27 @@ jobs: destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}} parent: false + - name: Check all CI stages succeeded + uses: actions/github-script@v3 + with: + script: | + const stepResults = { + RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}", + RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}", + RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}", + }; + + let failedTasks = []; + for( [step, result] of Object.entries(stepResults) ) { + if (result != "1") { + failedTasks.push(step); + } + }; + + if (failedTasks.length != 0) { + core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`); + }; + - name: ResourceCleanup if: always() uses: azure/CLI@v1