Files
containerd/.github/workflows/windows-periodic.yml
Nashwan Azhari 7002fc2c47 Ensure Windows Periodic workflow errors out while still uploading results.
This patch aims to ensure that any test failures in the Windows Periodic
workflow will lead to the workflow being marked as failed (red) while still
processing/uploading the JUnit result files to GCloud for them to show
up in testgrid.

Signed-off-by: Nashwan Azhari <nazhari@cloudbasesolutions.com>
2022-06-20 20:46:49 +03:00

308 lines
14 KiB
YAML

# Workflow intended to run containerd integration tests on Windows.
name: Windows Integration Tests
on:
workflow_dispatch:
workflow_call:
secrets:
AZURE_SUB_ID:
required: true
AZURE_CREDS:
required: true
GCP_SERVICE_ACCOUNT:
required: true
GCP_WORKLOAD_IDENTITY_PROVIDER:
required: true
env:
AZURE_DEFAULT_LOCATION: westeurope
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }}
AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3
PASSWORD: Passw0rdAdmin # temp for testing, will be generated
DEFAULT_ADMIN_USERNAME: azureuser
SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
REMOTE_VM_BIN_PATH: "c:\\containerd\\bin"
BUSYBOX_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/busybox:1.29-2"
RESOURCE_CONSUMER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/resource-consumer:1.10"
WEBSERVER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/nginx:1.14-2"
jobs:
winIntegration:
# NOTE: the following permissions are required by `google-github-actions/auth`:
permissions:
contents: 'read'
id-token: 'write'
strategy:
# NOTE(aznashwan): this will permit all other jobs from the matrix to finish and
# upload their results even if one has a failing non-test-task:
# (e.g. hitting resource limits in the `AZTestVMCreate` task)
fail-fast: false
matrix:
win_ver: [ltsc2019, ltsc2022]
include:
- win_ver: ltsc2019
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter-with-Containers-smalldisk:17763.2565.220202"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019/"
- win_ver: ltsc2022
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.524.220201"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022/"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install required packages
run: |
sudo apt-get install xmlstarlet -y
- name: PrepareArtifacts
run: |
STARTED_TIME=$(date +%s)
LOGS_DIR=$HOME/$STARTED_TIME
echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV
echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV
echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV
echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV
mkdir -p $LOGS_DIR/artifacts
jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json
- name: Generate ssh key pair
run: |
mkdir -p $HOME/.ssh/
ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N ""
echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV
- name: AZLogin
uses: azure/login@v1
with:
creds: ${{ secrets.AZURE_CREDS }}
- name: AZResourceGroupCreate
uses: azure/CLI@v1
with:
inlinescript: |
az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
- name: AZTestVMCreate
uses: azure/CLI@v1
with:
inlinescript: |
DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json)
PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress)
if [ "$PUB_IP" == "null" ]
then
RETRY=0
while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ]
do
sleep 5
PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r)
RETRY=$(( $RETRY + 1 ))
done
fi
if [ "$PUB_IP" == "null" ]
then
echo "failed to fetch public IP"
exit 1
fi
echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV
- name: EnableAZVMSSH
uses: azure/CLI@v1
with:
inlinescript: |
az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}'
- name: TestSSHConnection
run: |
if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname";
then
exit 1
fi
- name: InstallContainerFeatureWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' -Restart }"
- name: WaitForVMToRestart
if: ${{ matrix.win_ver == 'ltsc2022' }}
timeout-minutes: 5
run: |
# give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down.
sleep 30
while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ];
do
echo "Unable to connect to azurevm"
done
echo "Connection reestablished. VM restarted succesfully."
- name: CreateNatNetworkWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }"
- name: PrepareTestingEnv
run: |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1"
- name: MakeContainerDBins
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd "
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries"
- name: RunIntegrationTests
id: RunIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF
cd /c/containerd
export EXTRA_TESTFLAGS="-timeout=20m"
set -o pipefail
make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'
- name: PrepareRepoList
run: |
cat > repolist.toml << EOF
busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}"
ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}"
EOF
cat > cri-test-images.yaml << EOF
defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }}
webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }}
EOF
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml
- name: RunCRIIntegrationTests
id: RunCRIIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
cd c:/containerd
./script/setup/install-cni-windows
export TEST_IMAGE_LIST=c:/repolist.toml
set -o pipefail
make cri-integration | tee ${{ env.VM_CRI_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'
- name: GetCritestRepo
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools"
- name: BuildCritest
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'"
- name: RunCritest
id: RunCritest
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
# This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy.
# Info: https://github.com/containerd/containerd/issues/6652
SKIP=""
if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then
SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"'
fi
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
sleep 5
set -o pipefail
c:/cri-tools/build/bin/critest.exe $SKIP --runtime-endpoint='npipe://./pipe/containerd-containerd' --test-images-file='c:/cri-test-images.yaml' --report-dir='c:/Logs' -ginkgo.junit-report="C:\Logs\junit_critest.xml" | tee c:/Logs/critest.log
EOF
echo '::set-output name=SUCCEEDED::1'
- name: PullLogsFromWinNode
run: |
# Generate JUnit reports from the stdouts of the tests:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'"
# Copy over all the JUnit reports:
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/
for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do
xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f)
mv ${{ env.LOGS_DIR }}/$(basename $f) $f
done
- name: FinishJob
run: |
jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json
echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt
- name: AssignGcpCreds
id: AssignGcpCreds
run: |
echo '::set-output name=GCP_SERVICE_ACCOUNT::${{ secrets.GCP_SERVICE_ACCOUNT }}'
echo '::set-output name=GCP_WORKLOAD_IDENTITY_PROVIDER::${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}'
- name: AuthGcp
uses: google-github-actions/auth@v0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
- name: UploadJobReport
uses: google-github-actions/upload-cloud-storage@v0.8.0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ github.workspace }}/latest-build.txt
destination: ${{ matrix.GOOGLE_BUCKET }}
parent: false
- name: UploadLogsDir
uses: google-github-actions/upload-cloud-storage@v0.8.0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ env.LOGS_DIR }}
destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}}
parent: false
- name: Check all CI stages succeeded
uses: actions/github-script@v3
with:
script: |
const stepResults = {
RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}",
RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}",
RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}",
};
let failedTasks = [];
for( [step, result] of Object.entries(stepResults) ) {
if (result != "1") {
failedTasks.push(step);
}
};
if (failedTasks.length != 0) {
core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`);
};
- name: ResourceCleanup
if: always()
uses: azure/CLI@v1
with:
inlinescript: |
az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes