# Workflow intended to run containerd integration tests on Windows. name: Windows Integration Tests on: workflow_dispatch: workflow_call: secrets: AZURE_SUB_ID: required: true AZURE_CREDS: required: true GCP_SERVICE_ACCOUNT: required: true GCP_WORKLOAD_IDENTITY_PROVIDER: required: true env: AZURE_DEFAULT_LOCATION: westeurope AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }} AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3 PASSWORD: Passw0rdAdmin # temp for testing, will be generated DEFAULT_ADMIN_USERNAME: azureuser SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" REMOTE_VM_BIN_PATH: "c:\\containerd\\bin" BUSYBOX_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/busybox:1.29-2" RESOURCE_CONSUMER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/resource-consumer:1.10" WEBSERVER_TESTING_IMAGE_REF: "registry.k8s.io/e2e-test-images/nginx:1.14-2" permissions: # added using https://github.com/step-security/secure-workflows contents: read jobs: winIntegration: # NOTE: the following permissions are required by `google-github-actions/auth`: permissions: contents: 'read' id-token: 'write' strategy: # NOTE(aznashwan): this will permit all other jobs from the matrix to finish and # upload their results even if one has a failing non-test-task: # (e.g. hitting resource limits in the `AZTestVMCreate` task) fail-fast: false matrix: win_ver: [ltsc2019, ltsc2022] include: - win_ver: ltsc2019 AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter-with-Containers-smalldisk:17763.4131.230311" AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }} GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019/" - win_ver: ltsc2022 AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.1607.230310" AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }} GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022/" runs-on: ubuntu-latest timeout-minutes: 90 steps: - uses: actions/checkout@v3 - name: Install required packages run: | sudo apt-get install xmlstarlet -y - name: PrepareArtifacts run: | STARTED_TIME=$(date +%s) LOGS_DIR=$HOME/$STARTED_TIME echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV mkdir -p $LOGS_DIR/artifacts jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json - name: Generate ssh key pair run: | mkdir -p $HOME/.ssh/ ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N "" echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV - name: AZLogin uses: azure/login@v1 with: creds: ${{ secrets.AZURE_CREDS }} - name: AZResourceGroupCreate uses: azure/CLI@v1 with: inlinescript: | az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ') - name: AZTestVMCreate uses: azure/CLI@v1 with: inlinescript: | DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json) PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress) if [ "$PUB_IP" == "null" ] then RETRY=0 while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ] do sleep 5 PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r) RETRY=$(( $RETRY + 1 )) done fi if [ "$PUB_IP" == "null" ] then echo "failed to fetch public IP" exit 1 fi echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV - name: EnableAZVMSSH uses: azure/CLI@v1 with: inlinescript: | az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}' - name: TestSSHConnection run: | if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname"; then exit 1 fi - name: InstallContainerFeatureWS2022 if: ${{ matrix.win_ver == 'ltsc2022' }} run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' -Restart }" - name: WaitForVMToRestart if: ${{ matrix.win_ver == 'ltsc2022' }} timeout-minutes: 5 run: | # give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down. sleep 30 while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ]; do echo "Unable to connect to azurevm" done echo "Connection reestablished. VM restarted succesfully." - name: CreateNatNetworkWS2022 if: ${{ matrix.win_ver == 'ltsc2022' }} run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }" ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }" - name: PrepareTestingEnv run: | scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1 ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1" - name: MakeContainerDBins run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd " ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries" - name: RunIntegrationTests id: RunIntegrationTests # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. continue-on-error: true run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF cd /c/containerd export EXTRA_TESTFLAGS="-timeout=20m" set -o pipefail make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }} EOF echo 'SUCCEEDED=1' >> $GITHUB_OUTPUT - name: PrepareRepoList run: | cat > repolist.toml << EOF busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}" ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}" EOF cat > cri-test-images.yaml << EOF defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }} webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }} EOF scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml - name: RunCRIIntegrationTests id: RunCRIIntegrationTests # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. continue-on-error: true run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <> $GITHUB_OUTPUT - name: GetCritestRepo run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools" - name: BuildCritest run: | ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'" - name: RunCritest id: RunCritest # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. continue-on-error: true run: | # This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy. # Info: https://github.com/containerd/containerd/issues/6652 SKIP="" if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"' fi ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }" ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <> $GITHUB_OUTPUT - name: PullLogsFromWinNode run: | # Generate JUnit reports from the stdouts of the tests: ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'" ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'" # Copy over all the JUnit reports: scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/ for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f) mv ${{ env.LOGS_DIR }}/$(basename $f) $f done - name: FinishJob run: | jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt - name: AssignGcpCreds id: AssignGcpCreds run: | echo 'GCP_SERVICE_ACCOUNT=${{ secrets.GCP_SERVICE_ACCOUNT }}' >> $GITHUB_OUTPUT echo 'GCP_WORKLOAD_IDENTITY_PROVIDER=${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}' >> $GITHUB_OUTPUT - name: AuthGcp uses: google-github-actions/auth@v0 if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER with: service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} - name: UploadJobReport uses: google-github-actions/upload-cloud-storage@v0.10.4 if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER with: path: ${{ github.workspace }}/latest-build.txt destination: ${{ matrix.GOOGLE_BUCKET }} parent: false - name: UploadLogsDir uses: google-github-actions/upload-cloud-storage@v0.10.4 if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER with: path: ${{ env.LOGS_DIR }} destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}} parent: false - name: Check all CI stages succeeded uses: actions/github-script@v6 with: script: | const stepResults = { RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}", RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}", RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}", }; let failedTasks = []; for( [step, result] of Object.entries(stepResults) ) { if (result != "1") { failedTasks.push(step); } }; if (failedTasks.length != 0) { core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`); }; - name: ResourceCleanup if: always() uses: azure/CLI@v1 with: inlinescript: | az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes