From 7c77b3540dd9d8049e38da446f7fb683ee043bf8 Mon Sep 17 00:00:00 2001 From: Nashwan Azhari Date: Mon, 6 Jun 2022 13:06:46 +0300 Subject: [PATCH] Add Workflow for running critest with Hyper-V Containers on Windows. Signed-off-by: Nashwan Azhari --- .../windows-hyperv-periodic-trigger.yml | 25 ++ .github/workflows/windows-hyperv-periodic.yml | 351 ++++++++++++++++++ 2 files changed, 376 insertions(+) create mode 100644 .github/workflows/windows-hyperv-periodic-trigger.yml create mode 100644 .github/workflows/windows-hyperv-periodic.yml diff --git a/.github/workflows/windows-hyperv-periodic-trigger.yml b/.github/workflows/windows-hyperv-periodic-trigger.yml new file mode 100644 index 000000000..3927ea99e --- /dev/null +++ b/.github/workflows/windows-hyperv-periodic-trigger.yml @@ -0,0 +1,25 @@ +# Workflow intended to periodically run the Windows Hyper-V Integration test workflow. + +name: Windows Hyper-V Periodic Tests + +on: + workflow_dispatch: + schedule: + - cron: "0 1 * * *" + +jobs: + + triggerWinIntegration: + if: github.repository == 'containerd/containerd' + # NOTE(aznashwan, 11/24/21): GitHub actions do not currently support referencing + # or evaluating any kind of variables in the `uses` clause, but this will + # ideally be added in the future in which case the hardcoded reference to the + # upstream containerd repository should be replaced with the following to + # potentially allow contributors to enable periodic Windows tests on forks as well: + # uses: "${{ github.repository }}/.github/workflows/windows-hyperv-periodic.yml@${{ github.ref_name }}" + uses: containerd/containerd/.github/workflows/windows-hyperv-periodic.yml@main + secrets: + AZURE_SUB_ID: "${{ secrets.AZURE_SUB_ID }}" + AZURE_CREDS: "${{ secrets.AZURE_CREDS }}" + GCP_SERVICE_ACCOUNT: "${{ secrets.GCP_SERVICE_ACCOUNT }}" + GCP_WORKLOAD_IDENTITY_PROVIDER: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}" diff --git a/.github/workflows/windows-hyperv-periodic.yml b/.github/workflows/windows-hyperv-periodic.yml new file mode 100644 index 000000000..fd773d595 --- /dev/null +++ b/.github/workflows/windows-hyperv-periodic.yml @@ -0,0 +1,351 @@ +# Workflow intended to run containerd integration tests on Windows using Hyper-V Containers. + +name: Windows Hyper-V Integration Tests + +on: + workflow_dispatch: + workflow_call: + secrets: + AZURE_SUB_ID: + required: true + AZURE_CREDS: + required: true + GCP_SERVICE_ACCOUNT: + required: true + GCP_WORKLOAD_IDENTITY_PROVIDER: + required: true + +env: + AZURE_DEFAULT_LOCATION: westeurope + AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }} + AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3 + PASSWORD: Passw0rdAdmin # temp for testing, will be generated + DEFAULT_ADMIN_USERNAME: azureuser + SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" + REMOTE_VM_BIN_PATH: "c:\\containerd\\bin" + BUSYBOX_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/busybox:1.29-2" + RESOURCE_CONSUMER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/resource-consumer:1.10" + WEBSERVER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/nginx:1.14-2" + HCSSHIM_TAG: "master" + + +jobs: + winIntegration: + # NOTE: the following permissions are required by `google-github-actions/auth`: + permissions: + contents: 'read' + id-token: 'write' + strategy: + # NOTE(aznashwan): this will permit all other jobs from the matrix to finish and + # upload their results even if one has a failing non-test-task: + # (e.g. hitting resource limits in the `AZTestVMCreate` task) + fail-fast: false + matrix: + win_ver: [ltsc2019, ltsc2022] + include: + - win_ver: ltsc2019 + AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter-with-Containers-smalldisk:17763.2565.220202" + AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }} + GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019-hyperv/" + - win_ver: ltsc2022 + AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.524.220201" + AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }} + GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022-hyperv/" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Install required packages + run: | + sudo apt-get install xmlstarlet -y + + - name: PrepareArtifacts + run: | + STARTED_TIME=$(date +%s) + LOGS_DIR=$HOME/$STARTED_TIME + echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV + echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV + + echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV + echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV + + mkdir -p $LOGS_DIR/artifacts + jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json + + - name: Generate ssh key pair + run: | + mkdir -p $HOME/.ssh/ + ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N "" + echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV + + - name: AZLogin + uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_CREDS }} + + - name: AZResourceGroupCreate + uses: azure/CLI@v1 + with: + inlinescript: | + az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ') + + - name: AZTestVMCreate + uses: azure/CLI@v1 + with: + inlinescript: | + DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json) + PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress) + if [ "$PUB_IP" == "null" ] + then + RETRY=0 + while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ] + do + sleep 5 + PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r) + RETRY=$(( $RETRY + 1 )) + done + fi + + if [ "$PUB_IP" == "null" ] + then + echo "failed to fetch public IP" + exit 1 + fi + echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV + + - name: EnableAZVMSSH + uses: azure/CLI@v1 + with: + inlinescript: | + az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}' + + - name: TestSSHConnection + run: | + if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname"; + then + exit 1 + fi + + - name: InstallAdditionalFeaturesWS2022 + if: ${{ matrix.win_ver == 'ltsc2022' }} + run: | + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' }" + # NOTE(aznashwan): the 2022 image needs Hyper-V to be explicitly enabled: + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name Hyper-V -IncludeAllSubFeature -IncludeManagementTools }" + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "shutdown.exe /r /t 0" + + + - name: WaitForVMToRestart + if: ${{ matrix.win_ver == 'ltsc2022' }} + timeout-minutes: 5 + run: | + # give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down. + sleep 30 + while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ]; + do + echo "Unable to connect to azurevm" + done + echo "Connection reestablished. VM restarted succesfully." + + - name: CreateNatNetworkWS2022 + if: ${{ matrix.win_ver == 'ltsc2022' }} + run: | + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }" + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }" + + - name: PrepareTestingEnv + run: | + scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1 + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1" + + - name: MakeContainerDBins + run: | + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd " + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries" + + - name: BuildHcsshim + run: | + # NOTE(aznashwan, 6/6/22): need to use tip of HCSSHIM for the following: + # https://github.com/microsoft/hcsshim/pull/1388 + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "git clone http://github.com/Microsoft/hcsshim c:\containerd\hcsshim" + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "cd c:\containerd\hcsshim; git fetch --tags origin $HCSSHIM_TAG ; \ + git checkout $HCSSHIM_TAG ; go build -mod=vendor -o ${{ env.REMOTE_VM_BIN_PATH }}\containerd-shim-runhcs-v1.exe .\cmd\containerd-shim-runhcs-v1" + + - name: RunIntegrationTests + id: RunIntegrationTests + # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until + # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. + continue-on-error: true + run: | + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF + cd /c/containerd + export EXTRA_TESTFLAGS="-timeout=20m" + export USE_HYPERV=1 + set -o pipefail + make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }} + EOF + echo '::set-output name=SUCCEEDED::1' + + - name: PrepareRepoList + run: | + cat > containerd-hyperv-config.toml << EOF + version = 2 + + [plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "runhcs-wcow-hypervisor" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes] + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor] + base_runtime_spec = "" + cni_conf_dir = "" + cni_max_conf_num = 0 + container_annotations = [] + pod_annotations = [] + privileged_without_host_devices = false + runtime_engine = "" + runtime_path = "" + runtime_root = "" + runtime_type = "io.containerd.runhcs.v1" + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor.options] + Debug = true + DebugType = 2 + SandboxPlatform = "windows/amd64" + SandboxIsolation = 1 + EOF + + cat > repolist.toml << EOF + busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}" + ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}" + EOF + + cat > cri-test-images.yaml << EOF + defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }} + webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }} + EOF + + scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml + scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml + scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} containerd-hyperv-config.toml azureuser@${{ env.VM_PUB_IP }}:c:/containerd-hyperv-config.toml + + # NOTE(aznashwan): in-tree integration tests will need some updates to on + # Hyper-V containers so we skip this for now: + - name: RunCRIIntegrationTests + id: RunCRIIntegrationTests + # NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until + # the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too. + continue-on-error: true + run: | + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" < /c/Logs/junit_integration.xml'" + ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'" + + # Copy over all the JUnit reports: + scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/ + for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do + xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f) + mv ${{ env.LOGS_DIR }}/$(basename $f) $f + done + + - name: FinishJob + run: | + jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json + echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt + + - name: AssignGcpCreds + id: AssignGcpCreds + run: | + echo '::set-output name=GCP_SERVICE_ACCOUNT::${{ secrets.GCP_SERVICE_ACCOUNT }}' + echo '::set-output name=GCP_WORKLOAD_IDENTITY_PROVIDER::${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}' + + - name: AuthGcp + uses: google-github-actions/auth@v0 + if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER + with: + service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }} + workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} + + - name: UploadJobReport + uses: google-github-actions/upload-cloud-storage@v0.8.0 + if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER + with: + path: ${{ github.workspace }}/latest-build.txt + destination: ${{ matrix.GOOGLE_BUCKET }} + parent: false + + - name: UploadLogsDir + uses: google-github-actions/upload-cloud-storage@v0.8.0 + if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER + with: + path: ${{ env.LOGS_DIR }} + destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}} + parent: false + + - name: Check all CI stages succeeded + uses: actions/github-script@v3 + with: + script: | + const stepResults = { + RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}", + RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}", + RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}", + }; + let failedTasks = []; + for( [step, result] of Object.entries(stepResults) ) { + if (result != "1") { + failedTasks.push(step); + } + }; + if (failedTasks.length != 0) { + core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`); + }; + + - name: ResourceCleanup + if: always() + uses: azure/CLI@v1 + with: + inlinescript: | + az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes