Add Workflow for running critest with Hyper-V Containers on Windows.

Signed-off-by: Nashwan Azhari <nazhari@cloudbasesolutions.com>
This commit is contained in:
Nashwan Azhari 2022-06-06 13:06:46 +03:00
parent 9494f0b806
commit 7c77b3540d
2 changed files with 376 additions and 0 deletions

View File

@ -0,0 +1,25 @@
# Workflow intended to periodically run the Windows Hyper-V Integration test workflow.
name: Windows Hyper-V Periodic Tests
on:
workflow_dispatch:
schedule:
- cron: "0 1 * * *"
jobs:
triggerWinIntegration:
if: github.repository == 'containerd/containerd'
# NOTE(aznashwan, 11/24/21): GitHub actions do not currently support referencing
# or evaluating any kind of variables in the `uses` clause, but this will
# ideally be added in the future in which case the hardcoded reference to the
# upstream containerd repository should be replaced with the following to
# potentially allow contributors to enable periodic Windows tests on forks as well:
# uses: "${{ github.repository }}/.github/workflows/windows-hyperv-periodic.yml@${{ github.ref_name }}"
uses: containerd/containerd/.github/workflows/windows-hyperv-periodic.yml@main
secrets:
AZURE_SUB_ID: "${{ secrets.AZURE_SUB_ID }}"
AZURE_CREDS: "${{ secrets.AZURE_CREDS }}"
GCP_SERVICE_ACCOUNT: "${{ secrets.GCP_SERVICE_ACCOUNT }}"
GCP_WORKLOAD_IDENTITY_PROVIDER: "${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}"

View File

@ -0,0 +1,351 @@
# Workflow intended to run containerd integration tests on Windows using Hyper-V Containers.
name: Windows Hyper-V Integration Tests
on:
workflow_dispatch:
workflow_call:
secrets:
AZURE_SUB_ID:
required: true
AZURE_CREDS:
required: true
GCP_SERVICE_ACCOUNT:
required: true
GCP_WORKLOAD_IDENTITY_PROVIDER:
required: true
env:
AZURE_DEFAULT_LOCATION: westeurope
AZURE_SUBSCRIPTION_ID: ${{ secrets.AZURE_SUB_ID }}
AZURE_DEFAULT_VM_SIZE: Standard_D2s_v3
PASSWORD: Passw0rdAdmin # temp for testing, will be generated
DEFAULT_ADMIN_USERNAME: azureuser
SSH_OPTS: "-o ServerAliveInterval=20 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
REMOTE_VM_BIN_PATH: "c:\\containerd\\bin"
BUSYBOX_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/busybox:1.29-2"
RESOURCE_CONSUMER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/resource-consumer:1.10"
WEBSERVER_TESTING_IMAGE_REF: "k8s.gcr.io/e2e-test-images/nginx:1.14-2"
HCSSHIM_TAG: "master"
jobs:
winIntegration:
# NOTE: the following permissions are required by `google-github-actions/auth`:
permissions:
contents: 'read'
id-token: 'write'
strategy:
# NOTE(aznashwan): this will permit all other jobs from the matrix to finish and
# upload their results even if one has a failing non-test-task:
# (e.g. hitting resource limits in the `AZTestVMCreate` task)
fail-fast: false
matrix:
win_ver: [ltsc2019, ltsc2022]
include:
- win_ver: ltsc2019
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2019-Datacenter-with-Containers-smalldisk:17763.2565.220202"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2019-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2019-hyperv/"
- win_ver: ltsc2022
AZURE_IMG: "MicrosoftWindowsServer:WindowsServer:2022-datacenter-smalldisk-g2:20348.524.220201"
AZURE_RESOURCE_GROUP: ctrd-integration-ltsc2022-${{ github.run_id }}
GOOGLE_BUCKET: "containerd-integration/logs/windows-ltsc2022-hyperv/"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Install required packages
run: |
sudo apt-get install xmlstarlet -y
- name: PrepareArtifacts
run: |
STARTED_TIME=$(date +%s)
LOGS_DIR=$HOME/$STARTED_TIME
echo "STARTED_TIME=$STARTED_TIME" >> $GITHUB_ENV
echo "LOGS_DIR=$LOGS_DIR" >> $GITHUB_ENV
echo "VM_INTEGRATION_LOGFILE=/c/Logs/integration.log" >> $GITHUB_ENV
echo "VM_CRI_INTEGRATION_LOGFILE=/c/Logs/cri-integration.log" >> $GITHUB_ENV
mkdir -p $LOGS_DIR/artifacts
jq -n --arg node temp --arg timestamp $STARTED_TIME '$timestamp|tonumber|{timestamp:.,$node}' > $LOGS_DIR/started.json
- name: Generate ssh key pair
run: |
mkdir -p $HOME/.ssh/
ssh-keygen -t rsa -b 4096 -C "ci@containerd.com" -f $HOME/.ssh/id_rsa -q -N ""
echo "SSH_PUB_KEY=$(cat ~/.ssh/id_rsa.pub)" >> $GITHUB_ENV
- name: AZLogin
uses: azure/login@v1
with:
creds: ${{ secrets.AZURE_CREDS }}
- name: AZResourceGroupCreate
uses: azure/CLI@v1
with:
inlinescript: |
az group create -n ${{ matrix.AZURE_RESOURCE_GROUP }} -l ${{ env.AZURE_DEFAULT_LOCATION }} --tags creationTimestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
- name: AZTestVMCreate
uses: azure/CLI@v1
with:
inlinescript: |
DETAILS=$(az vm create -n winTestVM --admin-username ${{ env.DEFAULT_ADMIN_USERNAME }} --admin-password ${{ env.PASSWORD }} --image ${{ matrix.AZURE_IMG }} -g ${{ matrix.AZURE_RESOURCE_GROUP }} --nsg-rule SSH --size ${{ env.AZURE_DEFAULT_VM_SIZE }} --public-ip-sku Standard -o json)
PUB_IP=$(echo $DETAILS | jq -r .publicIpAddress)
if [ "$PUB_IP" == "null" ]
then
RETRY=0
while [ "$PUB_IP" == "null" ] || [ $RETRY -le 5 ]
do
sleep 5
PUB_IP=$(az vm show -d -g ${{ matrix.AZURE_RESOURCE_GROUP }} -n winTestVM -o json --query publicIps | jq -r)
RETRY=$(( $RETRY + 1 ))
done
fi
if [ "$PUB_IP" == "null" ]
then
echo "failed to fetch public IP"
exit 1
fi
echo "VM_PUB_IP=$PUB_IP" >> $GITHUB_ENV
- name: EnableAZVMSSH
uses: azure/CLI@v1
with:
inlinescript: |
az vm run-command invoke --command-id RunPowerShellScript -n winTestVM -g ${{ matrix.AZURE_RESOURCE_GROUP }} --scripts @$GITHUB_WORKSPACE/script/setup/enable_ssh_windows.ps1 --parameters 'SSHPublicKey=${{ env.SSH_PUB_KEY }}'
- name: TestSSHConnection
run: |
if ! ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname";
then
exit 1
fi
- name: InstallAdditionalFeaturesWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name 'Containers' }"
# NOTE(aznashwan): the 2022 image needs Hyper-V to be explicitly enabled:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Install-WindowsFeature -Name Hyper-V -IncludeAllSubFeature -IncludeManagementTools }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "shutdown.exe /r /t 0"
- name: WaitForVMToRestart
if: ${{ matrix.win_ver == 'ltsc2022' }}
timeout-minutes: 5
run: |
# give the vm 30 seconds to actually stop. SSH server might actually respond while server is shutting down.
sleep 30
while [ ! $( ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "hostname") ];
do
echo "Unable to connect to azurevm"
done
echo "Connection reestablished. VM restarted succesfully."
- name: CreateNatNetworkWS2022
if: ${{ matrix.win_ver == 'ltsc2022' }}
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { curl.exe -L 'https://raw.githubusercontent.com/microsoft/SDN/master/Kubernetes/windows/hns.psm1' -o hns.psm1 }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { Import-Module .\hns.psm1 ; New-HnsNetwork -Type NAT -Name nat -AddressPrefix 172.19.208.0/20 -Gateway 172.19.208.1 }"
- name: PrepareTestingEnv
run: |
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} $GITHUB_WORKSPACE/script/setup/prepare_env_windows.ps1 azureuser@${{ env.VM_PUB_IP }}:/prepare_env_windows.ps1
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "c:\\prepare_env_windows.ps1"
- name: MakeContainerDBins
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone http://github.com/containerd/containerd c:\\containerd "
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "cd c:\containerd ; make binaries"
- name: BuildHcsshim
run: |
# NOTE(aznashwan, 6/6/22): need to use tip of HCSSHIM for the following:
# https://github.com/microsoft/hcsshim/pull/1388
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "git clone http://github.com/Microsoft/hcsshim c:\containerd\hcsshim"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} ${{ env.DEFAULT_ADMIN_USERNAME }}@${{ env.VM_PUB_IP }} "cd c:\containerd\hcsshim; git fetch --tags origin $HCSSHIM_TAG ; \
git checkout $HCSSHIM_TAG ; go build -mod=vendor -o ${{ env.REMOTE_VM_BIN_PATH }}\containerd-shim-runhcs-v1.exe .\cmd\containerd-shim-runhcs-v1"
- name: RunIntegrationTests
id: RunIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" << EOF
cd /c/containerd
export EXTRA_TESTFLAGS="-timeout=20m"
export USE_HYPERV=1
set -o pipefail
make integration | tee ${{ env.VM_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'
- name: PrepareRepoList
run: |
cat > containerd-hyperv-config.toml << EOF
version = 2
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runhcs-wcow-hypervisor"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = "io.containerd.runhcs.v1"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runhcs-wcow-hypervisor.options]
Debug = true
DebugType = 2
SandboxPlatform = "windows/amd64"
SandboxIsolation = 1
EOF
cat > repolist.toml << EOF
busybox = "${{ env.BUSYBOX_TESTING_IMAGE_REF }}"
ResourceConsumer = "${{ env.RESOURCE_CONSUMER_TESTING_IMAGE_REF }}"
EOF
cat > cri-test-images.yaml << EOF
defaultTestContainerImage: ${{ env.BUSYBOX_TESTING_IMAGE_REF }}
webServerTestImage: ${{ env.WEBSERVER_TESTING_IMAGE_REF }}
EOF
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} repolist.toml azureuser@${{ env.VM_PUB_IP }}:c:/repolist.toml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} cri-test-images.yaml azureuser@${{ env.VM_PUB_IP }}:c:/cri-test-images.yaml
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} containerd-hyperv-config.toml azureuser@${{ env.VM_PUB_IP }}:c:/containerd-hyperv-config.toml
# NOTE(aznashwan): in-tree integration tests will need some updates to on
# Hyper-V containers so we skip this for now:
- name: RunCRIIntegrationTests
id: RunCRIIntegrationTests
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
cd c:/containerd
./script/setup/install-cni-windows
export TEST_IMAGE_LIST=c:/repolist.toml
export USE_HYPERV=1
# NOTE: 'TestContainerdRestart' should be skipped as discussed in:
# https://github.com/containerd/containerd/pull/7025
export FOCUS="[^TestContainerdRestart$]"
set -o pipefail
make cri-integration | tee ${{ env.VM_CRI_INTEGRATION_LOGFILE }}
EOF
echo '::set-output name=SUCCEEDED::1'
- name: GetCritestRepo
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "git clone https://github.com/kubernetes-sigs/cri-tools c:/cri-tools"
- name: BuildCritest
run: |
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'cd /c/cri-tools && make critest'"
- name: RunCritest
id: RunCritest
# NOTE(aznashwan): this is set to continue-on-error to allow the workflow to run until
# the reports are converted/uploaded to GCloud so as to show up on testgrid.k8s.io too.
continue-on-error: true
run: |
# This test is exceedingly flaky only on ws2022 so skip for now to keep CI happy.
# Info: https://github.com/containerd/containerd/issues/6652
SKIP=""
if [ '${{ matrix.win_ver }}' == 'ltsc2022' ];then
SKIP='-ginkgo.skip="runtime should support exec with tty=true and stdin=true"'
fi
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "powershell.exe -command { C:\containerd\bin\containerd.exe --log-level=debug --config=c:/containerd-hyperv-config.toml --log-file=C:/logs/containerd.log --service-name containerd --register-service ; Set-Service containerd -StartupType Automatic; Start-Service containerd }"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -s" <<EOF
sleep 5
set -o pipefail
c:/cri-tools/build/bin/critest.exe $SKIP --runtime-endpoint='npipe://./pipe/containerd-containerd' --test-images-file='c:/cri-test-images.yaml' --report-dir='c:/Logs' -ginkgo.junit-report="C:\Logs\junit_critest.xml" | tee c:/Logs/critest.log
EOF
echo '::set-output name=SUCCEEDED::1'
- name: PullLogsFromWinNode
run: |
# Generate JUnit reports from the stdouts of the tests:
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_INTEGRATION_LOGFILE }}; cat ${{ env.VM_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_integration.xml'"
ssh -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }} "sh.exe -c 'touch ${{ env.VM_CRI_INTEGRATION_LOGFILE }}; cat ${{ env.VM_CRI_INTEGRATION_LOGFILE }} | go-junit-report.exe > /c/Logs/junit_cri_integration.xml'"
# Copy over all the JUnit reports:
scp -i $HOME/.ssh/id_rsa ${{ env.SSH_OPTS }} azureuser@${{ env.VM_PUB_IP }}:c:/Logs/*.xml ${{ env.LOGS_DIR }}/artifacts/
for f in $(ls ${{ env.LOGS_DIR }}/artifacts/*.xml); do
xmlstarlet ed -d "/testsuites/testsuite/properties" $f > ${{ env.LOGS_DIR }}/$(basename $f)
mv ${{ env.LOGS_DIR }}/$(basename $f) $f
done
- name: FinishJob
run: |
jq -n --arg result SUCCESS --arg timestamp $(date +%s) '$timestamp|tonumber|{timestamp:.,$result}' > ${{ env.LOGS_DIR }}/finished.json
echo "${{ env.STARTED_TIME }}" > ${{ github.workspace }}/latest-build.txt
- name: AssignGcpCreds
id: AssignGcpCreds
run: |
echo '::set-output name=GCP_SERVICE_ACCOUNT::${{ secrets.GCP_SERVICE_ACCOUNT }}'
echo '::set-output name=GCP_WORKLOAD_IDENTITY_PROVIDER::${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}'
- name: AuthGcp
uses: google-github-actions/auth@v0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
service_account: ${{ secrets.GCP_SERVICE_ACCOUNT }}
workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
- name: UploadJobReport
uses: google-github-actions/upload-cloud-storage@v0.8.0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ github.workspace }}/latest-build.txt
destination: ${{ matrix.GOOGLE_BUCKET }}
parent: false
- name: UploadLogsDir
uses: google-github-actions/upload-cloud-storage@v0.8.0
if: steps.AssignGcpCreds.outputs.GCP_SERVICE_ACCOUNT && steps.AssignGcpCreds.outputs.GCP_WORKLOAD_IDENTITY_PROVIDER
with:
path: ${{ env.LOGS_DIR }}
destination: ${{ matrix.GOOGLE_BUCKET }}${{ env.STARTED_TIME}}
parent: false
- name: Check all CI stages succeeded
uses: actions/github-script@v3
with:
script: |
const stepResults = {
RunIntegrationTests: "${{ steps.RunIntegrationTests.outputs.SUCCEEDED }}",
RunCRIIntegrationTests: "${{ steps.RunCRIIntegrationTests.outputs.SUCCEEDED }}",
RunCritest: "${{ steps.RunCritest.outputs.SUCCEEDED }}",
};
let failedTasks = [];
for( [step, result] of Object.entries(stepResults) ) {
if (result != "1") {
failedTasks.push(step);
}
};
if (failedTasks.length != 0) {
core.setFailed(`One or more CI stages have failed. Please review the outputs of the following stepts: ${failedTasks}.`);
};
- name: ResourceCleanup
if: always()
uses: azure/CLI@v1
with:
inlinescript: |
az group delete -g ${{ matrix.AZURE_RESOURCE_GROUP }} --yes