Significantly improve download artifact latencies on GCE Windows node boot.

This commit is contained in:
Jeremy Edwards
2020-11-17 16:40:05 -08:00
parent c98f6bf308
commit 155bda010a

View File

@@ -128,53 +128,132 @@ function Validate-SHA {
# It will loop through the URLs list forever until it has a success. If # It will loop through the URLs list forever until it has a success. If
# successful, it will write the file to OutFile. You can optionally provide a # successful, it will write the file to OutFile. You can optionally provide a
# Hash argument with an optional Algorithm, in which case it will attempt to # Hash argument with an optional Algorithm, in which case it will attempt to
# validate the downloaded file against the hash. SHA512 will be used if Algorithm # validate the downloaded file against the hash. SHA512 will be used if
# is not provided. # -Algorithm is not provided.
# This function is idempotent, if OutFile already exists and has the correct Hash
# then the download will be skipped. If the Hash is incorrect, the file will be
# overwritten.
function MustDownload-File { function MustDownload-File {
param ( param (
[parameter(Mandatory=$false)] [string]$Hash, [parameter(Mandatory = $false)] [string]$Hash,
[parameter(Mandatory=$false)] [string]$Algorithm = 'SHA512', [parameter(Mandatory = $false)] [string]$Algorithm = 'SHA512',
[parameter(Mandatory=$true)] [string]$OutFile, [parameter(Mandatory = $true)] [string]$OutFile,
[parameter(Mandatory=$true)] [System.Collections.Generic.List[String]]$URLs, [parameter(Mandatory = $true)] [System.Collections.Generic.List[String]]$URLs,
[parameter(Mandatory=$false)] [System.Collections.IDictionary]$Headers = @{} [parameter(Mandatory = $false)] [System.Collections.IDictionary]$Headers = @{},
[parameter(Mandatory = $false)] [int]$Attempts = 0
) )
While($true) { # If the file is already downloaded and matches the expected hash, skip the download.
ForEach($url in $URLs) { if ((Test-Path -Path $OutFile) -And -Not [string]::IsNullOrEmpty($Hash)) {
# If the URL is for GCS and the node has dev storage scope, add the try {
# service account token to the request headers. Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm
if (($url -match "^https://storage`.googleapis`.com.*") -and $(Check-StorageScope)) { Log-Output "Skip download of ${OutFile}, it already exists with expected hash."
$Headers["Authorization"] = "Bearer $(Get-Credentials)" return
} }
catch {
# The hash does not match the file on disk.
# Proceed with the download and overwrite the file.
Log-Output "${OutFile} exists but had wrong hash. Redownloading."
}
}
# Attempt to download the file $currentAttempt = 0
Try { while ($true) {
# TODO(mtaufen): When we finally get a Windows version that has Powershell 6 foreach ($url in $URLs) {
# installed we can set `-MaximumRetryCount 6 -RetryIntervalSec 10` to make this even more robust. if (($Attempts -ne 0) -And ($currentAttempt -Gt 5)) {
$result = Invoke-WebRequest $url -Headers $Headers -OutFile $OutFile -TimeoutSec 300 throw "Attempted to download ${url} ${currentAttempt} times. Giving up."
} Catch { }
$currentAttempt++
try {
Get-RemoteFile -OutFile $OutFile -Url $url -Headers $Headers
}
catch {
$message = $_.Exception.ToString() $message = $_.Exception.ToString()
Log-Output "Failed to download file from $url. Will retry. Error: $message" Log-Output "Failed to download file from ${Url}. Will retry. Error: ${message}"
continue continue
} }
# Attempt to validate the hash # Attempt to validate the hash
if ($Hash) { if (-Not [string]::IsNullOrEmpty($Hash)) {
Try { try {
Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm Validate-SHA -Hash $Hash -Path $OutFile -Algorithm $Algorithm
} Catch {
$message = $_.Exception.ToString()
Log-Output "Hash validation of $url failed. Will retry. Error: $message"
continue
} }
Log-Output "Downloaded $url ($Algorithm = $Hash)" catch {
$message = $_.Exception.ToString()
Log-Output "Hash validation of ${url} failed. Will retry. Error: ${message}"
continue
}
Log-Output "Downloaded ${url} (${Algorithm} = ${Hash})"
return return
} }
Log-Output "Downloaded $url" Log-Output "Downloaded ${url}"
return return
} }
} }
} }
# Downloads a file via HTTP/HTTPS.
# If the file is stored in GCS and this is running on a GCE node with a service account
# with credentials that have the devstore.read_only auth scope the bearer token will be
# automatically added to download the file.
function Get-RemoteFile {
param (
[parameter(Mandatory = $true)] [string]$OutFile,
[parameter(Mandatory = $true)] [string]$Url,
[parameter(Mandatory = $false)] [System.Collections.IDictionary]$Headers = @{}
)
$timeout = New-TimeSpan -Minutes 5
try {
# Use HttpClient in favor of WebClient.
# https://docs.microsoft.com/en-us/dotnet/api/system.net.webclient?view=net-5.0#remarks
$httpClient = New-Object -TypeName System.Net.Http.HttpClient
$httpClient.Timeout = $timeout
foreach ($key in $Headers.Keys) {
$httpClient.DefaultRequestHeaders.Add($key, $Headers[$key])
}
# If the URL is for GCS and the node has dev storage scope, add the
# service account OAuth2 bearer token to the request headers.
# https://cloud.google.com/compute/docs/access/create-enable-service-accounts-for-instances#applications
if (($Url -match "^https://storage`.googleapis`.com.*") -And $(Check-StorageScope)) {
$httpClient.DefaultRequestHeaders.Add("Authorization", "Bearer $(Get-Credentials)")
}
# Attempt to download the file
$httpResponseMessage = $httpClient.GetAsync([System.Uri]::new($Url))
$httpResponseMessage.Wait()
if (-Not $httpResponseMessage.IsCanceled) {
# Check if the request was successful.
#
# DO NOT replace with EnsureSuccessStatusCode(), it prints the
# OAuth2 bearer token.
if (-Not $httpResponseMessage.Result.IsSuccessStatusCode) {
$statusCode = $httpResponseMessage.Result.StatusCode
throw "Downloading ${Url} returned status code ${statusCode}, retrying."
}
try {
$outFileStream = [System.IO.FileStream]::new($OutFile, [System.IO.FileMode]::Create, [System.IO.FileAccess]::Write)
$copyResult = $httpResponseMessage.Result.Content.CopyToAsync($outFileStream)
$copyResult.Wait()
$outFileStream.Close()
if ($null -ne $copyResult.Exception) {
throw $copyResult.Exception
}
}
finally {
if ($null -ne $outFileStream) {
$outFileStream.Dispose()
}
}
}
}
finally {
if ($null -ne $httpClient) {
$httpClient.Dispose()
}
}
}
# Returns the default service account token for the VM, retrieved from # Returns the default service account token for the VM, retrieved from
# the instance metadata. # the instance metadata.
function Get-Credentials { function Get-Credentials {