diff --git a/package.json b/package.json index d4c19dbd..3ca97330 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hooks", - "version": "0.3.12", + "version": "0.3.13", "description": "Three projects are included - k8s: a kubernetes hook implementation that spins up pods dynamically to run a job - docker: A hook implementation of the runner's docker implementation - A hook lib, which contains shared typescript definitions and utilities that the other packages consume", "main": "", "directories": { diff --git a/packages/k8s/package-lock.json b/packages/k8s/package-lock.json index b5908d3b..f0192c45 100644 --- a/packages/k8s/package-lock.json +++ b/packages/k8s/package-lock.json @@ -14,6 +14,7 @@ "@actions/io": "^1.1.2", "@kubernetes/client-node": "^0.16.3", "@types/lodash": "^4.14.191", + "exponential-backoff": "^3.1.1", "hooklib": "file:../hooklib" }, "devDependencies": { @@ -2242,6 +2243,11 @@ "node": "^10.13.0 || ^12.13.0 || ^14.15.0 || >=15.0.0" } }, + "node_modules/exponential-backoff": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.1.tgz", + "integrity": "sha512-dX7e/LHVJ6W3DE1MHWi9S1EYzDESENfLrYohG2G++ovZrYOkm4Knwa0mc1cn84xJOR4KEU0WSchhLbd0UklbHw==" + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -6903,6 +6909,11 @@ "jest-message-util": "^27.5.1" } }, + "exponential-backoff": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/exponential-backoff/-/exponential-backoff-3.1.1.tgz", + "integrity": "sha512-dX7e/LHVJ6W3DE1MHWi9S1EYzDESENfLrYohG2G++ovZrYOkm4Knwa0mc1cn84xJOR4KEU0WSchhLbd0UklbHw==" + }, "extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", diff --git a/packages/k8s/package.json b/packages/k8s/package.json index afb6122a..bde5dac7 100644 --- a/packages/k8s/package.json +++ b/packages/k8s/package.json @@ -18,6 +18,7 @@ "@actions/io": "^1.1.2", "@kubernetes/client-node": "^0.16.3", "@types/lodash": "^4.14.191", + "exponential-backoff": "^3.1.1", "hooklib": "file:../hooklib" }, "devDependencies": { diff --git a/packages/k8s/src/k8s/index.ts b/packages/k8s/src/k8s/index.ts index d9fc98c3..6beebaf5 100644 --- a/packages/k8s/src/k8s/index.ts +++ b/packages/k8s/src/k8s/index.ts @@ -14,6 +14,7 @@ import { } from '../hooks/constants' import { PodPhase } from './utils' import * as fs from 'fs' +import { backOff } from 'exponential-backoff' const kc = new k8s.KubeConfig() @@ -222,38 +223,70 @@ export async function execPodStep( ): Promise { const exec = new k8s.Exec(kc) await new Promise(async function (resolve, reject) { + const backOffOptions = { + numOfAttempts: 3, + retry: (e, attemptNumber) => { + core.debug(e.toString()) + core.debug( + `an error occurred trying to execute command in pod, retrying (${attemptNumber}/3)` + ) + return true + } + } try { - await exec.exec( - namespace(), - podName, - containerName, - command, - process.stdout, - process.stderr, - stdin ?? null, - false /* tty */, - resp => { - // kube.exec returns an error if exit code is not 0, but we can't actually get the exit code - if (resp.status === 'Success') { - resolve(resp.code) - } else { - core.debug( - JSON.stringify({ - message: resp?.message, - details: resp?.details - }) - ) - reject(resp?.message) - } - } - ) - } catch (error) { - core.error(`Failed to exec pod step`) - core.error(error as Error) + await backOff(async () => { + await execInPod( + exec, + command, + podName, + containerName, + resolve, + reject, + stdin + ) + }, backOffOptions) + } catch (e) { + core.debug('something went wrong in calling pod exec') + reject(e) } }) } +async function execInPod( + exec: k8s.Exec, + command: string[], + podName: string, + containerName: string, + resolve: (value?: number | PromiseLike | undefined) => void, + reject: (reason?: any) => void, + stdin?: stream.Readable +): Promise { + await exec.exec( + namespace(), + podName, + containerName, + command, + process.stdout, + process.stderr, + stdin ?? null, + false /* tty */, + resp => { + // kube.exec returns an error if exit code is not 0, but we can't actually get the exit code + if (resp.status === 'Success') { + resolve(resp.code) + } else { + core.debug( + JSON.stringify({ + message: resp?.message, + details: resp?.details + }) + ) + reject(resp?.message) + } + } + ) +} + export async function waitForJobToComplete(jobName: string): Promise { const backOffManager = new BackOffManager() while (true) {