Skip to content

Commit

Permalink
Wait for 15 seconds for the qmp listener
Browse files Browse the repository at this point in the history
Make the code consitently wait/retry for 15 seconds - in some cases it
would only wait for 3 seconds. Fixes timeout on slow devices.

Signed-off-by: eriknordmark <erik@zededa.com>
(cherry picked from commit 55b3933)
  • Loading branch information
eriknordmark authored and OhmSpectator committed Oct 18, 2024
1 parent 46b423e commit 9e9192d
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 52 deletions.
44 changes: 16 additions & 28 deletions pkg/pillar/hypervisor/kvm.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2023 Zededa, Inc.
// Copyright (c) 2017-2024 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package hypervisor
Expand Down Expand Up @@ -1158,36 +1158,24 @@ func (ctx KvmContext) CreateDomConfig(domainName string,
return nil
}

// waitForQmp does the waiting/retry in getQemuStatus, and ignores its returned
// status.
func waitForQmp(domainName string, available bool) error {
maxDelay := time.Second * 10
delay := time.Second
var waited time.Duration
var err error

for {
logrus.Infof("waitForQmp for %s %t: waiting for %v", domainName, available, delay)
if delay != 0 {
time.Sleep(delay)
waited += delay
}
sock := GetQmpExecutorSocket(domainName)
if _, err = getQemuStatus(sock); available == (err == nil) {
logrus.Infof("waitForQmp for %s %t done", domainName, available)
return nil
}
if waited > maxDelay {
// Give up
logrus.Warnf("waitForQmp for %s %t: giving up", domainName, available)
if available {
return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v", sock, domainName, err)
}
return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available", domainName, sock)
}
delay = 2 * delay
if delay > time.Minute {
delay = time.Minute
}
sock := GetQmpExecutorSocket(domainName)
logrus.Infof("waitForQmp for %s %t",
domainName, available)
if _, err = getQemuStatus(sock); available == (err == nil) {
logrus.Infof("waitForQmp for %s %t done", domainName, available)
return nil
}
if available {
return logError("Giving up waiting to connect to QEMU Monitor Protocol socket %s from VM %s, error: %v",
sock, domainName, err)
}
return logError("Giving up waiting to cleanup VM %s, QEMU Monitor Protocol socket %s is still available",
domainName, sock)

}

// Start starts a domain
Expand Down
69 changes: 45 additions & 24 deletions pkg/pillar/hypervisor/qmp.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Copyright (c) 2020-2024 Zededa, Inc.
// SPDX-License-Identifier: Apache-2.0

package hypervisor

import (
Expand All @@ -15,71 +18,88 @@ import (
// this package implements subset of
// https://qemu.weilnetz.de/doc/qemu-qmp-ref.html

const sockTimeout = 10 * time.Second
const (
sockTimeout = 10 * time.Second
qmpRetries = 5
qmpRetrySleep = 3 * time.Second
)

func execRawCmd(socket, cmd string) ([]byte, error) {
var retry = 3
// execRawCmd will retry (if doReply is set) for qmpRetries times sleeping
// qmpRetrySleep between attempts
func execRawCmd(socket, cmd string, doRetry bool) ([]byte, error) {
var retry int
logrus.Debugf("executing QMP command: %s", cmd)
var err error
var monitor *qmp.SocketMonitor

if doRetry {
retry = qmpRetries
}
for retry >= 0 {
if monitor, err = qmp.NewSocketMonitor("unix", socket, sockTimeout); err == nil {
break
if monitor, err = qmp.NewSocketMonitor("unix", socket, sockTimeout); err != nil {
retry = retry - 1
time.Sleep(qmpRetrySleep)
continue
}
retry = retry - 1
time.Sleep(time.Second)
break
}

if err != nil {
return nil, err
}

if err = monitor.Connect(); err != nil {
// We might have reached retry zero in the above loop, in which case
// we will still try the Connect once.
for retry >= 0 {
if err = monitor.Connect(); err != nil {
retry = retry - 1
time.Sleep(qmpRetrySleep)
continue
}
defer monitor.Disconnect()
break
}
if err != nil {
return nil, err
}
defer monitor.Disconnect()

return monitor.Run([]byte(cmd))
}

func execContinue(socket string) error {
_, err := execRawCmd(socket, `{ "execute": "cont" }`)
_, err := execRawCmd(socket, `{ "execute": "cont" }`, true)
return err
}

func execStop(socket string) error {
_, err := execRawCmd(socket, `{ "execute": "stop" }`)
_, err := execRawCmd(socket, `{ "execute": "stop" }`, true)
return err
}

func execShutdown(socket string) error {
_, err := execRawCmd(socket, `{ "execute": "system_powerdown" }`)
_, err := execRawCmd(socket, `{ "execute": "system_powerdown" }`, true)
return err
}

func execQuit(socket string) error {
_, err := execRawCmd(socket, `{ "execute": "quit" }`)
_, err := execRawCmd(socket, `{ "execute": "quit" }`, true)
return err
}

func execVNCPassword(socket string, password string) error {
vncSetPwd := fmt.Sprintf(`{ "execute": "change-vnc-password", "arguments": { "password": "%s" } }`, password)
_, err := execRawCmd(socket, vncSetPwd)
_, err := execRawCmd(socket, vncSetPwd, true)
return err
}

// QmpExecDeviceDelete removes a device
func QmpExecDeviceDelete(socket, id string) error {
qmpString := fmt.Sprintf(`{ "execute": "device_del", "arguments": { "id": "%s"}}`, id)
_, err := execRawCmd(socket, qmpString)
_, err := execRawCmd(socket, qmpString, true)
return err
}

// QmpExecDeviceAdd adds a usb device via busnum/devnum
func QmpExecDeviceAdd(socket, id string, busnum, devnum uint16) error {
qmpString := fmt.Sprintf(`{ "execute": "device_add", "arguments": { "driver": "usb-host", "hostbus": %d, "hostaddr": %d, "id": "%s"} }`, busnum, devnum, id)
_, err := execRawCmd(socket, qmpString)
_, err := execRawCmd(socket, qmpString, true)
return err
}

Expand Down Expand Up @@ -121,13 +141,14 @@ func getQemuStatus(socket string) (types.SwState, error) {
// the status is unexpected, EVE stops QEMU and game over.
var errs error
state := types.UNKNOWN
for attempt := 1; attempt <= 3; attempt++ {
raw, err := execRawCmd(socket, `{ "execute": "query-status" }`)
for attempt := 1; attempt <= qmpRetries; attempt++ {
raw, err := execRawCmd(socket, `{ "execute": "query-status" }`,
false)
if err != nil {
err = fmt.Errorf("[attempt %d] qmp status failed for QMP socket '%s': err: '%v'; (JSON response: '%s')",
attempt, socket, err, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}

Expand All @@ -146,15 +167,15 @@ func getQemuStatus(socket string) (types.SwState, error) {
err = fmt.Errorf("[attempt %d] failed to parse QMP status response for QMP socket '%s': err: '%v'; (JSON response: '%s')",
attempt, socket, err, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}
var matched bool
if state, matched = qmpStatusMap[result.Return.Status]; !matched {
err = fmt.Errorf("[attempt %d] unknown QMP status '%s' for QMP socket '%s'; (JSON response: '%s')",
attempt, result.Return.Status, socket, raw)
errs = joinErrors(errs, err)
time.Sleep(time.Second)
time.Sleep(qmpRetrySleep)
continue
}

Expand Down

0 comments on commit 9e9192d

Please sign in to comment.