Skip to content

Commit

Permalink
pillar: Release CPUs on domain activation failure.
Browse files Browse the repository at this point in the history
This commit addresses an issue where CPUs assigned to a domain within
doActivate() are not released if the domain activation fails. The new
logic ensures that CPUs are properly released and the CPU mask in the
status is updated accordingly. This is achieved by introducing the
releaseCPUs function and calling it in the appropriate error handling
blocks within doActivate().

It is common for doActivate to fail in scenarios such as switching
application profiles that share the same adapter. In such cases, the
second application will fail to activate until the first one releases
the necessary adapter.

Signed-off-by: Nikolay Martyanov <nikolay@zededa.com>
  • Loading branch information
OhmSpectator authored and eriknordmark committed May 30, 2024
1 parent 19a8690 commit 10c924e
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions pkg/pillar/cmd/domainmgr/domainmgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,9 @@ func updateNonPinnedCPUs(ctx *domainContext, config *types.DomainConfig, status
return nil
}

// assignCPUs assigns CPUs to the VM based on the configuration
// By the assignment, we mean that the CPUs are assigned in the CPUAllocator context to the given VM
// and the cpumask is updated in the *status*
func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) error {
if config.VmConfig.CPUsPinned { // Pin the CPU
cpusToAssign, err := ctx.cpuAllocator.Allocate(config.UUIDandVersion.UUID, config.VCpus)
Expand All @@ -1285,6 +1288,17 @@ func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.Do
return nil
}

// releaseCPUs releases the CPUs that were previously assigned to the VM.
// The cpumask in the *status* is updated accordingly, and the CPUs are released in the CPUAllocator context.
func releaseCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) {
if ctx.cpuPinningSupported && config.VmConfig.CPUsPinned && status.VmConfig.CPUs != "" {
if err := ctx.cpuAllocator.Free(config.UUIDandVersion.UUID); err != nil {
log.Errorf("Failed to free CPUs for %s: %s", config.DisplayName, err)
}
}
status.VmConfig.CPUs = ""
}

func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) {

log.Functionf("handleCreate(%v) for %s",
Expand Down Expand Up @@ -1476,6 +1490,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
status.PendingAdd = false
status.SetErrorDescription(*errDescription)
status.AdaptersFailed = true
releaseCPUs(ctx, &config, status)
publishDomainStatus(ctx, status)
releaseAdapters(ctx, config.IoAdapterList, config.UUIDandVersion.UUID,
nil)
Expand All @@ -1499,6 +1514,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
status.PendingAdd = false
status.SetErrorNow(err.Error())
status.AdaptersFailed = true
releaseCPUs(ctx, &config, status)
publishDomainStatus(ctx, status)
releaseAdapters(ctx, config.IoAdapterList, config.UUIDandVersion.UUID,
nil)
Expand All @@ -1522,6 +1538,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
snapshotID, config.UUIDandVersion.UUID, err)
log.Error(err.Error())
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
return
}

Expand Down Expand Up @@ -1550,6 +1567,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
err := fmt.Errorf("doActivate: Failed to write cloud-init metadata file. Error %s", err)
log.Error(err.Error())
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
return
}

Expand All @@ -1560,6 +1578,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
err := fmt.Errorf("doActivate: Failed to apply cloud-init config. Error %s", err)
log.Error(err.Error())
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
return
}
}
Expand All @@ -1575,6 +1594,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
if err != nil {
log.Errorf("Failed to check disk format: %v", err.Error())
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
return
}
}
Expand All @@ -1592,6 +1612,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
log.Errorf("Failed to create DomainStatus from %v: %s",
config, err)
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
return
}

Expand All @@ -1611,6 +1632,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig,
log.Errorf("DomainCreate for %s: %s", status.DomainName, err)
status.BootFailed = true
status.SetErrorNow(err.Error())
releaseCPUs(ctx, &config, status)
publishDomainStatus(ctx, status)
return
}
Expand Down

0 comments on commit 10c924e

Please sign in to comment.