Skip to content

Commit

Permalink
Allow Limits To Be Greater Than Requests (#3748)
Browse files Browse the repository at this point in the history
* wip

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

* add tests

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

* lint

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

* lint

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

* lint

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

* code review

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>

---------

Signed-off-by: Chris Martin <chris@cmartinit.co.uk>
Co-authored-by: Chris Martin <chris@cmartinit.co.uk>
  • Loading branch information
d80tb7 and d80tb7 authored Jun 25, 2024
1 parent d656816 commit 2b3e7f3
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 35 deletions.
3 changes: 3 additions & 0 deletions internal/armada/configuration/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ type SubmissionConfig struct {
// will have activeDeadlineSeconds set to 1.
// Trumps DefaultActiveDeadline.
DefaultActiveDeadlineByResourceRequest map[string]time.Duration
// Maximum ratio of limits:requests per resource. Jobs who have a higher limits:resource ratio than this will be rejected.
// Any resource type missing from this map will default to 1.0.
MaxOversubscriptionByResourceRequest map[string]float64
}

// TODO: we can probably just typedef this to map[string]string
Expand Down
40 changes: 23 additions & 17 deletions internal/armada/submit/validation/submit_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package validation
import (
"fmt"

v1 "k8s.io/api/core/v1"

"github.com/pkg/errors"
"k8s.io/component-helpers/scheduling/corev1/nodeaffinity"

Expand Down Expand Up @@ -210,28 +208,36 @@ func validatePriorityClasses(j *api.JobSubmitRequestItem, config configuration.S
// Ensures that the JobSubmitRequestItem's limits and requests are equal.
// Also checks that any resources defined are above minimum values set in config
func validateResources(j *api.JobSubmitRequestItem, config configuration.SubmissionConfig) error {
// Function which tells us if two k8s resource lists contain exactly the same elements
resourceListEquals := func(a v1.ResourceList, b v1.ResourceList) bool {
if len(a) != len(b) {
return false
}
for k, v := range a {
if v != b[k] {
return false
}
}
return true
}

spec := j.GetMainPodSpec()
maxOversubscriptionByResource := config.MaxOversubscriptionByResourceRequest
if maxOversubscriptionByResource == nil {
maxOversubscriptionByResource = map[string]float64{}
}
for _, container := range spec.Containers {

if len(container.Resources.Requests) == 0 && len(container.Resources.Requests) == 0 {
return fmt.Errorf("container %v has no resources specified", container.Name)
}

if !resourceListEquals(container.Resources.Requests, container.Resources.Limits) {
return fmt.Errorf("container %v does not have resource request and limit equal (this is currently not supported)", container.Name)
if len(container.Resources.Requests) != len(container.Resources.Limits) {
return fmt.Errorf("container %v defines different resources for requests and limits", container.Name)
}

for resource, request := range container.Resources.Requests {
limit, ok := container.Resources.Limits[resource]
if !ok {
return fmt.Errorf("container %v defines %s for requests but not limits", container.Name, resource)
}
if limit.MilliValue() < request.MilliValue() {
return fmt.Errorf("container %v defines %s with limits smaller than requests", container.Name, resource)
}
maxOversubscription, ok := maxOversubscriptionByResource[resource.String()]
if !ok {
maxOversubscription = 1.0
}
if float64(limit.MilliValue()) > maxOversubscription*float64(request.MilliValue()) {
return fmt.Errorf("container %v defines %s with limits great than %.2f*requests", container.Name, resource, maxOversubscription)
}
}

for rc, containerRsc := range container.Resources.Requests {
Expand Down
69 changes: 64 additions & 5 deletions internal/armada/submit/validation/submit_request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -788,9 +788,10 @@ func TestValidateResources(t *testing.T) {
}

tests := map[string]struct {
req *api.JobSubmitRequestItem
minJobResources v1.ResourceList
expectSuccess bool
req *api.JobSubmitRequestItem
minJobResources v1.ResourceList
maxOversubscriptionByResourceRequest map[string]float64
expectSuccess bool
}{
"Requests Missing": {
req: reqFromContainer(v1.Container{
Expand All @@ -808,13 +809,67 @@ func TestValidateResources(t *testing.T) {
}),
expectSuccess: false,
},
"Requests and limits different": {
"Limits Less Than Request": {
req: reqFromContainer(v1.Container{
Resources: v1.ResourceRequirements{
Requests: twoCpu,
Limits: oneCpu,
},
}),
expectSuccess: false,
maxOversubscriptionByResourceRequest: map[string]float64{
"cpu": 2.0,
},
},
"Limits And Requests specify different resources": {
req: reqFromContainer(v1.Container{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
},
Limits: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("1"),
v1.ResourceMemory: resource.MustParse("2Gi"),
},
},
}),
expectSuccess: false,
maxOversubscriptionByResourceRequest: map[string]float64{
"cpu": 2.0,
"memory": 2.0,
},
},
"Requests and limits different with MaxResourceOversubscriptionByResourceRequest undefined": {
req: reqFromContainer(v1.Container{
Resources: v1.ResourceRequirements{
Requests: oneCpu,
Limits: twoCpu,
},
}),
expectSuccess: false,
},
"Requests and limits different, passes MaxResourceOversubscriptionByResourceRequest": {
req: reqFromContainer(v1.Container{
Resources: v1.ResourceRequirements{
Requests: oneCpu,
Limits: twoCpu,
},
}),
maxOversubscriptionByResourceRequest: map[string]float64{
"cpu": 2.0,
},
expectSuccess: true,
},
"Requests and limits different, fails MaxResourceOversubscriptionByResourceRequest": {
req: reqFromContainer(v1.Container{
Resources: v1.ResourceRequirements{
Requests: oneCpu,
Limits: twoCpu,
},
}),
maxOversubscriptionByResourceRequest: map[string]float64{
"cpu": 1.9,
},
expectSuccess: false,
},
"Request and limits the same": {
Expand Down Expand Up @@ -846,7 +901,11 @@ func TestValidateResources(t *testing.T) {
}
for name, tc := range tests {
t.Run(name, func(t *testing.T) {
err := validateResources(tc.req, configuration.SubmissionConfig{})
submitConfg := configuration.SubmissionConfig{}
if tc.maxOversubscriptionByResourceRequest != nil {
submitConfg.MaxOversubscriptionByResourceRequest = tc.maxOversubscriptionByResourceRequest
}
err := validateResources(tc.req, submitConfg)
if tc.expectSuccess {
assert.NoError(t, err)
} else {
Expand Down
13 changes: 0 additions & 13 deletions internal/scheduler/jobdb/reconciliation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package jobdb
import (
"github.com/gogo/protobuf/proto"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"

armadamath "github.com/armadaproject/armada/internal/common/math"
armadaslices "github.com/armadaproject/armada/internal/common/slices"
Expand Down Expand Up @@ -250,18 +249,6 @@ func (jobDb *JobDb) schedulerJobFromDatabaseJob(dbJob *database.Job) (*Job, erro
return nil, errors.Wrapf(err, "error unmarshalling scheduling info for job %s", dbJob.JobID)
}

// Modify the resource requirements so that limits and requests point to the same object. This saves memory because
// we no longer have to store both objects, while it is safe because at the api we assert that limits and requests
// must be equal. Long term this is undesirable as if we ever want to have limits != requests this trick will not work.
// instead we should find a more efficient mechanism for representing this data
if schedulingInfo.GetPodRequirements() != nil {
resourceRequirements := schedulingInfo.GetPodRequirements().GetResourceRequirements()
schedulingInfo.GetPodRequirements().ResourceRequirements = v1.ResourceRequirements{
Limits: resourceRequirements.Limits,
Requests: resourceRequirements.Limits,
}
}

job, err := jobDb.NewJob(
dbJob.JobID,
dbJob.JobSet,
Expand Down

0 comments on commit 2b3e7f3

Please sign in to comment.