Skip to content

Commit

Permalink
Scheduler: a bit more logging (#3595)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertdavidsmith authored May 19, 2024
1 parent 8e36ae2 commit 61841eb
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions internal/scheduler/queue_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ package scheduler

import (
"container/heap"
"fmt"
"reflect"
"strconv"
"time"

"github.com/armadaproject/armada/internal/armada/configuration"

"github.com/pkg/errors"

"github.com/armadaproject/armada/internal/common/armadacontext"
armadamaps "github.com/armadaproject/armada/internal/common/maps"
schedulerconstraints "github.com/armadaproject/armada/internal/scheduler/constraints"
schedulercontext "github.com/armadaproject/armada/internal/scheduler/context"
"github.com/armadaproject/armada/internal/scheduler/fairness"
Expand Down Expand Up @@ -64,6 +67,15 @@ func (sch *QueueScheduler) Schedule(ctx *armadacontext.Context) (*SchedulerResul
var failedJobs []*schedulercontext.JobSchedulingContext
nodeIdByJobId := make(map[string]string)
additionalAnnotationsByJobId := make(map[string]map[string]string)
ctx.Info("Looping through candidate gangs...")

type queueStats struct {
gangCount int
jobCount int
time time.Duration
}

statsPerQueue := map[string]queueStats{}
for {
// Peek() returns the next gang to try to schedule. Call Clear() before calling Peek() again.
// Calling Clear() after (failing to) schedule ensures we get the next gang in order of smallest fair share.
Expand All @@ -89,6 +101,8 @@ func (sch *QueueScheduler) Schedule(ctx *armadacontext.Context) (*SchedulerResul
return nil, err
default:
}

start := time.Now()
if ok, unschedulableReason, err := sch.gangScheduler.Schedule(ctx, gctx); err != nil {
return nil, err
} else if ok {
Expand All @@ -112,12 +126,25 @@ func (sch *QueueScheduler) Schedule(ctx *armadacontext.Context) (*SchedulerResul
sch.candidateGangIterator.OnlyYieldEvictedForQueue(gctx.Queue)
}

duration := time.Now().Sub(start)
stats := statsPerQueue[gctx.Queue]
stats.gangCount++
stats.jobCount += gctx.Cardinality()
stats.time += duration
statsPerQueue[gctx.Queue] = stats
if duration.Seconds() > 1 {
ctx.Infof("Slow schedule: queue %s, gang cardinality %d, first job id %s, time %fs", gctx.Queue, gctx.Cardinality(), gctx.JobIds()[0], duration.Seconds())
}

// Clear() to get the next gang in order of smallest fair share.
// Calling clear here ensures the gang scheduled in this iteration is accounted for.
if err := sch.candidateGangIterator.Clear(); err != nil {
return nil, err
}
}
ctx.Infof("Finished looping through candidate gangs: details %v", armadamaps.MapValues(statsPerQueue, func(s queueStats) string {
return fmt.Sprintf("{gangs=%d, jobs=%d, time=%fs}", s.gangCount, s.jobCount, s.time.Seconds())
}))
if sch.schedulingContext.TerminationReason == "" {
sch.schedulingContext.TerminationReason = "no remaining candidate jobs"
}
Expand Down

0 comments on commit 61841eb

Please sign in to comment.