Skip to content

Commit

Permalink
Extend mtbf time range to also support hours and minutes.
Browse files Browse the repository at this point in the history
  • Loading branch information
snarlistic committed Mar 22, 2024
1 parent 7dcb3ca commit 541761d
Show file tree
Hide file tree
Showing 17 changed files with 175 additions and 135 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ kube-monkey works on an opt-in model and will only schedule terminations for Kub
Opt-in is done by setting the following labels on a k8s app:

**`kube-monkey/enabled`**: Set to **`"enabled"`** to opt-in to kube-monkey
**`kube-monkey/mtbf`**: Mean time between failure (in days). For example, if set to **`"3"`**, the k8s app can expect to have a Pod
**`kube-monkey/mtbf`**: Mean time between failure. Examples are: 1 or 1d (1 day), 4h (4 hours) and 20m (20 minutes). A recommended value would be 2h or 3h so pods get killed at least a couple of times a day.
killed approximately every third weekday.
**`kube-monkey/identifier`**: A unique identifier for the k8s apps. This is used to identify the pods
that belong to a k8s app as Pods inherit labels from their k8s app. So, if kube-monkey detects that app `foo` has enrolled to be a victim, kube-monkey will look for all pods that have the label `kube-monkey/identifier: foo` to determine which pods are candidates for killing. The recommendation is to set this value to be the same as the app's name.
Expand All @@ -41,7 +41,7 @@ that belong to a k8s app as Pods inherit labels from their k8s app. So, if kube-
* if `random-max-percent`, provide a number from `0`-`100` to specify the max `%` of pods kube-monkey can kill
* if `fixed-percent`, provide a number from `0`-`100` to specify the `%` of pods to kill

#### Example of opted-in Deployment killing one pod per purge
#### Example of opted-in Deployment killing one pod once every two hours.

```yaml
---
Expand All @@ -56,7 +56,7 @@ spec:
labels:
kube-monkey/enabled: enabled
kube-monkey/identifier: monkey-victim
kube-monkey/mtbf: '2'
kube-monkey/mtbf: '2h'
kube-monkey/kill-mode: "fixed"
kube-monkey/kill-value: '1'
[... omitted ...]
Expand All @@ -74,7 +74,7 @@ metadata:
labels:
kube-monkey/enabled: enabled
kube-monkey/identifier: monkey-victim
kube-monkey/mtbf: '2'
kube-monkey/mtbf: '2h'
kube-monkey/kill-mode: "fixed"
kube-monkey/kill-value: '1'
spec:
Expand Down
103 changes: 89 additions & 14 deletions internal/pkg/calendar/calendar.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package calendar

import (
"errors"
"fmt"
"math/rand"
"strconv"
"strings"
"time"

"github.com/golang/glog"
Expand Down Expand Up @@ -49,19 +53,90 @@ func NextRuntime(loc *time.Location, r int) time.Time {
return time.Date(year, month, day, r, 0, 0, 0, loc)
}

// ParseMtbf parses an mtbf value and returns a valid time duration.
func ParseMtbf(mtbf string) (time.Duration, error) {
// time.Duration biggest valid time unit is an hour, but we want to accept
// days. Before finer grained time units this software used to accept mtbf as
// an integer interpreted as days. Hence this routine now accepts a "d" as a
// valid time unit meaning days and simply strips it, because...
if mtbf[len(mtbf) - 1] == 'd' {
mtbf = strings.TrimRight(mtbf, "d")
}
// ...below we check if a given mtbf is simply a number and backward
// compatibilty dictates us to accept a simpel number as days (see above) and
// since time.Duration does not accept hours as a valid time unit we convert
// here ourselves days into hours.
if converted_mtbf, err := strconv.Atoi(mtbf); err == nil {
mtbf = fmt.Sprintf("%dh", converted_mtbf * 24)
}
duration, err := time.ParseDuration(mtbf)
if err != nil {
return 0, err
}
one_minute, _ := time.ParseDuration("1m")
if duration < one_minute {
return 0, errors.New("smallest valid mtbf is one minute.")
}
return duration, nil
}

// RandomTimeInRange returns a random time within the range specified by startHour and endHour
func RandomTimeInRange(startHour int, endHour int, loc *time.Location) time.Time {
// calculate the number of minutes in the range
minutesInRange := (endHour - startHour) * 60

// calculate a random minute-offset in range [0, minutesInRange)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randMinuteOffset := r.Intn(minutesInRange)
offsetDuration := time.Duration(randMinuteOffset) * time.Minute

// Add the minute offset to the start of the range to get a random
// time within the range
year, month, date := time.Now().Date()
rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, loc)
return rangeStart.Add(offsetDuration)
func RandomTimeInRange(mtbf string, startHour int, endHour int, loc *time.Location) []time.Time {
var times []time.Time
tmptimeDuration, err := ParseMtbf(mtbf)
if err != nil {
glog.Errorf("error parsing customized mtbf %s: %v", mtbf, err)
return []time.Time{time.Now().Add(time.Duration(24*365*10) * time.Hour)}
}

one_day, _ := time.ParseDuration("24h")

// If the mtbf is bigger or equal to one day we will calculate one
// random time in the range. If not we will calculate several random
// times.
if tmptimeDuration >= one_day {
// calculate the number of minutes in the range
minutesInRange := (endHour - startHour) * 60

// calculate a random minute-offset in range [0, minutesInRange)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randMinuteOffset := r.Intn(minutesInRange)
offsetDuration := time.Duration(randMinuteOffset) * time.Minute

// Add the minute offset to the start of the range to get a random
// time within the range
year, month, date := time.Now().Date()
rangeStart := time.Date(year, month, date, startHour, 0, 0, 0, loc)
times = append(times, rangeStart.Add(offsetDuration))
return times
} else {
startTime := time.Now().In(loc)

for {
//time range should be twice of the input mean time between failure value
timeDuration := tmptimeDuration * 2
//compute random offset time
mtbfEndTime := startTime.Add(timeDuration)
subSecond := int64(mtbfEndTime.Sub(startTime) / time.Second)
r := rand.New(rand.NewSource(time.Now().UnixNano()))
randSecondOffset := r.Int63n(subSecond)
randCalTime := startTime.Add(time.Duration(randSecondOffset) * time.Second)

// compute randSecondOffset between start and end hour
year, month, date := startTime.Date()
todayEndTime := time.Date(year, month, date, endHour, 0, 0, 0, loc)
todayStartTime := time.Date(year, month, date, startHour, 0, 0, 0, loc)
if startTime.Before(todayStartTime) { // now is earlier then start hour, only for test pass, normal process won't run into this condition
return []time.Time{todayStartTime}
}
if randCalTime.Before(todayEndTime) { // time offset before today's endHour
glog.V(1).Infof("RandomTimeInRange calculate time %s", randCalTime)
times = append(times, randCalTime)
// Move start time up to the calculated random time
startTime = randCalTime
} else {
return times
}
}
}
}
2 changes: 1 addition & 1 deletion internal/pkg/chaos/chaosmock.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (vm *VictimMock) IsWhitelisted() bool {
}

func NewVictimMock() *VictimMock {
v := victims.New(KIND, NAME, NAMESPACE, IDENTIFIER, 1)
v := victims.New(KIND, NAME, NAMESPACE, IDENTIFIER, "1h")
return &VictimMock{
VictimBase: *v,
}
Expand Down
8 changes: 4 additions & 4 deletions internal/pkg/kubemonkey/kubemonkey.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,6 @@ func Run() error {
}

for {
// Calculate duration to sleep before next run
sleepDuration := durationToNextRun(config.RunHour(), config.Timezone())
time.Sleep(sleepDuration)

schedule, err := schedule.New()
if err != nil {
glog.Fatal(err.Error())
Expand All @@ -58,6 +54,10 @@ func Run() error {
}
fmt.Println(schedule)
ScheduleTerminations(schedule.Entries(), notificationsClient)

// Calculate duration to sleep before next run
sleepDuration := durationToNextRun(config.RunHour(), config.Timezone())
time.Sleep(sleepDuration)
}
}

Expand Down
28 changes: 19 additions & 9 deletions internal/pkg/schedule/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,33 +81,43 @@ func New() (*Schedule, error) {
}

for _, victim := range victims {
killtime := CalculateKillTime()

if ShouldScheduleChaos(victim.Mtbf()) {
schedule.Add(chaos.New(killtime, victim))
mtbf := victim.Mtbf()
parsed_mtbf, err := calendar.ParseMtbf(mtbf)
if err != nil {
glog.Errorf("error parsing customized mtbf for %s/%s in namespace %s - %s: %v", victim.Kind(), victim.Name(), victim.Namespace(), mtbf, err)
continue
}
killtimes := CalculateKillTimes(mtbf)
one_day, _ := time.ParseDuration("24h")
// If the parsed mtbf value is less than one day we want to add the calculated kill times no matter
// what and otherwise we use probability to decide if we will schedule the calculated kill time.
if parsed_mtbf < one_day || ShouldScheduleChaos(float64(parsed_mtbf / one_day)) {
for _, killtime := range killtimes {
schedule.Add(chaos.New(killtime, victim))
}
}
}

return schedule, nil
}

func CalculateKillTime() time.Time {
func CalculateKillTimes(mtbf string) []time.Time {
loc := config.Timezone()
if config.DebugEnabled() && config.DebugScheduleImmediateKill() {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
// calculate a second-offset in the next minute
secOffset := r.Intn(60)
return time.Now().In(loc).Add(time.Duration(secOffset) * time.Second)
return []time.Time{time.Now().In(loc).Add(time.Duration(secOffset) * time.Second)}
}
return calendar.RandomTimeInRange(config.StartHour(), config.EndHour(), loc)
return calendar.RandomTimeInRange(mtbf, config.StartHour(), config.EndHour(), loc)
}

func ShouldScheduleChaos(mtbf int) bool {
func ShouldScheduleChaos(mtbf float64) bool {
if config.DebugEnabled() && config.DebugForceShouldKill() {
return true
}

r := rand.New(rand.NewSource(time.Now().UnixNano()))
probability := 1 / float64(mtbf)
probability := 1 / mtbf
return probability > r.Float64()
}
12 changes: 6 additions & 6 deletions internal/pkg/schedule/schedule_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ func TestStringWithEntries(t *testing.T) {

func TestCalculateKillTimeRandom(t *testing.T) {
config.SetDefaults()
killtime := CalculateKillTime()
killtimes := CalculateKillTimes("1h")

scheduledTime := func() (success bool) {
if killtime.Hour() >= config.StartHour() && killtime.Hour() <= config.EndHour() {
if killtimes[0].Hour() >= config.StartHour() && killtimes[0].Hour() <= config.EndHour() {
success = true
}
return
}

assert.Equal(t, killtime.Location(), config.Timezone())
assert.Equal(t, killtimes[0].Location(), config.Timezone())
assert.Condition(t, scheduledTime)

}
Expand All @@ -105,10 +105,10 @@ func TestCalculateKillTimeNow(t *testing.T) {
config.SetDefaults()
viper.SetDefault(param.DebugEnabled, true)
viper.SetDefault(param.DebugScheduleImmediateKill, true)
killtime := CalculateKillTime()
killtimes := CalculateKillTimes("1h")

assert.Equal(t, killtime.Location(), config.Timezone())
assert.WithinDuration(t, killtime, time.Now(), time.Second*time.Duration(60))
assert.Equal(t, killtimes[0].Location(), config.Timezone())
assert.WithinDuration(t, killtimes[0], time.Now(), time.Second*time.Duration(60))
config.SetDefaults()
}

Expand Down
16 changes: 6 additions & 10 deletions internal/pkg/victims/factory/daemonsets/daemonsets.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package daemonsets

import (
"fmt"
"strconv"

"kube-monkey/internal/pkg/calendar"
"kube-monkey/internal/pkg/config"
"kube-monkey/internal/pkg/victims"

Expand Down Expand Up @@ -44,20 +44,16 @@ func identifier(kubekind *appsv1.DaemonSet) (string, error) {

// Read the mean-time-between-failures value defined by the DaemonSet
// in the label defined by config.MtbfLabelKey
func meanTimeBetweenFailures(kubekind *appsv1.DaemonSet) (int, error) {
func meanTimeBetweenFailures(kubekind *appsv1.DaemonSet) (string, error) {
mtbf, ok := kubekind.Labels[config.MtbfLabelKey]
if !ok {
return -1, fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
return "", fmt.Errorf("%T %s does not have %s label", kubekind, kubekind.Name, config.MtbfLabelKey)
}

mtbfInt, err := strconv.Atoi(mtbf)
_, err := calendar.ParseMtbf(mtbf)
if err != nil {
return -1, err
return "", fmt.Errorf("error parsing mtbf %s: %v", mtbf, err)
}

if !(mtbfInt > 0) {
return -1, fmt.Errorf("Invalid value for label %s: %d", config.MtbfLabelKey, mtbfInt)
}

return mtbfInt, nil
return mtbf, nil
}
19 changes: 4 additions & 15 deletions internal/pkg/victims/factory/daemonsets/daemonsets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func TestNew(t *testing.T) {
NAME,
map[string]string{
config.IdentLabelKey: IDENTIFIER,
config.MtbfLabelKey: "1",
config.MtbfLabelKey: "1h",
},
)
ds, err := New(&v1ds)
Expand All @@ -43,14 +43,14 @@ func TestNew(t *testing.T) {
assert.Equal(t, NAME, ds.Name())
assert.Equal(t, NAMESPACE, ds.Namespace())
assert.Equal(t, IDENTIFIER, ds.Identifier())
assert.Equal(t, 1, ds.Mtbf())
assert.Equal(t, "1h", ds.Mtbf())
}

func TestInvalidIdentifier(t *testing.T) {
v1ds := newDaemonSet(
NAME,
map[string]string{
config.MtbfLabelKey: "1",
config.MtbfLabelKey: "1h",
},
)
_, err := New(&v1ds)
Expand Down Expand Up @@ -78,16 +78,5 @@ func TestInvalidMtbf(t *testing.T) {
)
_, err = New(&v1ds)

assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a Int type")

v1ds = newDaemonSet(
NAME,
map[string]string{
config.IdentLabelKey: IDENTIFIER,
config.MtbfLabelKey: "0",
},
)
_, err = New(&v1ds)

assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label is lower than 1")
assert.Errorf(t, err, "Expected an error if "+config.MtbfLabelKey+" label can't be converted a time.Duration type")
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ func TestEligibleDaemonSets(t *testing.T) {
NAME,
map[string]string{
"kube-monkey/identifier": "1",
"kube-monkey/mtbf": "1",
"kube-monkey/mtbf": "1h",
},
)

Expand All @@ -30,7 +30,7 @@ func TestIsEnrolled(t *testing.T) {
NAME,
map[string]string{
config.IdentLabelKey: "1",
config.MtbfLabelKey: "1",
config.MtbfLabelKey: "1h",
config.EnabledLabelKey: config.EnabledLabelValue,
},
)
Expand All @@ -49,7 +49,7 @@ func TestIsNotEnrolled(t *testing.T) {
NAME,
map[string]string{
config.IdentLabelKey: "1",
config.MtbfLabelKey: "1",
config.MtbfLabelKey: "1h",
config.EnabledLabelKey: "x",
},
)
Expand All @@ -66,7 +66,7 @@ func TestIsNotEnrolled(t *testing.T) {
func TestKillType(t *testing.T) {

ident := "1"
mtbf := "1"
mtbf := "1h"
killMode := "kill-mode"

v1ds := newDaemonSet(
Expand Down Expand Up @@ -104,7 +104,7 @@ func TestKillType(t *testing.T) {
func TestKillValue(t *testing.T) {

ident := "1"
mtbf := "1"
mtbf := "1h"
killValue := "0"

v1ds := newDaemonSet(
Expand Down
Loading

0 comments on commit 541761d

Please sign in to comment.