Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-15.0] Fix flakiness in VTOrc tests (#13489) #13534

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions go/test/endtoend/vtorc/general/vtorc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
// verify replication is setup
// verify that with multiple vtorc instances, we still only have 1 PlannedReparentShard call
func TestPrimaryElection(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand Down Expand Up @@ -64,6 +65,7 @@ func TestPrimaryElection(t *testing.T) {
// verify rdonly is not elected, only replica
// verify replication is setup
func TestSingleKeyspace(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, []string{"--clusters_to_watch", "ks"}, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand All @@ -81,6 +83,7 @@ func TestSingleKeyspace(t *testing.T) {
// verify rdonly is not elected, only replica
// verify replication is setup
func TestKeyspaceShard(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, []string{"--clusters_to_watch", "ks/0"}, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand All @@ -100,6 +103,7 @@ func TestKeyspaceShard(t *testing.T) {
// 4. setup replication from non-primary, let vtorc repair
// 5. make instance A replicates from B and B from A, wait for repair
func TestVTOrcRepairs(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 3, 0, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand Down Expand Up @@ -216,6 +220,7 @@ func TestVTOrcRepairs(t *testing.T) {
func TestRepairAfterTER(t *testing.T) {
// test fails intermittently on CI, skip until it can be fixed.
t.SkipNow()
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 0, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand Down Expand Up @@ -252,6 +257,7 @@ func TestSemiSync(t *testing.T) {
// stop any vtorc instance running due to a previous test.
utils.StopVTOrcs(t, clusterInfo)
newCluster := utils.SetupNewClusterSemiSync(t)
defer utils.PrintVTOrcLogsOnFailure(t, newCluster.ClusterInstance)
utils.StartVTOrcs(t, newCluster, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
}, 1)
Expand Down Expand Up @@ -316,6 +322,7 @@ func TestSemiSync(t *testing.T) {

// TestVTOrcWithPrs tests that VTOrc works fine even when PRS is called from vtctld
func TestVTOrcWithPrs(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 4, 0, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand Down Expand Up @@ -364,6 +371,7 @@ func TestVTOrcWithPrs(t *testing.T) {

// TestMultipleDurabilities tests that VTOrc works with 2 keyspaces having 2 different durability policies
func TestMultipleDurabilities(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
// Setup a normal cluster and start vtorc
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, nil, cluster.VTOrcConfiguration{}, 1, "")
Expand All @@ -388,6 +396,7 @@ func TestDurabilityPolicySetLater(t *testing.T) {
// stop any vtorc instance running due to a previous test.
utils.StopVTOrcs(t, clusterInfo)
newCluster := utils.SetupNewClusterSemiSync(t)
defer utils.PrintVTOrcLogsOnFailure(t, newCluster.ClusterInstance)
keyspace := &newCluster.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]
// Before starting VTOrc we explicity want to set the durability policy of the keyspace to an empty string
Expand Down
162 changes: 162 additions & 0 deletions go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,22 @@
// covers the test case master-failover from orchestrator
// Also tests that VTOrc can handle multiple failures, if the durability policies allow it
func TestDownPrimary(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
<<<<<<< HEAD

Check failure on line 37 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected statement, found '<<'
<<<<<<< HEAD
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
=======
=======
>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))

Check failure on line 42 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

illegal character U+0023 '#'
// We specify the --wait-replicas-timeout to a small value because we spawn a cross-cell replica later in the test.
// If that replica is more advanced than the same-cell-replica, then we try to promote the cross-cell replica as an intermediate source.
// If we don't specify a small value of --wait-replicas-timeout, then we would end up waiting for 30 seconds for the dead-primary to respond, failing this test.
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s", "--wait-replicas-timeout=5s"}, cluster.VTOrcConfiguration{
<<<<<<< HEAD
>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))

Check failure on line 48 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

illegal character U+0023 '#'
=======
>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))

Check failure on line 50 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

illegal character U+0023 '#'
PreventCrossDataCenterPrimaryFailover: true,
}, 1, "semi_sync")
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
Expand Down Expand Up @@ -83,9 +97,150 @@
utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1)
}

<<<<<<< HEAD

Check failure on line 100 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected declaration, found '<<'
=======
// bring down primary before VTOrc has started, let vtorc repair.
func TestDownPrimaryBeforeVTOrc(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{}, 0, "none")
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]
curPrimary := shard0.Vttablets[0]

// Promote the first tablet as the primary
err := clusterInfo.ClusterInstance.VtctlclientProcess.InitializeShard(keyspace.Name, shard0.Name, clusterInfo.ClusterInstance.Cell, curPrimary.TabletUID)
require.NoError(t, err)

// find the replica and rdonly tablets
var replica, rdonly *cluster.Vttablet
for _, tablet := range shard0.Vttablets {

Check failure on line 117 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected declaration, found 'for'
// we know we have only two replcia tablets, so the one not the primary must be the other replica
if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" {
replica = tablet
}
if tablet.Type == "rdonly" {
rdonly = tablet
}
}
assert.NotNil(t, replica, "could not find replica tablet")
assert.NotNil(t, rdonly, "could not find rdonly tablet")

// check that the replication is setup correctly before we failover
utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica}, 10*time.Second)

// Make the current primary vttablet unavailable.
_ = curPrimary.VttabletProcess.TearDown()
err = curPrimary.MysqlctlProcess.Stop()
require.NoError(t, err)

// Start a VTOrc instance
utils.StartVTOrcs(t, clusterInfo, []string{"--remote_operation_timeout=10s"}, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
}, 1)

vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0]

defer func() {
// we remove the tablet from our global list
utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary)
}()

// check that the replica gets promoted
utils.CheckPrimaryTablet(t, clusterInfo, replica, true)

// also check that the replication is working correctly after failover
utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{rdonly}, 10*time.Second)
utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1)
}

// TestDeadPrimaryRecoversImmediately test Vtorc ability to recover immediately if primary is dead.
// Reason is, unlike other recoveries, in DeadPrimary we don't call DiscoverInstance since we know
// that primary is unreachable. This help us save few seconds depending on value of `RemoteOperationTimeout` flag.
func TestDeadPrimaryRecoversImmediately(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
// We specify the --wait-replicas-timeout to a small value because we spawn a cross-cell replica later in the test.
// If that replica is more advanced than the same-cell-replica, then we try to promote the cross-cell replica as an intermediate source.
// If we don't specify a small value of --wait-replicas-timeout, then we would end up waiting for 30 seconds for the dead-primary to respond, failing this test.
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, []string{"--remote_operation_timeout=10s", "--wait-replicas-timeout=5s"}, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
}, 1, "semi_sync")
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]
// find primary from topo
curPrimary := utils.ShardPrimaryTablet(t, clusterInfo, keyspace, shard0)
assert.NotNil(t, curPrimary, "should have elected a primary")
vtOrcProcess := clusterInfo.ClusterInstance.VTOrcProcesses[0]
utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.ElectNewPrimaryRecoveryName, 1)

// find the replica and rdonly tablets
var replica, rdonly *cluster.Vttablet
for _, tablet := range shard0.Vttablets {

Check failure on line 179 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected declaration, found 'for'
// we know we have only two replcia tablets, so the one not the primary must be the other replica
if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" {
replica = tablet
}
if tablet.Type == "rdonly" {
rdonly = tablet
}
}
assert.NotNil(t, replica, "could not find replica tablet")
assert.NotNil(t, rdonly, "could not find rdonly tablet")

// Start a cross-cell replica
crossCellReplica := utils.StartVttablet(t, clusterInfo, utils.Cell2, false)

// check that the replication is setup correctly before we failover
utils.CheckReplication(t, clusterInfo, curPrimary, []*cluster.Vttablet{rdonly, replica, crossCellReplica}, 10*time.Second)

// Make the current primary vttablet unavailable.
curPrimary.VttabletProcess.Kill()
err := curPrimary.MysqlctlProcess.Stop()
require.NoError(t, err)
defer func() {
// we remove the tablet from our global list
utils.PermanentlyRemoveVttablet(clusterInfo, curPrimary)
}()

// check that the replica gets promoted
utils.CheckPrimaryTablet(t, clusterInfo, replica, true)
utils.WaitForInstancePollSecondsExceededCount(t, vtOrcProcess, "InstancePollSecondsExceeded", 2, false)
// also check that the replication is working correctly after failover
utils.VerifyWritesSucceed(t, clusterInfo, replica, []*cluster.Vttablet{crossCellReplica}, 10*time.Second)
utils.WaitForSuccessfulRecoveryCount(t, vtOrcProcess, logic.RecoverDeadPrimaryRecoveryName, 1)

// Parse log file and find out how much time it took for DeadPrimary to recover.
logFile := path.Join(vtOrcProcess.LogDir, vtOrcProcess.LogFileName)
// log prefix printed at the end of analysis where we conclude we have DeadPrimary
t1 := extractTimeFromLog(t, logFile, "Proceeding with DeadPrimary recovery")
// log prefix printed at the end of recovery
t2 := extractTimeFromLog(t, logFile, "auditType:recover-dead-primary")
curr := time.Now().Format("2006-01-02")
timeLayout := "2006-01-02 15:04:05.000000"
timeStr1 := fmt.Sprintf("%s %s", curr, t1)
timeStr2 := fmt.Sprintf("%s %s", curr, t2)
time1, err := time.Parse(timeLayout, timeStr1)
if err != nil {
t.Errorf("unable to parse time %s", err.Error())
}
time2, err := time.Parse(timeLayout, timeStr2)
if err != nil {
t.Errorf("unable to parse time %s", err.Error())
}
diff := time2.Sub(time1)
fmt.Printf("The difference between %s and %s is %v seconds.\n", t1, t2, diff.Seconds())
// assert that it takes less than `remote_operation_timeout` to recover from `DeadPrimary`
// use the value provided in `remote_operation_timeout` flag to compare with.
// We are testing against 9.5 seconds to be safe and prevent flakiness.
assert.Less(t, diff.Seconds(), 9.5)
}

>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))

Check failure on line 239 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

illegal character U+0023 '#'
// Failover should not be cross data centers, according to the configuration file
// covers part of the test case master-failover-lost-replicas from orchestrator
func TestCrossDataCenterFailure(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand All @@ -98,7 +253,7 @@

// find the replica and rdonly tablets
var replicaInSameCell, rdonly *cluster.Vttablet
for _, tablet := range shard0.Vttablets {

Check failure on line 256 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected declaration, found 'for'
// we know we have only two replcia tablets, so the one not the primary must be the other replica
if tablet.Alias != curPrimary.Alias && tablet.Type == "replica" {
replicaInSameCell = tablet
Expand Down Expand Up @@ -131,6 +286,7 @@
// Failover should not be cross data centers, according to the configuration file
// In case of no viable candidates, we should error out
func TestCrossDataCenterFailureError(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 1, 1, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand All @@ -143,7 +299,7 @@

// find the rdonly tablet
var rdonly *cluster.Vttablet
for _, tablet := range shard0.Vttablets {

Check failure on line 302 in go/test/endtoend/vtorc/primaryfailure/primary_failure_test.go

View workflow job for this annotation

GitHub Actions / Static Code Checks Etc

expected declaration, found 'for'
if tablet.Type == "rdonly" {
rdonly = tablet
}
Expand Down Expand Up @@ -177,6 +333,7 @@
// Earlier any replicas that were not able to replicate from the previous primary
// were detected by vtorc and could be configured to have their sources detached
t.Skip()
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 2, nil, cluster.VTOrcConfiguration{
PreventCrossDataCenterPrimaryFailover: true,
Expand Down Expand Up @@ -258,6 +415,7 @@
// This test checks that the promotion of a tablet succeeds if it passes the promotion lag test
// covers the test case master-failover-fail-promotion-lag-minutes-success from orchestrator
func TestPromotionLagSuccess(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
ReplicationLagQuery: "select 59",
Expand Down Expand Up @@ -307,6 +465,7 @@
// Earlier vtorc used to check that the promotion lag between the new primary and the old one
// was smaller than the configured value, otherwise it would fail the promotion
t.Skip()
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 3, 1, nil, cluster.VTOrcConfiguration{
ReplicationLagQuery: "select 61",
Expand Down Expand Up @@ -359,6 +518,7 @@
// We explicitly set one of the replicas to Prefer promotion rule.
// That is the replica which should be promoted in case of primary failure
func TestDownPrimaryPromotionRule(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
LockShardTimeoutSeconds: 5,
Expand Down Expand Up @@ -406,6 +566,7 @@
// That is the replica which should be promoted in case of primary failure
// It should also be caught up when it is promoted
func TestDownPrimaryPromotionRuleWithLag(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
LockShardTimeoutSeconds: 5,
Expand Down Expand Up @@ -485,6 +646,7 @@
// We let a replica in our own cell lag. That is the replica which should be promoted in case of primary failure
// It should also be caught up when it is promoted
func TestDownPrimaryPromotionRuleWithLagCrossCenter(t *testing.T) {
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
defer cluster.PanicHandler(t)
utils.SetupVttabletsAndVTOrcs(t, clusterInfo, 2, 1, nil, cluster.VTOrcConfiguration{
LockShardTimeoutSeconds: 5,
Expand Down
3 changes: 2 additions & 1 deletion go/test/endtoend/vtorc/readtopologyinstance/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
defer func() {
clusterInfo.ClusterInstance.Teardown()
}()
defer utils.PrintVTOrcLogsOnFailure(t, clusterInfo.ClusterInstance)
keyspace := &clusterInfo.ClusterInstance.Keyspaces[0]
shard0 := &keyspace.Shards[0]
oldArgs := os.Args
Expand Down Expand Up @@ -147,7 +148,7 @@ func TestReadTopologyInstanceBufferable(t *testing.T) {
assert.Equal(t, replicaInstance.ReadBinlogCoordinates.LogFile, primaryInstance.SelfBinlogCoordinates.LogFile)
assert.Greater(t, replicaInstance.ReadBinlogCoordinates.LogPos, int64(0))
assert.Equal(t, replicaInstance.ExecBinlogCoordinates.LogFile, primaryInstance.SelfBinlogCoordinates.LogFile)
assert.LessOrEqual(t, replicaInstance.ExecBinlogCoordinates.LogPos, replicaInstance.ReadBinlogCoordinates.LogPos)
assert.Greater(t, replicaInstance.ExecBinlogCoordinates.LogPos, uint32(0))
assert.Contains(t, replicaInstance.RelaylogCoordinates.LogFile, fmt.Sprintf("vt-0000000%d-relay", replica.TabletUID))
assert.Greater(t, replicaInstance.RelaylogCoordinates.LogPos, int64(0))
assert.Empty(t, replicaInstance.LastIOError)
Expand Down
60 changes: 60 additions & 0 deletions go/test/endtoend/vtorc/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -944,3 +944,63 @@ func WaitForSuccessfulRecoveryCount(t *testing.T, vtorcInstance *cluster.VTOrcPr
successCount := successfulRecoveriesMap[recoveryName]
assert.EqualValues(t, countExpected, successCount)
}
<<<<<<< HEAD
=======

// WaitForInstancePollSecondsExceededCount waits for 30 seconds and then queries api/aggregated-discovery-metrics.
// It expects to find minimum occurrence or exact count of `keyName` provided.
func WaitForInstancePollSecondsExceededCount(t *testing.T, vtorcInstance *cluster.VTOrcProcess, keyName string, minCountExpected float64, enforceEquality bool) {
t.Helper()
var sinceInSeconds = 30
duration := time.Duration(sinceInSeconds)
time.Sleep(duration * time.Second)

statusCode, res, err := vtorcInstance.MakeAPICall("api/aggregated-discovery-metrics?seconds=" + strconv.Itoa(sinceInSeconds))
if err != nil {
assert.Fail(t, "Not able to call api/aggregated-discovery-metrics")
}
if statusCode == 200 {
resultMap := make(map[string]any)
err := json.Unmarshal([]byte(res), &resultMap)
if err != nil {
assert.Fail(t, "invalid response from api/aggregated-discovery-metrics")
}
successCount := resultMap[keyName]
if iSuccessCount, ok := successCount.(float64); ok {
if enforceEquality {
assert.Equal(t, iSuccessCount, minCountExpected)
} else {
assert.GreaterOrEqual(t, iSuccessCount, minCountExpected)
}
return
}
}
assert.Fail(t, "invalid response from api/aggregated-discovery-metrics")
}

// PrintVTOrcLogsOnFailure prints the VTOrc logs on failure of the test.
// This function is supposed to be called as the first defer command from the vtorc tests.
func PrintVTOrcLogsOnFailure(t *testing.T, clusterInstance *cluster.LocalProcessCluster) {
// If the test has not failed, then we don't need to print anything.
if !t.Failed() {
return
}

log.Errorf("Printing VTOrc logs")
for _, vtorc := range clusterInstance.VTOrcProcesses {
if vtorc == nil || vtorc.LogFileName == "" {
continue
}
filePath := path.Join(vtorc.LogDir, vtorc.LogFileName)
log.Errorf("Printing file - %s", filePath)
content, err := os.ReadFile(filePath)
if err != nil {
log.Errorf("Error while reading the file - %v", err)
}
log.Errorf("%s", string(content))
}
}
<<<<<<< HEAD
>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))
=======
>>>>>>> 888df9228e (Fix flakiness in VTOrc tests (#13489))
Loading
Loading