Skip to content

Commit

Permalink
Add StalledDiskPrimary analysis and recovery
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe Kelley committed Jun 3, 2024
1 parent f4591fb commit 886b312
Show file tree
Hide file tree
Showing 15 changed files with 314 additions and 49 deletions.
37 changes: 21 additions & 16 deletions go/vt/vtorc/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,23 @@ const (
)

var (
sqliteDataFile = "file::memory:?mode=memory&cache=shared"
instancePollTime = 5 * time.Second
snapshotTopologyInterval = 0 * time.Hour
reasonableReplicationLag = 10 * time.Second
auditFileLocation = ""
auditToBackend = false
auditToSyslog = false
auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
recoveryPeriodBlockDuration = 30 * time.Second
preventCrossCellFailover = false
waitReplicasTimeout = 30 * time.Second
tolerableReplicationLag = 0 * time.Second
topoInformationRefreshDuration = 15 * time.Second
recoveryPollDuration = 1 * time.Second
ersEnabled = true
convertTabletsWithErrantGTIDs = false
sqliteDataFile = "file::memory:?mode=memory&cache=shared"
instancePollTime = 5 * time.Second
snapshotTopologyInterval = 0 * time.Hour
reasonableReplicationLag = 10 * time.Second
auditFileLocation = ""
auditToBackend = false
auditToSyslog = false
auditPurgeDuration = 7 * 24 * time.Hour // Equivalent of 7 days
recoveryPeriodBlockDuration = 30 * time.Second
preventCrossCellFailover = false
waitReplicasTimeout = 30 * time.Second
tolerableReplicationLag = 0 * time.Second
topoInformationRefreshDuration = 15 * time.Second
recoveryPollDuration = 1 * time.Second
ersEnabled = true
convertTabletsWithErrantGTIDs = false
enableStalledDiskPrimaryAnalysis = false
)

// RegisterFlags registers the flags required by VTOrc
Expand All @@ -79,6 +80,7 @@ func RegisterFlags(fs *pflag.FlagSet) {
fs.DurationVar(&recoveryPollDuration, "recovery-poll-duration", recoveryPollDuration, "Timer duration on which VTOrc polls its database to run a recovery")
fs.BoolVar(&ersEnabled, "allow-emergency-reparent", ersEnabled, "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
fs.BoolVar(&convertTabletsWithErrantGTIDs, "change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs, "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
fs.BoolVar(&enableStalledDiskPrimaryAnalysis, "enable-stalled-disk-primary-analysis", enableStalledDiskPrimaryAnalysis, "Whether VTOrc should be analyzing and recovering stalled disk primary failures")
}

// Configuration makes for vtorc configuration input, which can be provided by user via JSON formatted file.
Expand All @@ -100,6 +102,7 @@ type Configuration struct {
TolerableReplicationLagSeconds int // Amount of replication lag that is considered acceptable for a tablet to be eligible for promotion when Vitess makes the choice of a new primary in PRS.
TopoInformationRefreshSeconds int // Timer duration on which VTOrc refreshes the keyspace and vttablet records from the topo-server.
RecoveryPollSeconds int // Timer duration on which VTOrc recovery analysis runs
EnableStalledDiskPrimaryAnalysis bool // Whether the enable the analysis and recovery of stalled disk primary failures
}

// ToJSONString will marshal this configuration as JSON
Expand Down Expand Up @@ -130,6 +133,7 @@ func UpdateConfigValuesFromFlags() {
Config.TolerableReplicationLagSeconds = int(tolerableReplicationLag / time.Second)
Config.TopoInformationRefreshSeconds = int(topoInformationRefreshDuration / time.Second)
Config.RecoveryPollSeconds = int(recoveryPollDuration / time.Second)
Config.EnableStalledDiskPrimaryAnalysis = enableStalledDiskPrimaryAnalysis
}

// ERSEnabled reports whether VTOrc is allowed to run ERS or not.
Expand Down Expand Up @@ -173,6 +177,7 @@ func newConfiguration() *Configuration {
WaitReplicasTimeoutSeconds: 30,
TopoInformationRefreshSeconds: 15,
RecoveryPollSeconds: 1,
EnableStalledDiskPrimaryAnalysis: false,
}
}

Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/db/generate_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ CREATE TABLE database_instance (
semi_sync_primary_status TINYint NOT NULL DEFAULT 0,
semi_sync_replica_status TINYint NOT NULL DEFAULT 0,
semi_sync_primary_clients int NOT NULL DEFAULT 0,
stalled_disk TINYint NOT NULL DEFAULT 0,
PRIMARY KEY (alias)
)`,
`
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vtorc/inst/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ const (
LockedSemiSyncPrimaryHypothesis AnalysisCode = "LockedSemiSyncPrimaryHypothesis"
LockedSemiSyncPrimary AnalysisCode = "LockedSemiSyncPrimary"
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
StalledDiskPrimary AnalysisCode = "StalledDiskPrimary"
)

type StructureAnalysisCode string
Expand Down Expand Up @@ -130,6 +131,7 @@ type ReplicationAnalysis struct {
MaxReplicaGTIDMode string
MaxReplicaGTIDErrant string
IsReadOnly bool
IsStalledDisk bool
}

func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {
Expand Down
11 changes: 9 additions & 2 deletions go/vt/vtorc/inst/analysis_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
vitess_keyspace.durability_policy AS durability_policy,
vitess_shard.primary_timestamp AS shard_primary_term_timestamp,
primary_instance.read_only AS read_only,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.gtid_errant) AS gtid_errant,
MIN(primary_instance.alias) IS NULL AS is_invalid,
MIN(primary_instance.binary_log_file) AS binary_log_file,
MIN(primary_instance.binary_log_pos) AS binary_log_pos,
Expand Down Expand Up @@ -246,7 +246,8 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
COUNT(
DISTINCT case when replica_instance.log_bin
AND replica_instance.log_replica_updates then replica_instance.major_version else NULL end
) AS count_distinct_logging_major_versions
) AS count_distinct_logging_major_versions,
primary_instance.stalled_disk != 0 AS is_stalled_disk
FROM
vitess_tablet
JOIN vitess_keyspace ON (
Expand Down Expand Up @@ -364,6 +365,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
a.HeartbeatInterval = m.GetFloat64("heartbeat_interval")

a.IsReadOnly = m.GetUint("read_only") == 1
a.IsStalledDisk = m.GetBool("is_stalled_disk")

if !a.LastCheckValid {
analysisMessage := fmt.Sprintf("analysis: Alias: %+v, Keyspace: %+v, Shard: %+v, IsPrimary: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v",
Expand Down Expand Up @@ -411,6 +413,11 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
} else if isInvalid {
a.Analysis = InvalidReplica
a.Description = "VTOrc hasn't been able to reach the replica even once since restart/shutdown"
} else if a.IsClusterPrimary && !a.LastCheckValid && a.IsStalledDisk {
a.Analysis = StalledDiskPrimary
a.Description = "Primary has a stalled disk"
ca.hasClusterwideAction = true
//
} else if a.IsClusterPrimary && !a.LastCheckValid && a.CountReplicas == 0 {
a.Analysis = DeadPrimaryWithoutReplicas
a.Description = "Primary cannot be reached by vtorc and has no replica"
Expand Down
31 changes: 27 additions & 4 deletions go/vt/vtorc/inst/analysis_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ var (
// The initialSQL is a set of insert commands copied from a dump of an actual running VTOrc instances. The relevant insert commands are here.
// This is a dump taken from a test running 4 tablets, zone1-101 is the primary, zone1-100 is a replica, zone1-112 is a rdonly and zone2-200 is a cross-cell replica.
initialSQL = []string{
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000112-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000100-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,0,'',0,0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000200-relay-bin.000002',15815,0,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0);`,
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000112-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000100-relay-bin.000002',15815,0,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,0,'',0,0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2,false);`,
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,0,'vt-0000000200-relay-bin.000002',15815,0,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0,false);`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000100','localhost',6711,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130307d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731307d20706f72745f6d61703a7b6b65793a227674222076616c75653a363730397d206b657973706163653a226b73222073686172643a22302220747970653a5245504c494341206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363731312064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000101','localhost',6714,'ks','0','zone1',1,'2022-12-28 07:23:25.129898+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130317d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731337d20706f72745f6d61703a7b6b65793a227674222076616c75653a363731327d206b657973706163653a226b73222073686172643a22302220747970653a5052494d415259206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a36373134207072696d6172795f7465726d5f73746172745f74696d653a7b7365636f6e64733a31363732323132323035206e616e6f7365636f6e64733a3132393839383030307d2064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
`INSERT INTO vitess_tablet VALUES('zone1-0000000112','localhost',6747,'ks','0','zone1',3,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3131327d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363734367d20706f72745f6d61703a7b6b65793a227674222076616c75653a363734357d206b657973706163653a226b73222073686172643a22302220747970653a52444f4e4c59206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363734372064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
Expand Down Expand Up @@ -95,6 +95,29 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: PrimaryTabletDeleted,
}, {
name: "StalledDiskPrimary",
info: []*test.InfoForRecoveryAnalysis{{
TabletInfo: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
Hostname: "localhost",
Keyspace: "ks",
Shard: "0",
Type: topodatapb.TabletType_PRIMARY,
MysqlHostname: "localhost",
MysqlPort: 6709,
},
DurabilityPolicy: "none",
LastCheckValid: 0,
CountReplicas: 4,
CountValidReplicas: 4,
CountValidReplicatingReplicas: 0,
IsPrimary: 1,
IsStalledDisk: 1,
}},
keyspaceWanted: "ks",
shardWanted: "0",
codeWanted: StalledDiskPrimary,
}, {
name: "DeadPrimary",
info: []*test.InfoForRecoveryAnalysis{{
Expand Down
1 change: 1 addition & 0 deletions go/vt/vtorc/inst/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ type Instance struct {
IsUpToDate bool
IsRecentlyChecked bool
SecondsSinceLastSeen sql.NullInt64
StalledDisk bool

AllowTLS bool

Expand Down
21 changes: 15 additions & 6 deletions go/vt/vtorc/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func ReadTopologyInstanceBufferable(tabletAlias string, latency *stopwatch.Named
var tablet *topodatapb.Tablet
var fs *replicationdatapb.FullStatus
readingStartTime := time.Now()
stalledDisk := false
instance := NewInstance()
instanceFound := false
partialSuccess := false
Expand Down Expand Up @@ -200,6 +201,9 @@ func ReadTopologyInstanceBufferable(tabletAlias string, latency *stopwatch.Named

fs, err = fullStatus(tabletAlias)
if err != nil {
if config.Config.EnableStalledDiskPrimaryAnalysis && strings.Contains(err.Error(), "stalled disk") {
stalledDisk = true
}
goto Cleanup
}
partialSuccess = true // We at least managed to read something from the server.
Expand Down Expand Up @@ -400,9 +404,10 @@ Cleanup:

// Something is wrong, could be network-wise. Record that we
// tried to check the instance. last_attempted_check is also
// updated on success by writeInstance.
// updated on success by writeInstance. If the reason is a
// stalled disk, we can record that as well.
latency.Start("backend")
_ = UpdateInstanceLastChecked(tabletAlias, partialSuccess)
_ = UpdateInstanceLastChecked(tabletAlias, partialSuccess, stalledDisk)
latency.Stop("backend")
return nil, err
}
Expand Down Expand Up @@ -847,6 +852,7 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo
"semi_sync_primary_clients",
"semi_sync_replica_status",
"last_discovery_latency",
"stalled_disk",
}

values := make([]string, len(columns))
Expand Down Expand Up @@ -928,6 +934,7 @@ func mkInsertOdkuForInstances(instances []*Instance, instanceWasActuallyFound bo
args = append(args, instance.SemiSyncPrimaryClients)
args = append(args, instance.SemiSyncReplicaStatus)
args = append(args, instance.LastDiscoveryLatency.Nanoseconds())
args = append(args, instance.StalledDisk)
}

sql, err := mkInsertOdku("database_instance", columns, values, len(instances), insertIgnore)
Expand Down Expand Up @@ -973,17 +980,19 @@ func WriteInstance(instance *Instance, instanceWasActuallyFound bool, lastError

// UpdateInstanceLastChecked updates the last_check timestamp in the vtorc backed database
// for a given instance
func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool) error {
func UpdateInstanceLastChecked(tabletAlias string, partialSuccess bool, stalledDisk bool) error {
writeFunc := func() error {
_, err := db.ExecVTOrc(`
update
database_instance
set
last_checked = NOW(),
last_check_partial_success = ?
last_check_partial_success = ?,
stalled_disk = ?
where
alias = ?`,
partialSuccess,
stalledDisk,
tabletAlias,
)
if err != nil {
Expand Down Expand Up @@ -1110,8 +1119,8 @@ func SnapshotTopologies() error {
alias, hostname, port, source_host, source_port, keyspace, shard, version)
select
UNIX_TIMESTAMP(NOW()),
vitess_tablet.alias, vitess_tablet.hostname, vitess_tablet.port,
database_instance.source_host, database_instance.source_port,
vitess_tablet.alias, vitess_tablet.hostname, vitess_tablet.port,
database_instance.source_host, database_instance.source_port,
vitess_tablet.keyspace, vitess_tablet.shard, database_instance.version
from
vitess_tablet left join database_instance using (alias, hostname, port)
Expand Down
Loading

0 comments on commit 886b312

Please sign in to comment.