Skip to content

Commit

Permalink
Revert "VTGate Buffering: Use a more accurate heuristic for determini…
Browse files Browse the repository at this point in the history
…ng if we're doing a reshard (vitessio#13856)"

This reverts commit b3262d7.
  • Loading branch information
timvaillancourt committed Jun 4, 2024
1 parent b6812a7 commit 96f84e9
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 244 deletions.
1 change: 0 additions & 1 deletion examples/local/scripts/vtgate-up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ vtgate \
--tablet_types_to_wait PRIMARY,REPLICA \
--service_map 'grpc-vtgateservice' \
--pid_file $VTDATAROOT/tmp/vtgate.pid \
--enable_buffer \
--mysql_auth_server_impl none \
> $VTDATAROOT/tmp/vtgate.out 2>&1 &

Expand Down
36 changes: 12 additions & 24 deletions go/vt/discovery/keyspace_events.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,12 @@ import (

"google.golang.org/protobuf/proto"

"vitess.io/vitess/go/vt/key"
"vitess.io/vitess/go/vt/log"
"vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
"vitess.io/vitess/go/vt/srvtopo"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/topoproto"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)

// KeyspaceEventWatcher is an auxiliary watcher that watches all availability incidents
Expand Down Expand Up @@ -67,7 +65,7 @@ type KeyspaceEvent struct {

type ShardEvent struct {
Tablet *topodatapb.TabletAlias
Target *querypb.Target
Target *query.Target
Serving bool
}

Expand Down Expand Up @@ -126,27 +124,17 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool {
kss.mu.Lock()
defer kss.mu.Unlock()

// If the keyspace is gone, has no known availability events, or is in the middle of a
// MoveTables then the keyspace cannot be in the middle of a resharding operation.
// if the keyspace is gone, or if it has no known availability events, the keyspace
// cannot be in the middle of a resharding operation
if kss.deleted || kss.consistent {
return false
}

// If there are unequal and overlapping shards in the keyspace and any of them are
// currently serving then we assume that we are in the middle of a Reshard.
_, ckr, err := topo.ValidateShardName(currentShard)
if err != nil || ckr == nil { // Assume not and avoid potential panic
return false
}
// for all the known shards, try to find a primary shard besides the one we're trying to access
// and which is currently healthy. if there are other healthy primaries in the keyspace, it means
// we're in the middle of a resharding operation
for shard, sstate := range kss.shards {
if !sstate.serving || shard == currentShard {
continue
}
_, skr, err := topo.ValidateShardName(shard)
if err != nil || skr == nil { // Assume not and avoid potential panic
return false
}
if key.KeyRangesIntersect(ckr, skr) {
if shard != currentShard && sstate.serving {
return true
}
}
Expand All @@ -155,7 +143,7 @@ func (kss *keyspaceState) beingResharded(currentShard string) bool {
}

type shardState struct {
target *querypb.Target
target *query.Target
serving bool
externallyReparented int64
currentPrimary *topodatapb.TabletAlias
Expand Down Expand Up @@ -438,7 +426,7 @@ func (kew *KeyspaceEventWatcher) getKeyspaceStatus(keyspace string) *keyspaceSta
// This is not a fully accurate heuristic, but it's good enough that we'd want to buffer the
// request for the given target under the assumption that the reason why it cannot be completed
// right now is transitory.
func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *querypb.Target) bool {
func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *query.Target) bool {
if target.TabletType != topodatapb.TabletType_PRIMARY {
return false
}
Expand All @@ -458,7 +446,7 @@ func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(target *querypb.Target)
// The shard state keeps track of the current primary and the last externally reparented time, which we can use
// to determine that there was a serving primary which now became non serving. This is only possible in a DemotePrimary
// RPC which are only called from ERS and PRS. So buffering will stop when these operations succeed.
func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *querypb.Target) bool {
func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(target *query.Target) bool {
if target.TabletType != topodatapb.TabletType_PRIMARY {
return false
}
Expand Down
221 changes: 2 additions & 219 deletions go/vt/discovery/keyspace_events_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,10 @@ import (

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/faketopo"

querypb "vitess.io/vitess/go/vt/proto/query"
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
vschemapb "vitess.io/vitess/go/vt/proto/vschema"
"vitess.io/vitess/go/vt/topo"
"vitess.io/vitess/go/vt/topo/faketopo"
)

func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) {
Expand All @@ -40,7 +38,6 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) {
ts := faketopo.NewFakeTopoServer(factory)
ts2 := &fakeTopoServer{}
hc := NewHealthCheck(context.Background(), 1*time.Millisecond, time.Hour, ts, cell, "")
defer hc.Close()
kew := NewKeyspaceEventWatcher(context.Background(), ts2, hc, cell)
kss := &keyspaceState{
kew: kew,
Expand All @@ -58,220 +55,6 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) {
require.True(t, kss.onSrvKeyspace(nil, nil))
}

// TestKeyspaceEventTypes confirms that the keyspace event watcher determines
// that the unavailability event is caused by the correct scenario. We should
// consider it to be caused by a resharding operation when the following
// conditions are present:
// 1. The keyspace is inconsistent (in the middle of an availability event)
// 2. The target tablet is a primary
// 3. The keyspace has overlapping shards
// 4. The overlapping shard's tablet is serving
// And we should consider the cause to be a primary not serving when the
// following conditions exist:
// 1. The keyspace is inconsistent (in the middle of an availability event)
// 2. The target tablet is a primary
// 3. The target tablet is not serving
// 4. The shard's externallyReparented time is not 0
// 5. The shard's currentPrimary state is not nil
// We should never consider both as a possible cause given the same
// keyspace state.
func TestKeyspaceEventTypes(t *testing.T) {
cell := "cell"
keyspace := "testks"
factory := faketopo.NewFakeTopoFactory()
factory.AddCell(cell)
ts := faketopo.NewFakeTopoServer(factory)
ts2 := &fakeTopoServer{}
hc := NewHealthCheck(context.Background(), 1*time.Millisecond, time.Hour, ts, cell, "")
defer hc.Close()
kew := NewKeyspaceEventWatcher(context.Background(), ts2, hc, cell)

type testCase struct {
name string
kss *keyspaceState
shardToCheck string
expectResharding bool
expectPrimaryNotServing bool
}

testCases := []testCase{
{
name: "one to two resharding in progress",
kss: &keyspaceState{
kew: kew,
keyspace: keyspace,
shards: map[string]*shardState{
"-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
},
"-80": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-80",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: true,
},
"80-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "80-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
},
},
consistent: false,
},
shardToCheck: "-",
expectResharding: true,
expectPrimaryNotServing: false,
},
{
name: "two to four resharding in progress",
kss: &keyspaceState{
kew: kew,
keyspace: keyspace,
shards: map[string]*shardState{
"-80": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-80",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
},
"80-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "80-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: true,
},
"-40": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-40",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: true,
},
"40-80": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "40-80",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: true,
},
"80-c0": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "80-c0",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
},
"c0-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "c0-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
},
},
consistent: false,
},
shardToCheck: "-80",
expectResharding: true,
expectPrimaryNotServing: false,
},
{
name: "unsharded primary not serving",
kss: &keyspaceState{
kew: kew,
keyspace: keyspace,
shards: map[string]*shardState{
"-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
externallyReparented: time.Now().UnixNano(),
currentPrimary: &topodatapb.TabletAlias{
Cell: cell,
Uid: 100,
},
},
},
consistent: false,
},
shardToCheck: "-",
expectResharding: false,
expectPrimaryNotServing: true,
},
{
name: "sharded primary not serving",
kss: &keyspaceState{
kew: kew,
keyspace: keyspace,
shards: map[string]*shardState{
"-80": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "-80",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: false,
externallyReparented: time.Now().UnixNano(),
currentPrimary: &topodatapb.TabletAlias{
Cell: cell,
Uid: 100,
},
},
"80-": {
target: &querypb.Target{
Keyspace: keyspace,
Shard: "80-",
TabletType: topodatapb.TabletType_PRIMARY,
},
serving: true,
},
},
consistent: false,
},
shardToCheck: "-80",
expectResharding: false,
expectPrimaryNotServing: true,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
kew.mu.Lock()
kew.keyspaces[keyspace] = tc.kss
kew.mu.Unlock()

require.NotNil(t, tc.kss.shards[tc.shardToCheck], "the specified shardToCheck of %q does not exist in the shardState", tc.shardToCheck)

resharding := kew.TargetIsBeingResharded(tc.kss.shards[tc.shardToCheck].target)
require.Equal(t, resharding, tc.expectResharding, "TargetIsBeingResharded should return %t", tc.expectResharding)

primaryDown := kew.PrimaryIsNotServing(tc.kss.shards[tc.shardToCheck].target)
require.Equal(t, primaryDown, tc.expectPrimaryNotServing, "PrimaryIsNotServing should return %t", tc.expectPrimaryNotServing)
})
}
}

type fakeTopoServer struct {
}

Expand Down

0 comments on commit 96f84e9

Please sign in to comment.