From 723151ce0ee0d431560300cb8589baea197d25d2 Mon Sep 17 00:00:00 2001 From: Michael Demmer Date: Mon, 8 Jul 2024 12:01:29 -0700 Subject: [PATCH] rework tabletbalancer interface to avoid unnecessary sort There's actually no need in the tabletgateway to ever sort the full list of potential tablet candidates, because each query attempt only goes to a single tablet, and then if that fails, the withRetry loop records which tablet failed and then re-enters the loop to try again. Therefore, refactor the balancer to replace ShuffleTablets with a simple Pick interface that just returns the best tablet to route to. --- go/vt/vtgate/balancer/balancer.go | 30 +++++++++++------------ go/vt/vtgate/balancer/balancer_test.go | 18 +++++++------- go/vt/vtgate/tabletgateway.go | 33 ++++++++++++++++++-------- 3 files changed, 46 insertions(+), 35 deletions(-) diff --git a/go/vt/vtgate/balancer/balancer.go b/go/vt/vtgate/balancer/balancer.go index 462ccfda901..30cf862f890 100644 --- a/go/vt/vtgate/balancer/balancer.go +++ b/go/vt/vtgate/balancer/balancer.go @@ -88,8 +88,9 @@ converge on the desired balanced query load. */ type TabletBalancer interface { - // Randomly shuffle the tablets into an order for routing queries - ShuffleTablets(target *querypb.Target, tablets []*discovery.TabletHealth) + // Pick is the main entry point to the balancer. Returns the best tablet out of the list + // for a given query to maintain the desired balanced allocation over multiple executions. + Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth // Balancer debug page request DebugHandler(w http.ResponseWriter, r *http.Request) @@ -161,33 +162,30 @@ func (b *tabletBalancer) DebugHandler(w http.ResponseWriter, _ *http.Request) { fmt.Fprintf(w, "Allocations: %v\r\n", string(allocations)) } -// ShuffleTablets is the main entry point to the balancer. +// Pick is the main entry point to the balancer. // -// It shuffles the tablets into a preference list for routing a given query. -// However, since all tablets should be healthy, the query will almost always go -// to the first tablet in the list, so the balancer ranking algoritm randomly -// shuffles the list to break ties, then chooses a weighted random selection -// based on the balance alorithm to promote to the first in the set. -func (b *tabletBalancer) ShuffleTablets(target *querypb.Target, tablets []*discovery.TabletHealth) { +// Given the total allocation for the set of tablets, choose the best target +// by a weighted random sample so that over time the system will achieve the +// desired balanced allocation. +func (b *tabletBalancer) Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth { numTablets := len(tablets) + if numTablets == 0 { + return nil + } allocationMap, totalAllocation := b.getAllocation(target, tablets) - rand.Shuffle(numTablets, func(i, j int) { tablets[i], tablets[j] = tablets[j], tablets[i] }) - - // Do another O(n) seek through the list to effect the weighted sample by picking - // a random point in the allocation space and seeking forward in the list of (randomized) - // tablets to that point, promoting the tablet to the head of the list. r := rand.Intn(totalAllocation) for i := 0; i < numTablets; i++ { flow := allocationMap[tablets[i].Tablet.Alias.Uid] if r < flow { - tablets[0], tablets[i] = tablets[i], tablets[0] - break + return tablets[i] } r -= flow } + + return tablets[0] } // To stick with integer arithmetic, use 1,000,000 as the full load diff --git a/go/vt/vtgate/balancer/balancer_test.go b/go/vt/vtgate/balancer/balancer_test.go index 1eb9e69fadf..b8a1095465e 100644 --- a/go/vt/vtgate/balancer/balancer_test.go +++ b/go/vt/vtgate/balancer/balancer_test.go @@ -223,7 +223,7 @@ func TestAllocateFlows(t *testing.T) { } } -func TestBalancedShuffle(t *testing.T) { +func TestBalancedPick(t *testing.T) { cases := []struct { test string tablets []*discovery.TabletHealth @@ -293,13 +293,13 @@ func TestBalancedShuffle(t *testing.T) { b := NewTabletBalancer(localCell, vtGateCells).(*tabletBalancer) for i := 0; i < N/len(vtGateCells); i++ { - b.ShuffleTablets(target, tablets) + th := b.Pick(target, tablets) if i == 0 { t.Logf("Target Flows %v, Balancer: %s\n", expectedPerCell, b.print()) t.Logf(b.print()) } - routed[tablets[0].Tablet.Alias.Uid]++ + routed[th.Tablet.Alias.Uid]++ } } @@ -334,25 +334,25 @@ func TestTopologyChanged(t *testing.T) { tablets = tablets[0:2] for i := 0; i < N; i++ { - b.ShuffleTablets(target, tablets) + th := b.Pick(target, tablets) allocation, totalAllocation := b.getAllocation(target, tablets) if totalAllocation != ALLOCATION/2 { t.Errorf("totalAllocation mismatch %s", b.print()) } - if allocation[allTablets[0].Tablet.Alias.Uid] != ALLOCATION/4 { + if allocation[th.Tablet.Alias.Uid] != ALLOCATION/4 { t.Errorf("allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) } - if tablets[0].Tablet.Alias.Cell != "a" { + if th.Tablet.Alias.Cell != "a" { t.Errorf("shuffle promoted wrong tablet from cell %s", tablets[0].Tablet.Alias.Cell) } } // Run again with the full topology. Now traffic should go to cell b for i := 0; i < N; i++ { - b.ShuffleTablets(target, allTablets) + th := b.Pick(target, allTablets) allocation, totalAllocation := b.getAllocation(target, allTablets) @@ -360,11 +360,11 @@ func TestTopologyChanged(t *testing.T) { t.Errorf("totalAllocation mismatch %s", b.print()) } - if allocation[allTablets[0].Tablet.Alias.Uid] != ALLOCATION/4 { + if allocation[th.Tablet.Alias.Uid] != ALLOCATION/4 { t.Errorf("allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) } - if allTablets[0].Tablet.Alias.Cell != "b" { + if th.Tablet.Alias.Cell != "b" { t.Errorf("shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) } } diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index eff7cdd02f4..06cb5174596 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -357,7 +357,7 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, } // Determine whether or not to use the balancer or the standard affinity-based shuffle - useBalancer := false + useBalancer := balancerEnabled if balancerEnabled { if len(balancerKeyspaces) != 0 { for _, keyspace := range balancerKeyspaces { @@ -371,20 +371,33 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, } } + var th *discovery.TabletHealth if useBalancer { - gw.balancer.ShuffleTablets(target, tablets) + // filter out the tablets that we've tried before (if any), then pick the best one + if len(invalidTablets) > 0 { + validTablets := make([]*discovery.TabletHealth, len(tablets)) + for _, t := range tablets { + if _, ok := invalidTablets[topoproto.TabletAliasString(t.Tablet.Alias)]; !ok { + validTablets = append(validTablets, th) + } + } + tablets = validTablets + } + + th = gw.balancer.Pick(target, tablets) + } else { + // shuffle sort the set of tablets with AZ-affinity, then pick the first one in the + // result set that we haven't tried before gw.shuffleTablets(gw.localCell, tablets) - } - - var th *discovery.TabletHealth - // skip tablets we tried before - for _, t := range tablets { - if _, ok := invalidTablets[topoproto.TabletAliasString(t.Tablet.Alias)]; !ok { - th = t - break + for _, t := range tablets { + if _, ok := invalidTablets[topoproto.TabletAliasString(t.Tablet.Alias)]; !ok { + th = t + break + } } } + if th == nil { // do not override error from last attempt. if err == nil {