Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sql text counts stats to vtcombo,vtgate+vttablet #15897

Merged
merged 6 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions changelog/20.0/20.0.0/summary.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- **[Minor Changes](#minor-changes)**
- **[New Stats](#new-stats)**
- [VTTablet Query Cache Hits and Misses](#vttablet-query-cache-hits-and-misses)
- [VTGate and VTTablet Query Text Characters Processed](#vttablet-query-text-characters-processed)
- **[`SIGHUP` reload of gRPC client static auth creds](#sighup-reload-of-grpc-client-auth-creds)**
- **[VTAdmin](#vtadmin)**
- [Updated to node v20.12.2](#updated-node)
Expand Down Expand Up @@ -325,6 +326,12 @@ VTTablet exposes two new counter stats:
* `QueryCacheHits`: Query engine query cache hits
* `QueryCacheMisses`: Query engine query cache misses

### <a id="#vttablet-query-text-characters-processed"/>VTTablet Query Text Characters Processed

VTGate and VTTablet expose a new counter stat `QueryTextCharactersProcessed` to reflect the number of query text characters processed.

VTGate groups this metric by Operation, Keyspace and TabletType. On VTTablet it is grouped by Table, Plan and optionally Workload.

### <a id="sighup-reload-of-grpc-client-auth-creds"/>`SIGHUP` reload of gRPC client static auth creds

The internal gRPC client now caches the static auth credentials and supports reloading via the `SIGHUP` signal. Previous to v20 the credentials were not cached. They were re-loaded from disk on every use.
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vtgate/plugin_mysql_server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ func TestKillMethods(t *testing.T) {
func TestGracefulShutdown(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, queryTextCharsProcessed: queryTextCharsProcessed})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down Expand Up @@ -379,7 +379,7 @@ func TestGracefulShutdown(t *testing.T) {
func TestGracefulShutdownWithTransaction(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, queryTextCharsProcessed: queryTextCharsProcessed})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down
30 changes: 19 additions & 11 deletions go/vt/vtgate/vtgate.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ var (
"VtgateApiRowsAffected",
"Rows affected by a write (DML) operation through the VTgate API",
[]string{"Operation", "Keyspace", "DbType"})

queryTextCharsProcessed = stats.NewCountersWithMultiLabels(
"VtgateQueryTextCharactersProcessed",
"Query text characters processed through the VTGate API",
[]string{"Operation", "Keyspace", "DbType"})
)

// VTGate is the rpc interface to vtgate. Only one instance
Expand All @@ -225,9 +230,10 @@ type VTGate struct {
// stats objects.
// TODO(sougou): This needs to be cleaned up. There
// are global vars that depend on this member var.
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
queryTextCharsProcessed *stats.CountersWithMultiLabels

// the throttled loggers for all errors, one per API entry
logExecute *logutil.ThrottledLogger
Expand Down Expand Up @@ -459,6 +465,7 @@ func (vtg *VTGate) Execute(ctx context.Context, mysqlCtx vtgateservice.MySQLConn
if err == nil {
vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows)))
vtg.rowsAffected.Add(statsKey, int64(qr.RowsAffected))
vtg.queryTextCharsProcessed.Add(statsKey, int64(len(sql)))
return session, qr, nil
}

Expand Down Expand Up @@ -669,14 +676,15 @@ func (vtg *VTGate) HandlePanic(err *error) {

func newVTGate(executor *Executor, resolver *Resolver, vsm *vstreamManager, tc *TxConn, gw *TabletGateway) *VTGate {
return &VTGate{
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
queryTextCharsProcessed: queryTextCharsProcessed,

logExecute: logutil.NewThrottledLogger("Execute", 5*time.Second),
logPrepare: logutil.NewThrottledLogger("Prepare", 5*time.Second),
Expand Down
18 changes: 11 additions & 7 deletions go/vt/vttablet/tabletserver/query_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ type QueryEngine struct {

// stats
// Note: queryErrorCountsWithCode is similar to queryErrorCounts except it contains error code as an additional dimension
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned, queryTextCharsProcessed *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc

// stats flags
enablePerWorkloadTableMetrics bool
Expand Down Expand Up @@ -298,6 +298,7 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine {
qe.queryTimes = env.Exporter().NewCountersWithMultiLabels("QueryTimesNs", "query times in ns", labels)
qe.queryRowsAffected = env.Exporter().NewCountersWithMultiLabels("QueryRowsAffected", "query rows affected", labels)
qe.queryRowsReturned = env.Exporter().NewCountersWithMultiLabels("QueryRowsReturned", "query rows returned", labels)
qe.queryTextCharsProcessed = env.Exporter().NewCountersWithMultiLabels("QueryTextCharactersProcessed", "query text characters processed", labels)
qe.queryErrorCounts = env.Exporter().NewCountersWithMultiLabels("QueryErrorCounts", "query error counts", labels)
qe.queryErrorCountsWithCode = env.Exporter().NewCountersWithMultiLabels("QueryErrorCountsWithCode", "query error counts with error code", []string{"Table", "Plan", "Code"})

Expand Down Expand Up @@ -559,30 +560,33 @@ func (qe *QueryEngine) QueryPlanCacheLen() (count int) {
}

// AddStats adds the given stats for the planName.tableName
func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
func (qe *QueryEngine) AddStats(plan *TabletPlan, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
// table names can contain "." characters, replace them!
keys := []string{tableName, planType.String()}
keys := []string{tableName, plan.PlanID.String()}
// Only use the workload as a label if that's enabled in the configuration.
if qe.enablePerWorkloadTableMetrics {
keys = append(keys, workload)
}
qe.queryCounts.Add(keys, queryCount)
qe.queryTimes.Add(keys, int64(duration))
qe.queryErrorCounts.Add(keys, errorCount)
if plan.FullQuery != nil {
qe.queryTextCharsProcessed.Add(keys, int64(len(plan.FullQuery.Query)))
}

qe.queryCountsWithTabletType.Add([]string{tableName, planType.String(), tabletType.String()}, queryCount)
qe.queryCountsWithTabletType.Add([]string{tableName, plan.PlanID.String(), tabletType.String()}, queryCount)

// queryErrorCountsWithCode is similar to queryErrorCounts except we have an additional dimension
// of error code.
if errorCount > 0 {
errorKeys := []string{tableName, planType.String(), errorCode}
errorKeys := []string{tableName, plan.PlanID.String(), errorCode}
qe.queryErrorCountsWithCode.Add(errorKeys, errorCount)
}

// For certain plan types like select, we only want to add their metrics to rows returned
// But there are special cases like `SELECT ... INTO OUTFILE ''` which return positive rows affected
// So we check if it is positive and add that too.
switch planType {
switch plan.PlanID {
case planbuilder.PlanSelect, planbuilder.PlanSelectStream, planbuilder.PlanSelectImpossible, planbuilder.PlanShow, planbuilder.PlanOtherRead:
qe.queryRowsReturned.Add(keys, rowsReturned)
if rowsAffected > 0 {
Expand Down
45 changes: 34 additions & 11 deletions go/vt/vttablet/tabletserver/query_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,21 @@ func TestPlanCachePollution(t *testing.T) {
}

func TestAddQueryStats(t *testing.T) {
fakeSelectPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanSelect,
FullQuery: &sqlparser.ParsedQuery{Query: `select * from something where something=123`}, // 43 length
},
}
fakeInsertPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanInsert,
FullQuery: &sqlparser.ParsedQuery{Query: `insert into something (id, msg) values(123, 'hello world!')`}, // 59 length
},
}
testcases := []struct {
name string
planType planbuilder.PlanType
plan *TabletPlan
tableName string
tabletType topodata.TabletType
queryCount int64
Expand All @@ -654,12 +666,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes string
expectedQueryRowsAffected string
expectedQueryRowsReturned string
expectedQueryTextCharsProcessed string
expectedQueryErrorCounts string
expectedQueryErrorCountsWithCode string
}{
{
name: "select query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -674,12 +687,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select query against a replica",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_REPLICA,
queryCount: 1,
Expand All @@ -694,12 +708,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.REPLICA": 1}`,
}, {
name: "select into query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -714,12 +729,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{"A.Select": 15}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -734,12 +750,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -754,12 +771,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert": 10}`,
expectedQueryRowsAffected: `{"A.Insert": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQueryTextCharsProcessed: `{"A.Insert": 59}`,
expectedQueryErrorCounts: `{"A.Insert": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
}, {
name: "select query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -774,12 +792,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select into query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -794,12 +813,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Select.some-workload": 15}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -814,12 +834,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query with per workload metrics",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -834,6 +855,7 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Insert.some-workload": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQueryTextCharsProcessed: `{"A.Insert.some-workload": 59}`,
expectedQueryErrorCounts: `{"A.Insert.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
Expand All @@ -849,12 +871,13 @@ func TestAddQueryStats(t *testing.T) {
env := tabletenv.NewEnv(vtenv.NewTestEnv(), cfg, "TestAddQueryStats_"+testcase.name)
se := schema.NewEngine(env)
qe := NewQueryEngine(env, se)
qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
qe.AddStats(testcase.plan, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
assert.Equal(t, testcase.expectedQueryCounts, qe.queryCounts.String())
assert.Equal(t, testcase.expectedQueryCountsWithTableType, qe.queryCountsWithTabletType.String())
assert.Equal(t, testcase.expectedQueryTimes, qe.queryTimes.String())
assert.Equal(t, testcase.expectedQueryRowsAffected, qe.queryRowsAffected.String())
assert.Equal(t, testcase.expectedQueryRowsReturned, qe.queryRowsReturned.String())
assert.Equal(t, testcase.expectedQueryTextCharsProcessed, qe.queryTextCharsProcessed.String())
assert.Equal(t, testcase.expectedQueryErrorCounts, qe.queryErrorCounts.String())
assert.Equal(t, testcase.expectedQueryErrorCountsWithCode, qe.queryErrorCountsWithCode.String())
})
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vttablet/tabletserver/query_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,12 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) {
errCode = vtErrorCode.String()

if reply == nil {
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.plan.AddStats(1, duration, mysqlTime, 0, 0, 1)
return
}

qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.plan.AddStats(1, duration, mysqlTime, reply.RowsAffected, uint64(len(reply.Rows)), 0)
qre.logStats.RowsAffected = int(reply.RowsAffected)
qre.logStats.Rows = reply.Rows
Expand Down
Loading