Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sql text counts stats to vtcombo,vtgate+vttablet #15897

Merged
merged 6 commits into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go/vt/vtgate/plugin_mysql_server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ func TestKillMethods(t *testing.T) {
func TestGracefulShutdown(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, sqlTextCounts: sqlTextCounts})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down Expand Up @@ -379,7 +379,7 @@ func TestGracefulShutdown(t *testing.T) {
func TestGracefulShutdownWithTransaction(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, sqlTextCounts: sqlTextCounts})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down
30 changes: 19 additions & 11 deletions go/vt/vtgate/vtgate.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ var (
"VtgateApiRowsAffected",
"Rows affected by a write (DML) operation through the VTgate API",
[]string{"Operation", "Keyspace", "DbType"})

sqlTextCounts = stats.NewCountersWithMultiLabels(
"VtgateSQLTextCounts",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does /metrics render this one? If it turns out to be vtgate_s_q_l_text_counts, we'll want to rename it :)
In general, for any metrics changes, it is good to post screenshots or embedded text from both /debug/vars and /metrics into the PR description. That serves as a visual check for what we are emitting.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, now I understand the intent of this metric, it won't work as implemented. Counters should only be used for monotonically increasing numbers, prometheus will not let you reduce the value of a "counter".
The right type of metric to use here is a Gauge, which can take any value, and change up or down. If I had read the linked issue properly, I would have realized this during the original review.
The name of the metric is also misleading, because it is not a "count" of some specific kind of sql text, it is instead the length in text of the query that is being processed, so the name needs to reflect that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QueryTextProcessed will in fact be a better name for the metric.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way I understand it, the metric would be more appropriately named QueryProcessedTextLengths

"Vtgate API query SQL text counts",
[]string{"Operation", "Keyspace", "DbType"})
)

// VTGate is the rpc interface to vtgate. Only one instance
Expand All @@ -225,9 +230,10 @@ type VTGate struct {
// stats objects.
// TODO(sougou): This needs to be cleaned up. There
// are global vars that depend on this member var.
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
sqlTextCounts *stats.CountersWithMultiLabels

// the throttled loggers for all errors, one per API entry
logExecute *logutil.ThrottledLogger
Expand Down Expand Up @@ -459,6 +465,7 @@ func (vtg *VTGate) Execute(ctx context.Context, mysqlCtx vtgateservice.MySQLConn
if err == nil {
vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows)))
vtg.rowsAffected.Add(statsKey, int64(qr.RowsAffected))
vtg.sqlTextCounts.Add(statsKey, int64(len(sql)))
return session, qr, nil
}

Expand Down Expand Up @@ -669,14 +676,15 @@ func (vtg *VTGate) HandlePanic(err *error) {

func newVTGate(executor *Executor, resolver *Resolver, vsm *vstreamManager, tc *TxConn, gw *TabletGateway) *VTGate {
return &VTGate{
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
sqlTextCounts: sqlTextCounts,

logExecute: logutil.NewThrottledLogger("Execute", 5*time.Second),
logPrepare: logutil.NewThrottledLogger("Prepare", 5*time.Second),
Expand Down
18 changes: 11 additions & 7 deletions go/vt/vttablet/tabletserver/query_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ type QueryEngine struct {

// stats
// Note: queryErrorCountsWithCode is similar to queryErrorCounts except it contains error code as an additional dimension
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned, querySQLTextCounts *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc

// stats flags
enablePerWorkloadTableMetrics bool
Expand Down Expand Up @@ -298,6 +298,7 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine {
qe.queryTimes = env.Exporter().NewCountersWithMultiLabels("QueryTimesNs", "query times in ns", labels)
qe.queryRowsAffected = env.Exporter().NewCountersWithMultiLabels("QueryRowsAffected", "query rows affected", labels)
qe.queryRowsReturned = env.Exporter().NewCountersWithMultiLabels("QueryRowsReturned", "query rows returned", labels)
qe.querySQLTextCounts = env.Exporter().NewCountersWithMultiLabels("QuerySQLTextCounts", "query sql text counts", labels)
qe.queryErrorCounts = env.Exporter().NewCountersWithMultiLabels("QueryErrorCounts", "query error counts", labels)
qe.queryErrorCountsWithCode = env.Exporter().NewCountersWithMultiLabels("QueryErrorCountsWithCode", "query error counts with error code", []string{"Table", "Plan", "Code"})

Expand Down Expand Up @@ -559,30 +560,33 @@ func (qe *QueryEngine) QueryPlanCacheLen() (count int) {
}

// AddStats adds the given stats for the planName.tableName
func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
func (qe *QueryEngine) AddStats(plan *TabletPlan, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
// table names can contain "." characters, replace them!
keys := []string{tableName, planType.String()}
keys := []string{tableName, plan.PlanID.String()}
// Only use the workload as a label if that's enabled in the configuration.
if qe.enablePerWorkloadTableMetrics {
keys = append(keys, workload)
}
qe.queryCounts.Add(keys, queryCount)
qe.queryTimes.Add(keys, int64(duration))
qe.queryErrorCounts.Add(keys, errorCount)
if plan.FullQuery != nil {
qe.querySQLTextCounts.Add(keys, int64(len(plan.FullQuery.Query)))
}

qe.queryCountsWithTabletType.Add([]string{tableName, planType.String(), tabletType.String()}, queryCount)
qe.queryCountsWithTabletType.Add([]string{tableName, plan.PlanID.String(), tabletType.String()}, queryCount)

// queryErrorCountsWithCode is similar to queryErrorCounts except we have an additional dimension
// of error code.
if errorCount > 0 {
errorKeys := []string{tableName, planType.String(), errorCode}
errorKeys := []string{tableName, plan.PlanID.String(), errorCode}
qe.queryErrorCountsWithCode.Add(errorKeys, errorCount)
}

// For certain plan types like select, we only want to add their metrics to rows returned
// But there are special cases like `SELECT ... INTO OUTFILE ''` which return positive rows affected
// So we check if it is positive and add that too.
switch planType {
switch plan.PlanID {
case planbuilder.PlanSelect, planbuilder.PlanSelectStream, planbuilder.PlanSelectImpossible, planbuilder.PlanShow, planbuilder.PlanOtherRead:
qe.queryRowsReturned.Add(keys, rowsReturned)
if rowsAffected > 0 {
Expand Down
45 changes: 34 additions & 11 deletions go/vt/vttablet/tabletserver/query_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,21 @@ func TestPlanCachePollution(t *testing.T) {
}

func TestAddQueryStats(t *testing.T) {
fakeSelectPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanSelect,
FullQuery: &sqlparser.ParsedQuery{Query: `select * from something where something=123`}, // 43 length
},
}
fakeInsertPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanInsert,
FullQuery: &sqlparser.ParsedQuery{Query: `insert into something (id, msg) values(123, 'hello world!')`}, // 59 length
},
}
testcases := []struct {
name string
planType planbuilder.PlanType
plan *TabletPlan
tableName string
tabletType topodata.TabletType
queryCount int64
Expand All @@ -654,12 +666,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes string
expectedQueryRowsAffected string
expectedQueryRowsReturned string
expectedQuerySQLTextCounts string
expectedQueryErrorCounts string
expectedQueryErrorCountsWithCode string
}{
{
name: "select query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -674,12 +687,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQuerySQLTextCounts: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select query against a replica",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_REPLICA,
queryCount: 1,
Expand All @@ -694,12 +708,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQuerySQLTextCounts: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.REPLICA": 1}`,
}, {
name: "select into query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -714,12 +729,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{"A.Select": 15}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQuerySQLTextCounts: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -734,12 +750,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQuerySQLTextCounts: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -754,12 +771,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert": 10}`,
expectedQueryRowsAffected: `{"A.Insert": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQuerySQLTextCounts: `{"A.Insert": 59}`,
expectedQueryErrorCounts: `{"A.Insert": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
}, {
name: "select query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -774,12 +792,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 15}`,
expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select into query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -794,12 +813,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Select.some-workload": 15}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -814,12 +834,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQuerySQLTextCounts: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query with per workload metrics",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -834,6 +855,7 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Insert.some-workload": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQuerySQLTextCounts: `{"A.Insert.some-workload": 59}`,
expectedQueryErrorCounts: `{"A.Insert.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
Expand All @@ -849,12 +871,13 @@ func TestAddQueryStats(t *testing.T) {
env := tabletenv.NewEnv(vtenv.NewTestEnv(), cfg, "TestAddQueryStats_"+testcase.name)
se := schema.NewEngine(env)
qe := NewQueryEngine(env, se)
qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
qe.AddStats(testcase.plan, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
assert.Equal(t, testcase.expectedQueryCounts, qe.queryCounts.String())
assert.Equal(t, testcase.expectedQueryCountsWithTableType, qe.queryCountsWithTabletType.String())
assert.Equal(t, testcase.expectedQueryTimes, qe.queryTimes.String())
assert.Equal(t, testcase.expectedQueryRowsAffected, qe.queryRowsAffected.String())
assert.Equal(t, testcase.expectedQueryRowsReturned, qe.queryRowsReturned.String())
assert.Equal(t, testcase.expectedQuerySQLTextCounts, qe.querySQLTextCounts.String())
assert.Equal(t, testcase.expectedQueryErrorCounts, qe.queryErrorCounts.String())
assert.Equal(t, testcase.expectedQueryErrorCountsWithCode, qe.queryErrorCountsWithCode.String())
})
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vttablet/tabletserver/query_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,12 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) {
errCode = vtErrorCode.String()

if reply == nil {
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.plan.AddStats(1, duration, mysqlTime, 0, 0, 1)
return
}

qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.plan.AddStats(1, duration, mysqlTime, reply.RowsAffected, uint64(len(reply.Rows)), 0)
qre.logStats.RowsAffected = int(reply.RowsAffected)
qre.logStats.Rows = reply.Rows
Expand Down
Loading