-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the race condition during vttablet startup #15731
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,7 @@ import ( | |
"strings" | ||
"time" | ||
|
||
"vitess.io/vitess/go/mysql" | ||
"vitess.io/vitess/go/mysql/replication" | ||
"vitess.io/vitess/go/netutil" | ||
"vitess.io/vitess/go/vt/hook" | ||
|
@@ -174,8 +175,21 @@ func (mysqld *Mysqld) RestartReplication(hookExtraEnv map[string]string) error { | |
} | ||
|
||
// GetMysqlPort returns mysql port | ||
func (mysqld *Mysqld) GetMysqlPort() (int32, error) { | ||
qr, err := mysqld.FetchSuperQuery(context.TODO(), "SHOW VARIABLES LIKE 'port'") | ||
func (mysqld *Mysqld) GetMysqlPort(ctx context.Context) (int32, error) { | ||
// We can not use the connection pool here. This check runs very early | ||
// during MySQL startup when we still might be loading things like grants. | ||
// This means we need to use an isolated connection to avoid poisoning the | ||
// DBA connection pool for further queries. | ||
params, err := mysqld.dbcfgs.DbaConnector().MysqlParams() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing it here to use a connection without pooling which avoids the poisoning. I also fixed the |
||
if err != nil { | ||
return 0, err | ||
} | ||
conn, err := mysql.Connect(ctx, params) | ||
if err != nil { | ||
return 0, err | ||
} | ||
defer conn.Close() | ||
qr, err := conn.ExecuteFetch("SHOW VARIABLES LIKE 'port'", 1, false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is not super important here, but should the context be passed down to the actual query? We've seen various cases where MySQL ends up being "stuck" for whatever reason not replying to incoming queries, and this would cause the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arthurschreiber Do you mean as a general feature / refactor? It's not possible to pass in a context at the moment in the MySQL connection handling that Vitess does. That might be useful as a separate feature / change, but I think that's independent of what we're doing here? |
||
if err != nil { | ||
return 0, err | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -366,7 +366,7 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, config *tabletenv.Tabl | |
if err := tm.checkPrimaryShip(ctx, si); err != nil { | ||
return err | ||
} | ||
if err := tm.checkMysql(); err != nil { | ||
if err := tm.checkMysql(ctx); err != nil { | ||
return err | ||
} | ||
if err := tm.initTablet(ctx); err != nil { | ||
|
@@ -702,7 +702,7 @@ func (tm *TabletManager) checkPrimaryShip(ctx context.Context, si *topo.ShardInf | |
return nil | ||
} | ||
|
||
func (tm *TabletManager) checkMysql() error { | ||
func (tm *TabletManager) checkMysql(ctx context.Context) error { | ||
appConfig, err := tm.DBConfigs.AppWithDB().MysqlParams() | ||
if err != nil { | ||
return err | ||
|
@@ -717,7 +717,7 @@ func (tm *TabletManager) checkMysql() error { | |
tm.tmState.UpdateTablet(func(tablet *topodatapb.Tablet) { | ||
tablet.MysqlHostname = tablet.Hostname | ||
}) | ||
mysqlPort, err := tm.MysqlDaemon.GetMysqlPort() | ||
mysqlPort, err := tm.MysqlDaemon.GetMysqlPort(ctx) | ||
if err != nil { | ||
log.Warningf("Cannot get current mysql port, will keep retrying every %v: %v", mysqlPortRetryInterval, err) | ||
go tm.findMysqlPort(mysqlPortRetryInterval) | ||
|
@@ -730,10 +730,18 @@ func (tm *TabletManager) checkMysql() error { | |
return nil | ||
} | ||
|
||
const portCheckTimeout = 5 * time.Second | ||
|
||
func (tm *TabletManager) getMysqlPort() (int32, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems overkill to define a function that is used exactly once. Why can't all this be folded into findMysqlPort? It will grow from 10 lines to 15 lines. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @deepthi Because it's setting up a context and you can't defer that properly in a |
||
ctx, cancel := context.WithTimeout(context.Background(), portCheckTimeout) | ||
defer cancel() | ||
return tm.MysqlDaemon.GetMysqlPort(ctx) | ||
} | ||
|
||
func (tm *TabletManager) findMysqlPort(retryInterval time.Duration) { | ||
for { | ||
time.Sleep(retryInterval) | ||
mport, err := tm.MysqlDaemon.GetMysqlPort() | ||
mport, err := tm.getMysqlPort() | ||
if err != nil || mport == 0 { | ||
continue | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Opted to also refactor this case slightly to avoid using the pool entirely so for any future readers it's also more obvious no pooling is actually used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is good. When I was looking at this yesterday, I had to navigate through the code to see that
NewDBConnection
returns a non-pooled connection.