From dff31b293c0538d32b69d9df41503da1887240bc Mon Sep 17 00:00:00 2001 From: JimMoen Date: Thu, 6 Jun 2024 11:01:38 +0800 Subject: [PATCH] perf(utils_sql): pre-build regexp --- apps/emqx_utils/src/emqx_utils_sql.erl | 55 ++++++++++++++++++-------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/apps/emqx_utils/src/emqx_utils_sql.erl b/apps/emqx_utils/src/emqx_utils_sql.erl index 958507e3fc3..081ad26c727 100644 --- a/apps/emqx_utils/src/emqx_utils_sql.erl +++ b/apps/emqx_utils/src/emqx_utils_sql.erl @@ -31,8 +31,44 @@ -type statement_type() :: select | insert | delete | update. -type value() :: null | binary() | number() | boolean() | [value()]. +-define(INSERT_RE_MP_KEY, insert_re_mp). +-define(INSERT_RE_BIN, << + %% case-insensitive + "(?i)^\\s*", + %% Group-1: insert into, table name and columns (when existed). + %% All space characters suffixed to will be kept + %% `INSERT INTO [(, ..)]` + "(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)", + %% Keyword: `VALUES` + "\\s*values\\s*", + %% Group-2: literals value(s) or placeholder(s) with round brackets. + %% And the sub-pattern in brackets does not do any capturing + %% `([ | ], ..])` + "(\\((?:[^()]++|(?2))*\\))", + "\\s*$" +>>). + -dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}). +-on_load(put_insert_mp/0). + +put_insert_mp() -> + persistent_term:put({?MODULE, ?INSERT_RE_MP_KEY}, re:compile(?INSERT_RE_BIN)), + ok. + +%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26 +%% Since `re:mp()` exported after OTP 27 +-type mp() :: {re_pattern, _, _, _, _}. +-spec get_insert_mp() -> {ok, mp()}. +get_insert_mp() -> + case persistent_term:get({?MODULE, ?INSERT_RE_MP_KEY}, undefined) of + undefined -> + ok = put_insert_mp(), + get_insert_mp(); + {ok, MP} -> + {ok, MP} + end. + -spec get_statement_type(iodata()) -> statement_type() | {error, unknown}. get_statement_type(Query) -> KnownTypes = #{ @@ -54,23 +90,8 @@ get_statement_type(Query) -> -spec parse_insert(iodata()) -> {ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}. parse_insert(SQL) -> - Pattern = << - %% case-insensitive - "(?i)^\\s*", - %% Group-1: insert into, table name and columns (when existed). - %% All space characters suffixed to will be kept - %% `INSERT INTO [(, ..)]` - "(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)", - %% Keyword: `VALUES` - "\\s*values\\s*", - %% Group-2: literals value(s) or placeholder(s) with round brackets. - %% And the sub-pattern in brackets does not do any capturing - %% `([ | ], ..])` - "(\\((?:[^()]++|(?2))*\\))", - "\\s*$" - >>, - - case re:run(SQL, Pattern, [{capture, all_but_first, binary}]) of + {ok, MP} = get_insert_mp(), + case re:run(SQL, MP, [{capture, all_but_first, binary}]) of {match, [InsertInto, ValuesTemplate]} -> {ok, {InsertInto, ValuesTemplate}}; nomatch ->