Skip to content

Commit

Permalink
Merge pull request emqx#13189 from JimMoen/fix-apply-sqlserver-template
Browse files Browse the repository at this point in the history
fix(utils_sql): improve insert sql regular expression
  • Loading branch information
JimMoen authored Jun 7, 2024
2 parents eb21572 + dff31b2 commit 78a196e
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 11 deletions.
52 changes: 41 additions & 11 deletions apps/emqx_utils/src/emqx_utils_sql.erl
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,44 @@
-type statement_type() :: select | insert | delete | update.
-type value() :: null | binary() | number() | boolean() | [value()].

-define(INSERT_RE_MP_KEY, insert_re_mp).
-define(INSERT_RE_BIN, <<
%% case-insensitive
"(?i)^\\s*",
%% Group-1: insert into, table name and columns (when existed).
%% All space characters suffixed to <TABLE_NAME> will be kept
%% `INSERT INTO <TABLE_NAME> [(<COLUMN>, ..)]`
"(insert\\s+into\\s+[^\\s\\(\\)]+\\s*(?:\\([^\\)]*\\))?)",
%% Keyword: `VALUES`
"\\s*values\\s*",
%% Group-2: literals value(s) or placeholder(s) with round brackets.
%% And the sub-pattern in brackets does not do any capturing
%% `([<VALUE> | <PLACEHOLDER>], ..])`
"(\\((?:[^()]++|(?2))*\\))",
"\\s*$"
>>).

-dialyzer({no_improper_lists, [escape_mysql/4, escape_prepend/4]}).

-on_load(put_insert_mp/0).

put_insert_mp() ->
persistent_term:put({?MODULE, ?INSERT_RE_MP_KEY}, re:compile(?INSERT_RE_BIN)),
ok.

%% The type Copied from stdlib/src/re.erl to compatibility with OTP 26
%% Since `re:mp()` exported after OTP 27
-type mp() :: {re_pattern, _, _, _, _}.
-spec get_insert_mp() -> {ok, mp()}.
get_insert_mp() ->
case persistent_term:get({?MODULE, ?INSERT_RE_MP_KEY}, undefined) of
undefined ->
ok = put_insert_mp(),
get_insert_mp();
{ok, MP} ->
{ok, MP}
end.

-spec get_statement_type(iodata()) -> statement_type() | {error, unknown}.
get_statement_type(Query) ->
KnownTypes = #{
Expand All @@ -54,17 +90,11 @@ get_statement_type(Query) ->
-spec parse_insert(iodata()) ->
{ok, {_Statement :: binary(), _Rows :: binary()}} | {error, not_insert_sql}.
parse_insert(SQL) ->
case re:split(SQL, "((?i)values)", [{return, binary}]) of
[Part1, _, Part3] ->
case string:trim(Part1, leading) of
<<"insert", _/binary>> = InsertSQL ->
{ok, {InsertSQL, Part3}};
<<"INSERT", _/binary>> = InsertSQL ->
{ok, {InsertSQL, Part3}};
_ ->
{error, not_insert_sql}
end;
_ ->
{ok, MP} = get_insert_mp(),
case re:run(SQL, MP, [{capture, all_but_first, binary}]) of
{match, [InsertInto, ValuesTemplate]} ->
{ok, {InsertInto, ValuesTemplate}};
nomatch ->
{error, not_insert_sql}
end.

Expand Down
132 changes: 132 additions & 0 deletions apps/emqx_utils/test/emqx_utils_sql_SUITE.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
%%--------------------------------------------------------------------
%% Copyright (c) 2022-2024 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------

-module(emqx_utils_sql_SUITE).

-compile(export_all).
-compile(nowarn_export_all).

-include_lib("eunit/include/eunit.hrl").

-import(emqx_utils_sql, [get_statement_type/1, parse_insert/1]).

all() ->
emqx_common_test_helpers:all(?MODULE).

t_get_statement_type(_) ->
?assertEqual(select, get_statement_type("SELECT * FROM abc")),
?assertEqual(insert, get_statement_type("INSERT INTO abc (c1, c2, c3)VALUES(1, 2, 3)")),
?assertEqual(update, get_statement_type("UPDATE abc SET c1 = 1, c2 = 2, c3 = 3")),
?assertEqual(delete, get_statement_type("DELETE FROM abc WHERE c1 = 1")),
?assertEqual({error, unknown}, get_statement_type("drop table books")).

t_parse_insert(_) ->
%% `values` in table name
run_pi(
<<"insert into tag_VALUES(tag_values,Timestamp) values (${tagvalues},${date})"/utf8>>,
{<<"insert into tag_VALUES(tag_values,Timestamp)"/utf8>>, <<"(${tagvalues},${date})"/utf8>>}
),
run_pi(
<<"INSERT INTO Values_таблица (идентификатор, имя, возраст) VALUES \t (${id}, 'Иван', 25) "/utf8>>,
{<<"INSERT INTO Values_таблица (идентификатор, имя, возраст)"/utf8>>,
<<"(${id}, 'Иван', 25)"/utf8>>}
),

%% `values` in column name
run_pi(
<<"insert into PI.dbo.tags(tag_values,Timestamp) values (${tagvalues},${date} )"/utf8>>,
{<<"insert into PI.dbo.tags(tag_values,Timestamp)"/utf8>>,
<<"(${tagvalues},${date} )"/utf8>>}
),

run_pi(
<<"INSERT INTO mqtt_test(payload, arrived) VALUES (${payload}, FROM_UNIXTIME((${timestamp}/1000)))"/utf8>>,
{<<"INSERT INTO mqtt_test(payload, arrived)"/utf8>>,
<<"(${payload}, FROM_UNIXTIME((${timestamp}/1000)))">>}
),

run_pi(
<<"insert into таблица (идентификатор,имя,возраст) VALUES(${id},'Алексей',30)"/utf8>>,
{<<"insert into таблица (идентификатор,имя,возраст)"/utf8>>,
<<"(${id},'Алексей',30)"/utf8>>}
),
run_pi(
<<"INSERT into 表格 (标识, 名字, 年龄) VALUES (${id}, '张三', 22)"/utf8>>,
{<<"INSERT into 表格 (标识, 名字, 年龄)"/utf8>>, <<"(${id}, '张三', 22)"/utf8>>}
),
run_pi(
<<" inSErt into 表格(标识,名字,年龄)values(${id},'李四', 35)"/utf8>>,
{<<"inSErt into 表格(标识,名字,年龄)"/utf8>>, <<"(${id},'李四', 35)"/utf8>>}
),
run_pi(
<<"insert into PI.dbo.tags( tag_value,Timestamp) VALUES\t\t( ${tagvalues}, ${date} )"/utf8>>,
{<<"insert into PI.dbo.tags( tag_value,Timestamp)"/utf8>>,
<<"( ${tagvalues}, ${date} )"/utf8>>}
),
run_pi(
<<"insert into PI.dbo.tags(tag_value , Timestamp )vALues(${tagvalues},${date})"/utf8>>,
{<<"insert into PI.dbo.tags(tag_value , Timestamp )"/utf8>>,
<<"(${tagvalues},${date})"/utf8>>}
),

run_pi(
<<"inSErt INTO table75 (column1, column2, column3) values (${one}, ${two},${three})"/utf8>>,
{<<"inSErt INTO table75 (column1, column2, column3)"/utf8>>,
<<"(${one}, ${two},${three})"/utf8>>}
),
run_pi(
<<"INSERT Into some_table values\t(${tag1}, ${tag2} )">>,
{<<"INSERT Into some_table "/utf8>>, <<"(${tag1}, ${tag2} )">>}
).

t_parse_insert_nested_brackets(_) ->
InsertPart = <<"INSERT INTO test_tab (val1, val2)">>,
ValueLs = [
<<"(ABS(POWER((2 * POWER(ABS( (-3 + 1) * 4), (2 * (1 + ABS( (-3 + 1) * 4))))), (3 - POWER(4, 2)))), ",
"POWER(ABS( (-3 + 1) * 4), (2 * (1 + ABS( (-3 + 1) * 4)))))">>,
<<"(GREATEST(LEAST(5, 10), ABS(-7)), LEAST(GREATEST(3, 2), 9))">>,
<<"(ABS(POWER((2 * 2), (3 - 1))), POWER(ABS(-3), (2 * (1 + 1))))">>,
<<"(SQRT(POWER(4, 2)), MOD((10 + 3), (2 * 5)))">>,
<<"(ROUND(CEIL(3.14159 * 2), 2), CEIL(ROUND(7.5, 1)))">>,
<<"(FLOOR(SQRT(ABS(-8.99))), SIGN(POWER(-2, 3)))">>,
<<"(TRUNCATE(RAND() * 100, 2), ROUND(RAND() * 10, 1))">>,
<<"(EXP(LOG(POWER(2, 3))), LOG(EXP(5)))">>,
<<"(COS(PI() / (3 - 1)), PI() / COS(PI() / 4))">>,
<<"(SIN(TAN(PI() / 4)), TAN(SIN(PI() / 6)))">>
],

[
run_pi(<<InsertPart/binary, " VALUES ", ValueL/binary>>, {InsertPart, ValueL})
|| ValueL <- ValueLs
].

t_parse_insert_failed(_) ->
run_pi("drop table books"),
run_pi("SELECT * FROM abc"),
run_pi("UPDATE abc SET c1 = 1, c2 = 2, c3 = 3"),
run_pi("DELETE FROM abc WHERE c1 = 1"),
run_pi("insert intotable(a,b)values(1,2)"),
run_pi("insert into (a,val)values(1,'val')").

run_pi(SQL) ->
?assertEqual({error, not_insert_sql}, parse_insert(SQL)),
ct:pal("SQL:~n~ts~n", [SQL]).

run_pi(SQL, {InsertPart, Values}) ->
{ok, {InsertPart0, Values0}} = parse_insert(SQL),
?assertEqual(InsertPart, InsertPart0),
?assertEqual(Values, Values0),
ct:pal("SQL:~n~ts~n", [SQL]).
1 change: 1 addition & 0 deletions changes/ce/fix-13189.en.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed an issue where the data integration with Microsoft SQL Server or MySQL could not use SQL templates with substring `values` in table name or column name.

0 comments on commit 78a196e

Please sign in to comment.