[DISCUSS]spark datasource support the sql parameters #6774

Zosimer · 2020-09-30T15:40:16Z

Zosimer
Sep 30, 2020

When the dolphin scheduler connects with spark thriftserver to execute SQL,setting environment variables spark.sql.adaptive . enabled = true ;Log tips“ java.sql.SQLException: java.lang.IllegalArgumentException: spark.sql.adaptive.enabled should be boolean, but was true;”；but I did set the boolean type

sql：

set spark.sql.adaptive.enabled = true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize = 134217728;
set spark.sql.adaptive.join.enabled= true;
set spark.sql.autoBroadcastJoinThreshold = 20971520;
set spark.sql.hive.mergeFiles = true;
INSERT overwrite TABLE ods.ods_app_report_vo
partition (month='$[yyyy-MM-1]',day='$[yyyy-MM-dd-1]')
select
_id ,
hid ,
coid,
csid,
eqid,
type,
timestamp,
date,
status,
roomid,
errmsg,
_class
from ods.ods_app_report_vo_day as a
where a.day = '$[yyyy-MM-1]' distribute by rand();

workerlogs：

[INFO] 2020-09-30 23:31:16.902 - [taskAppId=TASK-6-43-218]:[499] - after replace sql , preparing : set spark.sql.adaptive.enabled=true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize=134217728;
set spark.sql.adaptive.join.enabled=true;
set spark.sql.autoBroadcastJoinThreshold=20971520;
set spark.sql.hive.mergeFiles=true;
insert overwrite table ads.ads_community_equipment_house_view
partition (month='2020-09',day='2020-09-29')
select a.*,NVL(b.app_num,0) app_num
from (select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as house_num,
COUNT(distinct A.CO_ID) as community_num,
COUNT(distinct A.EQ_AutoID) as equipment_num
--'2020-09' as month
from dws.dws_community_equipment_room_house_wide a where 1=1
-- and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) a left join
(select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as app_num
from dws.dws_community_equipment_room_house_wide a where A.H_LoginType='1'
--and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) b on a.proince_id=b.proince_id and a.city_id=b.city_id and a.county_id=b.county_id and a.CO_ID=b.CO_ID distribute by rand();

[INFO] 2020-09-30 23:31:16.904 - [taskAppId=TASK-6-43-218]:[504] - Sql Params are replaced sql , parameters:
[INFO] 2020-09-30 23:31:16.904 - [taskAppId=TASK-6-43-218]:[52] - can't find udf function resource
[INFO] 2020-09-30 23:31:16.905 org.apache.hive.jdbc.Utils:[318] - Supplied authorities: 192.168.2.139:10000
[INFO] 2020-09-30 23:31:16.905 org.apache.hive.jdbc.Utils:[437] - Resolved authority: 192.168.2.139:10000
[INFO] 2020-09-30 23:31:16.945 - [taskAppId=TASK-6-43-218]:[418] - prepare statement replace sql : org.apache.hive.jdbc.HivePreparedStatement@29842163
[ERROR] 2020-09-30 23:31:16.952 - [taskAppId=TASK-6-43-218]:[242] - execute sql error
java.sql.SQLException: java.lang.IllegalArgumentException: spark.sql.adaptive.enabled should be boolean, but was true;
set spark.sql.adaptive.shuffle.targetPostShuffleInputSize=134217728;
set spark.sql.adaptive.join.enabled=true;
set spark.sql.autoBroadcastJoinThreshold=20971520;
set spark.sql.hive.mergeFiles=true;
insert overwrite table ads.ads_community_equipment_house_view
partition (month='2020-09',day='2020-09-29')
select a.*,NVL(b.app_num,0) app_num
from (select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as house_num,
COUNT(distinct A.CO_ID) as community_num,
COUNT(distinct A.EQ_AutoID) as equipment_num
--'2020-09' as month
from dws.dws_community_equipment_room_house_wide a where 1=1
-- and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) a left join
(select
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name,
COUNT(distinct A.H_ID) as app_num
from dws.dws_community_equipment_room_house_wide a where A.H_LoginType='1'
--and a.house_day<='2020-09-16' and a.community_day <='2020-09-16' and a.equipment_day <='2020-09-16'
GROUP BY
A.proince_id,
A.proinceName,
A.city_id,
A.city_name,
A.county_id,
A.county_name,
A.CO_ID,
A.CO_Name) b on a.proince_id=b.proince_id and a.city_id=b.city_id and a.county_id=b.county_id and a.CO_ID=b.CO_ID distribute by rand();
at org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:348)
at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:251)
at org.apache.hive.jdbc.HiveStatement.executeUpdate(HiveStatement.java:448)
at org.apache.hive.jdbc.HivePreparedStatement.executeUpdate(HivePreparedStatement.java:119)
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:236)
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[ERROR] 2020-09-30 23:31:16.962 - [taskAppId=TASK-6-43-218]:[145] - sql task error
java.lang.RuntimeException: execute sql error
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:243)
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
[ERROR] 2020-09-30 23:31:16.964 org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread:[140] - task scheduler failure
java.lang.RuntimeException: execute sql error
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.executeFuncAndSql(SqlTask.java:243)
at org.apache.dolphinscheduler.server.worker.task.sql.SqlTask.handle(SqlTask.java:139)
at org.apache.dolphinscheduler.server.worker.runner.TaskExecuteThread.run(TaskExecuteThread.java:129)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Zosimer · 2020-10-01T01:18:17Z

Zosimer
Oct 1, 2020
Author

The same is true of hivesql

0 replies

lgcareer · 2020-10-09T02:29:23Z

lgcareer
Oct 9, 2020
Collaborator

Hi,The sql task only support one sql statement,whether you paste the image of the sql task config page.

0 replies

Zosimer · 2020-10-09T03:19:41Z

Zosimer
Oct 9, 2020
Author

Hi,The sql task only support one sql statement,whether you paste the image of the sql task config page.

Dynamic partitioning needs to set relevant environment variables before executing SQL

0 replies

lgcareer · 2020-10-09T03:30:22Z

lgcareer
Oct 9, 2020
Collaborator

hi,please paste the image of the sql task config,thx.

0 replies

Zosimer · 2020-10-09T03:44:58Z

Zosimer
Oct 9, 2020
Author

hi,please paste the image of the sql task config,thx.

0 replies

lgcareer · 2020-10-09T04:06:15Z

lgcareer
Oct 9, 2020
Collaborator

hi,the sql statement only support one sql,you can move all the set statements to the sql parameters

0 replies

lgcareer · 2020-10-09T04:07:37Z

lgcareer
Oct 9, 2020
Collaborator

The above support hive datasource

0 replies

lgcareer · 2020-10-09T04:08:57Z

lgcareer
Oct 9, 2020
Collaborator

If you use spark datasource,it didn't support the sql parameters.You should use the shell task instead of the sql task.

0 replies

Zosimer · 2020-10-10T00:54:44Z

Zosimer
Oct 10, 2020
Author

hi,the sql statement only support one sql,you can move all the set statements to the sql parameters

the sparksql is not suport sql parameters

0 replies

Zosimer · 2020-10-10T00:56:18Z

Zosimer
Oct 10, 2020
Author

If you use spark datasource,it didn't support the sql parameters.You should use the shell task instead of the sql task.

Will later versions support it? It's important to me

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DISCUSS]spark datasource support the sql parameters #6774

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 10 comments

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

{{title}}

Select a reply

[DISCUSS]spark datasource support the sql parameters #6774

Zosimer Sep 30, 2020

sql：

workerlogs：

Replies: 10 comments

Zosimer Oct 1, 2020 Author

lgcareer Oct 9, 2020 Collaborator

Zosimer Oct 9, 2020 Author

lgcareer Oct 9, 2020 Collaborator

Zosimer Oct 9, 2020 Author

lgcareer Oct 9, 2020 Collaborator

lgcareer Oct 9, 2020 Collaborator

lgcareer Oct 9, 2020 Collaborator

Zosimer Oct 10, 2020 Author

Zosimer Oct 10, 2020 Author

Zosimer
Sep 30, 2020

Zosimer
Oct 1, 2020
Author

lgcareer
Oct 9, 2020
Collaborator

Zosimer
Oct 9, 2020
Author

lgcareer
Oct 9, 2020
Collaborator

Zosimer
Oct 9, 2020
Author

lgcareer
Oct 9, 2020
Collaborator

lgcareer
Oct 9, 2020
Collaborator

lgcareer
Oct 9, 2020
Collaborator

Zosimer
Oct 10, 2020
Author

Zosimer
Oct 10, 2020
Author