二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

一、目的

在海豚调度HQL的脚本任务时报错,Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

二、原本HiveSQL

with t1 as(

select

       get_json_object(queue_json,’$.deviceNo’)   device_no,

       get_json_object(queue_json,’$.createTime’) create_time,

       get_json_object(queue_json,’$.laneNum’)    lane_num,

       get_json_object(queue_json,’$.queueList’)  queue_list,

       day

from hurys_dc_ods.ods_queue

    )

insert  overwrite  table  hurys_dc_dwd.dwd_queue partition(day)

select

        t1.device_no,

        t1.lane_num,

        t1.create_time,

        get_json_object(list_json,’$.laneNo’)         lane_no,

        get_json_object(list_json,’$.queueCount’)     queue_count,

        cast(get_json_object(list_json,’$.queueLen’)  as decimal(10,2))        queue_len,

        cast(get_json_object(list_json,’$.queueHead’)  as decimal(10,2))       queue_head,

        cast(get_json_object(list_json,’$.queueTail’)  as decimal(10,2))       queue_tail,

        date(t1.create_time) day

from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                                                ‘\\[|\\]’,”) ,   –将json数组两边的中括号去掉

                                 ‘\\}\\,\\{‘,’\\}\\;\\{‘),  –将json数组元素之间的逗号换成分号

                  ‘\\;’) –以分号作为分隔符(split函数以分号作为分隔)

          )list_queue as list_json

where  device_no is not null   and  get_json_object(list_json,’$.queueLen’) >=0

group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,’$.laneNo’), get_json_object(list_json,’$.queueCount’), cast(get_json_object(list_json,’$.queueLen’)  as decimal(10,2)), cast(get_json_object(list_json,’$.queueHead’)  as decimal(10,2)), cast(get_json_object(list_json,’$.queueTail’)  as decimal(10,2)), date(t1.create_time)

;

原本的HiveSQL在Hive中执行正常!!!

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

三、原因分析

lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                                                ‘\\[|\\]’,”) ,  

                                 ‘\\}\\,\\{‘,’\\}\\;\\{‘), 

                   ‘\\;’) 

          )list_queue as list_json

因为+、*、|、\等符号在正则表达示中有相应的不同意义,所以在海豚调度HiveSQL时要进行转义处理。

一般采用加双斜杠进行转义,例如  ‘\\;’)   变成   ‘\\\\;’),或者 将需要转义的字符放在 [] 中。

不过,还是采用在\前面再添加\这种方法居多

四、修改海豚调度HQL的脚本

#! /bin/bash

source /etc/profile

nowdate=`date –date=’0 days ago’ “+%Y%m%d”`

yesdate=`date -d yesterday +%Y-%m-%d`

hive -e “

use hurys_dc_dwd;

set hive.exec.dynamic.partition=true;

set hive.exec.dynamic.partition.mode=nonstrict;

set hive.exec.max.dynamic.partitions.pernode=1000;

set hive.exec.max.dynamic.partitions=1500;

with t1 as(

select

       get_json_object(queue_json,’$.deviceNo’)   device_no,

       get_json_object(queue_json,’$.createTime’) create_time,

       get_json_object(queue_json,’$.laneNum’)    lane_num,

       get_json_object(queue_json,’$.queueList’)  queue_list,

       day

from hurys_dc_ods.ods_queue

    )

insert  overwrite  table  hurys_dc_dwd.dwd_queue partition(day=’$yesdate’)

select

        t1.device_no,

        t1.lane_num,

        t1.create_time,

        get_json_object(list_json,’$.laneNo’)         lane_no,

        get_json_object(list_json,’$.queueCount’)     queue_count,

        cast(get_json_object(list_json,’$.queueLen’)  as decimal(10,2))        queue_len,

        cast(get_json_object(list_json,’$.queueHead’)  as decimal(10,2))       queue_head,

        cast(get_json_object(list_json,’$.queueTail’)  as decimal(10,2))       queue_tail

from t1

lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                                                ‘\\\\[|\\\\]’,”) ,      –将json数组两边的中括号去掉

                                 ‘\\\\}\\\\,\\\\{‘,’\\\\}\\\\;\\\\{‘),   –将json数组元素之间的逗号换成分号

                   ‘\\\\;’)   –以分号作为分隔符(split函数以分号作为分隔)

          )list_queue as list_json

where  device_no is not null   and  get_json_object(list_json,’$.queueLen’) >=0   and  date(t1.create_time) = ‘$yesdate’

group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,’$.laneNo’), get_json_object(list_json,’$.queueCount’), cast(get_json_object(list_json,’$.queueLen’)  as decimal(10,2)), cast(get_json_object(list_json,’$.queueHead’)  as decimal(10,2)), cast(get_json_object(list_json,’$.queueTail’)  as decimal(10,2)), date(t1.create_time)

五、海豚脚本改好后执行任务

修改脚本后,海豚任务执行成功!又学会一招

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

六、在Hive的计算引擎为MapReduce时,脚本里最好不要添加Hive调优语句,否则可能会报错

(一)Hive的计算引擎为MapReduce

set hive.execution.engine;
--hive.execution.engine=mr

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

(二)在海豚脚本里添加Hive调优的SQL语句

set hive.vectorized.execution.enabled=false;

set hive.auto.convert.join=false;

set hive.exec.parallel=true;

set hive.support.concurrency=false;
二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

(三)海豚任务运行报错

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

七、在Hive中执行的HiveSQL不能使用 \\\\; 只能使用 \\;,否则会报错

(一)原先执行成功的HQL   使用 \\;

with t1 as(
select
       get_json_object(queue_json,'$.deviceNo')   device_no,
       get_json_object(queue_json,'$.createTime') create_time,
       get_json_object(queue_json,'$.laneNum')    lane_num,
       get_json_object(queue_json,'$.queueList')  queue_list,
       day
from hurys_dc_ods.ods_queue
    )
insert  overwrite  table  hurys_dc_dwd.dwd_queue partition(day)
select
        t1.device_no,
        t1.lane_num,
        t1.create_time,
        get_json_object(list_json,'$.laneNo')         lane_no,
        get_json_object(list_json,'$.queueCount')     queue_count,
        cast(get_json_object(list_json,'$.queueLen')  as decimal(10,2))       queue_len,
        cast(get_json_object(list_json,'$.queueHead')  as decimal(10,2))       queue_head,
        cast(get_json_object(list_json,'$.queueTail')  as decimal(10,2))       queue_tail,
        date(t1.create_time) day
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                  '\\[|\\]','') ,   --将json数组两边的中括号去掉
         '\\}\\,\\{','\\}\\;\\{'),  --将json数组元素之间的逗号换成分号
  '\\;') --以分号作为分隔符(split函数以分号作为分隔)
          )list_queue as list_json
where  device_no is not null   and  get_json_object(list_json,'$.queueLen') >=0
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen')  as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead')  as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail')  as decimal(10,2)), date(t1.create_time)
;

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

(二)如果使用 \\\\;  则运行报错

with t1 as(
select
       get_json_object(queue_json,'$.deviceNo')   device_no,
       get_json_object(queue_json,'$.createTime') create_time,
       get_json_object(queue_json,'$.laneNum')    lane_num,
       get_json_object(queue_json,'$.queueList')  queue_list,
       day
from hurys_dc_ods.ods_queue
    )
insert  overwrite  table  hurys_dc_dwd.dwd_queue partition(day)
select
        t1.device_no,
        t1.lane_num,
        t1.create_time,
        get_json_object(list_json,'$.laneNo')         lane_no,
        get_json_object(list_json,'$.queueCount')     queue_count,
        cast(get_json_object(list_json,'$.queueLen')  as decimal(10,2))       queue_len,
        cast(get_json_object(list_json,'$.queueHead')  as decimal(10,2))       queue_head,
        cast(get_json_object(list_json,'$.queueTail')  as decimal(10,2))       queue_tail,
        date(t1.create_time) day
from t1
lateral view explode(split(regexp_replace(regexp_replace(queue_list,
               '\\\\[|\\\\]','') ,    --将json数组两边的中括号去掉
       '\\\\}\\\\,\\\\{','\\\\}\\\\;\\\\{'),   --将json数组元素之间的逗号换成分号
  '\\\\;') --以分号作为分隔符(split函数以分号作为分隔)
          )list_queue as list_json
where  device_no is not null   and  get_json_object(list_json,'$.queueLen') >=0
group by t1.device_no, t1.lane_num, t1.create_time, get_json_object(list_json,'$.laneNo'), get_json_object(list_json,'$.queueCount'), cast(get_json_object(list_json,'$.queueLen')  as decimal(10,2)), cast(get_json_object(list_json,'$.queueHead')  as decimal(10,2)), cast(get_json_object(list_json,'$.queueTail')  as decimal(10,2)), date(t1.create_time)
;

运行报错:[08S01][2] Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask

二百一十九、Hive——HQL报错:Caused by: java.util.regex.PatternSyntaxException: Illegal repetition near index 1

Hive中不能再转义,否则会报错!!!

八、总结一下

(一)在Hive的执行的HQL,不需要转义处理,例如使用 \\;

lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                  '\\[|\\]','') ,   --将json数组两边的中括号去掉
           '\\}\\,\\{','\\}\\;\\{'),  --将json数组元素之间的逗号换成分号
'\\;') --以分号作为分隔符(split函数以分号作为分隔)

(二)在海豚调度执行HiveSQL脚本任务时,需要使用\\进行转义处理,例如\\\\;

lateral view explode(split(regexp_replace(regexp_replace(queue_list,
                                                ‘\\\\[|\\\\]’,”) ,      –将json数组两边的中括号去掉

                                 ‘\\\\}\\\\,\\\\{‘,’\\\\}\\\\;\\\\{‘),   –将json数组元素之间的逗号换成分号

                   ‘\\\\;’)   –以分号作为分隔符(split函数以分号作为分隔)

总是会遇到一些奇奇怪怪的问题,不过九九八十一难,也算是又经历一难吧,这个问题基本搞了一天。终于搞定了!

本文来自网络,不代表协通编程立场,如若转载,请注明出处:https://www.net2asp.com/7a39322113.html