WITH tmp as(
select
a.*,
if(b.guid is not null,null,a.guid) as nu? -- 是否新用戶的標(biāo)記:老用戶=null,新用戶=自己的guid
from
? (
? SELECT? * FROM dwd_apl_utm_dtl WHERE dt='2020-03-15'
? ) a?
? left join
? (
? ? SELECT * FROM dws_apl_hsu_rec WHERE dt='2020-03-15'
? ) b
? on a.guid=b.guid
)
INSERT INTO TABLE ads_apl_utm_ovw PARTITION(dt='2020-03-15')
SELECT
event['utm_source'],
event['utm_medium'],
event['utm_content'],
event['utm_campain'],
event['utm_term'],
count(1) as pv_cnts,
count(distinct sessionid) as se_cnts,
count(distinct guid) as uv_cnts,
count(distinct nu) as nu_cnts
FROM? tmp
group by? event['utm_source'],event['utm_medium'],event['utm_content'],event['utm_campain'],event['utm_term']
with cube;
Error: Error while compiling statement: FAILED: SemanticException [Error 10226]: An additional MR job is introduced since the cardinality of grouping sets is more than hive.new.job.grouping.set.cardinality. This functionality is not supported with distincts. Either set hive.new.job.grouping.set.cardinality to a high number (higher than the number of rows per input row due to grouping sets in the query), or rewrite the query to not use distincts. The number of rows per input row due to grouping sets is 32 (state=42000,code=10226)
因為用到??with cube
解決辦法:設(shè)置set hive.new.job.grouping.set.cardinality = 32
這個要設(shè)置的數(shù)温学,要大于group by 組合和個數(shù)聪建,如這里发侵,要組合5個,所以 2^5=32妆偏,所以要設(shè)置大于等于32