概述
DWD完成后术幔,接下來(lái)就是進(jìn)行DWS建模。將具有相同分析主題的DWD層數(shù)據(jù)梯影,聚合層寬表模型嘶朱,便于數(shù)據(jù)分析和計(jì)算。
ODDA建模
DWS模型
從訂單表光酣、用戶表和支付流水表疏遏,用戶行為表
從訂單表、訂單詳情表救军、商品表财异、用戶表和支付流水表,用戶購(gòu)買商品明細(xì)表
1. 建表
-- 進(jìn)入數(shù)據(jù)庫(kù)
use mall;
-- 創(chuàng)建用戶行為寬表
drop table if exists dws_user_action;
create external table dws_user_action
(
user_id string comment '用戶 id',
order_count bigint comment '下單次數(shù) ',
order_amount decimal(16,2) comment '下單金額 ',
payment_count bigint comment '支付次數(shù)',
payment_amount decimal(16,2) comment '支付金額 '
) COMMENT '每日用戶行為寬表'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/mall/dws/dws_user_action/'
tblproperties ("parquet.compression"="snappy");
-- 創(chuàng)建用戶購(gòu)買商品明細(xì)表
drop table if exists dws_sale_detail_daycount;
create external table dws_sale_detail_daycount
( user_id string comment '用戶 id',
sku_id string comment '商品 Id',
user_gender string comment '用戶性別',
user_age string comment '用戶年齡',
user_level string comment '用戶等級(jí)',
order_price decimal(10,2) comment '訂單價(jià)格',
sku_name string comment '商品名稱',
sku_tm_id string comment '品牌id',
sku_category3_id string comment '商品三級(jí)品類id',
sku_category2_id string comment '商品二級(jí)品類id',
sku_category1_id string comment '商品一級(jí)品類id',
sku_category3_name string comment '商品三級(jí)品類名稱',
sku_category2_name string comment '商品二級(jí)品類名稱',
sku_category1_name string comment '商品一級(jí)品類名稱',
spu_id string comment '商品 spu',
sku_num int comment '購(gòu)買個(gè)數(shù)',
order_count string comment '當(dāng)日下單單數(shù)',
order_amount string comment '當(dāng)日下單金額'
) COMMENT '用戶購(gòu)買商品明細(xì)表'
PARTITIONED BY ( `dt` string)
stored as parquet
location '/warehouse/mall/dws/dws_user_sale_detail_daycount/'
tblproperties ("parquet.compression"="snappy");
執(zhí)行建表語(yǔ)句:
hive -f /home/warehouse/sql/dws_ddl.sql
2. 腳本
#!/bin/bash
# 定義變量方便修改
APP=mall
hive=hive
# 如果是輸入的日期按照取輸入日期唱遭;如果沒(méi)輸入日期取當(dāng)前時(shí)間的前一天
if [ -n $1 ] ;then
log_date=$1
else
log_date=`date -d "-1 day" +%F`
fi
# 用戶行為寬表
function user_actions()
{
# 定義變量
APP=$1
hive=$2
log_date=$3
sql="
with
tmp_order as
(
select
user_id,
sum(oc.total_amount) order_amount,
count(*) order_count
from "$APP".dwd_order_info oc
where date_format(oc.create_time,'yyyy-MM-dd')='$log_date'
group by user_id
),
tmp_payment as
(
select
user_id,
sum(pi.total_amount) payment_amount,
count(*) payment_count
from "$APP".dwd_payment_info pi
where date_format(pi.payment_time,'yyyy-MM-dd')='$log_date'
group by user_id
)
insert overwrite table "$APP".dws_user_action partition(dt='$log_date')
select
user_actions.user_id,
sum(user_actions.order_count),
sum(user_actions.order_amount),
sum(user_actions.payment_count),
sum(user_actions.payment_amount)
from
(
select
user_id,
order_count,
order_amount ,
0 payment_count ,
0 payment_amount
from tmp_order
union all
select
user_id,
0,
0,
payment_count,
payment_amount
from tmp_payment
) user_actions
group by user_id;
"
$hive -e "$sql"
}
function user_sales()
{
# 定義變量
APP=$1
hive=$2
log_date=$3
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
with
tmp_detail as
(
select
user_id,
sku_id,
sum(sku_num) sku_num ,
count(*) order_count ,
sum(od.order_price*sku_num) order_amount
from "$APP".dwd_order_detail od
where od.dt='$log_date' and user_id is not null
group by user_id, sku_id
)
insert overwrite table "$APP".dws_sale_detail_daycount partition(dt='$log_date')
select
tmp_detail.user_id,
tmp_detail.sku_id,
u.gender,
months_between('$log_date', u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id ,
category2_id ,
category1_id ,
category3_name ,
category2_name ,
category1_name ,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join "$APP".dwd_user_info u
on u.id=tmp_detail.user_id and u.dt='$log_date'
left join "$APP".dwd_sku_info s on tmp_detail.sku_id =s.id and s.dt='$log_date';
"
$hive -e "$sql"
}
user_actions $APP $hive $log_date
3. 結(jié)果
hive> select * from dws_user_action limit 2;
OK
SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder".
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details.
1 1 402 0 0 2021-03-24
10 1 205 1 205 2021-03-24
Time taken: 0.843 seconds, Fetched: 2 row(s)
hive> select * from dws_sale_detail_daycount limit 2;
OK
1 135 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 2 1 8958.0 2021-03-24
1 141 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 1 1 571.0 2021-03-24
Time taken: 0.119 seconds, Fetched: 2 row(s)