2. 第二層DW主題層
2.1 DWD明細(xì)層
2.1.1 用戶主題
DWD明細(xì)粒度事實(shí)層Hive數(shù)據(jù)庫建庫建表:
創(chuàng)建Hive庫并進(jìn)入:
create database if not exists dwd_nshop;
use dwd_nshop;
在DWD層開始入庫之前,我們有一張通用字典表,在mysql已經(jīng)導(dǎo)入,為后面的數(shù)據(jù)篩選做條件過濾:
2.1.1.1 用戶啟動(dòng)日志表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_launch (
user_id string COMMENT '用戶id',
device_num string COMMENT '設(shè)備號(hào)',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
network_type string COMMENT '網(wǎng)絡(luò)類型',
area_code string COMMENT '地區(qū)編碼',
launch_time_segment string COMMENT '啟動(dòng)時(shí)間段',
ct BIGINT COMMENT '產(chǎn)生時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_launch/';
從ods層事件表里選擇action=02(啟動(dòng))的事件數(shù)據(jù)導(dǎo)入即可:
insert overwrite table dwd_nshop.dwd_nshop_actlog_launch partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
from_unixtime(cast(ct/1000 as int),'HH') as launch_time_segment,
ct
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action = "02"
2.1.1.2 用戶產(chǎn)品瀏覽表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtview (
user_id string COMMENT '用戶id',
device_num string COMMENT '設(shè)備號(hào)',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
network_type string COMMENT '網(wǎng)絡(luò)類型',
area_code string COMMENT '地區(qū)編碼',
target_id string COMMENT '產(chǎn)品ID',
ct BIGINT COMMENT '產(chǎn)生時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtview/'
從ods層的事件表里篩選出action為07或08的數(shù)據(jù)予借,即為用戶瀏覽數(shù)據(jù):
insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtview partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_id')as target_id,
ct
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action in('07','08')
2.1.1.3 用戶產(chǎn)品查詢表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_actlog_pdtsearch (
user_id string COMMENT '用戶id',
device_num string COMMENT '設(shè)備號(hào)',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
network_type string COMMENT '網(wǎng)絡(luò)類型',
area_code string COMMENT '地區(qū)編碼',
target_order string COMMENT '查詢排序方式',
target_keys string COMMENT '查詢內(nèi)容',
target_id string COMMENT '產(chǎn)品ID',
ct BIGINT COMMENT '產(chǎn)生時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_nshop_actlog_pdtsearch/';
查看公共字典表:
我們知道轴或,選擇action為05(交互)和event_type為01(瀏覽)或04(滑動(dòng))的行為是用戶查詢行為,因此寫出下列sql語句
insert overwrite table dwd_nshop.dwd_nshop_actlog_pdtsearch partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_order')as target_order,
get_json_object(extinfo,'$.target_keys')as target_keys,
get_json_object(extinfo,'$.target_id')as target_id,
ct
from
ods_nshop.ods_nshop_01_useractlog
--lateral view explode(split(regexp_replace(get_json_object(extinfo,'$target_ids'),'[\\[\\"\\]]',''),','))t as target_id
where
bdp_day = "20200618"
and
action = '05'
and
event_type in('01','04')
2.1.1.4 用戶產(chǎn)品關(guān)注表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_actlog_product_comment (
user_id string COMMENT '用戶id',
device_num string COMMENT '設(shè)備號(hào)',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
network_type string COMMENT '網(wǎng)絡(luò)類型',
area_code string COMMENT '地區(qū)編碼',
target_id string COMMENT '產(chǎn)品ID',
ct BIGINT COMMENT '產(chǎn)生時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dw/user/dwd_actlog_product_comment/';
選擇action ='05'(交互)昌跌,event_type='02'(點(diǎn)擊),并且target_action='01'(店鋪關(guān)注)的事件即可:
with log_attend as(
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
get_json_object(extinfo,'$.target_action')as target_action,
get_json_object(extinfo,'$.target_id')as target_id,
extinfo,
ct,
bdp_day
from
ods_nshop.ods_nshop_01_useractlog
where
bdp_day = "20200618"
and
action ='05'
and
event_type='02'
)
insert overwrite table dwd_nshop.dwd_actlog_product_comment partition(bdp_day="20200618")
select
customer_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
target_id,
ct
from
log_attend where target_action='01'
2.1.2 交易主題
2.1.2.1 交易訂單明細(xì)流水表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_orders_details (
order_id string COMMENT '訂單ID',
order_status INT COMMENT '訂單狀態(tài):5已收貨(完成)|6投訴 7退貨',
supplier_code string COMMENT '店鋪ID',
product_code string COMMENT '商品ID',
customer_id string COMMENT '用戶id',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
user_areacode string COMMENT '用戶所在地區(qū)',
consignee_zipcode string COMMENT '收貨人地址',
pay_type string COMMENT '支付類型:線上支付 10 網(wǎng)上銀行 11 微信 12 支付寶 | 線下支 付(貨到付款) 20 ',
pay_count INT COMMENT '支付次數(shù)',
product_price DECIMAL (5, 1) COMMENT '購買商品單價(jià)',
weighing_cost DECIMAL (2, 1) COMMENT '商品加權(quán)價(jià)格',
district_money DECIMAL (4, 1) COMMENT '優(yōu)惠金額',
is_activity int COMMENT '1:參加活動(dòng)|0:沒有參加活動(dòng)',
order_ctime BIGINT COMMENT '創(chuàng)建時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_nshop_orders_details/';
訂單明細(xì)流水表的數(shù)據(jù)主要是從ods層的訂單詳細(xì)表里抽取照雁,結(jié)合公共產(chǎn)品維度表查出產(chǎn)品相關(guān)信息避矢,另外還結(jié)合訂單支付記錄表的數(shù)據(jù),查出支付相關(guān)信息囊榜,sql語句如下:
with tborder as(
select
order_id,
order_status,
customer_id,
consignee_zipcode,
pay_type,
order_ctime,
from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_orders
where
from_unixtime(cast(order_ctime/1000 as bigint),'yyyyMMdd') = '20200618'
),
tbdetail as(
select
a.order_id,
a.product_id,
a.product_cnt,
a.weighing_cost,
a.district_money,
a.is_activity,
b.supplier_code,
b.product_price,
from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_order_detail a
join ods_nshop.dim_pub_product b
on a.product_id=b.product_code
where
from_unixtime(cast(a.order_detail_ctime/1000 as bigint),'yyyyMMdd')='20200618'
),
pays as (
select
count(*) as pay_count,
order_id,
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd') as bdp_day
from ods_nshop.ods_02_orders_pay_records
where
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')='20200618'
group by
order_id,
from_unixtime(cast(pay_ctime/1000 as bigint),'yyyyMMdd')
)
insert overwrite table dwd_nshop.dwd_nshop_orders_details partition (bdp_day)
select
tborder.order_id,
tborder.order_status,
tbdetail.supplier_code,
tbdetail.product_id,
tborder.customer_id,
tborder.consignee_zipcode,
tborder.pay_type,
pays.pay_count,
tbdetail.product_price,
tbdetail.product_cnt,
tbdetail.weighing_cost,
tbdetail.district_money,
tbdetail.is_activity,
tborder.order_ctime,
tborder.bdp_day
from tborder join tbdetail
on tborder.order_id=tbdetail.order_id
join pays
on tbdetail.order_id=pays.order_id
2.2.1 訂單支付表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_order_pay (
pay_id string COMMENT '訂單支付記錄ID',
order_id string COMMENT '訂單ID',
customer_id string COMMENT '用戶ID',
user_id string COMMENT '用戶id',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
carrier string COMMENT '電信運(yùn)營商',
network_type string COMMENT '網(wǎng)絡(luò)類型',
area_code string COMMENT '地區(qū)編碼',
longitude string COMMENT '經(jīng)度',
latitude string COMMENT '緯度',
page_id string COMMENT '行為所在頁',
target_action string COMMENT '參考點(diǎn)擊目標(biāo)類型說明',
target_id string COMMENT '訂單id',
target_price DOUBLE COMMENT '訂單價(jià)格',
target_status string COMMENT '支付狀態(tài):0 未支付| 1 支付中 | 2 已支付',
target_type string COMMENT '支付類型:線上支付 10 網(wǎng)上銀行 11 微信 12 支付寶 | 線 下支付(貨到付款) 20 ',
ct BIGINT COMMENT '產(chǎn)生時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/order/dwd_order_pay/'
2.1.3 營銷活動(dòng)主題
廣告投放數(shù)據(jù)表【DWD】
CREATE external TABLE
IF NOT EXISTS dwd_nshop.dwd_nshop_releasedatas (
customer_id string COMMENT '用戶id',
device_num string COMMENT '設(shè)備號(hào)',
device_type string COMMENT '設(shè)備類型',
os string COMMENT '手機(jī)系統(tǒng)',
os_version string COMMENT '手機(jī)系統(tǒng)版本',
manufacturer string COMMENT '手機(jī)制造商',
area_code string COMMENT '地區(qū)編碼',
release_sid string COMMENT '投放請求id',
release_ip string COMMENT '投放方ip',
release_session string COMMENT '投放會(huì)話id',
release_sources string COMMENT '投放渠道',
release_category string COMMENT '投放瀏覽產(chǎn)品分類',
release_product string COMMENT '投放瀏覽產(chǎn)品',
release_product_page string COMMENT '投放瀏覽產(chǎn)品頁',
ct BIGINT COMMENT '創(chuàng)建時(shí)間'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dwd/release/dwd_nshop_releasedatas/'
這里需要關(guān)聯(lián)兩張中間表,一個(gè)是產(chǎn)品表亥宿,查出產(chǎn)品分類卸勺,一個(gè)是頁面布局表,選擇產(chǎn)品頁烫扼,兩張表結(jié)構(gòu)如下:
插入數(shù)據(jù):
with tbrelease as(
select
c.customer_id,
r.device_num ,
r.device_type ,
r.os ,
r.os_version ,
r.manufacturer ,
r.area_code ,
r.release_sid ,
parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','ip')as release_ip,
r.release_session,
r.release_sources,
parse_url(concat("http://127.0.0.1:8088/release?",'',r.release_params),'QUERY','productPage')as release_product_page,
r.ct
from ods_nshop.ods_01_releasedatas r
join ods_nshop.ods_02_customer c
on r.customer_id=c.customer_id
where
bdp_day='20200618'
)
insert overwrite table dwd_nshop.dwd_nshop_releasedatas partition(bdp_day='20200618')
select
a.customer_id,
a.device_num ,
a.device_type ,
a.os ,
a.os_version ,
a.manufacturer,
a.area_code ,
a.release_sid ,
a.release_ip,
a.release_session,
a.release_sources,
f.category_code release_category,
p.page_target release_product,
a.release_product_page,
a.ct
from tbrelease a
join dim_nshop.dim_pub_page p
on a.release_product_page=p.page_code and p.page_type='4'
join dim_nshop.dim_pub_product f
on p.page_code=f.product_code;