1.創(chuàng)建數(shù)據(jù)庫(kù)
1)啟動(dòng)
hive
[atguigu@hadoop102 hive]$ nohup bin/hive --service metastore &
[atguigu@hadoop102 hive]$ nohup bin/hive --service hiveserver2 &
[atguigu@hadoop102 hive]$ bin/hive
2)顯示數(shù)據(jù)庫(kù)
hive (default)> show databases;
3)創(chuàng)建數(shù)據(jù)庫(kù)
hive (default)> create database gmall;
4)使用數(shù)據(jù)庫(kù)
hive (default)> use gmall;
2.ODS 層(用戶行為數(shù)據(jù))
2.1 創(chuàng)建啟動(dòng)日志表 ods_start_log
1)創(chuàng)建輸入數(shù)據(jù)是 lzo 輸出是 text啰脚,支持 json 解析的分區(qū)表
hive (gmall)> drop table if exists ods_start_log;
CREATE EXTERNAL TABLE ods_start_log (`line` string)
PARTITIONED BY (`dt` string)
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '/warehouse/gmall/ods/ods_start_log';
說(shuō)明 Hive 的 LZO 壓縮:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LZO
2)加載數(shù)據(jù)
hive (gmall)> load data inpath '/origin_data/gmall/log/topic_start/2020-03-10' into table gmall.ods_start_log partition(dt='2020-03-10');
3)查看是否加載成功
hive (gmall)> select * from ods_start_log where dt='2020-03-10' limit 2;
4)為 lzo 壓縮文件創(chuàng)建索引
[atguigu@hadoop102 hadoop-2.7.2]$ hadoop jar /opt/module/hadoop-
2.7.2/share/hadoop/common/hadoop-lzo-0.4.20. jar
com.hadoop.compression.lzo.DistributedLzoIndexer
/warehouse/gmall/ods/ods_start_log/dt=2020-03-10
2.2 創(chuàng)建事件日志表 ods_event_log
1)創(chuàng)建輸入數(shù)據(jù)是 lzo 輸出是 text枫攀,支持 json 解析的分區(qū)表
hive (gmall)> drop table if exists ods_event_log;
CREATE EXTERNAL TABLE ods_event_log(`line` string)
PARTITIONED BY (`dt` string)
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '/warehouse/gmall/ods/ods_event_log';
2)加載數(shù)據(jù)
hive (gmall)> load data inpath '/origin_data/gmall/log/topic_event/2020-03-10' into table gmall.ods_event_log partition(dt='2020-03-10');
注意:時(shí)間格式都配置成 YYYY-MM-DD 格式,這是 Hive 默認(rèn)支持的時(shí)間格式
3)查看是否加載成功
hive (gmall)> select * from ods_event_log where dt="2020-03-10" limit 2;
4)為 lzo 壓縮文件創(chuàng)建索引
[atguigu@hadoop102 hadoop-2.7.2]$ hadoop jar /opt/module/hadoop-
2.7.2/share/hadoop/common/hadoop-lzo-0.4.20. jar
com.hadoop.compression.lzo.DistributedLzoIndexer
/warehouse/gmall/ods/ods_event_log/dt=2020-03-10
2.3 Shell 中單引號(hào)和雙引號(hào)區(qū)別
1)在/home/atguigu/bin 創(chuàng)建一個(gè) test.sh 文件
[atguigu@hadoop102 bin]$ vim test.sh
在文件中添加如下內(nèi)容
#!/bin/bash
do_date=$1
echo '$do_date'
echo "$do_date"
echo "'$do_date'"
echo '"$do_date"'
echo `date`
2)查看執(zhí)行結(jié)果
[atguigu@hadoop102 bin]$ test.sh
2020-03-10
$do_date
2020-03-10
'2020-03-10'
"$do_date"
2020 年 05 月 02 日 星期四 21:02:08 CST
3)總結(jié):
(1)單引號(hào)不取變量值
(2)雙引號(hào)取變量值
(3)反引號(hào)`棋嘲,執(zhí)行引號(hào)中命令
(4)雙引號(hào)內(nèi)部嵌套單引號(hào)鸭限,取出變量值
(5)單引號(hào)內(nèi)部嵌套雙引號(hào),不取出變量值
2.4ODS 層加載數(shù)據(jù)腳本
1)在 hadoop102 的/home/atguigu/bin 目錄下創(chuàng)建腳本
[atguigu@hadoop102 bin]$ vim hdfs_to_ods_log.sh
在腳本中編寫(xiě)如下內(nèi)容
#!/bin/bash
# 定義變量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是輸入的日期按照取輸入日期;如果沒(méi)輸入日期取當(dāng)前時(shí)間的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo "===日志日期為 $do_date==="
sql="
load data inpath '/origin_data/gmall/log/topic_start/$do_date' overwrite into table ${APP}.ods_start_log partition(dt='$do_date');
load data inpath '/origin_data/gmall/log/topic_event/$do_date' overwrite into table ${APP}.ods_event_log partition(dt='$do_date'); "
$hive -e "$sql"
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar
com.hadoop.compression.lzo.DistributedLzoIndexer
/warehouse/gmall/ods/ods_start_log/dt=$do_date
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar
com.hadoop.compression.lzo.DistributedLzoIndexer
/warehouse/gmall/ods/ods_event_log/dt=$do_date
說(shuō)明 1:
[ -n 變量值 ] 判斷變量的值铜异,是否為空
-- 變量的值,非空秸架,返回 true -- 變量的值揍庄,為空,返回 false
說(shuō)明 2:
查看 date 命令的使用东抹,[atguigu@hadoop102 ~]$ date --help
2)增加腳本執(zhí)行權(quán)限
[atguigu@hadoop102 bin]$ chmod 777 hdfs_to_ods_log.sh
3)腳本使用
[atguigu@hadoop102 module]$ hdfs_to_ods_log.sh 2020-03-11
4)查看導(dǎo)入數(shù)據(jù)
hive (gmall)> select * from ods_start_log where dt='2020-03-11' limit 2;
select * from ods_event_log where dt='2020-03-11' limit 2;
5)腳本執(zhí)行時(shí)間
企業(yè)開(kāi)發(fā)中一般在每日凌晨 30 分~1 點(diǎn)
3.ODS 層(業(yè)務(wù)數(shù)據(jù))
3.1 訂單表(增量及更新)
hive (gmall)>
drop table if exists ods_order_info;
create external table ods_order_info ( `id` string COMMENT '訂單號(hào)',
`final_total_amount` decimal(10,2) COMMENT '訂單金額',
`order_status` string COMMENT '訂單狀態(tài)',
`user_id` string COMMENT '用戶 id',
`out_trade_no` string COMMENT '支付流水號(hào)',
`create_time` string COMMENT '創(chuàng)建時(shí)間',
`operate_time` string COMMENT '操作時(shí)間',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(10,2) COMMENT '優(yōu)惠金額',
`original_total_amount` decimal(10,2) COMMENT '原價(jià)金額',
`feight_fee` decimal(10,2) COMMENT '運(yùn)費(fèi)' )
COMMENT '訂單表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_info/';
3.2訂單詳情表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_order_detail;
CREATE external TABLE ods_order_detail (
`id` string COMMENT '訂單編號(hào)',
`order_id` string COMMENT '訂單號(hào)',
`user_id` string COMMENT '用戶 id',
`sku_id` string COMMENT '商品 id',
`sku_name` string COMMENT '商品名稱',
`order_price` DECIMAL (10, 2) COMMENT '商品價(jià)格',
`sku_num` BIGINT COMMENT '商品數(shù)量',
`create_time` string COMMENT '創(chuàng)建時(shí)間'
) COMMENT '訂單詳情表' PARTITIONED BY (`dt` string) ROW format delimited FIELDS TERMINATED BY '\t' STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' location '/warehouse/gmall/ods/ods_order_detail/';
3.3SKU 商品表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_sku_info;
CREATE external TABLE ods_sku_info (
`id` string COMMENT 'skuId',
`spu_id` string COMMENT 'spuid',
`price` DECIMAL (10, 2) COMMENT '價(jià)格',
`sku_name` string COMMENT '商品名稱',
`sku_desc` string COMMENT '商品描述',
`weight` string COMMENT '重量',
`tm_id` string COMMENT '品牌 id',
`category3_id` string COMMENT '品類 id',
`create_time` string COMMENT '創(chuàng)建時(shí)間'
) COMMENT 'SKU 商品表' PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_sku_info/';
3.4用戶表(增量及更新)
hive (gmall) > DROP TABLE
IF EXISTS ods_user_info;
CREATE external TABLE ods_user_info (
`id` string COMMENT '用戶 id',
`name` string COMMENT '姓名',
`birthday` string COMMENT '生日',
`gender` string COMMENT '性別',
`email` string COMMENT '郵箱',
`user_level` string COMMENT '用戶等級(jí)',
`create_time` string COMMENT '創(chuàng)建時(shí)間',
`operate_time` string COMMENT '操作時(shí)間'
) COMMENT '用戶表' PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_user_info/';
3.5商品一級(jí)分類表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_category1;
CREATE external TABLE ods_base_category1 (
`id` string COMMENT 'id',
`name` string COMMENT '名稱'
) COMMENT '商品一級(jí)分類表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_category1/';
3.6商品二級(jí)分類表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_category2;
CREATE external TABLE ods_base_category2 (
`id` string COMMENT ' id',
`name` string COMMENT '名稱',
category1_id string COMMENT '一級(jí)品類 id'
) COMMENT '商品二級(jí)分類表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_category2/';
3.7商品三級(jí)分類表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_category3;
CREATE external TABLE ods_base_category3 (
`id` string COMMENT ' id',
`name` string COMMENT '名稱',
category2_id string COMMENT '二級(jí)品類 id'
) COMMENT '商品三級(jí)分類表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_category3/';
3.8支付流水表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_payment_info;
CREATE external TABLE ods_payment_info (
`id` BIGINT COMMENT '編號(hào)',
`out_trade_no` string COMMENT '對(duì)外業(yè)務(wù)編號(hào)',
`order_id` string COMMENT '訂單編號(hào)',
`user_id` string COMMENT '用戶編號(hào)',
`alipay_trade_no` string COMMENT '支付寶交易流水編號(hào)',
`total_amount` DECIMAL (16, 2) COMMENT '支付金額',
`subject` string COMMENT '交易內(nèi)容',
`payment_type` string COMMENT '支付類型',
`payment_time` string COMMENT '支付時(shí)間'
) COMMENT '支付流水表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_payment_info/';
3.9 省份表(特殊)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_province;
CREATE external TABLE ods_base_province (
`id` BIGINT COMMENT '編號(hào)',
`name` string COMMENT '省份名稱',
`region_id` string COMMENT '地區(qū) ID',
`area_code` string COMMENT '地區(qū)編碼',
`iso_code` string COMMENT 'iso 編碼'
) COMMENT '省份表'
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_province/';
3.10 地區(qū)表(特殊)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_region;
CREATE external TABLE ods_base_region (
`id` BIGINT COMMENT '編號(hào)',
`region_name` string COMMENT '地區(qū)名稱'
) COMMENT '地區(qū)表'
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_region/';
3.11 品牌表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_trademark;
CREATE external TABLE ods_base_trademark (
`tm_id` BIGINT COMMENT '編號(hào)',
`tm_name` string COMMENT '品牌名稱'
) COMMENT '品牌表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_trademark/';
3.12 訂單狀態(tài)表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_order_status_log;
CREATE external TABLE ods_order_status_log (
`id` BIGINT COMMENT '編號(hào)',
`order_id` string COMMENT '訂單 ID',
`order_status` string COMMENT '訂單狀態(tài)',
`operate_time` string COMMENT '修改時(shí)間'
) COMMENT '訂單狀態(tài)表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_status_log/';
3.13 SPU 商品表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_spu_info;
CREATE external TABLE ods_spu_info (
`id` string COMMENT 'spuid',
`spu_name` string COMMENT 'spu 名稱',
`category3_id` string COMMENT '品類 id',
`tm_id` string COMMENT '品牌 id'
) COMMENT 'SPU 商品表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_spu_info/';
3.14 商品評(píng)論表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_comment_info;
CREATE external TABLE ods_comment_info (
`id` string COMMENT '編號(hào)',
`user_id` string COMMENT '用戶 ID',
`sku_id` string COMMENT '商品 sku',
`spu_id` string COMMENT '商品 spu',
`order_id` string COMMENT '訂單 ID',
`appraise` string COMMENT '評(píng)價(jià)',
`create_time` string COMMENT '評(píng)價(jià)時(shí)間'
) COMMENT '商品評(píng)論表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_comment_info/';
3.15 退單表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_order_refund_info;
CREATE external TABLE ods_order_refund_info (
`id` string COMMENT '編號(hào)',
`user_id` string COMMENT '用戶 ID',
`order_id` string COMMENT '訂單 ID',
`sku_id` string COMMENT '商品 ID',
`refund_type` string COMMENT '退款類型',
`refund_num` BIGINT COMMENT '退款件數(shù)',
`refund_amount` DECIMAL (16, 2) COMMENT '退款金額',
`refund_reason_type` string COMMENT '退款原因類型',
`create_time` string COMMENT '退款時(shí)間'
) COMMENT '退單表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_refund_info/';
3.16 加購(gòu)表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_cart_info;
CREATE external TABLE ods_cart_info (
`id` string COMMENT '編號(hào)',
`user_id` string COMMENT '用戶 id',
`sku_id` string COMMENT 'skuid',
`cart_price` string COMMENT '放入購(gòu)物車(chē)時(shí)價(jià)格',
`sku_num` string COMMENT '數(shù)量',
`sku_name` string COMMENT 'sku 名稱 (冗余)',
`create_time` string COMMENT '創(chuàng)建時(shí)間',
`operate_time` string COMMENT '修改時(shí)間',
`is_ordered` string COMMENT '是否已經(jīng)下單',
`order_time` string COMMENT '下單時(shí)間'
) COMMENT '加購(gòu)表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_cart_info/';
3.17 商品收藏表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_favor_info;
CREATE external TABLE ods_favor_info (
`id` string COMMENT '編號(hào)',
`user_id` string COMMENT '用戶 id',
`sku_id` string COMMENT 'skuid',
`spu_id` string COMMENT 'spuid',
`is_cancel` string COMMENT '是否取消',
`create_time` string COMMENT '收藏時(shí)間',
`cancel_time` string COMMENT '取消時(shí)間'
) COMMENT '商品收藏表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_favor_info/';
3.18 優(yōu)惠券領(lǐng)用表(新增及變化)
hive (gmall) > DROP TABLE
IF EXISTS ods_coupon_use;
CREATE external TABLE ods_coupon_use (
`id` string COMMENT '編號(hào)',
`coupon_id` string COMMENT '優(yōu)惠券 ID',
`user_id` string COMMENT 'skuid',
`order_id` string COMMENT 'spuid',
`coupon_status` string COMMENT '優(yōu)惠券狀態(tài)',
`get_time` string COMMENT '領(lǐng)取時(shí)間',
`using_time` string COMMENT '使用時(shí)間(下單)',
`used_time` string COMMENT '使用時(shí)間(支付)'
) COMMENT '優(yōu)惠券領(lǐng)用表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_coupon_use/';
3.19 優(yōu)惠券表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_coupon_info;
CREATE external TABLE ods_coupon_info (
`id` string COMMENT '購(gòu)物券編號(hào)',
`coupon_name` string COMMENT '購(gòu)物券名稱',
`coupon_type` string COMMENT '購(gòu)物券類型 1 現(xiàn)金券 2 折扣券 3 滿減券 4 滿件打折券',
`condition_amount` string COMMENT '滿額數(shù)',
`condition_num` string COMMENT '滿件數(shù)',
`activity_id` string COMMENT '活動(dòng)編號(hào)',
`benefit_amount` string COMMENT '減金額',
`benefit_discount` string COMMENT '折扣',
`create_time` string COMMENT '創(chuàng)建時(shí)間',
`range_type` string COMMENT '范圍類型 1蚂子、商品 2、品類 3缭黔、品牌',
`spu_id` string COMMENT '商品 id',
`tm_id` string COMMENT '品牌 id',
`category3_id` string COMMENT '品類 id',
`limit_num` string COMMENT '最多領(lǐng)用次數(shù)',
`operate_time` string COMMENT '修改時(shí)間',
`expire_time` string COMMENT '過(guò)期時(shí)間'
) COMMENT '優(yōu)惠券表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_coupon_info/';
3.20 活動(dòng)表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_activity_info;
CREATE external TABLE ods_activity_info (
`id` string COMMENT '編號(hào)',
`activity_name` string COMMENT '活動(dòng)名稱',
`activity_type` string COMMENT '活動(dòng)類型',
`start_time` string COMMENT '開(kāi)始時(shí)間',
`end_time` string COMMENT '結(jié)束時(shí)間',
`create_time` string COMMENT '創(chuàng)建時(shí)間'
) COMMENT '活動(dòng)表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_info/';
3.21 活動(dòng)訂單關(guān)聯(lián)表(增量)
hive (gmall) > DROP TABLE
IF EXISTS ods_activity_order;
CREATE external TABLE ods_activity_order (
`id` string COMMENT '編號(hào)',
`activity_id` string COMMENT '優(yōu)惠券 ID',
`order_id` string COMMENT 'skuid',
`create_time` string COMMENT '領(lǐng)取時(shí)間'
) COMMENT '活動(dòng)訂單關(guān)聯(lián)表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_order/';
3.22 優(yōu)惠規(guī)則表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_activity_rule;
CREATE external TABLE ods_activity_rule (
`id` string COMMENT '編號(hào)',
`activity_id` string COMMENT '活動(dòng) ID',
`condition_amount` string COMMENT '滿減金額',
`condition_num` string COMMENT '滿減件數(shù)',
`benefit_amount` string COMMENT '優(yōu)惠金額',
`benefit_discount` string COMMENT '優(yōu)惠折扣',
`benefit_level` string COMMENT '優(yōu)惠級(jí)別'
) COMMENT '優(yōu)惠規(guī)則表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_rule/';
3.23 編碼字典表(全量)
hive (gmall) > DROP TABLE
IF EXISTS ods_base_dic;
CREATE external TABLE ods_base_dic (
`dic_code` string COMMENT '編號(hào)',
`dic_name` string COMMENT '編碼名稱',
`parent_code` string COMMENT '父編碼',
`create_time` string COMMENT '創(chuàng)建日期',
`operate_time` string COMMENT '操作日期'
) COMMENT '編碼字典表'
PARTITIONED BY (`dt` string)
ROW format delimited FIELDS TERMINATED BY '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_base_dic/';
3.24 ODS 層加載數(shù)據(jù)腳本
1)在/home/atguigu/bin 目錄下創(chuàng)建腳本 hdfs_to_ods_db.sh
[atguigu@hadoop102 bin]$ vim hdfs_to_ods_db.sh
在腳本中填寫(xiě)如下內(nèi)容
#!/bin/bash
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是輸入的日期按照取輸入日期食茎;如果沒(méi)輸入日期取當(dāng)前時(shí)間的前一天
if [ -n "$2" ] ;
then
do_date=$2
elsedo_date=`date -d "-1 day" +%F`
fisql1=" load data inpath '/origin_data/$APP/db/order_info/$do_date' OVERWRITE into table ${APP}.ods_order_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/order_detail/$do_date' OVERWRITE into table ${APP}.ods_order_detail partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/sku_info/$do_date' OVERWRITE into table ${APP}.ods_sku_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/user_info/$do_date' OVERWRITE into table ${APP}.ods_user_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/payment_info/$do_date' OVERWRITE into table ${APP}.ods_payment_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/base_category1/$do_date' OVERWRITE into table ${APP}.ods_base_category1 partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/base_category2/$do_date' OVERWRITE into table ${APP}.ods_base_category2 partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/base_category3/$do_date' OVERWRITE into table ${APP}.ods_base_category3 partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/base_trademark/$do_date' OVERWRITE into table ${APP}.ods_base_trademark partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/activity_info/$do_date' OVERWRITE into table ${APP}.ods_activity_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/activity_order/$do_date' OVERWRITE into table ${APP}.ods_activity_order partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/cart_info/$do_date' OVERWRITE into table ${APP}.ods_cart_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/comment_info/$do_date' OVERWRITE into table ${APP}.ods_comment_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/coupon_info/$do_date' OVERWRITE into table ${APP}.ods_coupon_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/coupon_use/$do_date' OVERWRITE into table ${APP}.ods_coupon_use partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/favor_info/$do_date' OVERWRITE into table ${APP}.ods_favor_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/order_refund_info/$do_date' OVERWRITE into table ${APP}.ods_order_refund_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/order_status_log/$do_date' OVERWRITE into table ${APP}.ods_order_status_log partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/spu_info/$do_date' OVERWRITE into table ${APP}.ods_spu_info partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/activity_rule/$do_date' OVERWRITE into table ${APP}.ods_activity_rule partition(dt='$do_date');
load data inpath '/origin_data/$APP/db/base_dic/$do_date' OVERWRITE into table ${APP}.ods_base_dic partition(dt='$do_date'); "
sql2=" load data inpath '/origin_data/$APP/db/base_province/$do_date' OVERWRITE into table ${APP}.ods_base_province;
load data inpath '/origin_data/$APP/db/base_region/$do_date' OVERWRITE into table ${APP}.ods_base_region;
"case $1 in
"first")
{ $hive -e "$sql1"
$hive -e "$sql2"
};;
"all"){
$hive -e "$sql1"
};;
esac
2)修改權(quán)限
[atguigu@hadoop102 bin]$ chmod 777 hdfs_to_ods_db.sh
3)初次導(dǎo)入
[atguigu@hadoop102 bin]$ hdfs_to_ods_db.sh first 2020-03-10
4)每日導(dǎo)入
[atguigu@hadoop102 bin]$ hdfs_to_ods_db.sh all 2020-03-11
5)測(cè)試數(shù)據(jù)是否導(dǎo)入成功
hive (gmall)> select * from ods_order_detail where dt='2020-03-11';