1.下載安裝hive
https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.1/
解壓
tar -zxvf apache-hive-3.1.1-bin.tar.gz
2.啟動 hadoop daemon 服務(wù)
因為 hive 依賴于 hdfs,所以需要啟動 hadoop 服務(wù)
$HADOOP_HOME/sbin/start-all.sh
3.配置 hive 并使用 mysql 存儲 metastore 信息
配置相關(guān)環(huán)境變量
vim /etc/profile
export HIVE_HOME=/home/hadoop/apache-hive-3.1.1-bin
export PATH=$HIVE_HOME/bin:$PATH
配置mysql:
cd $HIVE_HOME/scripts/metastore/upgrade/mysql # 進入 sql 腳本目錄,便于后面執(zhí)行腳本
service mysqld start
mysql -u root -p # root 登陸 mysql 服務(wù)器
>create 'hive'@'localhost' identified by 'password'; # 創(chuàng)建 hive 用戶
>create database hive; # 創(chuàng)建 metastore 庫
>grant all on hive.* to 'hive'@'localhost'; # 賦予相關(guān)庫權(quán)限
>flush privileges; # 令權(quán)限生效
>source hive-schema-3.1.0.mysql.sql # 初始化 meta_store 相關(guān)表結(jié)構(gòu)
配置 hive:
這里用 mysql 作為 hive metastore 因此需要下載 jdbc 連接庫鞍盗,以 centos7 為例桐臊,下載 jdbc jar
wget https://dev.mysql.com/downloads/file/?id=485761 # 下載頁面
yum install mysql-connector-java-8.0.16-1.el7.noarch.rpm # 安裝 jdbc
ln -s /usr/share/java/mysql-connector-java.jar $HIVE_HOME/lib/mysql-connector-java.jar # 將組件 軟連接到 相關(guān)庫
vim $HIVE_HOME/conf/hive-site.xml
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://localhost/hive?createDatabaseIfNotExist=true</value>
<description>metadata is stored in a MySQL server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>MySQL JDBC driver class</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>user name for connecting to mysql server</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>$hive_password</value>
<description>password for connecting to mysql server</description>
</property>
<property>
<name>datanucleus.autoCreateSchema</name>
<value>true</value>
</property>
<property>
<name>datanucleus.fixedDatastore</name>
<value>true</value>
</property>
</configuration>
上面是配置 mysql 信息,主要是三個遏佣,db_name, account, password 根據(jù)自己的情況修改。
啟動hive,創(chuàng)建一張外部表:
hdfs dfs -mkdir -p /dbtaobao/dataset/user_log # 創(chuàng)建表目錄
hdfs dfs -put ~/tmp.log /dbtaobao/dataset/user_log # 將 csv 分件作為 hive 外部數(shù)據(jù)源
[hadoop@hadoop1 mysql]$ head -n 10 ~/tmp.log
328862,323294,833,2882,2661,08,29,0,0,1,內(nèi)蒙古
328862,844400,1271,2882,2661,08,29,0,1,1,山西
328862,575153,1271,2882,2661,08,29,0,2,1,山西
328862,996875,1271,2882,2661,08,29,0,1,1,內(nèi)蒙古
328862,1086186,1271,1253,1049,08,29,0,0,2,浙江
328862,623866,1271,2882,2661,08,29,0,0,2,黑龍江
328862,542871,1467,2882,2661,08,29,0,5,2,四川
328862,536347,1095,883,1647,08,29,0,7,1,吉林
328862,364513,1271,2882,2661,08,29,0,1,2,貴州
328862,575153,1271,2882,2661,08,29,0,0,0,陜西
創(chuàng)建一張 hive 表
create external table dbtaobao.user_log(user_id INT,item_id INT,cat_id INT,merchant_id INT,brand_id INT,month STRING,day STRING,action INT,age_range INT,gender INT,province STRING) comment "first test table" row format delimited fields terminated by ',' stored as textFile location '/dbtaobao/dataset/user_log'; # create table ddl
上面創(chuàng)建 user_log 表的語句和 mysql 很想拿愧,其中特別說明下:
- row format delimited 告訴 hive csv 文件的一行對應(yīng) hive 表一行
- terminated by ',' 行內(nèi)字段分割符
- STORED AS TEXTFILE 存儲為文本格式, 另一種格式為 Sequence Files
- location 指定 hdfs 中的存儲位置