前臺啟動(dòng)(服務(wù)端)
cd /export/servers/hive-1.1.0-cdh5.14.0
bin/hive --service hiveserver2
后臺啟動(dòng)(服務(wù)端)
cd /export/servers/hive-1.1.0-cdh5.14.0
nohup bin/hive --service hiveserver2 &
### beeline 鏈接 hiveserver2
bin/beeline
beeline> !connect jdbc:hive2://node03.hadoop.com:10000
第三種交互方式(客戶端)
### 使用 –e 參數(shù)來直接執(zhí)行hql的語句
bin/hive -e "use myhive;select * from test;"
### 使用 –f 參數(shù)通過指定文本文件來執(zhí)行hql的語句
vim hive.sql
use myhive;select * from test;
bin/hive -f hive.sql
===============================================================
1、創(chuàng)建數(shù)據(jù)庫
create database if not exists myhive;
use myhive;
### 說明:hive的表存放位置模式是由hive-site.xml當(dāng)中的一個(gè)屬性指定的
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
### 創(chuàng)建數(shù)據(jù)庫并指定hdfs存儲位置
create database myhive2 location '/myhive2';
### 修改數(shù)據(jù)庫(可以使用alter database 命令來修改數(shù)據(jù)庫的一些屬性反璃。但是數(shù)據(jù)庫的元數(shù)據(jù)信息是不可更改的昵慌,包括數(shù)據(jù)庫的名稱以及數(shù)據(jù)庫所在的位置)
alter database myhive2 set dbproperties('createtime'='20180611');
### 查看數(shù)據(jù)庫詳細(xì)信息
desc database myhive2; ## 基本信息
desc database extended myhive2; ## 詳細(xì)信息
### 刪除數(shù)據(jù)庫
drop database myhive2; ## 刪除一個(gè)空數(shù)據(jù)庫,如果數(shù)據(jù)庫下面有數(shù)據(jù)表淮蜈,那么就會報(bào)錯(cuò)
drop database myhive cascade; ## 強(qiáng)制刪除數(shù)據(jù)庫斋攀,包含數(shù)據(jù)庫下面的表一起刪除
2、創(chuàng)建表
################## 總語法 ############
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...)
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[ROW FORMAT row_format]
[STORED AS file_format]
[LOCATION hdfs_path]
1)內(nèi)部表
# hive表初體驗(yàn)
use myhive;
create table stu(id int,name string);
insert into stu values (1, "zhangsan");
select * from stu;
# 創(chuàng)建表并指定字段之間的分隔符
create table if not exists stu2(id int ,name string) \
row format delimited fields terminated by '\t' \
stored as textfile \
location '/user/stu2';
# 根據(jù)查詢結(jié)果創(chuàng)建表
create table stu3 as select * from stu2;
# 根據(jù)已經(jīng)存在的表結(jié)構(gòu)創(chuàng)建表
create table stu4 like stu2;
# 查詢表的類型
desc formatted stu2;
2) 外部表
# 老師表
create external table techer (t_id string,t_name string) \
row format delimited fields terminated by '\t';
# 學(xué)生表
create external table student (s_id string,s_name string,s_birth string , s_sex string ) \
row format delimited fields terminated by '\t';
# 從本地文件系統(tǒng)向表中加載數(shù)據(jù)
load data local inpath '/export/servers/hivedatas/student.csv' \
into table student;
# 加載并覆蓋已有的數(shù)據(jù)
load data local inpath '/export/servers/hivedatas/student.csv' \
overwrite into table student;
# 從hdfs文件系統(tǒng)像表中添加數(shù)據(jù)(需要提前將數(shù)據(jù)上傳到hdfs文件系統(tǒng)梧田,其實(shí)就是一個(gè)移動(dòng)文件的操作)
cd /export/servers/hivedatas
hdfs dfs -mkdir -p /hivedatas
hdfs dfs -put techer.csv /hivedatas/
load data inpath '/hivedatas/techer.csv' into table techer;
################### 【分區(qū)表】 #################
# 創(chuàng)建分區(qū)表的語法
create table score(s_id string,c_id string, s_score int) \
partitioned by (month string) \
row format delimited fields terminated by '\t';
# 創(chuàng)建一個(gè)表帶多個(gè)分區(qū)
create table score2 (s_id string,c_id string, s_score int) \
partitioned by (year string,month string,day string) \
row format delimited fields terminated by '\t';
# 建在數(shù)據(jù)到分區(qū)表中
load data local inpath '/export/servers/hivedatas/score.csv' \
into table score partition (month='201806');
# 加載數(shù)據(jù)到一個(gè)多分區(qū)表中
load data local inpath '/export/servers/hivedatas/score.csv' \
into table score2 partition(year='2018',month='06',day='01');
# 多分區(qū)聯(lián)合查詢使用union all來實(shí)現(xiàn)
select * from score where month = '201806' \
union all select * from score where month = '201806';
# 查看分區(qū)的命令
show partitions score;
# 添加一個(gè)分區(qū)
alter table score add partition(month='201805');
# 同時(shí)添加多個(gè)分區(qū)
alter table score add partition(month='201804') partition(month = '201803');
# 刪除表分區(qū)
alter table score drop partition(month = '201806');
############### 【分區(qū)表的應(yīng)用】 ############
hdfs dfs -mkdir -p /scoredatas/month=201806
hdfs dfs -put score.csv /scoredatas/month=201806/
create external table score4(s_id string, c_id string,s_score int) \
partitioned by (month string) \
row format delimited fields terminated by '\t' \
location '/scoredatas';
# 進(jìn)行表的修復(fù),說白了就是建立我們表與我們數(shù)據(jù)文件之間的一個(gè)關(guān)系映射
# 修復(fù)成功之后即可看到數(shù)據(jù)已經(jīng)全部加載到表當(dāng)中去了
msck repair table score4;
3) 分桶表
################### 【分桶表】 #################
## 將數(shù)據(jù)按照指定的字段進(jìn)行分成多個(gè)桶中去淳蔼,
## 將數(shù)據(jù)按照字段進(jìn)行劃分,按照字段劃分到多個(gè)文件中去
set hive.enforce.bucketing=true; # 開啟hive的桶表功能
set mapreduce.job.reduces=3; # 設(shè)置reduce的個(gè)數(shù)
# 創(chuàng)建桶表,通過insert overwrite
create table course (c_id string,c_name string,t_id string) \
clustered by(c_id) into 3 buckets \
row format delimited fields terminated by '\t';
# 創(chuàng)建普通表裁眯,并通過insert overwrite的方式將普通表的數(shù)據(jù)通過查詢的方式加載到桶表當(dāng)中去
create table course_common (c_id string,c_name string,t_id string) \
row format delimited fields terminated by '\t';
# 普通表中加載數(shù)據(jù)
load data local inpath '/export/servers/hivedatas/course.csv' \
into table course_common;
# 通過insert overwrite 給他桶表中加載數(shù)據(jù)
insert overwrite table course \
select * from course_common cluster by(c_id);
4) 修改表
################# 【表重命名】 ##################
## 基本語法
alter table old_table_name rename to new_table_name;
# 把表score4修改成score5
alter table score4 rename to score5;
################# 【增加\修改列信息】 ##################
desc score5; # 查看表結(jié)構(gòu)
alter table score5 add columns (mycol string, mysco string); ## 添加列
alter table score5 change column mysco mysconew int; # 更新列
################# 【刪除表】 ##################
drop table score5;
5) hive表中加載數(shù)據(jù)
########### 【直接向分區(qū)表中插入數(shù)據(jù)】 ###########
create table score3 like score;
insert into table score3 partition(month ='201807') values ('001','002','100');
########### 【通過查詢插入數(shù)據(jù)】 ###########
# 通過load方式加載數(shù)據(jù)
load data local inpath '/export/servers/hivedatas/score.csv' \
overwrite into table score partition(month='201806');
# 通過查詢方式加載數(shù)據(jù)
create table score4 like score;
insert overwrite[a1] table score4 partition(month = '201806') \
select s_id,c_id,s_score from score;
########### 【多插入模式】 ###########
# 常用于實(shí)際生產(chǎn)環(huán)境當(dāng)中鹉梨,將一張表拆開成兩部分或者多部分
# 給score表加載數(shù)據(jù)
load data local inpath '/export/servers/hivedatas/score.csv' \
overwrite into table score partition(month='201806');
# 創(chuàng)建第一部分表
create table score_first( s_id string,c_id string) \
partitioned by (month string) \
row format delimited fields terminated by '\t' ;
# 創(chuàng)建第二部分表
create table score_second(c_id string,s_score int) \
partitioned by (month string) \
row format delimited fields terminated by '\t';
# 分別給第一部分與第二部分表加載數(shù)據(jù)
from score \
insert overwrite table score_first partition(month='201806') select s_id,c_id \
insert overwrite table score_second partition(month = '201806') select c_id,s_score;
############ 【查詢語句中創(chuàng)建表并加載數(shù)據(jù)(as select)】#########
# 將查詢的結(jié)果保存到一張表當(dāng)中去
create table score5 as select * from score;
############ 【創(chuàng)建表時(shí)通過location指定加載數(shù)據(jù)路徑】############
# 1)創(chuàng)建表時(shí)通過location指定加載數(shù)據(jù)路徑(主要針對外部表)
create external table score6 (s_id string,c_id string,s_score int) \
row format delimited fields terminated by '\t' \
location '/myscore6';
# 2)上傳數(shù)據(jù)到hdfs上
hdfs dfs -mkdir -p /myscore6;
hdfs dfs -put score.csv /myscore6;
########【export導(dǎo)出與import導(dǎo)入hive表數(shù)據(jù)(內(nèi)部表操作)#######
# 都是在hdfs上
create table techer2 like techer;
export table techer to '/export/techer';
import table techer2 from '/export/techer';
6) hive表中的數(shù)據(jù)導(dǎo)出
##############【insert導(dǎo)出】############
# 1)將查詢的結(jié)果導(dǎo)出到本地
insert overwrite local directory '/export/servers/exporthive' \
select * from score;
# 2)將查詢的結(jié)果格式化導(dǎo)出到本地
insert overwrite local directory '/export/servers/exporthive' \
row format delimited fields terminated by '\t' \
collection items terminated by '#' \
select * from student;
# 3)將查詢出的結(jié)果導(dǎo)出到HDFS(沒有l(wèi)ocal)
insert overwrite directory '/export/servers/exporthive' \
row format delimited fields terminated by '\t' \
collection items terminated by[a1] '#' \
select * from score;
##############【hadoop命令導(dǎo)出到本地】############
dfs -get \
/export/servers/exporthive/000000_0 \
/export/servers/exporthive/local.txt;
##############【hive shell命令導(dǎo)出】############
# 基本語法
hive -f/-e 執(zhí)行語句或者腳本 > file
bin/hive -e "select * from myhive.score;" \
> /export/servers/exporthive/score.txt
##############【export導(dǎo)出到HDFS上】############
export table score to '/export/exporthive/score';
7) 清空表數(shù)據(jù)
#### 只能清空內(nèi)部表
truncate table score6; # 清空外部表會報(bào)錯(cuò)
3、hive函數(shù)
#################【內(nèi)置函數(shù)】#################
show fuctions;
desc function upper;
desc function extended upper;
#################【自定義函數(shù)】#################
1)UDF(一進(jìn)一出)
2)UDAF(多進(jìn)一出)
3)UDTF(一進(jìn)多出)
編程步驟:
(1)繼承org.apache.hadoop.hive.ql.UDF
(2)需要實(shí)現(xiàn)evaluate函數(shù)未状;evaluate函數(shù)支持重載俯画;
注意事項(xiàng)
(1)UDF必須要有返回類型,可以返回null司草,但是返回類型不能為void艰垂;
(2)UDF中常用Text/LongWritable等類型,不推薦使用java類型埋虹;
###### 第一步 創(chuàng)建maven java 工程猜憎,導(dǎo)入jar包
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0-cdh5.14.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.2</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*/RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
###### 第二步 開發(fā)java類繼承UDF,并重載evaluate 方法
public class ItcastUDF extends UDF {
public Text evaluate(final Text s) {
if (null == s) {
return null;
}
//返回大寫字母
return new Text(s.toString().toUpperCase());
}
}
###### 第三步 將我們的項(xiàng)目打包搔课,并上傳到hive的lib目錄下
###### 第四步 將我們的項(xiàng)目打包胰柑,并上傳到hive的lib目錄下
# 重命名我們的jar包名稱
cd /export/servers/hive-1.1.0-cdh5.14.0/lib
mv original-day_06_hive_udf-1.0-SNAPSHOT.jar udf.jar
# hive的客戶端添加我們的jar包
add jar /export/servers/hive-1.1.0-cdh5.14.0/lib/udf.jar;
###### 第五步 設(shè)置函數(shù)與我們的自定義函數(shù)關(guān)聯(lián)
create temporary function tolowercase as 'cn.itcast.udf.ItcastUDF';
###### 第六步 使用自定義函數(shù)
select tolowercase('abc');