1.安裝 jdk 配置環(huán)境變量
apt-get install openJdk8
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
使全局變量生效
source /etc/profile
2.下載 Hadoop
wget address
3.減壓 tar.gz
tar -zxvf hadoop.2.6.1tar.gz
4.hadoop安裝
創(chuàng)建數(shù)據(jù)存放的文件夾
/home/hadoop
/home/hadoop/tmp
/home/hadoop/hdfs
/home/hadoop/hdfs/data
/home/hadoop/hdfs/name
5.創(chuàng)建目錄
mkdir /usr/local/hadoop
hadoop-2.6.1.tar.gz解壓至/usr/local/hadoop/下
6.配置各機(jī)器hosts
vi /etc/hosts
ip 機(jī)器名
ip 機(jī)器名
7.配置hadoop參數(shù)
7.1配置hadoop-2.6.1/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131702</value>
</property>
</configuration>
7.2配置hadoop-2.6.1/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop:9001</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
7.3配置hadoop-2.6.1/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop:19888</value>
</property>
</configuration>
42.62.73.147
7.4配置hadoop-2.6.1/etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>hadoop:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>hadoop:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>hadoop:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>hadoop:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>hadoop:8088</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>22528</value>
</property>
</configuration>
7.5配置hadoop-2.6.1/etc/hadoop/hadoop-env.sh和hadoop-2.6.1/etc/hadoop/yarn-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.7.0_79
7.6配置slaves 添加從節(jié)點(diǎn)
vi hadoop-2.6.1/etc/hadoop/slaves
8.命令
初始化
bin/hdfs namenode -format
sbin/start-dfs.sh
sbin/start-yarn.sh
sbin/stop-dfs.sh
sbin/stop-yarn.sh
輸入命令jps可以看到相關(guān)信息
執(zhí)行jar
hadoop jar xxxxx.jar arg1 arg2
** hdfs命令 **
列出目錄下文件
hadoop fs -ls /
創(chuàng)建目錄
hadoop fs -mkdir /newdir
本地文件復(fù)制到HDFS
hadoop fs -copyFromLocal /home/input/a.txt /input/a.txt
HDFS文件復(fù)制到本地
hadoop fs -copyToLocal /input/a.txt /home/input/a.txt
刪除HDFS目錄及其中文件
hadoop fs -rm -f -r /output1
移動(dòng)文件
hadoop fs -mv URI [URI …] <dest>
停止job
hadoop job -kill <id>
關(guān)閉安全模式
hadoop dfsadmin -safemode leave
Cluster查看
http://192.168.1.100:8088/
HDFS查看
http://192.168.1.100:50070/
9.錯(cuò)誤
初始化報(bào)錯(cuò)
host = java.net.UnknownHostException: centos: centos
查看/etc/sysconfig/network文件
NETWORKING=yes
HOSTNAME=centos
HOSTNAME是centos, 無(wú)法在/etc/hosts中找到對(duì)應(yīng)IP
vi /etc/hosts,添加:
127.0.0.1 centos
啟動(dòng)報(bào)錯(cuò):
/hadoop-2.6.1/sbin/hadoop-daemon.sh: Permission denied
從節(jié)點(diǎn)hadoop目錄要有執(zhí)行權(quán)限
chmod -R 755 hadoop-2.6.1
ShuffleError
Error: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in fetcher#3
at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1656)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
Caused by: java.lang.OutOfMemoryError: Java heap space
at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:56)
at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:46)
at org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput.<init>(InMemoryMapOutput.java:63)
at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.unconditionalReserve(MergeManagerImpl.java:305)
at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.reserve(MergeManagerImpl.java:295)
at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyMapOutput(Fetcher.java:514)
at org.apache.hadoop.mapreduce.task.reduce.Fetcher.copyFromHost(Fetcher.java:336)
at org.apache.hadoop.mapreduce.task.reduce.Fetcher.run(Fetcher.java:193)
解決方法:在mapred-size.xml添加配置
<property>
<name>mapreduce.reduce.shuffle.memory.limit.percent</name>
<value>0.10</value>
</property>
15/11/30 20:15:46 INFO hdfs.DFSClient: Exception in createBlockOutputStream
java.io.IOException: Bad connect ack with firstBadLink as 192.168.1.200:50010
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.createBlockOutputStream(DFSOutputStream.java:1460)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1361)
at org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:588)
解決方法:打開(kāi)192.168.1.200 50010端口防火墻