docker-Hadoop全家桶 - hadoop283

[TOC]

hadoop283.dockerfile

#hadoop283-jdk.dockerfile
FROM xiaows/debian8-jdk8-ssh:3.0
# MAINTAINER XIAOWS <xiaows08@163.com>

WORKDIR /root

#install hadoop
COPY hadoop-2.8.3/ /usr/local/hadoop-2.8.3
#copy hadoop-config
# COPY hadoop-conf/ $HADOOP_HOME/etc/hadoop/
ENV HADOOP_HOME=/usr/local/hadoop-2.8.3
ENV PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

RUN mkdir -p /data/hdfs/namenode;\
    mkdir -p /data/hdfs/datanode;\
    # mkdir -p $HADOOP_HOME/logs;\
    # ln -s $HADOOP_HOME/etc/hadoop /root/hadoop-conf;\
    rm -rfv $HADOOP_HOME/share/doc/;\
    rm -rfv $HADOOP_HOME/bin/*.cmd;\
    rm -rfv $HADOOP_HOME/sbin/*.cmd;\
    mv $HADOOP_HOME/etc/hadoop/start-hadoop.sh /root/;\
    mv $HADOOP_HOME/etc/hadoop/run-wordcount.sh /root/;\
    mv /root/Dockerfile-debian8-jdk8-ssh /;\
    chmod +x /root/*.sh;\
    hdfs namenode -format

COPY hadoop283-jdk.dockerfile /
# ENTRYPOINT hdfs namenode -format

修改如下配置文件

  • $HADOOP_HOME/etc/hadoop/core-site.xml
<?xml version="1.0"?>
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://hadoop-1:9000/</value>
    </property>
</configuration>
  • $HADOOP_HOME/etc/hadoop/hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64

# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol.  Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}

export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}

# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by 
#       the user that will run the hadoop daemons.  Otherwise there is the
#       potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
  • $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<?xml version="1.0"?>
<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///data/hdfs/namenode</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///data/hdfs/datanode</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
</configuration>
  • $HADOOP_HOME/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>
  • $HADOOP_HOME/etc/hadoop/slaves
hadoop-1
hadoop-2
hadoop-3
#hadoop-4
  • $HADOOP_HOME/etc/hadoop/yarn-site.sh
<?xml version="1.0"?>
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hadoop-1</value>
    </property>
</configuration>
  • $HADOOP_HOME/etc/hadoop/run-wordcount.sh [可選]
#!/bin/bash

# test the hadoop cluster by running wordcount

# create input files 
mkdir input
echo "Hello Docker" >input/file2.txt
echo "Hello Hadoop" >input/file1.txt

# create input directory on HDFS
hadoop fs -mkdir -p input

# put input files to HDFS
hdfs dfs -put ./input/* input

# run wordcount 
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.8.3-sources.jar org.apache.hadoop.examples.WordCount input output

# print the input files
echo -e "\ninput file1.txt:"
hdfs dfs -cat input/file1.txt

echo -e "\ninput file2.txt:"
hdfs dfs -cat input/file2.txt

# print the output of wordcount
echo -e "\nwordcount output:"
hdfs dfs -cat output/part-r-00000
  • run-container.sh
#start-cluster.sh
#!/bin/bash

if [ $# = 0 ]
then
    echo "1. Please specify the docker-image of cluster !"
    echo "2. Please specify name of cluster service !"
    exit 1
fi

image_name=$1
service_name=$2
net_name=${3:-cluster}
docker rm -f ${service_name}-{1,2,3}
i=1
while [ $i -lt 4 ]
do
    echo "start ${service_name}-$i container..."
    docker run -itd \
        --net=${net_name} \
        --hostname ${service_name}-$i \
        --name ${service_name}-$i \
        --restart=always \
        $image_name
    i=$(( $i + 1 ))
done

docker exec -it ${service_name}-1 bash
  • 為了使鏡像構(gòu)建的盡量小比勉,可以刪除Hadoop中的doc/文檔 src/源碼以及bin/.cmd sbin/.cmd
  • 構(gòu)建鏡像
    docker build -f hadoop283.dockerfile -t xiaows/hadoop283:3.0 .
  • 啟動Hadoop集群
    ./run-containeer.sh xiaows/hadoop283:3.0 hadoop
最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末,一起剝皮案震驚了整個濱河市驹止,隨后出現(xiàn)的幾起案子浩聋,更是在濱河造成了極大的恐慌,老刑警劉巖臊恋,帶你破解...
    沈念sama閱讀 219,427評論 6 508
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件衣洁,死亡現(xiàn)場離奇詭異,居然都是意外死亡抖仅,警方通過查閱死者的電腦和手機坊夫,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 93,551評論 3 395
  • 文/潘曉璐 我一進店門,熙熙樓的掌柜王于貴愁眉苦臉地迎上來撤卢,“玉大人环凿,你說我怎么就攤上這事》欧裕” “怎么了智听?”我有些...
    開封第一講書人閱讀 165,747評論 0 356
  • 文/不壞的土叔 我叫張陵,是天一觀的道長。 經(jīng)常有香客問我考赛,道長莉测,這世上最難降的妖魔是什么? 我笑而不...
    開封第一講書人閱讀 58,939評論 1 295
  • 正文 為了忘掉前任,我火速辦了婚禮腌零,結(jié)果婚禮上益涧,老公的妹妹穿的比我還像新娘。我一直安慰自己久免,他們只是感情好阎姥,可當我...
    茶點故事閱讀 67,955評論 6 392
  • 文/花漫 我一把揭開白布呼巴。 她就那樣靜靜地躺著御蒲,像睡著了一般。 火紅的嫁衣襯著肌膚如雪府瞄。 梳的紋絲不亂的頭發(fā)上碘箍,一...
    開封第一講書人閱讀 51,737評論 1 305
  • 那天团搞,我揣著相機與錄音多艇,去河邊找鬼峻黍。 笑死,一個胖子當著我的面吹牛挽拂,可吹牛的內(nèi)容都是我干的骨饿。 我是一名探鬼主播宏赘,決...
    沈念sama閱讀 40,448評論 3 420
  • 文/蒼蘭香墨 我猛地睜開眼察署,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了脐往?” 一聲冷哼從身側(cè)響起业簿,我...
    開封第一講書人閱讀 39,352評論 0 276
  • 序言:老撾萬榮一對情侶失蹤梅尤,失蹤者是張志新(化名)和其女友劉穎希太,沒想到半個月后誊辉,有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體,經(jīng)...
    沈念sama閱讀 45,834評論 1 317
  • 正文 獨居荒郊野嶺守林人離奇死亡,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 37,992評論 3 338
  • 正文 我和宋清朗相戀三年拍屑,在試婚紗的時候發(fā)現(xiàn)自己被綠了僵驰。 大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
    茶點故事閱讀 40,133評論 1 351
  • 序言:一個原本活蹦亂跳的男人離奇死亡,死狀恐怖粉私,靈堂內(nèi)的尸體忽然破棺而出诺核,到底是詐尸還是另有隱情,我是刑警寧澤漓摩,帶...
    沈念sama閱讀 35,815評論 5 346
  • 正文 年R本政府宣布幌甘,位于F島的核電站锅风,受9級特大地震影響鞍泉,放射性物質(zhì)發(fā)生泄漏咖驮。R本人自食惡果不足惜,卻給世界環(huán)境...
    茶點故事閱讀 41,477評論 3 331
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望砚嘴。 院中可真熱鬧涩拙,春花似錦兴泥、人聲如沸。這莊子的主人今日做“春日...
    開封第一講書人閱讀 32,022評論 0 22
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至,卻和暖如春谎势,著一層夾襖步出監(jiān)牢的瞬間脏榆,已是汗流浹背台谍。 一陣腳步聲響...
    開封第一講書人閱讀 33,147評論 1 272
  • 我被黑心中介騙來泰國打工坞生, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留掷伙,地道東北人任柜。 一個月前我還...
    沈念sama閱讀 48,398評論 3 373
  • 正文 我出身青樓宙地,卻偏偏與公主長得像,于是被迫代替她去往敵國和親参袱。 傳聞我的和親對象是個殘疾皇子蓖柔,可洞房花燭夜當晚...
    茶點故事閱讀 45,077評論 2 355

推薦閱讀更多精彩內(nèi)容

  • (搭建集群部分借鑒了kiwenlau/hadoop-cluster-docker中的內(nèi)容,不過那里的基礎(chǔ)環(huán)境是Ub...
    ProteanBear閱讀 3,845評論 0 12
  • 一潜索、系統(tǒng)參數(shù)配置優(yōu)化 1竹习、系統(tǒng)內(nèi)核參數(shù)優(yōu)化配置 修改文件/etc/sysctl.conf列牺,添加如下配置瞎领,然后執(zhí)行s...
    張偉科閱讀 3,754評論 0 14
  • 使用搭建部署分布式集群一:環(huán)境準備: 1:首先要有一個Centos7操作系統(tǒng)九默,可以在虛擬機中安裝驼修。 2:在cent...
    yaoshiyou閱讀 310評論 0 1
  • 壹 看完電影已是傍晚時分乙各,心中的悲傷久久不能自已觅丰「咎眩回家后,好友在我的朋友圈留言轻掩,大意是唇牧,這是一部寫盡真實的電影聚唐,因...
    catbaker閱讀 460評論 0 4
  • 我有一頭長發(fā)陪蜻,它熱愛陽光 我已經(jīng)很久沒有去理發(fā)店,任自己的頭發(fā)瘋狂成長等恐,扎最簡單的馬尾课蔬,或者運動起來,就用皮筋草草...
    LD1993閱讀 364評論 0 1