ubuntu 12.04 LTS 기반으로 설치
1. Java 설치
http://www.ahnseungkyu.com/139
2. 패키지 설치
$ sudo apt-get install build-essential maven cmake libssl-dev
3. proxy 를 사용한다면 다음을 수정
$ vi /home/stack/.m2/settings.xml
<settings>
<proxies>
<proxy>
<active>true</active>
<protocol>http</protocol>
<host>xx.xx.xx.xx</host>
<port>8080</port>
<nonProxyHosts>localhost|127.0.0.1|192.168.75.136|192.168.230.136|ubuntu</nonProxyHosts>
...
$ cd /Hadoop-src/hadoop-2.4.0-src/hadoop-hdfs-project/hadoop-hdfs-httpfs/downloads
$ wget http://archive.apache.org/dist/tomcat/tomcat-6/v6.0.36/bin/apache-tomcat-6.0.36.tar.gz
$ keytool -v -alias mavensrv -import \
> -file /usr/share/ca-certificates/extra/XXX.crt \
> -keystore trust.jks
4. protocol buffer 소스 다운로드, 컴파일, 설치 (2.5 이상을 설치)
$ wget https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz
$ tar xvfz protobuf-2.5.0.tar.gz
$ cd protobuf-2.5.0
$ ./configure
$ make
$ sudo make install # /usr/local/lib 에 관련 라이브러리가 설치됨
$ sudo ldconfig
5. Hadoop 소스 다운로드 및 패키징
$ wget http://apache.mirror.cdnetworks.com/hadoop/common/hadoop-2.4.0/hadoop-2.4.0-src.tar.gz
$ tar xvfz hadoop-2.4.0-src.tar.gz
$ cd hadoop-2.4.0-src
$ mvn package -Pdist,native -DskipTests -Dtar -X
6. 소스 파일 및 컴파일 된 바이너리 파일 찾기
$ cd ./hadoop-dist/target
$ cp -R ./hadoop-2.4.0/ ~/.
7. 하둡 환경변수 설정
$ vi ~/.bashrc
# Hadoop
export HADOOP_PREFIX="/home/stack/hadoop-2.4.0"
export PATH=$PATH:$HADOOP_PREFIX/bin
export PATH=$PATH:$HADOOP_PREFIX/sbin
export HADOOP_MAPRED_HOME=${HADOOP_PREFIX}
export HADOOP_COMMON_HOME=${HADOOP_PREFIX}
export HADOOP_HDFS_HOME=${HADOOP_PREFIX}
export YARN_HOME=${HADOOP_PREFIX}
# Native Path
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"
$ source ~/.bashrc
8. local 에서 ssh 자동 접속 설정
$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
9. hadoop-env.sh 설정
$ vi $HADOOP_PREFIX/etc/hadoop/hadoop-env.sh
export JAVA_HOME="/usr/local/jdk1.7.0_51"
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_PREFIX/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"
10. yarn-env.sh 설정
$ vi $HADOOP_PREFIX/etc/hadoop/yarn-env.sh
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_PREFIX/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"
11. Hadoop 데이터 및 시스템 파일을 저장할 디렉토리 생성 (hdfs-site.xml, mapred-site.xml 파일 참조)
$ mkdir -p ${HADOOP_PREFIX}/hadoop/dfs/name
$ mkdir -p ${HADOOP_PREFIX}/hadoop/dfs/data
$ mkdir -p ${HADOOP_PREFIX}/hadoop/mapred/system
$ mkdir -p ${HADOOP_PREFIX}/hadoop/mapred/local
12. core-site.xml 설정
$ vi $HADOOP_PREFIX/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
<final>true</final>
</property>
</configuration>
13. hdfs-site.xml 설정
$ vi $HADOOP_PREFIX/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/stack/hadoop-2.4.0/hadoop/dfs/name</value>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/stack/hadoop-2.4.0/hadoop/dfs/data</value>
<final>true</final>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
14. mapred-site.xml 설정
$ cp $HADOOP_PREFIX/etc/hadoop/mapred-site.xml.template $HADOOP_PREFIX/etc/hadoop/mapred-site.xml
$ vi $HADOOP_PREFIX/etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>file:/home/stack/hadoop-2.4.0/hadoop/mapred/system</value>
<final>true</final>
</property>
<property>
<name>mapred.local.dir</name>
<value>file:/home/stack/hadoop-2.4.0/hadoop/mapred/local</value>
<final>true</final>
</property>
</configuration>
15. yarn-site.xml 설정
$ vi $HADOOP_PREFIX/etc/hadoop/yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
16. NameNode 포맷
hdfs namenode -format
17. 데몬 실생
$ cd ${HADOOP_PREFIX}/sbin
# hdfs 데몬 실행
$ start-all.sh
# yarn 데몬 실행
$ start-yarn.sh
18. 하둡 데몬 확인 (Pseudo-Distributed Mode 일 때 5개가 떠 있어야 함)
$ jps
13861 NameNode # 네임노드
14347 SecondaryNameNode # 세컨더리 네임노드
14070 DataNode # 데이터 노드
14526 ResourceManager # yarn 리소스 매니저 (네임노드)
14745 NodeManager # yarn 노드 매니저 (데이터노드)
# yarn Resource Manager 접속
http://localhost:8088
# yarn Node Manager 접속
http://localhost:8042/node
# 네임노드 접속
http://localhost:50070
# 노드 리포트
$ hdfs dfsadmin -report