- Hadoop yarn 启动:
- Hadoop datanode6 启动:
- Hmaster:(namenode)
- Region+thrift:(datanode)
- hbase 在hdfs创建目录
- hbase 在 hbase-env.sh
- hbase 启动
- Hbase web 展示
ID | Hostname | Address | Service | Data_path(GB) |
1 | Flume1.qs.com | 172.31.96.37 | Flume+kf+stm+zk | /data1/500GB |
2 | Namenode1.qs.com | 172.31.96.38 | Nn+hbs+dnsmasq | /data1/300GB |
3 | Datanode1.qs.com | 172.31.96.39 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
4 | Datanode2.qs.com | 172.31.96.40 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
5 | Datanode3.qs.com | 172.31.96.43 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
6 | Datanode4.qs.com | 172.31.96.44 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
7 | Flume2.qs.com | 172.31.96.46 | Flume+kf+stm+zk | /data1/500GB |
8 | Namenode2.qs.com | 172.31.96.47 | Nn+hbm | /data1/300GB |
9 | Datanode5.qs.com | 172.31.96.48 | Dn+zk+jn+hbr +kf+stm | /data1/500GB |
10 | Datanode6.qs.com | 172.31.96.49 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
11 | Datanode7.qs.com | 172.31.96.50 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
12 | Datanode8.qs.com | 172.31.96.51 | Dn+zk+jn+hbr+kf+stm | /data1/500GB |
DNS服务器:dnsmasq
apt-get update
apt-get install dnsmasq
mv dnsmasq.conf dnsmasq.conf.default
[root@Namenode1:172.31.96.38:~/tools]more /etc/dnsmasq.conf
no-hosts
addn-hosts=/etc/add_hosts
resolv-file=/etc/resolv.conf
#resolv-file=/etc/dnsmasq.resolv.conf
all-servers
no-dhcp-interface=eth0
listen-address=172.31.96.38,127.0.0.1
cache-size=5000
log-facility=syslog
[root@Namenode1:172.31.96.38:~/tools]more /etc/add_hosts
172.31.96.37 Flume1.qs.com
172.31.96.38 Namenode1.qs.com
172.31.96.39 Datanode1.qs.com
172.31.96.40 Datanode2.qs.com
172.31.96.43 Datanode3.qs.com
172.31.96.44 Datanode4.qs.com
172.31.96.46 Flume2.qs.com
172.31.96.47 Namenode2.qs.com
172.31.96.48 Datanode5.qs.com
172.31.96.49 Datanode6.qs.com
172.31.96.50 Datanode7.qs.com
172.31.96.51 Datanode8.qs.com
echo 'nameserver 172.31.96.38' >> more /etc/resolv.conf ; grep 'nameserver' /etc/resolv.conf
/etc/init.d/dnsmasq restart
创建互信:
ssh-keygen -t rsa
cp id_rsa.pub authorized_keys
设置java 环境变量:
[root@Namenode1:172.31.96.38:~]tail /etc/profile
export JAVA_HOME=/usr/lib/jdk1.7.0
export PATH=$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$PATH
export CLASSPATH=$CLASSPATH:.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PKG_CONFIG_PATH=/usr/local/fuse/lib/pkgconfig:$PKG_CONFIG_PATH
export HADOOP_MAPRED_HOME=/usr/lib/hadoop-mapreduce
[root@Namenode1:172.31.96.38:~]java -version
java version "1.7.0_55"
Java(TM) SE Runtime Environment (build 1.7.0_55-b13)
Java HotSpot(TM) 64-Bit Server VM (build 24.55-b03, mixed mode)
选择 java:(所有集群服务器必须执行)
update-alternatives --install /usr/bin/java java /usr/lib/jdk1.7.0//bin/java 300
update-alternatives --install /usr/bin/javac javac /usr/lib/jdk1.7.0//bin/javac 300
update-alternatives --config java
update-alternatives --config javac
zookeeper安装:
清空hosts文件信息
> /etc/hosts
apt-get install zookeeper zookeeper-server
[root@Flume1:172.31.96.37:~]more /etc/zookeeper/conf/zoo.cfg
maxClientCnxns=150
tickTime=2000
initLimit=20
syncLimit=20
dataDir=/var/lib/zookeeper
clientPort=2181
server.1=Flume1.qs.com:2888:3888
server.2=Datanode1.qs.com:2888:3888
server.3=Datanode2.qs.com:2888:3888
server.4=Datanode3.qs.com:2888:3888
server.5=Datanode8.qs.com:2888:3888
server.6=Flume2.qs.com:2888:3888
server.7=Datanode5.qs.com:2888:3888
server.8=Datanode6.qs.com:2888:3888
server.9=Datanode7.qs.com:2888:3888
修改myid 命令:
/etc/init.d/zookeeper-server init --myid=1
启动 zookeeper;
service zookeeper-server restart
root@Datanode7:/var/log# zookeeper-client
WATCHER::
WatchedEvent state:SyncConnected type:None path:null
ls /
[kafka, storm, hbase, hadoop-ha, zookeeper]
[zk: localhost:2181(CONNECTED) 1]
HDFS 安装:
Hadoop master 安装,即namenode上的操作:
apt-get install hadoop-hdfs-zkfc hadoop-hdfs-namenode hadoop-yarn-resourcemanager
Hadoop 数据节点安装,即datanode上的操作:
apt-get install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce hadoop-hdfs-journalnode
Hadoop 数据节点安装,即datanode6上的操作:
apt-get install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver
[root@Namenode1:172.31.96.38:/etc/hadoop/conf]more core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://dmcluster</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>dmcluster</value>
</property>
<property>
<name>dfs.ha.namenodes.dmcluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.dmcluster.nn1</name>
<value>Namenode1.qs.com:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.dmcluster.nn2</name>
<value>Namenode2.qs.com:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.dmcluster.nn1</name>
<value>Namenode1.qs.com:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.dmcluster.nn2</name>
<value>Namenode2.qs.com:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://Namenode1.qs.com:8485;Namenode2.qs.com:8485;Datanode1.qs.com:8485;Datanode2.qs.com:8485;Datanode3.qs.com:8485;Datanode4.qs.com:8485;Datanode5
.qs.com:8485;Datanode6.qs.com:8485;Datanode7.qs.com:8485/dmcluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/dfs/jn</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.dmcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hdfs)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/var/lib/hadoop-hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>5000</value>
<description>
SSH connection timeout, in milliseconds, to use with the builtin
sshfence fencer.
</description>
</property>
<property>
<name>ha.zookeeper.quorum.dmcluster</name>
<value>Flume1.qs.com:2181,Flume2.qs.com:2181,Datanode1.qs.com:2181,Datanode2.qs.com:2181,Datanode3.qs.com:2181,Datanode5.qs.com:2181,Datanode6.qs.com:2181,Data
node7.qs.com:2181,Datanode8.qs.com:2181</value>
</property>
</configuration>
[root@Namenode1:172.31.96.38:/etc/hadoop/conf]more hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.name.dir</name>
<value>/var/lib/hadoop-hdfs/cache/hdfs/dfs/name</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data1/dfs/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data1/dfs/dn</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
[root@Namenode1:172.31.96.38:/etc/hadoop/conf]more slaves
Datanode1.qs.com
Datanode2.qs.com
Datanode3.qs.com
Datanode4.qs.com
Datanode5.qs.com
Datanode6.qs.com
Datanode7.qs.com
Datanode8.qs.com
[root@Namenode1:172.31.96.38:/etc/hadoop/conf]more mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>Datanode8.qs.com:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>Datanode8.qs.com:19888</value>
</property>
</configuration>
[root@Namenode1:172.31.96.38:/etc/hadoop/conf]more yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>Namenode1.qs.com:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>Namenode1.qs.com:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>Namenode1.qs.com:8030</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>Namenode1.qs.com:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>Namenode1.qs.com:8088</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$YARN_HOME/*,$YARN_HOME/lib/*
</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce.shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data1/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data1/yarn/logs</value>
</property>
<property>
<description>Where to aggregate logs</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://var/log/hadoop-yarn/apps</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>Datanode8.qs.com:54315</value>
</property>
</configuration>
[root@Namenode1:172.31.96.38:/etc/hadoop/conf] more container-executor.cfg
yarn.nodemanager.local-dirs=/data1/yarn/local
yarn.nodemanager.linux-container-executor.group=yarn
yarn.nodemanager.log-dirs=/data1/yarn/logs
banned.users=root
min.user.id=1
创建数据目录:
mkdir -pv /data1/dfs/{jn,nn,dn} ; chown hdfs:hdfs -R /data1/dfs/
mkdir -pv /data1/yarn/{local,logs} ; chown yarn:yarn -R /data1/yarn/
Hdfs 账户 设置互信(namenode1、namenode2)
su - hdfs
ssh-keygen -t rsa
hdfs ssh-copy-id -i /var/lib/hadoop-hdfs/.ssh/id_rsa.pub [email protected]
hdfs ssh-copy-id -i /var/lib/hadoop-hdfs/.ssh/id_rsa.pub [email protected]
zkfc初始化:(namenode1 上执行)
hdfs@Namenode1:~$ hdfs zkfc -formatZK
HDFS 集群启动:
User:root
Hadoop journalnode 启动:
hdfs namenode -initializeSharedEdits (格式化JN)
/etc/init.d/hadoop-hdfs-journalnode start
Hadoop namenode 启动:
sudo -u hdfs hadoop namenode -format (格式化NN,在任意一台NN执行,只执行一次)
1、Start the primary (formatted) NameNode:
$ sudo service hadoop-hdfs-namenode start
2、Start the standby NameNode:
$ sudo -u hdfs hdfs namenode -bootstrapStandby
$ sudo service hadoop-hdfs-namenode start
Hadoop datanode 启动:
/etc/init.d/hadoop-hdfs-datanode start
/etc/init.d/hadoop-yarn-nodemanager start
HDFS 创建集群目录:
sudo -u hdfs hadoop fs -mkdir /tmp
sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
sudo -u hdfs hadoop fs -mkdir /user/history
sudo -u hdfs hadoop fs -chmod -R 1777 /user/history
sudo -u hdfs hadoop fs -chown yarn /user/history
sudo -u hdfs hadoop fs -mkdir /var/log/hadoop-yarn
sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn
/etc/init.d/hadoop-yarn-resourcemanager start
/etc/init.d/hadoop-mapreduce-historyserver start
/etc/init.d/hadoop-yarn-proxyserver start
HDFS web 展示;
web hdfs: http://172.31.96.38:50070/dfshealth.jsp
job listen: http://172.31.96.38:8088/cluster
jobhistory: http://172.31.96.51:19888/jobhistory
Mapreduce 测试命令:
hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.0.0-cdh4.6.0.jar wordcount /user/root/input/data.txt /user/root/output_data
HBASE 部署
apt-get install hbase-master
apt-get install hbase-regionserver hbase-thrift
HBASE 配置:
Hbase MASTER与region+thrift 主配置文件内容一样
more /etc/hbase/conf/hbase-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://dmcluster:8020/hbase</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>Flume1.qs.com,Flume2.qs.com,Datanode1.qs.com,Datanode2.qs.com,Datanode3.qs.com,Datanode5.qs.com,Datanode6.qs.com,Datanode7.qs.com,Datanode8.qs.com</value>
</property>
<property>
<name>hbase.replication</name>
<value>true</value>
</property>
<property>
<name>replication.source.ratio</name>
<value>1.0</value>
</property>
<property>
<name>replication.source.nb.capacity</name>
<value>1000</value>
</property>
<property>
<name>replication.replicationsource.implementation</name>
<value>com.ngdata.sep.impl.SepReplicationSource</value>
</property>
<property>
<name>hbase.regionserver.lease.period</name>
<value>240000</value>
<!-- default 60000 -->
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>280000</value>
<!-- default 60000 -->
</property>
<property>
<name>hadoop.proxyuser.hbase.groups</name>
<value>solr,hdfs</value>
</property>
<property>
<name>hadoop.proxyuser.hbase.hosts</name>
<value>*</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>128</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>2097152</value>
</property>
<property>
<name>hbase.thrift.minWorkerThreads</name>
<value>16</value>
<description>
The "core size" of the thread pool. New threads are created on every
connection until this many threads are created.
</description>
</property>
<property>
<name>hbase.thrift.maxWorkerThreads</name>
<value>1000</value>
</property>
<property>
<name>hbase.thrift.maxQueuedRequests</name>
<value>1000</value>
</property>
</configuration>
root@Datanode7:/etc/hbase/conf# more /etc/hbase/conf/regionservers
Datanode1.qs.com
Datanode2.qs.com
Datanode3.qs.com
Datanode4.qs.com
Datanode5.qs.com
Datanode6.qs.com
Datanode7.qs.com
Datanode8.qs.com
sudo -u hdfs hadoop fs -mkdir /hbase
sudo -u hdfs hadoop fs -chown hbase /hbase
export HBASE_HEAPSIZE=4000
export HBASE_OPTS="-Xmx4g -Xms4g -Xmn1024m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70"
sudo service hbase-master start (HMASTER)
/usr/lib/hbase/bin/hbase-daemon.sh start thrift2 (region+thrift)
http://172.31.96.38:60010/master-status