环境:
Ubuntu 16.04.2 3台
下面来看一下架构图
下面我们将直接进行部署流程,最后会来简单阐述一下原理
zookeeper的部署
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
dataDir=/apps/svr/install/apache-zookeeper-3.5.7-bin/data
dataLogDir=/apps/svr/install/apache-zookeeper-3.5.7-bin/log
server.0=192.168.1.7:2888:3888
server.1=192.168.1.8:2888:3888
server.2=192.168.1.9:2888:3888
修改zoo.cfg,创建对应的目录,在data目录下创建myid文件,一切完毕后进行启动
hadoop-ha部署,我们这里采用一步到位的做法
先来看一下配置
###core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--说明:hadoop2.x端口默认9000;hadoop3.x端口默认9820-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!--注意:临时目录自己创建下-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/ha</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>192.168.1.7:2181,192.168.1.8:2181,192.168.1.9:2181</value>
</property>
</configuration>
###hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!--说明:不配置副本的情况下默认是3 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<!--设置 secondaryNameNode 为 node02节点的虚拟机; hadoop2.x 端口为50090-->
<name>dfs.namenode.secondary.http-address</name>
<value>ubuntu-node2:50090</value>
</property>
<!--关闭 hdfs 读取权限,即不检查权限-->
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>node1,node2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.node1</name>
<value>ubuntu-node1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.node2</name>
<value>ubuntu-node2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.node1</name>
<value>ubuntu-node1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.node2</name>
<value>ubuntu-node2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://ubuntu-node1:8485;ubuntu-node2:8485;ubuntu-node3:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/ubuntu/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/tmp/hadoop/journalnode/data</value>
</property>
</configuration>
启动 在奇数个节点上启动QJM
sbin/hadoop-daemon.sh start journalnode
首先在namenode1上执行
bin/hdfs namenode -format
然后在namenode1上执行
bin/hdfs zkfc -formatZK
启动程序
sbin/start-dfs.sh
在namenode2上执行格式化
bin/hdfs namenode -bootstrapStandby
启动namenode2上的namenode
sbin/hadoop-daemon.sh start
到此hadoop-ha已经搭建完毕
查看状态的命令
bin/hdfs haadmin -getServiceState <id>
下面说说yarn ha的搭建
<?xml version="1.0"?>
<configuration>
<!--启用resourcemanager ha-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>ubuntu-node1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>ubuntu-node2</value>
</property>
<!--zookeeper-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>192.168.1.7:2181,192.168.1.8:2181,192.168.1.9:2181</value>
</property>
<!--NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--启用自动恢复-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的状态信息存储在zookeeper集群-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--日志聚合-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--任务历史服务-->
<property>
<name>yarn.log.server.url</name>
<value>http://ubuntu-node1:19888/jobhistory/logs/</value>
</property>
<!--HDFS上保存多长时间-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
</configuration>
启动命令
sbin/start-yarn.sh
启动备用节点
sbin/yarn-daemon.sh start resourcemanager
查看状态的命令
bin/yarn rmadmin -getServiceState <id>
手动激活命令
bin/hdfs haadmin -transitionToActive [--forcemanual] <id>
欢迎关注我的公众号