Hadoop参数汇总
linux参数
以下参数最好优化一下:
- 文件描述符
ulimit -n
- 用户最大进程 nproc (hbase需要 hbse book)
- 关闭swap分区
- 设置合理的预读取缓冲区
- Linux的内核的IO调度器
JVM参数
Hadoop参数大全
要配置文件:
重要性表示如下:
- 重要
- 一般
- 不重要
hadoop.common.configuration.version
hadoop.tmp.dir=/tmp/hadoop-${user.name}
hadoop.security.authorization=false
io.file.buffer.size=4096
io.compression.codecs=null
fs.defaultFS=file:///
fs.trash.interval=0
fs.trash.checkpoint.interval=0
dfs.ha.fencing.methods=null
dfs.ha.fencing.ssh.private-key-files=null
ha.zookeeper.quorum=null
ha.zookeeper.session-timeout.ms=5000
hadoop.http.staticuser.user=dr.who
fs.permissions.umask-mode=22
io.native.lib.available=true
hadoop.http.filter.initializers=org.apache.hadoop.http.lib.StaticUserWebFilter
hadoop.security.authentication
hadoop.security.group.mapping=org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback
hadoop.security.groups.cache.secs=300
hadoop.security.service.user.name.key=null
hadoop.security.uid.cache.secs=14400
hadoop.rpc.protection=authentication
hadoop.work.around.non.threadsafe.getpwuid=false
hadoop.kerberos.kinit.command=kinit
hadoop.security.auth_to_local=null
io.bytes.per.checksum=512
io.skip.checksum.errors=FALSE
io.serializations=org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization
io.seqfile.local.dir=${hadoop.tmp.dir}/io/local
io.map.index.skip=0
io.map.index.interval=128
fs.default.name=file:///
fs.AbstractFileSystem.file.impl=org.apache.hadoop.fs.local.LocalFs
fs.AbstractFileSystem.hdfs.impl=org.apache.hadoop.fs.Hdfs
fs.AbstractFileSystem.viewfs.impl=org.apache.hadoop.fs.viewfs.ViewFs
fs.ftp.host=0.0.0.0
fs.ftp.host.port=21
fs.df.interval=60000
fs.s3.block.size=67108864
fs.s3.buffer.dir=${hadoop.tmp.dir}/s3
fs.s3.maxRetries=4
fs.s3.sleepTimeSeconds=10
fs.automatic.close=true
fs.s3n.block.size=67108864
io.seqfile.compress.blocksize=1000000
io.seqfile.lazydecompress=TRUE
io.seqfile.sorter.recordlimit=1000000
io.mapfile.bloom.size=1048576
io.mapfile.bloom.error.rate=0.005
hadoop.util.hash.type=murmur
ipc.client.idlethreshold=4000
ipc.client.kill.max=10
ipc.client.connection.maxidletime=10000
ipc.client.connect.max.retries=10
ipc.client.connect.max.retries.on.timeouts=45
ipc.server.listen.queue.size=128
ipc.server.tcpnodelay=false
ipc.client.tcpnodelay=false
hadoop.rpc.socket.factory.class.default=org.apache.hadoop.net.StandardSocketFactory
hadoop.rpc.socket.factory.class.ClientProtocol=null
hadoop.socks.server=null
net.topology.node.switch.mapping.impl=org.apache.hadoop.net.ScriptBasedMapping
net.topology.script.file.name=null
net.topology.script.number.args=100
net.topology.table.file.name=null
file.stream-buffer-size=4096
s3.stream-buffer-size=4096
kfs.stream-buffer-size=4096
ftp.stream-buffer-size=4096
tfile.io.chunk.size=1048576
hadoop.http.authentication.type=simple
hadoop.http.authentication.token.validity=36000
hadoop.http.authentication.signature.secret.file=${user.home}/hadoop-http-auth-signature-secret
hadoop.http.authentication.cookie.domain=null
hadoop.http.authentication.simple.anonymous.allowed=TRUE
hadoop.http.authentication.kerberos.principal=HTTP/_HOST@LOCALHOST
hadoop.http.authentication.kerberos.keytab=${user.home}/hadoop.keytab
dfs.ha.fencing.ssh.connect-timeout=30000
ha.zookeeper.parent-znode=/hadoop-ha
ha.zookeeper.acl=world:anyone:rwcda
ha.zookeeper.auth=null
hadoop.ssl.keystores.factory.class=org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory
hadoop.ssl.require.client.cert=FALSE
hadoop.ssl.hostname.verifier=DEFAULT
hadoop.ssl.server.conf=ssl-server.xml
hadoop.ssl.client.conf=ssl-client.xml
hadoop.ssl.enabled=FALSE
hadoop.jetty.logs.serve.aliases=TRUE
ha.health-monitor.connect-retry-interval.ms=1000
ha.health-monitor.check-interval.ms=1000
ha.health-monitor.sleep-after-disconnect.ms=1000
ha.health-monitor.rpc-timeout.ms=45000
ha.failover-controller.new-active.rpc-timeout.ms=60000
ha.failover-controller.graceful-fence.rpc-timeout.ms=5000
ha.failover-controller.graceful-fence.connection.retries=1
ha.failover-controller.cli-check.rpc-timeout.ms=20000
hadoop.hdfs.configuration.version=1
配置文件的版本
dfs.datanode.address=0.0.0.0:50010
DN服务地址和端口,用于数据传输。0表示任意空闲端口。
xferPort dfs.datanode.address 50010 数据流地址 数据传输
infoPort dfs.datanode.http.address 50075
ipcPort dfs.datanode.ipc.address 50020 命令
dfs.datanode.http.address=0.0.0.0:50075
dfs.datanode.ipc.address=0.0.0.0:50020
dfs.namenode.rpc-address=0.0.0.0:50090
dfs.namenode.http-address=0.0.0.0:50070
dfs.datanode.du.reserved=0
dfs.namenode.name.dir.restore=FALSE
dfs.namenode.edits.dir=${dfs.namenode.name.dir}
dfs.namenode.shared.edits.dir=null
dfs.namenode.edits.journal-plugin.qjournal=org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager
dfs.permissions.enabled=true
dfs.permissions.superusergroup=supergroup
dfs.datanode.data.dir=file://${hadoop.tmp.dir}/dfs/data
dfs.replication=3
dfs.replication.max=512
dfs.namenode.replication.min=1
dfs.blocksize=67108864
dfs.client.block.write.retries=3
dfs.client.block.write.replace-datanode-on-failure.enable=true
dfs.client.block.write.replace-datanode-on-failure.policy=DEFAULT
dfs.heartbeat.interval=3
dfs.namenode.handler.count=10
dfs.namenode.safemode.threshold-pct=0.999f
dfs.namenode.safemode.extension=30000
dfs.datanode.balance.bandwidthPerSec=1048576
dfs.hosts=null
dfs.hosts.exclude=null
dfs.stream-buffer-size=4096
dfs.namenode.num.extra.edits.retained=1000000
dfs.datanode.handler.count=10
dfs.datanode.failed.volumes.tolerated=0
dfs.namenode.support.allow.format=true
dfs.client.failover.max.attempts=15
dfs.client.failover.connection.retries=0
dfs.client.failover.connection.retries.on.timeouts=0
dfs.nameservices=null
dfs.nameservice.id=null
dfs.ha.namenodes.EXAMPLENAMESERVICE=null
dfs.ha.namenode.id=null
dfs.ha.automatic-failover.enabled=FALSE
dfs.namenode.avoid.write.stale.datanode=FALSE
dfs.journalnode.rpc-address=0.0.0.0:8485
dfs.journalnode.http-address=0.0.0.0:8480
dfs.namenode.audit.loggers=default
dfs.client.socket-timeout=60*1000
dfs.datanode.socket.write.timeout=8*60*1000
dfs.datanode.socket.reuse.keepalive=1000
dfs.namenode.logging.level=info
dfs.namenode.secondary.http-address=0.0.0.0:50090
dfs.https.enable=FALSE
dfs.client.https.need-auth=FALSE
dfs.https.server.keystore.resource=ssl-server.xml
dfs.client.https.keystore.resource=ssl-client.xml
dfs.datanode.https.address=0.0.0.0:50475
dfs.namenode.https-address=0.0.0.0:50470
dfs.datanode.dns.interface=default
dfs.datanode.dns.nameserver=default
dfs.namenode.backup.address=0.0.0.0:50100
dfs.namenode.backup.http-address=0.0.0.0:50105
dfs.namenode.replication.considerLoad=true
dfs.default.chunk.view.size=32768
dfs.namenode.name.dir=file://${hadoop.tmp.dir}/dfs/name
dfs.namenode.fs-limits.max-component-length=0
dfs.namenode.fs-limits.max-directory-items=0
dfs.namenode.fs-limits.min-block-size=1048576
dfs.namenode.fs-limits.max-blocks-per-file=1048576
dfs.block.access.token.enable=FALSE
dfs.block.access.key.update.interval=600
dfs.block.access.token.lifetime=600
dfs.datanode.data.dir.perm=700
dfs.blockreport.intervalMsec=21600000
dfs.blockreport.initialDelay=0
dfs.datanode.directoryscan.interval=21600
dfs.datanode.directoryscan.threads=1
dfs.namenode.safemode.min.datanodes=0
dfs.namenode.max.objects=0
dfs.namenode.decommission.interval=30
dfs.namenode.decommission.nodes.per.interval=5
dfs.namenode.replication.interval=3
dfs.namenode.accesstime.precision=3600000
dfs.datanode.plugins=null
dfs.namenode.plugins=null
dfs.bytes-per-checksum=512
dfs.client-write-packet-size=65536
dfs.client.write.exclude.nodes.cache.expiry.interval.millis=600000
dfs.namenode.checkpoint.dir=file://${hadoop.tmp.dir}/dfs/namesecondary
dfs.namenode.checkpoint.edits.dir=${dfs.namenode.checkpoint.dir}
dfs.namenode.checkpoint.period=3600
dfs.namenode.checkpoint.txns=1000000
dfs.namenode.checkpoint.check.period=60
dfs.namenode.checkpoint.max-retries=3
dfs.namenode.num.checkpoints.retained=2
建议不使用SNN功能,忽略此配置dfs.namenode.num.extra.edits.retained=1000000
dfs.namenode.max.extra.edits.segments.retained=10000
dfs.namenode.delegation.key.update-interval=86400000
dfs.namenode.delegation.token.max-lifetime=604800000
dfs.namenode.delegation.token.renew-interval=86400000
dfs.image.compress=FALSE
dfs.image.compression.codec=org.apache.hadoop.io.compress.DefaultCodec
dfs.image.transfer.timeout=600000
dfs.image.transfer.bandwidthPerSec=0
dfs.datanode.max.transfer.threads=4096
dfs.datanode.readahead.bytes=4193404
dfs.datanode.drop.cache.behind.reads=FALSE
dfs.datanode.drop.cache.behind.writes=FALSE
dfs.datanode.sync.behind.writes=FALSE
dfs.client.failover.sleep.base.millis=500
dfs.client.failover.sleep.max.millis=15000
dfs.ha.log-roll.period=120
dfs.ha.tail-edits.period=60
dfs.ha.zkfc.port=8019
dfs.support.append=TRUE
dfs.client.use.datanode.hostname=FALSE
dfs.datanode.use.datanode.hostname=FALSE
dfs.client.local.interfaces=null
dfs.namenode.kerberos.internal.spnego.principal=${dfs.web.authentication.kerberos.principal}
dfs.secondary.namenode.kerberos.internal.spnego.principal=${dfs.web.authentication.kerberos.principal}
dfs.namenode.avoid.read.stale.datanode=FALSE
dfs.namenode.stale.datanode.interval=30000
dfs.namenode.write.stale.datanode.ratio=0.5f
dfs.namenode.invalidate.work.pct.per.iteration=0.32f
dfs.namenode.replication.work.multiplier.per.iteration=2
dfs.webhdfs.enabled=FALSE
hadoop.fuse.connection.timeout=300
hadoop.fuse.timer.period=5
dfs.metrics.percentiles.intervals=null
dfs.encrypt.data.transfer=FALSE
dfs.encrypt.data.transfer.algorithm=null
dfs.datanode.hdfs-blocks-metadata.enabled=TRUE
dfs.client.file-block-storage-locations.num-threads=10
dfs.client.file-block-storage-locations.timeout=60
dfs.domain.socket.path=/var/run/hadoop-hdfs/dn._PORT
yarn.app.mapreduce.am.env=null
yarn.app.mapreduce.am.command-opts=-Xmx1024m
yarn.app.mapreduce.am.resource.mb=1536
yarn.resourcemanager.address=0.0.0.0:8032
yarn.resourcemanager.scheduler.address=0.0.0.0:8030
yarn.admin.acl=*
yarn.resourcemanager.admin.address=0.0.0.0:8033
yarn.resourcemanager.am.max-retries=1
yarn.resourcemanager.nodes.include-path=null
yarn.resourcemanager.nodes.exclude-path=null
yarn.resourcemanager.scheduler.class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler
yarn.scheduler.minimum-allocation-mb=1024
yarn.scheduler.maximum-allocation-mb=8192
yarn.resourcemanager.recovery.enabled=FALSE
yarn.resourcemanager.store.class=null
yarn.resourcemanager.max-completed-applications=10000
yarn.nodemanager.address=0.0.0.0:0
yarn.nodemanager.env-whitelist=JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,YARN_HOME
yarn.nodemanager.delete.debug-delay-sec=0
yarn.nodemanager.local-dirs=${hadoop.tmp.dir}/nm-local-dir
yarn.nodemanager.log-dirs=${yarn.log.dir}/userlogs
yarn.log-aggregation-enable=FALSE
yarn.log-aggregation.retain-seconds=-1
yarn.nodemanager.log.retain-seconds=10800
yarn.nodemanager.remote-app-log-dir=/tmp/logs
yarn.nodemanager.remote-app-log-dir-suffix=logs
yarn.nodemanager.resource.memory-mb=8192
yarn.nodemanager.vmem-pmem-ratio=2.1
yarn.nodemanager.webapp.address=0.0.0.0:8042
yarn.nodemanager.log-aggregation.compression-type=none
yarn.nodemanager.aux-services=null
yarn.nodemanager.aux-services.mapreduce.shuffle.class=org.apache.hadoop.mapred.ShuffleHandler
mapreduce.job.jar=null
mapreduce.job.hdfs-servers=${fs.defaultFS}
yarn.application.classpath=$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/,$HADOOP_COMMON_HOME/share/hadoop/common/lib/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/,$YARN_HOME/share/hadoop/yarn/*,$YARN_HOME/share/hadoop/yarn/lib/*
yarn.app.mapreduce.am.job.task.listener.thread-count=30
yarn.app.mapreduce.am.job.client.port-range=null
yarn.app.mapreduce.am.job.committer.cancel-timeout=60000
yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms=1000
yarn.app.mapreduce.client-am.ipc.max-retries=3
yarn.app.mapreduce.client.max-retries=3
yarn.ipc.client.factory.class=null
yarn.ipc.serializer.type=protocolbuffers
yarn.ipc.server.factory.class=null
yarn.ipc.exception.factory.class=null
yarn.ipc.record.factory.class=null
yarn.ipc.rpc.class=org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC
yarn.resourcemanager.client.thread-count=50
yarn.am.liveness-monitor.expiry-interval-ms=600000
yarn.resourcemanager.principal=null
yarn.resourcemanager.scheduler.client.thread-count=50
yarn.resourcemanager.webapp.address=0.0.0.0:8088
yarn.resourcemanager.resource-tracker.address=0.0.0.0:8031
yarn.acl.enable=TRUE
yarn.resourcemanager.admin.client.thread-count=1
yarn.resourcemanager.amliveliness-monitor.interval-ms=1000
yarn.resourcemanager.container.liveness-monitor.interval-ms=600000
yarn.resourcemanager.keytab=/etc/krb5.keytab
yarn.nm.liveness-monitor.expiry-interval-ms=600000
yarn.resourcemanager.nm.liveness-monitor.interval-ms=1000
yarn.resourcemanager.resource-tracker.client.thread-count=50
yarn.resourcemanager.delayed.delegation-token.removal-interval-ms=30000
yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs=86400
yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs=86400
yarn.nodemanager.admin-env=MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX
yarn.nodemanager.container-executor.class=org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor
yarn.nodemanager.container-manager.thread-count=20
yarn.nodemanager.delete.thread-count=4