1.安装ubuntu-server14.04
2.ubuntu-server14.04安装docker
  sudo apt-get install docker-io
3.pull hadoop-docker镜像
  docker pull sequenceiq/hadoop-docker:2.7.1
  docker run -it sequenceiq/hadoop-docker:2.7.1 /etc/bootstrap.sh -bash
  参考 https://github.com/sequenceiq/hadoop-docker

  https://hub.docker.com/r/uhopper/hadoop/ 这个好像已经带spark了

增加部署spark
4.yum install wget
5.下载安装spark
  wget http://d3kbcqa49mib13.cloudfront.net/spark-1.5.2-bin-without-hadoop.tgz
  mv spark-1.5.2-bin-without-hadoop.tgz /usr/local
  tar -xf spark-1.5.2-bin-without-hadoop.tgz
  mv spark-1.5.2-bin-without-hadoop spark
  vi /etc/profile,增加:
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    export SPARK_HOME=/usr/local/spark
    export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin:$JAVA_HOME/bin
    export SPARK_DIST_CLASSPATH=$(/usr/local/hadoop/bin/hadoop classpath)
  source /etc/profile
  run-examples SparkPi 10
  pyspark
6.保存镜像
  exit
  docker commit id hadoop-docker-spark
7.打开镜像
  sudo docker run -ti hadoop-docker-spark /etc/bootstrap.sh -bash
  source /etc/profile
  run-examples SparkPi 10
  pyspark

测试:
  cd /usr/hadoop
  bin/hdfs dfs -put datafile /datafile
  pyspark
  >>>>> lines = sc.textFile("/datafile")
  >>>>> lines.count()
  >>>>> lines.first()
09-25 13:18
查看更多