import org.apache.spark.{SparkConf, SparkContext} import scala.util.parsing.json.JSON /** * Created with IntelliJ IDEA. * User: @别慌 * Date: 2019-11-24 * Time: 18:04 * Description: */ object hello { def main(args: Array[String]): Unit = { // Create spark context val conf = new SparkConf() .setAppName("WordFreq_Spark") .setMaster("local") val sc = new SparkContext(conf) val do01= sc.textFile("D:\\杂七杂八\\瞎画\\test.json") val do02=do01.map(JSON.parseFull) println(do02.collect().mkString("\t")) do02.foreach ( { case Some(do02) =>println(do02) case None => println("unvaild sign") case _ =>println("other sign") } ) sc.stop() // val rdd = sc.textFile("hdfs://192.168.199.120:9000/words.txt") // val wc=rdd.flatMap(_.split(" ")) // .map(s=>(s,1)) // .reduceByKey((a,b)=>a+b) // .sortBy(_._2,true) // // // //wc.saveAsTextFile("D:\\") // // for (arg <- wc.collect()) // print(arg+" ") // println() // wc.saveAsTextFile("hdfs://192.168.199.120:9000/tai") // sc.stop } }
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.syllabus</groupId> <artifactId>chapter-3</artifactId> <version>1.0-SNAPSHOT</version> <!-- 额外指定可以通过如下链接下载Jar包依赖 --> <repositories> <repository> <id>1</id> <name>MAVEN-CENTRE</name> <url>http://central.maven.org/maven2/</url> </repository> </repositories> <!-- 添加相关依赖 --> <dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.12</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>2.4.4</version> <!-- 排除Spark依赖中关于Hadoop和Scala的依赖,以便于添加自已的版本 --> <exclusions> <exclusion> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> </exclusion> <exclusion> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> </exclusion> </exclusions> </dependency> <!-- 添加自己的Hadoop版本 --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.9.0</version> </dependency> </dependencies> <!-- 编译Scala代码的插件--> <build> <plugins> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <version>2.15.2</version> <executions> <execution> <id>scala-compile-first</id> <goals> <goal>compile</goal> </goals> <configuration> <includes> <include>**/*.scala</include> </includes> </configuration> </execution> <execution> <id>scala-test-compile</id> <goals> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> </plugins> </build> </project>
提交到集群中的时候,shell 代码为 ./bin/spark-submit --class hello --master spark://192.168.199.120:7077 --executor-memory 1G --total-executor-cores 3 comspark.jar ; 这里 spark端口为 7070 hdfs端口为 9000