版本说明:spark:2.2.0; kafka:0.10.0.0
object StreamingDemo {
def main(args: Array[String]): Unit = { Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.WARN)
Logger.getLogger("org.apache.kafka.clients.consumer").setLevel(Level.WARN) val warehouseLocation = new File("hdfs://user/hive/warehouse").getAbsolutePath val bootstrapServers = "192.168.156.111:9092,192.168.156.111:9092,192.168.156.111:9092" val spark: SparkSession = SparkSession
.builder()
.appName("Spark SQL To Hive")
.config("spark.sql.warehouse.dir", warehouseLocation)
.master("local[4]")
.enableHiveSupport()
.getOrCreate() spark.conf.set("spark.streaming.concurrentJobs", 10)
spark.conf.set("spark.streaming.kafka.maxRetries", 50)
spark.conf.set("spark.streaming.stopGracefullyOnShutdown", true)
spark.conf.set("spark.streaming.backpressure.enabled", true)
spark.conf.set("spark.streaming.backpressure.initialRate", 5000)
spark.conf.set("spark.streaming.kafka.maxRatePerPartition", 3000) @transient
val sc: SparkContext = spark.sparkContext
val ssc: StreamingContext = new StreamingContext(sc, Seconds(5)) //kafka params
val kafkaParams = Map[String, Object](
"auto.offset.reset" -> "latest",
"value.deserializer" -> classOf[StringDeserializer],
"key.deserializer" -> classOf[StringDeserializer],
"bootstrap.servers" -> bootstrapServers,
"group.id" -> "test-consumer-group",
"enable.auto.commit" -> (true: java.lang.Boolean)
) var stream: InputDStream[ConsumerRecord[String, String]] = null
val topics = Array("test") stream = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topics, kafkaParams)
) stream.foreachRDD(rdd => {
val cache_rdd: RDD[String] = rdd.map(x => x.value()).cache() cache_rdd.foreach(println) }) ssc.start() ssc.awaitTermination() }
}