我正在尝试在Spark Scala应用程序中运行Hive sql查询,并在该应用程序对存储在s3上的表执行查询时收到以下错误“HiveTableRelation没有计划”。这是代码:

package com.testapp.data

import org.apache.log4j.{Logger, Level}
import com.amazonaws.auth.{AWSCredentials, BasicSessionCredentials, DefaultAWSCredentialsProviderChain}
import play.api.libs.json.{JsObject, JsString, JsValue, Json}
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.sql.SparkSession
import org.jets3t.service.S3Service
import scala.sys.process._
import java.io.File;
import org.apache.spark.sql.hive.HiveContext;

object TestEnrich {

  def main(args: Array[String]) {
    Logger.getRootLogger.setLevel(Level.INFO);
    val controllerLogger = Logger.getLogger(this.getClass)

    val dt = args(0);
    val tm = args(1);
    println(s"enrich request $dt, $tm")
    val sparkConfig = new SparkConf()
      .setAppName("enricher")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses {
        Array(
          classOf[AWSCredentials],
          classOf[BasicSessionCredentials],
          classOf[DefaultAWSCredentialsProviderChain]
        )
      }

    val sparkContext = SparkContext.getOrCreate(sparkConfig)
    val spark = SparkSession.builder.config(sparkContext.getConf).enableHiveSupport().getOrCreate()

    spark.sqlContext.setConf("spark.sql.caseSensitive", "true")

    spark.sqlContext.setConf("javax.jdo.option.ConnectionURL", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionUserName", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionPassword", "xxxx")

    spark.sparkContext.hadoopConfiguration.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    spark.sparkContext.hadoopConfiguration.set("fs.s3.access.key", "xxxx");
    spark.sparkContext.hadoopConfiguration.set("fs.s3.secret.key", "xxxx");

    SparkSession.setDefaultSession(spark)
    SparkSession.clearActiveSession()

    import spark.sql;
    import spark.implicits._;

    val hiveContext = new org.apache.spark.sql.hive.HiveContext(spark.sparkContext);
    import hiveContext._
    hiveContext.sql("show databases").show();
    hiveContext.sql("use production");
    hiveContext.sql("show tables").show();
    // error is thrown in next line
    val data = hiveContext.sql(s"select * from raw_by_ts_events_nrt where dt='$dt' and tm='$tm' limit 5");
    data.show();

    sparkContext.stop()
  }
}

这是表创建语句:
CREATE TABLE hive.production.raw_by_ts_events_nrt (
   ts bigint,
   batchts bigint,
   eventid varchar,
   userid varchar,
   ...
   dt varchar,
   tm varchar
)
WITH (
   external_location = 's3a://pb-prod-raw-by-ts-events-nrt/',
   format = 'ORC',
   partitioned_by = ARRAY['dt','tm']
)

这是日志:
18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97_resources
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97/_tmp_space.db
18/08/28 06:50:05 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.2) is hdfs:///user/spark/warehouse
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 258.835732 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 15.359587 ms
+------------+
|databaseName|
+------------+
|     default|
|  production|
+------------+

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 11.998794 ms
++
||
++
++

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 22.778824 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 16.995158 ms
+----------+--------------------+-----------+
|  database|           tableName|isTemporary|
+----------+--------------------+-----------+
|production|raw_by_ts_events_nrt|      false|
+----------+--------------------+-----------+

18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 1
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 2
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 0
18/08/28 06:50:07 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
Exception in thread "main" java.lang.AssertionError: assertion failed: No plan for HiveTableRelation `production`.`raw_by_ts_events_nrt`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [ts#26L, batchts#27L, eventid#28, userid#29, eventname#30, pageloaduid#31, deltatime#32, adaction#33, adduration#34, aderrordescription#35, adispreload#36, admoduleisloaded#37, adnetwork#38, adplacement#39, adplayer#40, adprogress#41, adrejectreason#42, adtag#43, adtargeting#44, adtype#45, aduuid#46, articlecanonicalurl#47, articleformat#48, articleid#49, ... 113 more fields], [dt#163, tm#164]

        at scala.Predef$.assert(Predef.scala:170)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
        at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249)
        at org.apache.spark.sql.Dataset.head(Dataset.scala:2484)
        at org.apache.spark.sql.Dataset.take(Dataset.scala:2698)
        at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:723)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:682)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:691)
        at com.playbuzz.data.TestEnrich$.main(TestEnrich.scala:90)
        at com.playbuzz.data.TestEnrich.main(TestEnrich.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

如您所见,它成功执行了所有先前的语句,但在选择时失败。任何帮助将不胜感激。

最佳答案

如果您使用的是spark-submit,则必须包括spark.sql.catalogImplementation=hive配置。像这样:

spark-submit --master yarn --deploy-mode cluster --conf spark.sql.catalogImplementation=hive yourApplication.jar

关于scala - java.lang.AssertionError : assertion failed: No plan for HiveTableRelation,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/52052179/

10-11 08:52