Spark 讀取 Hive 數據及相關問題解決

  1. 示例代碼
    1. SparkHiveAPP 主類

      注意:
      需要將 core-site.xml,hdfs-site.xml, yarn-site.xml,mapred-site.xml 和 hive-site.xml 放到 resource 下面,程序運行的時候需要這些環境。

      import org.apache.log4j.{Level, Logger}
      import org.apache.spark.SparkConf
      import org.apache.spark.sql.SparkSession
      
      object SparkHiveAPP {
      
        def main(args: Array[String]): Unit = {
      
          Logger.getLogger("org").setLevel(Level.WARN)
          
          /**
            * 不設置 System.setProperty("HADOOP_USER_NAME", "root") 會出現異常
            * org.apache.hadoop.security.AccessControlException: Permission denied
            */
          System.setProperty("HADOOP_USER_NAME", "root")
          val conf = new SparkConf()
            .setIfMissing("spark.master", "local[2]")
            .set("spark.sql.warehouse.dir", "/user/hive/warehouse")
            .setAppName("Spark_Hive_APP")
      
          val spark: SparkSession = SparkSession.builder().config(conf)
            .enableHiveSupport()
            .getOrCreate()
      
          spark.sparkContext.setLogLevel("WARN")
      
          spark.sql("SELECT * FROM test.test1").show()
      
        }
      }
      
    2. pom.xml 文件
      <?xml version="1.0" encoding="UTF-8"?>
      <project xmlns="http://maven.apache.org/POM/4.0.0"
               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
               xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
          <modelVersion>4.0.0</modelVersion>
          <groupId>com.cloudera</groupId>
          <artifactId>RemoteSubmitSparkToYarn</artifactId>
          <version>1.0-SNAPSHOT</version>
      
          <packaging>jar</packaging>
          <name>RemoteSubmitSparkToYarn</name>
      
          <repositories>
              <!-- cloudera 的倉庫 -->
              <repository>
                  <id>cloudera</id>
                  <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
                  <name>Cloudera Repositories</name>
                  <releases>
                      <enabled>true</enabled>
                  </releases>
                  <snapshots>
                      <enabled>false</enabled>
                  </snapshots>
              </repository>
          </repositories>
      
          <properties>
              <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
              <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
              <java.version>1.8</java.version>
              <scala.version>2.11.12</scala.version>
              <hbase.version>1.3.0</hbase.version>
              <!--<spark.version>2.4.0-cdh6.1.1</spark.version>-->
              <hive.version>1.2.0</hive.version>
              <kafka.version>0.10.0.1</kafka.version>
              <spark.version>2.2.0</spark.version>
              <kafka.scope>compile</kafka.scope>
              <provided.scope>compile</provided.scope>
          </properties>
      
          <dependencies>
      
              <!-- HBsae -->
              <!--<dependency>-->
              <!--<groupId>org.apache.hbase</groupId>-->
              <!--<artifactId>hbase-client</artifactId>-->
              <!--<version>${hbase.version}</version>-->
              <!--</dependency>-->
              <!--<dependency>-->
              <!--<groupId>org.apache.hbase</groupId>-->
              <!--<artifactId>hbase-server</artifactId>-->
              <!--<version>${hbase.version}</version>-->
              <!--<scope>${provided.scope}</scope>-->
              <!--</dependency>-->
      
              <!-- scala -->
              <dependency>
                  <groupId>org.scala-lang</groupId>
                  <artifactId>scala-library</artifactId>
                  <version>${scala.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.scala-lang</groupId>
                  <artifactId>scala-compiler</artifactId>
                  <version>${scala.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.scala-lang</groupId>
                  <artifactId>scala-reflect</artifactId>
                  <version>${scala.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-core_2.11</artifactId>
                  <version>${spark.version}</version>
                  <exclusions>
                      <exclusion>
                          <groupId>org.glassfish.jersey.bundles.repackaged</groupId>
                          <artifactId>jersey-guava</artifactId>
                      </exclusion>
                  </exclusions>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-streaming_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-sql_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-hive_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.hive</groupId>
                  <artifactId>hive-exec</artifactId>
                  <version>${hive.version}</version>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-yarn_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.kafka</groupId>
                  <artifactId>kafka_2.11</artifactId>
                  <version>${kafka.version}</version>
                  <scope>${kafka.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.kafka</groupId>
                  <artifactId>kafka-clients</artifactId>
                  <version>0.10.0.1</version>
                  <scope>${kafka.scope}</scope>
              </dependency>
          </dependencies>
      
          <build>
              <pluginManagement>
                  <plugins>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-compiler-plugin</artifactId>
                          <version>3.8.0</version>
                          <configuration>
                              <source>1.8</source>
                              <target>1.8</target>
                          </configuration>
                      </plugin>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-resources-plugin</artifactId>
                          <version>3.0.2</version>
                          <configuration>
                              <encoding>UTF-8</encoding>
                          </configuration>
                      </plugin>
                      <plugin>
                          <groupId>net.alchim31.maven</groupId>
                          <artifactId>scala-maven-plugin</artifactId>
                          <version>3.2.2</version>
                          <executions>
                              <execution>
                                  <goals>
                                      <goal>compile</goal>
                                      <goal>testCompile</goal>
                                  </goals>
                              </execution>
                          </executions>
                      </plugin>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-resources-plugin</artifactId>
                          <version>3.0.2</version>
                          <configuration>
                              <encoding>UTF-8</encoding>
                          </configuration>
                      </plugin>
                  </plugins>
              </pluginManagement>
              <plugins>
                  <plugin>
                      <groupId>net.alchim31.maven</groupId>
                      <artifactId>scala-maven-plugin</artifactId>
                      <executions>
                          <execution>
                              <id>scala-compile-first</id>
                              <phase>process-resources</phase>
                              <goals>
                                  <goal>add-source</goal>
                                  <goal>compile</goal>
                              </goals>
                          </execution>
                          <execution>
                              <id>scala-test-compile</id>
                              <phase>process-test-resources</phase>
                              <goals>
                                  <goal>testCompile</goal>
                              </goals>
                          </execution>
                      </executions>
                  </plugin>
      
                  <plugin>
                      <groupId>org.apache.maven.plugins</groupId>
                      <artifactId>maven-compiler-plugin</artifactId>
                      <executions>
                          <execution>
                              <phase>compile</phase>
                              <goals>
                                  <goal>compile</goal>
                              </goals>
                          </execution>
                      </executions>
                  </plugin>
      
                  <plugin>
                      <groupId>org.apache.maven.plugins</groupId>
                      <artifactId>maven-shade-plugin</artifactId>
                      <version>2.4.3</version>
                      <executions>
                          <execution>
                              <phase>package</phase>
                              <goals>
                                  <goal>shade</goal>
                              </goals>
                              <configuration>
                                  <filters>
                                      <filter>
                                          <artifact>*:*</artifact>
                                          <excludes>
                                              <exclude>META-INF/*.SF</exclude>
                                              <exclude>META-INF/*.DSA</exclude>
                                              <exclude>META-INF/*.RSA</exclude>
                                          </excludes>
                                      </filter>
                                  </filters>
                              </configuration>
                          </execution>
                      </executions>
                  </plugin>
              </plugins>
              <resources>
                  <resource>
                      <directory>${basedir}/src/main/resources</directory>
                      <excludes>
                          <exclude>env/*/*</exclude>
                      </excludes>
                      <includes>
                          <include>**/*</include>
                      </includes>
                  </resource>
                  <resource>
                      <directory>${basedir}/src/main/resources/env/${profile.active}</directory>
                      <includes>
                          <include>**/*.properties</include>
                          <include>**/*.xml</include>
                      </includes>
                  </resource>
              </resources>
          </build>
          <profiles>
              <profile>
                  <id>dev</id>
                  <properties>
                      <profile.active>dev</profile.active>
                  </properties>
                  <activation>
                      <activeByDefault>true</activeByDefault>
                  </activation>
              </profile>
              <profile>
                  <id>test</id>
                  <properties>
                      <profile.active>test</profile.active>
                  </properties>
              </profile>
              <profile>
                  <id>prod</id>
                  <properties>
                      <profile.active>prod</profile.active>
                  </properties>
              </profile>
          </profiles>
      </project>
      
    3. 運行結果
      Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
      18/06/27 10:30:40 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
      18/06/27 10:30:41 WARN ShellBasedUnixGroupsMapping: got exception trying to get groups for user root: GetLocalGroupsForUser error (1332): ?????????????????
      
      
      
      18/06/27 10:30:41 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:41 INFO metastore: Connected to metastore.
      18/06/27 10:30:42 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      18/06/27 10:30:42 WARN UserGroupInformation: No groups available for user root
      +---+--------+------------+
      | id|    name|       hobby|
      +---+--------+------------+
      |  1|zhangsan|[唱歌, 跳舞, 游泳]|
      |  2|    lisi|   [打遊戲, 籃球]|
      |  3|  wangwu|    [唱歌, 游泳]|
      +---+--------+------------+
      Process finished with exit code 0
      
  2. 遇到的問題
    1. 本地找不到未 winutils 二進制文件

      問題日誌:

      18/06/27 10:35:18 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
      java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
          at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:378)
          at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:393)
          at org.apache.hadoop.util.Shell.getGroupsForUserCommand(Shell.java:163)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.fetchGroupList(Groups.java:231)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.load(Groups.java:211)
          at org.apache.hadoop.security.Groups$GroupCacheLoader.load(Groups.java:199)
          at com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3524)
          at com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2317)
          at com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2280)
          at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2195)
          at com.google.common.cache.LocalCache.get(LocalCache.java:3934)
          at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3938)
          at com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4821)
          at org.apache.hadoop.security.Groups.getGroups(Groups.java:173)
          at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1552)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:236)
          at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
          at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
          at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
          at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1234)
          at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
          at org.apache.hadoop.hive.ql.metadata.Hive.<clinit>(Hive.java:166)
          at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
          at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:191)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
          at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
          at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
          at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
          at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
          at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at scala.Option.getOrElse(Option.scala:121)
          at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
          at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
          at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
          at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
          at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
          at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
          at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
      

      解決辦法:

      1. 下載 winutils 文件。下載地址: https://github.com/steveloughran/winutils

      2. 設置環境變量 HADOOP_HOME 。
        在本地機器中配置: HADOOP_HOME=D:\winutils-master\hadoop-2.6.0

        或在 idea 中運行參數設置 HADOOP_HOME

    2. 不能訪問 metastore, 無法實例化 SessionHiveMetaStoreClient

      原因: 在上面 pom.xml 中把整合 HBsae 的相關jar引入後,訪問 Hive 時會報以下異常,與未整合 HBsae 報錯不一樣。解決辦法同上。

      問題日誌:

      log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell).
      log4j:WARN Please initialize the log4j system properly.
      log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
      Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
      18/06/27 10:47:01 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
      18/06/27 10:47:01 WARN Hive: Failed to access metastore. This class should not accessed in runtime.
      org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
          at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1236)
          at org.apache.hadoop.hive.ql.metadata.Hive.reloadFunctions(Hive.java:174)
          at org.apache.hadoop.hive.ql.metadata.Hive.<clinit>(Hive.java:166)
          at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
          at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:191)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
          at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
          at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
          at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
          at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
          at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at scala.Option.getOrElse(Option.scala:121)
          at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
          at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
          at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
          at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
          at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
          at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
          at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
      Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
          at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
          at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
          at org.apache.hadoop.hive.ql.metadata.Hive.getAllDatabases(Hive.java:1234)
          ... 41 more
      Caused by: java.lang.reflect.InvocationTargetException
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
          ... 47 more
      Caused by: java.lang.NullPointerException
          at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
          at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
          at org.apache.hadoop.util.Shell.run(Shell.java:455)
          at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
          at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
          at org.apache.hadoop.util.Shell.execCommand(Shell.java:774)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
          at org.apache.hadoop.security.Groups.getGroups(Groups.java:139)
          at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1474)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:236)
          at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
          ... 52 more
      18/06/27 10:47:01 INFO metastore: Trying to connect to metastore with URI thrift://cdh01:9083
      Exception in thread "main" java.lang.IllegalArgumentException: Error while instantiating 'org.apache.spark.sql.hive.HiveSessionStateBuilder':
          at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1053)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at org.apache.spark.sql.SparkSession$$anonfun$sessionState$2.apply(SparkSession.scala:130)
          at scala.Option.getOrElse(Option.scala:121)
          at org.apache.spark.sql.SparkSession.sessionState$lzycompute(SparkSession.scala:129)
          at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:126)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at org.apache.spark.sql.SparkSession$Builder$$anonfun$getOrCreate$5.apply(SparkSession.scala:938)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:130)
          at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:236)
          at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:40)
          at scala.collection.mutable.HashMap.foreach(HashMap.scala:130)
          at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:938)
          at com.cloudera.SparkHiveAPP$.main(SparkHiveAPP.scala:24)
          at com.cloudera.SparkHiveAPP.main(SparkHiveAPP.scala)
      Caused by: org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
          at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)
          at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:193)
          at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:105)
          at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:93)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)
          at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:35)
          at org.apache.spark.sql.internal.BaseSessionStateBuilder.build(BaseSessionStateBuilder.scala:289)
          at org.apache.spark.sql.SparkSession$.org$apache$spark$sql$SparkSession$$instantiateSessionState(SparkSession.scala:1050)
          ... 15 more
      Caused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
          at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
          at org.apache.spark.sql.hive.client.HiveClientImpl.<init>(HiveClientImpl.scala:191)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:362)
          at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:266)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)
          at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:194)
          at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
          ... 24 more
      Caused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:86)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)
          at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)
          at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)
          at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)
          at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)
          ... 38 more
      Caused by: java.lang.reflect.InvocationTargetException
          at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
          at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
          at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
          at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
          at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)
          ... 44 more
      Caused by: java.lang.NullPointerException
          at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
          at org.apache.hadoop.util.Shell.runCommand(Shell.java:482)
          at org.apache.hadoop.util.Shell.run(Shell.java:455)
          at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
          at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
          at org.apache.hadoop.util.Shell.execCommand(Shell.java:774)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getUnixGroups(ShellBasedUnixGroupsMapping.java:84)
          at org.apache.hadoop.security.ShellBasedUnixGroupsMapping.getGroups(ShellBasedUnixGroupsMapping.java:52)
          at org.apache.hadoop.security.Groups.getGroups(Groups.java:139)
          at org.apache.hadoop.security.UserGroupInformation.getGroupNames(UserGroupInformation.java:1474)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:436)
          at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:236)
          at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:74)
          ... 49 more
      
      Process finished with exit code 1
      
      
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章