Uploaded image for project: 'Spark'
  1. Spark
  2. SPARK-11851

Unable to start spark thrift server against secured hive metastore(GSS initiate failed)

    XMLWordPrintableJSON

Details

    • Bug
    • Status: Resolved
    • Critical
    • Resolution: Duplicate
    • 1.5.0, 1.5.2
    • None
    • SQL
    • None
    • Centos x64

    Description

      I am using Spark 1.5.X to work with Hive 0.14.0.

      a. spark-defaults.conf:

      spark.sql.hive.metastore.version 0.14.0
      spark.sql.hive.metastore.jars hadoop 2.6.0 jars:hive 0.14.0 jars
      

      b. hive-site.conf

      <?xml version="1.0"?>
      <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
      <configuration>
        <property>
          <name>javax.jdo.option.ConnectionURL</name>
          <!--<value>jdbc:mysql://my.domain/metastore</value> -->
          <value>jdbc:mysql://my.domain/metestore2</value>
          <description>the URL of the MySQL database</description>
        </property>
        <property>
          <name>javax.jdo.option.ConnectionDriverName</name>
          <value>com.mysql.jdbc.Driver</value>
        </property>
        <property>
          <name>javax.jdo.option.ConnectionUserName</name>
          <value>hive</value>
        </property>
        <property>
          <name>javax.jdo.option.ConnectionPassword</name>
          <value>hive</value>
        </property>
        <property>
          <name>datanucleus.autoCreateSchema</name>
          <value>false</value>
        </property>
        <property>
          <name>datanucleus.fixedDatastore</name>
          <value>true</value>
        </property>
        <property>
          <name>datanucleus.autoStartMechanism</name>
          <value>SchemaTable</value>
        </property>
      
        <property>
          <name>hive.exec.max.dynamic.partitions</name>
          <value>100000</value>
        </property>
        <property>
          <name>hive.exec.max.dynamic.partitions.pernode</name>
          <value>10000</value>
        </property>
        <!-- rename bug workaround https://issues.apache.org/jira/browse/HIVE-3815 -->
        <property>
          <name>fs.hdfs.impl.disable.cache</name>
          <value>false</value>
        </property>
        <property>
          <name>fs.file.impl.disable.cache</name>
          <value>false</value>
        </property>
        <!-- memory leak workaround https://issues.apache.org/jira/browse/HIVE-4501-->
         <property>
          <name>hive.server2.thrift.http.max.worker.threads</name>
          <value>5000</value>
        </property>
        <property>
          <name>hive.metastore.warehouse.dir</name>
          <value>hdfs:/user/hive/warehouse</value>
        </property>
        <property>
          <name>hive.exec.max.dynamic.partitions.pernode</name>
          <value>10000</value>
        </property>
        <property>
          <name>hive.exec.max.dynamic.partitions</name>
          <value>10000</value>
        </property>
        <property>
          <name>mapred.output.compress</name>
          <value>true</value>
        </property>
        <property>
          <name>hive.exec.compress.output</name>
          <value>true</value>
        </property>
        <property>
          <name>mapred.output.compression.type</name>
          <value>BLOCK</value>
        </property>
        <property>
          <name>mapreduce.input.fileinputformat.split.minsize</name>
          <value>134217728</value>
        </property>
        <property>
          <name>mapreduce.input.fileinputformat.split.maxsize</name>
          <value>1000000000</value>
        </property>
        <property>
          <name>mapred.child.java.opts</name>
          <value>-Xmx1024m</value>
        </property>
        <property>
          <name>mapreduce.map.memory.mb</name>
          <value>1024</value>
        </property>
        <property>
          <name>mapreduce.reduce.memory.mb</name>
          <value>1024</value>
        </property>
        <!--
        <property>
          <name>hive.mapred.map.tasks.speculative.execution</name>
          <value>false</value>
        </property>
        -->
        <property>
          <name>hive.mapred.reduce.tasks.speculative.execution</name>
          <value>false</value>
        </property>
        <property>
          <name>mapred.map.tasks.speculative.execution</name>
          <value>false</value>
        </property>
        <property>
          <name>mapred.reduce.tasks.speculative.execution</name>
          <value>false</value>
        </property>
        <property>
          <name>mapreduce.job.queuename</name>
          <value>mapreduce</value>
        </property>
        <property>
          <name>hive.metastore.client.socket.timeout</name>
          <value>600</value>
        </property>
        <property>
          <name>hive.auto.convert.join.noconditionaltask.size</name>
          <value>671088000</value>
        </property>
        
        <property>
          <name>hive.server2.authentication</name>
          <value>KERBEROS</value>
        </property>
        <property>
          <name>hive.server2.authentication.kerberos.principal</name>
          <value>hive/_HOST@HADOOP.HAP</value>
        </property>
        <property>
          <name>hive.server2.authentication.kerberos.keytab</name>
          <value>/tmp/hive.keytab</value>
        </property>
      
        <property>
          <name>hive.metastore.sasl.enabled</name>
          <value>true</value>
        </property>
        <property>
          <name>hive.metastore.kerberos.keytab.file</name>
          <value>/export/keytabs_conf/hive.keytab</value>
        </property>
        <property>
          <name>hive.metastore.kerberos.principal</name>
          <value>hive/_HOST@HADOOP.HAP</value>
        </property>
      
        <property>
          <name>hive.metastore.uris</name>
          <value>thrift://my.domain:9083</value>
        </property>
      
        <property>
          <name>hive.server2.support.dynamic.service.discovery</name>
          <value>true</value>
        </property>
       
      
        <!--hive security-->
        <property>
          <name>hive.security.authorization.enabled</name>
          <value>true</value>
        </property>
        <property>
          <name>hive.security.authorization.createtable.owner.grants</name>
          <value>ALL</value>
        </property>
        <property>
          <name>hive.security.authorization.task.factory</name>
          <value>org.apache.hadoop.hive.ql.parse.authorization.HiveAuthorizationTaskFactoryImpl</value>
        </property>
        <property>
          <name>hive.server2.enable.doAs</name>
          <value>false</value>
        </property>
        <property>
          <name>hive.warehouse.subdir.inherit.perms</name>
          <value>true</value>
        </property>
      
        <!-- hive Storage Based Authorization-->
        <!--
        <property>
          <name>hive.metastore.pre.event.listeners</name>
          <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
        </property>
        <property>
          <name>hive.security.metastore.authorization.manager</name>
          <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
        </property>
        <property>
          <name>hive.security.metastore.authenticator.manager</name>
          <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
        </property>
        <property>
          <name>hive.security.metastore.authorization.auth.reads</name>
          <value>true</value>
        </property>
        -->
        <!--  SQL Standard Based Hive Authorization-->
        <property>
          <name>hive.users.in.admin.role</name>
          <value>hive,test109</value>
        </property>
        <property>
          <name>hive.security.authorization.manager</name>
          <value>org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdConfOnlyAuthorizerFactory</value>
        </property>
        <property>
          <name>hive.security.authenticator.manager</name>
          <value>org.apache.hadoop.hive.ql.security.SessionStateUserAuthenticator</value>
        </property>
      
        <property>
          <name>hive.server2.map.fair.scheduler.queue</name>
          <value>false</value>
        </property>
        
        <!-- https://issues.apache.org/jira/browse/SPARK-11021 -->
          <property>
              <name>hive.exec.stagingdir</name>
              <value>/tmp/hive/spark-stagingdir</value>
          </property>
        
      </configuration>
      
      

      The steps to startup spark 1.5.x

      1.

      kinit -kt /tmp/xx.keytab hive/xxx
      

      2.Startup Spark Thrift Server

      sbin/start-thriftserver.sh --master yarn-client --num-executors 2
      

      Following exception is thrown during startup

      15/11/19 15:39:59 ERROR TSaslTransport: SASL negotiation failure
      javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]
      	at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:212)
      	at org.apache.thrift.transport.TSaslClientTransport.handleSaslStartMessage(TSaslClientTransport.java:94)
      	at org.apache.thrift.transport.TSaslTransport.open(TSaslTransport.java:253)
      	at org.apache.thrift.transport.TSaslClientTransport.open(TSaslClientTransport.java:37)
      	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:52)
      	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport$1.run(TUGIAssumingTransport.java:49)
      	at java.security.AccessController.doPrivileged(Native Method)
      	at javax.security.auth.Subject.doAs(Subject.java:415)
      	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
      	at org.apache.hadoop.hive.thrift.client.TUGIAssumingTransport.open(TUGIAssumingTransport.java:49)
      	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.open(HiveMetaStoreClient.java:358)
      	at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.<init>(HiveMetaStoreClient.java:215)
      	at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.<init>(SessionHiveMetaStoreClient.java:73)
      	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
      	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
      	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
      	at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
      	at org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1447)
      	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.<init>(RetryingMetaStoreClient.java:63)
      	at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:73)
      	at org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:2661)
      	at org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:2680)
      	at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:425)
      	at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:171)
      	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
      	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
      	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
      	at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
      	at org.apache.spark.sql.hive.client.IsolatedClientLoader.liftedTree1$1(IsolatedClientLoader.scala:183)
      	at org.apache.spark.sql.hive.client.IsolatedClientLoader.<init>(IsolatedClientLoader.scala:179)
      	at org.apache.spark.sql.hive.HiveContext.metadataHive$lzycompute(HiveContext.scala:264)
      	at org.apache.spark.sql.hive.HiveContext.metadataHive(HiveContext.scala:186)
      	at org.apache.spark.sql.hive.HiveContext.setConf(HiveContext.scala:393)
      	at org.apache.spark.sql.SQLContext$$anonfun$5.apply(SQLContext.scala:229)
      	at org.apache.spark.sql.SQLContext$$anonfun$5.apply(SQLContext.scala:228)
      	at scala.collection.Iterator$class.foreach(Iterator.scala:727)
      	at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
      	at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      	at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      	at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:228)
      	at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:72)
      	at org.apache.spark.sql.hive.thriftserver.SparkSQLEnv$.init(SparkSQLEnv.scala:58)
      	at org.apache.spark.sql.hive.thriftserver.HiveThriftServer2$.main(HiveThriftServer2.scala:77)
      	at org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.main(HiveThriftServer2.scala)
      	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
      	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
      	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
      	at java.lang.reflect.Method.invoke(Method.java:606)
      	at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:672)
      	at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:180)
      	at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:205)
      	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:120)
      	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
      Caused by: GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)
      	at sun.security.jgss.krb5.Krb5InitCredential.getInstance(Krb5InitCredential.java:147)
      	at sun.security.jgss.krb5.Krb5MechFactory.getCredentialElement(Krb5MechFactory.java:121)
      	at sun.security.jgss.krb5.Krb5MechFactory.getMechanismContext(Krb5MechFactory.java:187)
      	at sun.security.jgss.GSSManagerImpl.getMechanismContext(GSSManagerImpl.java:223)
      	at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:212)
      	at sun.security.jgss.GSSContextImpl.initSecContext(GSSContextImpl.java:179)
      	at com.sun.security.sasl.gsskerb.GssKrb5Client.evaluateChallenge(GssKrb5Client.java:193)
      	... 52 more
      15/11/19 15:39:59 WARN metastore: Failed to connect to the MetaStore Server...
      15/11/19 15:39:59 INFO metastore: Waiting 1 seconds before next connection attempt.
      

      Note: If I don't configure the spark.sql.hive.metastore things in spark-defaults.conf, not surprising that there is version incompatible issue when I do DML,but I can startup the spark thrift server and pass the kerberos authentication

      Attachments

        Issue Links

          Activity

            People

              Unassigned Unassigned
              bit1129 bit1129
              Votes:
              1 Vote for this issue
              Watchers:
              7 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved: