在 idea 中以 yarn-client 遠程提交 Spark 作業(yè)

  1. 示例代碼
    1. RemoteSubmitApp 主類
      package com.cloudera
      
      import org.apache.kafka.clients.consumer.ConsumerConfig
      import org.apache.kafka.common.serialization.StringDeserializer
      import org.apache.log4j.Logger
      import org.apache.spark
      import org.apache.spark.rdd.RDD
      import org.apache.spark.streaming.dstream.DStream
      import org.apache.spark.{SparkConf, rdd}
      import org.apache.spark.streaming.{Seconds, StreamingContext}
      import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
      
      object RemoteSubmitApp {
        val logger = Logger.getLogger(this.getClass)
      
        def main(args: Array[String]): Unit = {
          
          // 設置提交任務的用戶
          //    System.setProperty("HADOOP_USER_NAME", "root")
          val conf = new SparkConf().setAppName("Remote_Submit_App")
            // 設置yarn-client模式提交
            .setMaster("yarn-client") // 設置resourcemanager的ip
            .set("yarn.resourcemanager.hostname", "cdh02")
            // 設置driver的內(nèi)存大小
            .set("spark.driver.memory", "1024M")
            // 設置executor的內(nèi)存大小
            .set("spark.executor.memory", "800M")
            // 設置executor的個數(shù)
            .set("spark.executor.instance", "2")
            // 設置提交任務的 yarn 隊列
            //      .set("spark.yarn.queue", "defalut")
            // 設置driver的 ip 地址,即本機的 ip 地址
            .set("spark.driver.host", "192.168.1.26")
            // 設置序列化
      //      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            // 設置jar包的路徑,如果有其他的依賴包,可以在這里添加,逗號隔開
            .setJars(List("E:\\RemoteSubmitSparkToYarn\\target\\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar"))
          val scc = new StreamingContext(conf, Seconds(30))
      
          scc.sparkContext.setLogLevel("WARN")
      //    scc.checkpoint("checkpoint")
          val topic = "remote_submit_test"
          val topicSet = topic.split(",").toSet
      
          val kafkaParams = Map[String, Object](
            ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "10.101.75.190:9092,10.101.75.191:9092,10.101.75.192:9092",
            ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
            ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
            ConsumerConfig.GROUP_ID_CONFIG -> "remote_test",
            ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest",
            ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)
          )
      
          val kafkaStreams = KafkaUtils.createDirectStream[String, String](
            scc,
            LocationStrategies.PreferConsistent,
            ConsumerStrategies.Subscribe[String, String](topicSet, kafkaParams)
          )
      
          val wordCounts: DStream[(String, Long)] = kafkaStreams.map(_.value())
            .flatMap(_.split(" "))
            .map(x => (x, 1L))
            .reduceByKey(_ + _)
          wordCounts.print()
      
          //啟動流
          scc.start()
          scc.awaitTermination()
        }
      }
      
    2. pom.xml 文件
      <?xml version="1.0" encoding="UTF-8"?>
      <project xmlns="http://maven.apache.org/POM/4.0.0"
               xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
               xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
          <modelVersion>4.0.0</modelVersion>
          <groupId>com.cloudera</groupId>
          <artifactId>RemoteSubmitSparkToYarn</artifactId>
          <version>1.0-SNAPSHOT</version>
      
          <packaging>jar</packaging>
          <name>RemoteSubmitSparkToYarn</name>
      
          <repositories>
              <!-- cloudera 的倉庫 -->
              <repository>
                  <id>cloudera</id>
                  <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
                  <name>Cloudera Repositories</name>
                  <releases>
                      <enabled>true</enabled>
                  </releases>
                  <snapshots>
                      <enabled>false</enabled>
                  </snapshots>
              </repository>
          </repositories>
      
          <properties>
              <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
              <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
              <java.version>1.8</java.version>
              <!--<spark.version>2.4.0-cdh6.1.1</spark.version>-->
              <spark.version>2.2.0</spark.version>
              <provided.scope>compile</provided.scope>
              <!--<provided.scope>provided</provided.scope>-->
          </properties>
      
          <dependencies>
              <!-- scala -->
              <dependency>
                  <groupId>org.scala-lang</groupId>
                  <artifactId>scala-library</artifactId>
                  <version>2.11.7</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-core_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-streaming_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-sql_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-hive_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-yarn_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-sql-kafka-0-10_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.spark</groupId>
                  <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
                  <version>${spark.version}</version>
                  <scope>${provided.scope}</scope>
              </dependency>
              <dependency>
                  <groupId>org.apache.kafka</groupId>
                  <artifactId>kafka_2.11</artifactId>
                  <version>0.10.0.1</version>
                  <!--<scope>${provided.scope}</scope>-->
              </dependency>
              <dependency>
                  <groupId>org.apache.kafka</groupId>
                  <artifactId>kafka-clients</artifactId>
                  <version>0.11.0.2</version>
                  <!--<scope>${provided.scope}</scope>-->
              </dependency>
          </dependencies>
      
          <build>
              <pluginManagement>
                  <plugins>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-compiler-plugin</artifactId>
                          <version>3.8.0</version>
                          <configuration>
                              <source>1.8</source>
                              <target>1.8</target>
                          </configuration>
                      </plugin>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-resources-plugin</artifactId>
                          <version>3.0.2</version>
                          <configuration>
                              <encoding>UTF-8</encoding>
                          </configuration>
                      </plugin>
                      <plugin>
                          <groupId>net.alchim31.maven</groupId>
                          <artifactId>scala-maven-plugin</artifactId>
                          <version>3.2.2</version>
                          <executions>
                              <execution>
                                  <goals>
                                      <goal>compile</goal>
                                      <goal>testCompile</goal>
                                  </goals>
                              </execution>
                          </executions>
                      </plugin>
                      <plugin>
                          <groupId>org.apache.maven.plugins</groupId>
                          <artifactId>maven-resources-plugin</artifactId>
                          <version>3.0.2</version>
                          <configuration>
                              <encoding>UTF-8</encoding>
                          </configuration>
                      </plugin>
                  </plugins>
              </pluginManagement>
              <plugins>
                  <plugin>
                      <groupId>net.alchim31.maven</groupId>
                      <artifactId>scala-maven-plugin</artifactId>
                      <executions>
                          <execution>
                              <id>scala-compile-first</id>
                              <phase>process-resources</phase>
                              <goals>
                                  <goal>add-source</goal>
                                  <goal>compile</goal>
                              </goals>
                          </execution>
                          <execution>
                              <id>scala-test-compile</id>
                              <phase>process-test-resources</phase>
                              <goals>
                                  <goal>testCompile</goal>
                              </goals>
                          </execution>
                      </executions>
                  </plugin>
      
                  <plugin>
                      <groupId>org.apache.maven.plugins</groupId>
                      <artifactId>maven-compiler-plugin</artifactId>
                      <executions>
                          <execution>
                              <phase>compile</phase>
                              <goals>
                                  <goal>compile</goal>
                              </goals>
                          </execution>
                      </executions>
                  </plugin>
      
                  <plugin>
                      <groupId>org.apache.maven.plugins</groupId>
                      <artifactId>maven-shade-plugin</artifactId>
                      <version>2.4.3</version>
                      <executions>
                          <execution>
                              <phase>package</phase>
                              <goals>
                                  <goal>shade</goal>
                              </goals>
                              <configuration>
                                  <filters>
                                      <filter>
                                          <artifact>*:*</artifact>
                                          <excludes>
                                              <exclude>META-INF/*.SF</exclude>
                                              <exclude>META-INF/*.DSA</exclude>
                                              <exclude>META-INF/*.RSA</exclude>
                                          </excludes>
                                      </filter>
                                  </filters>
                              </configuration>
                          </execution>
                      </executions>
                  </plugin>
              </plugins>
              <resources>
                  <resource>
                      <directory>${basedir}/src/main/resources</directory>
                      <excludes>
                          <exclude>env/*/*</exclude>
                      </excludes>
                      <includes>
                          <include>**/*</include>
                      </includes>
                  </resource>
                  <resource>
                      <directory>${basedir}/src/main/resources/env/${profile.active}</directory>
                      <includes>
                          <include>**/*.properties</include>
                          <include>**/*.xml</include>
                      </includes>
                  </resource>
              </resources>
          </build>
          <profiles>
              <profile>
                  <id>dev</id>
                  <properties>
                      <profile.active>dev</profile.active>
                  </properties>
                  <activation>
                      <activeByDefault>true</activeByDefault>
                  </activation>
              </profile>
              <profile>
                  <id>test</id>
                  <properties>
                      <profile.active>test</profile.active>
                  </properties>
              </profile>
              <profile>
                  <id>prod</id>
                  <properties>
                      <profile.active>prod</profile.active>
                  </properties>
                  </properties>
              </profile>
          </profiles>
      </project>
      
    3. 運行結(jié)果
      ......
      Connected to the target VM, address: '127.0.0.1:49723', transport: 'socket'
      Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
      19/09/27 15:32:47 INFO SparkContext: Running Spark version 2.2.0
      19/09/27 15:32:47 WARN SparkConf: spark.master yarn-client is deprecated in Spark 2.0+, please instead use "yarn" with specified deploy mode.
      19/09/27 15:32:47 INFO SparkContext: Submitted application: Remote_Submit_App
      19/09/27 15:32:47 INFO SecurityManager: Changing view acls to: 110610172
      19/09/27 15:32:47 INFO SecurityManager: Changing modify acls to: 110610172
      19/09/27 15:32:47 INFO SecurityManager: Changing view acls groups to: 
      19/09/27 15:32:47 INFO SecurityManager: Changing modify acls groups to: 
      19/09/27 15:32:47 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(110610172); groups with view permissions: Set(); users  with modify permissions: Set(110610172); groups with modify permissions: Set()
      19/09/27 15:32:48 INFO Utils: Successfully started service 'sparkDriver' on port 49747.
      19/09/27 15:32:48 INFO SparkEnv: Registering MapOutputTracker
      19/09/27 15:32:48 INFO SparkEnv: Registering BlockManagerMaster
      19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
      19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
      19/09/27 15:32:48 INFO DiskBlockManager: Created local directory at C:\Users\110610172\AppData\Local\Temp\blockmgr-c580e3ec-3b0f-4365-8766-387e0c4a3947
      19/09/27 15:32:48 INFO MemoryStore: MemoryStore started with capacity 1989.6 MB
      19/09/27 15:32:48 INFO SparkEnv: Registering OutputCommitCoordinator
      19/09/27 15:32:48 INFO Utils: Successfully started service 'SparkUI' on port 4040.
      19/09/27 15:32:48 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.1.26:4040
      19/09/27 15:32:48 INFO SparkContext: Added JAR E:\RemoteSubmitSparkToYarn\target\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar at spark://192.168.1.26:49747/jars/RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar with timestamp 1569569568596
      19/09/27 15:32:50 INFO ConfiguredRMFailoverProxyProvider: Failing over to rm381
      19/09/27 15:32:50 INFO Client: Requesting a new application from cluster with 7 NodeManagers
      19/09/27 15:32:50 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container)
      19/09/27 15:32:50 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead
      19/09/27 15:32:50 INFO Client: Setting up container launch context for our AM
      19/09/27 15:32:50 INFO Client: Setting up the launch environment for our AM container
      19/09/27 15:32:50 INFO Client: Preparing resources for our AM container
      19/09/27 15:32:51 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
      19/09/27 15:32:54 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_libs__4420363360244802432.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_libs__4420363360244802432.zip
      19/09/27 15:32:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
      19/09/27 15:32:57 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_conf__4989294758151956703.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_conf__.zip
      19/09/27 15:32:57 INFO SecurityManager: Changing view acls to: 110610172
      19/09/27 15:32:57 INFO SecurityManager: Changing modify acls to: 110610172
      19/09/27 15:32:57 INFO SecurityManager: Changing view acls groups to: 
      19/09/27 15:32:57 INFO SecurityManager: Changing modify acls groups to: 
      19/09/27 15:32:57 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users  with view permissions: Set(110610172); groups with view permissions: Set(); users  with modify permissions: Set(110610172); groups with modify permissions: Set()
      19/09/27 15:32:57 INFO Client: Submitting application application_1568096913481_0456 to ResourceManager
      19/09/27 15:32:57 INFO YarnClientImpl: Submitted application application_1568096913481_0456
      19/09/27 15:32:57 INFO SchedulerExtensionServices: Starting Yarn extension services with app application_1568096913481_0456 and attemptId None
      19/09/27 15:32:58 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
      19/09/27 15:32:58 INFO Client: 
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: N/A
         ApplicationMaster RPC port: -1
         queue: root.users.110610172
         start time: 1569569577390
         final status: UNDEFINED
         tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/
         user: 110610172
      19/09/27 15:32:59 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
      19/09/27 15:33:00 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
      19/09/27 15:33:01 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED)
      19/09/27 15:33:01 INFO YarnSchedulerBackend$YarnSchedulerEndpoint: ApplicationMaster registered as NettyRpcEndpointRef(spark-client://YarnAM)
      19/09/27 15:33:01 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, Map(PROXY_HOSTS -> cdh01,cdh02, PROXY_URI_BASES -> http://cdh01:8088/proxy/application_1568096913481_0456,http://cdh02:8088/proxy/application_1568096913481_0456), /proxy/application_1568096913481_0456
      19/09/27 15:33:01 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter
      19/09/27 15:33:02 INFO Client: Application report for application_1568096913481_0456 (state: RUNNING)
      19/09/27 15:33:02 INFO Client: 
         client token: N/A
         diagnostics: N/A
         ApplicationMaster host: 10.101.75.194
         ApplicationMaster RPC port: 0
         queue: root.users.110610172
         start time: 1569569577390
         final status: UNDEFINED
         tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/
         user: 110610172
      19/09/27 15:33:02 INFO YarnClientSchedulerBackend: Application application_1568096913481_0456 has started running.
      19/09/27 15:33:02 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 49796.
      19/09/27 15:33:02 INFO NettyBlockTransferService: Server created on 192.168.1.26:49796
      19/09/27 15:33:02 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
      19/09/27 15:33:02 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)
      19/09/27 15:33:02 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.1.26:49796 with 1989.6 MB RAM, BlockManagerId(driver, 192.168.1.26, 49796, None)
      19/09/27 15:33:02 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None)
      19/09/27 15:33:02 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.1.26, 49796, None)
      19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10332) with ID 1
      19/09/27 15:33:07 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:24916 with 246.9 MB RAM, BlockManagerId(1, cdh04, 24916, None)
      19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10334) with ID 2
      19/09/27 15:33:08 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:27337 with 246.9 MB RAM, BlockManagerId(2, cdh04, 27337, None)
      19/09/27 15:33:08 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.8
      19/09/27 15:33:08 WARN KafkaUtils: overriding enable.auto.commit to false for executor
      19/09/27 15:33:08 WARN KafkaUtils: overriding auto.offset.reset to none for executor
      19/09/27 15:33:08 WARN KafkaUtils: overriding executor group.id to spark-executor-remote_test
      19/09/27 15:33:08 WARN KafkaUtils: overriding receive.buffer.bytes to 65536 see KAFKA-3135
      -------------------------------------------
      Time: 1569569610000 ms
      -------------------------------------------
      (assigned,10)
      (serializer,2)
      (Setting,10)
      (rdd.count(),1)
      (class,2)
      (=,2)
      (newly,10)
      (partitions,10)
      
      -------------------------------------------
      Time: 1569569640000 ms
      -------------------------------------------
      
      -------------------------------------------
      Time: 1569569670000 ms
      -------------------------------------------
      ......
      
    4. 集群上查看

      Yarn --> 應用程序


      image
  1. 遇到的問題
    1. Spark 版本不一致導致的問題

      問題日志:

      19/09/27 11:01:38 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message.
      java.io.InvalidClassException: org.apache.spark.rpc.RpcEndpointRef; local class incompatible: stream classdesc serialVersionUID = -1329125091869941550, local class serialVersionUID = 1835832137613908542
        at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:616)
        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630)
        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
        at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630)
        at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373)
        at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
        at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:108)
        at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1$$anonfun$apply$1.apply(NettyRpcEnv.scala:267)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:316)
        at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1.apply(NettyRpcEnv.scala:266)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58)
        at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:265)
        at org.apache.spark.rpc.netty.RequestMessage$.apply(NettyRpcEnv.scala:600)
        at org.apache.spark.rpc.netty.NettyRpcHandler.internalReceive(NettyRpcEnv.scala:651)
        at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:643)
        at org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:178)
        at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:107)
        at org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:118)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
        at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
        at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
        at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
        at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
        at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
        at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
        at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)
        at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)
        at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)
        at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)
        at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)
        at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643)
        at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566)
        at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480)
        at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442)
        at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131)
        at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)
        at java.lang.Thread.run(Thread.java:745)
      

      解決辦法:

      下載Spark相同的版本测暗。下載地址: https://archive.apache.org/dist/spark/

    2. 環(huán)境變量問題

      問題日志:

      Exception in thread "main" java.lang.IllegalStateException: Library directory 'E:\RemoteSubmitSparkToYarn\assembly\target\scala-2.11\jars' does not exist; make sure Spark is built.
        at org.apache.spark.launcher.CommandBuilderUtils.checkState(CommandBuilderUtils.java:248)
        at org.apache.spark.launcher.CommandBuilderUtils.findJarsDir(CommandBuilderUtils.java:347)
        at org.apache.spark.launcher.YarnCommandBuilderUtils$.findJarsDir(YarnCommandBuilderUtils.scala:38)
        at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:526)
        at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:814)
        at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:169)
        at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56)
        at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173)
        at org.apache.spark.SparkContext.<init>(SparkContext.scala:509)
        at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:839)
        at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:85)
        at com.cloudera.RemoteSubmitApp$.main(RemoteSubmitApp.scala:33)
        at com.cloudera.RemoteSubmitApp.main(RemoteSubmitApp.scala)
      

      解決辦法:
      在本地機器中設置 SPARK_HOME 環(huán)境變量

      image

      或在 idea 中運行參數(shù)設置 SPARK_HOME


      image
  1. 權限問題 Permission denied

    問題日志:

    Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=charles, access=WRITE, inode="/user":root:supergroup:drwxr-xr-x
      at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:342)
      at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)
      at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)
      at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1744)
      at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1728)
      at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1687)
      at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60)
      at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:2980)
      at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1096)
      at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:652)
      at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
      at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
      at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
      at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:868)
      at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:814)
      at java.security.AccessController.doPrivileged(Native Method)
      at javax.security.auth.Subject.doAs(Subject.java:422)
      at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1886)
      at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2603)
    

    解決辦法:
    在代碼中添加下面代碼板丽,設置為以 root 用戶提交。

    System.setProperty("HADOOP_USER_NAME", "root")
    
  1. /etc/hadoop/conf.cloudera.yarn/topology.py 問題

    問題日志:

    java.io.IOException: Cannot run program "/etc/hadoop/conf.cloudera.yarn/topology.py" (in directory "E:\RemoteSubmitSparkToYarn"): CreateProcess error=2, 系統(tǒng)找不到指定的文件明吩。
      at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
      at org.apache.hadoop.util.Shell.runCommand(Shell.java:519)
      at org.apache.hadoop.util.Shell.run(Shell.java:478)
      at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:766)
      at org.apache.hadoop.net.ScriptBasedMapping$RawScriptBasedMapping.runResolveCommand(ScriptBasedMapping.java:251)
      at org.apache.hadoop.net.ScriptBasedMapping$RawScriptBasedMapping.resolve(ScriptBasedMapping.java:188)
      at org.apache.hadoop.net.CachedDNSToSwitchMapping.resolve(CachedDNSToSwitchMapping.java:119)
      at org.apache.hadoop.yarn.util.RackResolver.coreResolve(RackResolver.java:101)
      at org.apache.hadoop.yarn.util.RackResolver.resolve(RackResolver.java:81)
      at org.apache.spark.scheduler.cluster.YarnScheduler.getRackForHost(YarnScheduler.scala:37)
      at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$1.apply(TaskSchedulerImpl.scala:337)
      at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$1.apply(TaskSchedulerImpl.scala:326)
      at scala.collection.Iterator$class.foreach(Iterator.scala:742)
      at scala.collection.AbstractIterator.foreach(Iterator.scala:1194)
      at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
      at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
      at org.apache.spark.scheduler.TaskSchedulerImpl.resourceOffers(TaskSchedulerImpl.scala:326)
      at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.org$apache$spark$scheduler$cluster$CoarseGrainedSchedulerBackend$DriverEndpoint$$makeOffers(CoarseGrainedSchedulerBackend.scala:237)
      at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint$$anonfun$receiveAndReply$1.applyOrElse(CoarseGrainedSchedulerBackend.scala:200)
      at org.apache.spark.rpc.netty.Inbox$$anonfun$process$1.apply$mcV$sp(Inbox.scala:105)
      at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:205)
      at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:101)
      at org.apache.spark.rpc.netty.Dispatcher$MessageLoop.run(Dispatcher.scala:213)
      at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
      at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
      at java.lang.Thread.run(Thread.java:745)
    

    解決辦法:
    在配置文件 core-site.xml 中修改 net.topology.script.file.name 屬性值瘾晃,將 /etc/hadoop/conf.cloudera.yarn/topology.py 注釋掉。

      <property>
        <name>net.topology.script.file.name</name>
        <value><!--/etc/hadoop/conf.cloudera.yarn/topology.py--></value>
      </property>
    
  2. 沒有設置 driver 的 ip 問題

    問題日志:

    cationMaster: Failed to connect to driver at 192.168.1.26:34010, retrying ...
    19/09/27 15:12:48 ERROR ApplicationMaster: Failed to connect to driver at 192.168.1.26:34010, retrying ...
    19/09/27 15:12:48 ERROR ApplicationMaster: Uncaught exception: 
    org.apache.spark.SparkException: Failed to connect to driver!
      at org.apache.spark.deploy.yarn.ApplicationMaster.waitForSparkDriver(ApplicationMaster.scala:577)
      at org.apache.spark.deploy.yarn.ApplicationMaster.runExecutorLauncher(ApplicationMaster.scala:433)
      at org.apache.spark.deploy.yarn.ApplicationMaster.run(ApplicationMaster.scala:256)
      at org.apache.spark.deploy.yarn.ApplicationMaster$$anonfun$main$1.apply$mcV$sp(ApplicationMaster.scala:764)
      at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:67)
      at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:66)
      at java.security.AccessController.doPrivileged(Native Method)
      at javax.security.auth.Subject.doAs(Subject.java:422)
      at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692)
      at org.apache.spark.deploy.SparkHadoopUtil.runAsSparkUser(SparkHadoopUtil.scala:66)
      at org.apache.spark.deploy.yarn.ApplicationMaster$.main(ApplicationMaster.scala:762)
      at org.apache.spark.deploy.yarn.ExecutorLauncher$.main(ApplicationMaster.scala:785)
      at org.apache.spark.deploy.yarn.ExecutorLauncher.main(ApplicationMaster.scala)
    

    解決辦法:

    這個報錯是因為沒有設置 driver host, 由于運行的是 yarn-client 模式, driver 就是我們的本機, 所以要設置為本地的ip,不然找不到driver.

    .set("spark.driver.host","192.168.1.26")
    

注意:

  1. 需要將 core-site.xml,hdfs-site.xml 和 yarn-site.xml 放到 resource 下面,程序運行的時候需要這些環(huán)境吆倦。
  2. 修改代碼后需要重新編譯打包否則會報ERROR YarnScheduler: Lost executor 2 on cdh03: Container container_e01_1568096913481_0453_01_000005 exited from explicit termination異常
?著作權歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
  • 序言:七十年代末佳晶,一起剝皮案震驚了整個濱河市,隨后出現(xiàn)的幾起案子截型,更是在濱河造成了極大的恐慌官帘,老刑警劉巖缰冤,帶你破解...
    沈念sama閱讀 206,839評論 6 482
  • 序言:濱河連續(xù)發(fā)生了三起死亡事件,死亡現(xiàn)場離奇詭異,居然都是意外死亡偶洋,警方通過查閱死者的電腦和手機,發(fā)現(xiàn)死者居然都...
    沈念sama閱讀 88,543評論 2 382
  • 文/潘曉璐 我一進店門荒吏,熙熙樓的掌柜王于貴愁眉苦臉地迎上來驱还,“玉大人,你說我怎么就攤上這事精堕》跆裕” “怎么了?”我有些...
    開封第一講書人閱讀 153,116評論 0 344
  • 文/不壞的土叔 我叫張陵歹篓,是天一觀的道長瘫证。 經(jīng)常有香客問我,道長庄撮,這世上最難降的妖魔是什么背捌? 我笑而不...
    開封第一講書人閱讀 55,371評論 1 279
  • 正文 為了忘掉前任,我火速辦了婚禮洞斯,結(jié)果婚禮上毡庆,老公的妹妹穿的比我還像新娘。我一直安慰自己烙如,他們只是感情好扭仁,可當我...
    茶點故事閱讀 64,384評論 5 374
  • 文/花漫 我一把揭開白布。 她就那樣靜靜地躺著厅翔,像睡著了一般乖坠。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上刀闷,一...
    開封第一講書人閱讀 49,111評論 1 285
  • 那天熊泵,我揣著相機與錄音,去河邊找鬼甸昏。 笑死顽分,一個胖子當著我的面吹牛,可吹牛的內(nèi)容都是我干的施蜜。 我是一名探鬼主播卒蘸,決...
    沈念sama閱讀 38,416評論 3 400
  • 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼!你這毒婦竟也來了缸沃?” 一聲冷哼從身側(cè)響起恰起,我...
    開封第一講書人閱讀 37,053評論 0 259
  • 序言:老撾萬榮一對情侶失蹤,失蹤者是張志新(化名)和其女友劉穎趾牧,沒想到半個月后检盼,有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體,經(jīng)...
    沈念sama閱讀 43,558評論 1 300
  • 正文 獨居荒郊野嶺守林人離奇死亡翘单,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
    茶點故事閱讀 36,007評論 2 325
  • 正文 我和宋清朗相戀三年吨枉,在試婚紗的時候發(fā)現(xiàn)自己被綠了。 大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片哄芜。...
    茶點故事閱讀 38,117評論 1 334
  • 序言:一個原本活蹦亂跳的男人離奇死亡貌亭,死狀恐怖,靈堂內(nèi)的尸體忽然破棺而出认臊,到底是詐尸還是另有隱情圃庭,我是刑警寧澤,帶...
    沈念sama閱讀 33,756評論 4 324
  • 正文 年R本政府宣布美尸,位于F島的核電站冤议,受9級特大地震影響,放射性物質(zhì)發(fā)生泄漏师坎。R本人自食惡果不足惜恕酸,卻給世界環(huán)境...
    茶點故事閱讀 39,324評論 3 307
  • 文/蒙蒙 一、第九天 我趴在偏房一處隱蔽的房頂上張望胯陋。 院中可真熱鬧蕊温,春花似錦、人聲如沸遏乔。這莊子的主人今日做“春日...
    開封第一講書人閱讀 30,315評論 0 19
  • 文/蒼蘭香墨 我抬頭看了看天上的太陽盟萨。三九已至凉翻,卻和暖如春,著一層夾襖步出監(jiān)牢的瞬間捻激,已是汗流浹背制轰。 一陣腳步聲響...
    開封第一講書人閱讀 31,539評論 1 262
  • 我被黑心中介騙來泰國打工, 沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留胞谭,地道東北人垃杖。 一個月前我還...
    沈念sama閱讀 45,578評論 2 355
  • 正文 我出身青樓,卻偏偏與公主長得像丈屹,于是被迫代替她去往敵國和親调俘。 傳聞我的和親對象是個殘疾皇子,可洞房花燭夜當晚...
    茶點故事閱讀 42,877評論 2 345

推薦閱讀更多精彩內(nèi)容