-
示例代碼
-
RemoteSubmitApp 主類
package com.cloudera import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.StringDeserializer import org.apache.log4j.Logger import org.apache.spark import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.DStream import org.apache.spark.{SparkConf, rdd} import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} object RemoteSubmitApp { val logger = Logger.getLogger(this.getClass) def main(args: Array[String]): Unit = { // 設置提交任務的用戶 // System.setProperty("HADOOP_USER_NAME", "root") val conf = new SparkConf().setAppName("Remote_Submit_App") // 設置yarn-client模式提交 .setMaster("yarn-client") // 設置resourcemanager的ip .set("yarn.resourcemanager.hostname", "cdh02") // 設置driver的內(nèi)存大小 .set("spark.driver.memory", "1024M") // 設置executor的內(nèi)存大小 .set("spark.executor.memory", "800M") // 設置executor的個數(shù) .set("spark.executor.instance", "2") // 設置提交任務的 yarn 隊列 // .set("spark.yarn.queue", "defalut") // 設置driver的 ip 地址,即本機的 ip 地址 .set("spark.driver.host", "192.168.1.26") // 設置序列化 // .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") // 設置jar包的路徑,如果有其他的依賴包,可以在這里添加,逗號隔開 .setJars(List("E:\\RemoteSubmitSparkToYarn\\target\\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar")) val scc = new StreamingContext(conf, Seconds(30)) scc.sparkContext.setLogLevel("WARN") // scc.checkpoint("checkpoint") val topic = "remote_submit_test" val topicSet = topic.split(",").toSet val kafkaParams = Map[String, Object]( ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "10.101.75.190:9092,10.101.75.191:9092,10.101.75.192:9092", ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer], ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer], ConsumerConfig.GROUP_ID_CONFIG -> "remote_test", ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest", ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean) ) val kafkaStreams = KafkaUtils.createDirectStream[String, String]( scc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topicSet, kafkaParams) ) val wordCounts: DStream[(String, Long)] = kafkaStreams.map(_.value()) .flatMap(_.split(" ")) .map(x => (x, 1L)) .reduceByKey(_ + _) wordCounts.print() //啟動流 scc.start() scc.awaitTermination() } }
-
pom.xml 文件
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.cloudera</groupId> <artifactId>RemoteSubmitSparkToYarn</artifactId> <version>1.0-SNAPSHOT</version> <packaging>jar</packaging> <name>RemoteSubmitSparkToYarn</name> <repositories> <!-- cloudera 的倉庫 --> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> <name>Cloudera Repositories</name> <releases> <enabled>true</enabled> </releases> <snapshots> <enabled>false</enabled> </snapshots> </repository> </repositories> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <java.version>1.8</java.version> <!--<spark.version>2.4.0-cdh6.1.1</spark.version>--> <spark.version>2.2.0</spark.version> <provided.scope>compile</provided.scope> <!--<provided.scope>provided</provided.scope>--> </properties> <dependencies> <!-- scala --> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.11.7</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-yarn_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql-kafka-0-10_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-10_2.11</artifactId> <version>${spark.version}</version> <scope>${provided.scope}</scope> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.11</artifactId> <version>0.10.0.1</version> <!--<scope>${provided.scope}</scope>--> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>0.11.0.2</version> <!--<scope>${provided.scope}</scope>--> </dependency> </dependencies> <build> <pluginManagement> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.8.0</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-resources-plugin</artifactId> <version>3.0.2</version> <configuration> <encoding>UTF-8</encoding> </configuration> </plugin> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>3.2.2</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-resources-plugin</artifactId> <version>3.0.2</version> <configuration> <encoding>UTF-8</encoding> </configuration> </plugin> </plugins> </pluginManagement> <plugins> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <executions> <execution> <id>scala-compile-first</id> <phase>process-resources</phase> <goals> <goal>add-source</goal> <goal>compile</goal> </goals> </execution> <execution> <id>scala-test-compile</id> <phase>process-test-resources</phase> <goals> <goal>testCompile</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <executions> <execution> <phase>compile</phase> <goals> <goal>compile</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.4.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <filters> <filter> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> </configuration> </execution> </executions> </plugin> </plugins> <resources> <resource> <directory>${basedir}/src/main/resources</directory> <excludes> <exclude>env/*/*</exclude> </excludes> <includes> <include>**/*</include> </includes> </resource> <resource> <directory>${basedir}/src/main/resources/env/${profile.active}</directory> <includes> <include>**/*.properties</include> <include>**/*.xml</include> </includes> </resource> </resources> </build> <profiles> <profile> <id>dev</id> <properties> <profile.active>dev</profile.active> </properties> <activation> <activeByDefault>true</activeByDefault> </activation> </profile> <profile> <id>test</id> <properties> <profile.active>test</profile.active> </properties> </profile> <profile> <id>prod</id> <properties> <profile.active>prod</profile.active> </properties> </properties> </profile> </profiles> </project>
-
運行結(jié)果
...... Connected to the target VM, address: '127.0.0.1:49723', transport: 'socket' Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties 19/09/27 15:32:47 INFO SparkContext: Running Spark version 2.2.0 19/09/27 15:32:47 WARN SparkConf: spark.master yarn-client is deprecated in Spark 2.0+, please instead use "yarn" with specified deploy mode. 19/09/27 15:32:47 INFO SparkContext: Submitted application: Remote_Submit_App 19/09/27 15:32:47 INFO SecurityManager: Changing view acls to: 110610172 19/09/27 15:32:47 INFO SecurityManager: Changing modify acls to: 110610172 19/09/27 15:32:47 INFO SecurityManager: Changing view acls groups to: 19/09/27 15:32:47 INFO SecurityManager: Changing modify acls groups to: 19/09/27 15:32:47 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set() 19/09/27 15:32:48 INFO Utils: Successfully started service 'sparkDriver' on port 49747. 19/09/27 15:32:48 INFO SparkEnv: Registering MapOutputTracker 19/09/27 15:32:48 INFO SparkEnv: Registering BlockManagerMaster 19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information 19/09/27 15:32:48 INFO BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up 19/09/27 15:32:48 INFO DiskBlockManager: Created local directory at C:\Users\110610172\AppData\Local\Temp\blockmgr-c580e3ec-3b0f-4365-8766-387e0c4a3947 19/09/27 15:32:48 INFO MemoryStore: MemoryStore started with capacity 1989.6 MB 19/09/27 15:32:48 INFO SparkEnv: Registering OutputCommitCoordinator 19/09/27 15:32:48 INFO Utils: Successfully started service 'SparkUI' on port 4040. 19/09/27 15:32:48 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://192.168.1.26:4040 19/09/27 15:32:48 INFO SparkContext: Added JAR E:\RemoteSubmitSparkToYarn\target\RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar at spark://192.168.1.26:49747/jars/RemoteSubmitSparkToYarn-1.0-SNAPSHOT.jar with timestamp 1569569568596 19/09/27 15:32:50 INFO ConfiguredRMFailoverProxyProvider: Failing over to rm381 19/09/27 15:32:50 INFO Client: Requesting a new application from cluster with 7 NodeManagers 19/09/27 15:32:50 INFO Client: Verifying our application has not requested more than the maximum memory capability of the cluster (12288 MB per container) 19/09/27 15:32:50 INFO Client: Will allocate AM container, with 896 MB memory including 384 MB overhead 19/09/27 15:32:50 INFO Client: Setting up container launch context for our AM 19/09/27 15:32:50 INFO Client: Setting up the launch environment for our AM container 19/09/27 15:32:50 INFO Client: Preparing resources for our AM container 19/09/27 15:32:51 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME. 19/09/27 15:32:54 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_libs__4420363360244802432.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_libs__4420363360244802432.zip 19/09/27 15:32:54 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 19/09/27 15:32:57 INFO Client: Uploading resource file:/C:/Users/110610172/AppData/Local/Temp/spark-46819e6c-4520-4e75-b7b0-0374e0020d36/__spark_conf__4989294758151956703.zip -> hdfs://cdh01:8020/user/110610172/.sparkStaging/application_1568096913481_0456/__spark_conf__.zip 19/09/27 15:32:57 INFO SecurityManager: Changing view acls to: 110610172 19/09/27 15:32:57 INFO SecurityManager: Changing modify acls to: 110610172 19/09/27 15:32:57 INFO SecurityManager: Changing view acls groups to: 19/09/27 15:32:57 INFO SecurityManager: Changing modify acls groups to: 19/09/27 15:32:57 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(110610172); groups with view permissions: Set(); users with modify permissions: Set(110610172); groups with modify permissions: Set() 19/09/27 15:32:57 INFO Client: Submitting application application_1568096913481_0456 to ResourceManager 19/09/27 15:32:57 INFO YarnClientImpl: Submitted application application_1568096913481_0456 19/09/27 15:32:57 INFO SchedulerExtensionServices: Starting Yarn extension services with app application_1568096913481_0456 and attemptId None 19/09/27 15:32:58 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED) 19/09/27 15:32:58 INFO Client: client token: N/A diagnostics: N/A ApplicationMaster host: N/A ApplicationMaster RPC port: -1 queue: root.users.110610172 start time: 1569569577390 final status: UNDEFINED tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/ user: 110610172 19/09/27 15:32:59 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED) 19/09/27 15:33:00 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED) 19/09/27 15:33:01 INFO Client: Application report for application_1568096913481_0456 (state: ACCEPTED) 19/09/27 15:33:01 INFO YarnSchedulerBackend$YarnSchedulerEndpoint: ApplicationMaster registered as NettyRpcEndpointRef(spark-client://YarnAM) 19/09/27 15:33:01 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, Map(PROXY_HOSTS -> cdh01,cdh02, PROXY_URI_BASES -> http://cdh01:8088/proxy/application_1568096913481_0456,http://cdh02:8088/proxy/application_1568096913481_0456), /proxy/application_1568096913481_0456 19/09/27 15:33:01 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter 19/09/27 15:33:02 INFO Client: Application report for application_1568096913481_0456 (state: RUNNING) 19/09/27 15:33:02 INFO Client: client token: N/A diagnostics: N/A ApplicationMaster host: 10.101.75.194 ApplicationMaster RPC port: 0 queue: root.users.110610172 start time: 1569569577390 final status: UNDEFINED tracking URL: http://cdh02:8088/proxy/application_1568096913481_0456/ user: 110610172 19/09/27 15:33:02 INFO YarnClientSchedulerBackend: Application application_1568096913481_0456 has started running. 19/09/27 15:33:02 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 49796. 19/09/27 15:33:02 INFO NettyBlockTransferService: Server created on 192.168.1.26:49796 19/09/27 15:33:02 INFO BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy 19/09/27 15:33:02 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None) 19/09/27 15:33:02 INFO BlockManagerMasterEndpoint: Registering block manager 192.168.1.26:49796 with 1989.6 MB RAM, BlockManagerId(driver, 192.168.1.26, 49796, None) 19/09/27 15:33:02 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 192.168.1.26, 49796, None) 19/09/27 15:33:02 INFO BlockManager: Initialized BlockManager: BlockManagerId(driver, 192.168.1.26, 49796, None) 19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10332) with ID 1 19/09/27 15:33:07 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:24916 with 246.9 MB RAM, BlockManagerId(1, cdh04, 24916, None) 19/09/27 15:33:07 INFO YarnSchedulerBackend$YarnDriverEndpoint: Registered executor NettyRpcEndpointRef(spark-client://Executor) (10.101.75.190:10334) with ID 2 19/09/27 15:33:08 INFO BlockManagerMasterEndpoint: Registering block manager cdh04:27337 with 246.9 MB RAM, BlockManagerId(2, cdh04, 27337, None) 19/09/27 15:33:08 INFO YarnClientSchedulerBackend: SchedulerBackend is ready for scheduling beginning after reached minRegisteredResourcesRatio: 0.8 19/09/27 15:33:08 WARN KafkaUtils: overriding enable.auto.commit to false for executor 19/09/27 15:33:08 WARN KafkaUtils: overriding auto.offset.reset to none for executor 19/09/27 15:33:08 WARN KafkaUtils: overriding executor group.id to spark-executor-remote_test 19/09/27 15:33:08 WARN KafkaUtils: overriding receive.buffer.bytes to 65536 see KAFKA-3135 ------------------------------------------- Time: 1569569610000 ms ------------------------------------------- (assigned,10) (serializer,2) (Setting,10) (rdd.count(),1) (class,2) (=,2) (newly,10) (partitions,10) ------------------------------------------- Time: 1569569640000 ms ------------------------------------------- ------------------------------------------- Time: 1569569670000 ms ------------------------------------------- ......
-
集群上查看
Yarn --> 應用程序
-
-
遇到的問題
-
Spark 版本不一致導致的問題
問題日志:
19/09/27 11:01:38 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message. java.io.InvalidClassException: org.apache.spark.rpc.RpcEndpointRef; local class incompatible: stream classdesc serialVersionUID = -1329125091869941550, local class serialVersionUID = 1835832137613908542 at java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:616) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521) at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630) at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018) at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942) at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808) at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) at java.io.ObjectInputStream.readObject(ObjectInputStream.java:373) at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75) at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:108) at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1$$anonfun$apply$1.apply(NettyRpcEnv.scala:267) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:316) at org.apache.spark.rpc.netty.NettyRpcEnv$$anonfun$deserialize$1.apply(NettyRpcEnv.scala:266) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at org.apache.spark.rpc.netty.NettyRpcEnv.deserialize(NettyRpcEnv.scala:265) at org.apache.spark.rpc.netty.RequestMessage$.apply(NettyRpcEnv.scala:600) at org.apache.spark.rpc.netty.NettyRpcHandler.internalReceive(NettyRpcEnv.scala:651) at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:643) at org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:178) at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:107) at org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:118) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336) at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336) at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336) at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343) at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336) at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357) at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343) at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911) at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131) at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643) at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566) at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480) at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442) at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131) at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144) at java.lang.Thread.run(Thread.java:745)
解決辦法:
下載Spark相同的版本测暗。下載地址: https://archive.apache.org/dist/spark/
-
環(huán)境變量問題
問題日志:
Exception in thread "main" java.lang.IllegalStateException: Library directory 'E:\RemoteSubmitSparkToYarn\assembly\target\scala-2.11\jars' does not exist; make sure Spark is built. at org.apache.spark.launcher.CommandBuilderUtils.checkState(CommandBuilderUtils.java:248) at org.apache.spark.launcher.CommandBuilderUtils.findJarsDir(CommandBuilderUtils.java:347) at org.apache.spark.launcher.YarnCommandBuilderUtils$.findJarsDir(YarnCommandBuilderUtils.scala:38) at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:526) at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:814) at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:169) at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:56) at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:173) at org.apache.spark.SparkContext.<init>(SparkContext.scala:509) at org.apache.spark.streaming.StreamingContext$.createNewSparkContext(StreamingContext.scala:839) at org.apache.spark.streaming.StreamingContext.<init>(StreamingContext.scala:85) at com.cloudera.RemoteSubmitApp$.main(RemoteSubmitApp.scala:33) at com.cloudera.RemoteSubmitApp.main(RemoteSubmitApp.scala)
解決辦法:
在本地機器中設置 SPARK_HOME 環(huán)境變量或在 idea 中運行參數(shù)設置 SPARK_HOME
-
-
權限問題 Permission denied
問題日志:
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=charles, access=WRITE, inode="/user":root:supergroup:drwxr-xr-x at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:342) at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251) at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189) at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1744) at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1728) at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1687) at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:2980) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1096) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:652) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:868) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:814) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1886) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2603)
解決辦法:
在代碼中添加下面代碼板丽,設置為以 root 用戶提交。System.setProperty("HADOOP_USER_NAME", "root")
-
/etc/hadoop/conf.cloudera.yarn/topology.py 問題
問題日志:
java.io.IOException: Cannot run program "/etc/hadoop/conf.cloudera.yarn/topology.py" (in directory "E:\RemoteSubmitSparkToYarn"): CreateProcess error=2, 系統(tǒng)找不到指定的文件明吩。 at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048) at org.apache.hadoop.util.Shell.runCommand(Shell.java:519) at org.apache.hadoop.util.Shell.run(Shell.java:478) at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:766) at org.apache.hadoop.net.ScriptBasedMapping$RawScriptBasedMapping.runResolveCommand(ScriptBasedMapping.java:251) at org.apache.hadoop.net.ScriptBasedMapping$RawScriptBasedMapping.resolve(ScriptBasedMapping.java:188) at org.apache.hadoop.net.CachedDNSToSwitchMapping.resolve(CachedDNSToSwitchMapping.java:119) at org.apache.hadoop.yarn.util.RackResolver.coreResolve(RackResolver.java:101) at org.apache.hadoop.yarn.util.RackResolver.resolve(RackResolver.java:81) at org.apache.spark.scheduler.cluster.YarnScheduler.getRackForHost(YarnScheduler.scala:37) at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$1.apply(TaskSchedulerImpl.scala:337) at org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$resourceOffers$1.apply(TaskSchedulerImpl.scala:326) at scala.collection.Iterator$class.foreach(Iterator.scala:742) at scala.collection.AbstractIterator.foreach(Iterator.scala:1194) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at org.apache.spark.scheduler.TaskSchedulerImpl.resourceOffers(TaskSchedulerImpl.scala:326) at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint.org$apache$spark$scheduler$cluster$CoarseGrainedSchedulerBackend$DriverEndpoint$$makeOffers(CoarseGrainedSchedulerBackend.scala:237) at org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend$DriverEndpoint$$anonfun$receiveAndReply$1.applyOrElse(CoarseGrainedSchedulerBackend.scala:200) at org.apache.spark.rpc.netty.Inbox$$anonfun$process$1.apply$mcV$sp(Inbox.scala:105) at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:205) at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:101) at org.apache.spark.rpc.netty.Dispatcher$MessageLoop.run(Dispatcher.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745)
解決辦法:
在配置文件 core-site.xml 中修改 net.topology.script.file.name 屬性值瘾晃,將/etc/hadoop/conf.cloudera.yarn/topology.py
注釋掉。<property> <name>net.topology.script.file.name</name> <value><!--/etc/hadoop/conf.cloudera.yarn/topology.py--></value> </property>
-
沒有設置 driver 的 ip 問題
問題日志:
cationMaster: Failed to connect to driver at 192.168.1.26:34010, retrying ... 19/09/27 15:12:48 ERROR ApplicationMaster: Failed to connect to driver at 192.168.1.26:34010, retrying ... 19/09/27 15:12:48 ERROR ApplicationMaster: Uncaught exception: org.apache.spark.SparkException: Failed to connect to driver! at org.apache.spark.deploy.yarn.ApplicationMaster.waitForSparkDriver(ApplicationMaster.scala:577) at org.apache.spark.deploy.yarn.ApplicationMaster.runExecutorLauncher(ApplicationMaster.scala:433) at org.apache.spark.deploy.yarn.ApplicationMaster.run(ApplicationMaster.scala:256) at org.apache.spark.deploy.yarn.ApplicationMaster$$anonfun$main$1.apply$mcV$sp(ApplicationMaster.scala:764) at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:67) at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:66) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1692) at org.apache.spark.deploy.SparkHadoopUtil.runAsSparkUser(SparkHadoopUtil.scala:66) at org.apache.spark.deploy.yarn.ApplicationMaster$.main(ApplicationMaster.scala:762) at org.apache.spark.deploy.yarn.ExecutorLauncher$.main(ApplicationMaster.scala:785) at org.apache.spark.deploy.yarn.ExecutorLauncher.main(ApplicationMaster.scala)
解決辦法:
這個報錯是因為沒有設置 driver host, 由于運行的是 yarn-client 模式, driver 就是我們的本機, 所以要設置為本地的ip,不然找不到driver.
.set("spark.driver.host","192.168.1.26")
注意:
- 需要將 core-site.xml,hdfs-site.xml 和 yarn-site.xml 放到 resource 下面,程序運行的時候需要這些環(huán)境吆倦。
- 修改代碼后需要重新編譯打包否則會報
ERROR YarnScheduler: Lost executor 2 on cdh03: Container container_e01_1568096913481_0453_01_000005 exited from explicit termination
異常