SDC Spark-Submit 命令
exec /home/app/stream/spark/spark-2.2.0-hdp2.7/bin/spark-submit --master yarn --deploy-mode cluster --executor-memory 1024m --executor-cores 1 --num-executors 2 --archives /home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/libs.tar.gz,/home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/etc.tar.gz,/home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/resources.tar.gz --files /home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/log4j.properties --jars /home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/main/streamsets-datacollector-bootstrap-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/kafka_2.11-0.10.0.1.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/kafka-clients-0.10.2.1.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/metrics-core-2.2.0.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/spark-streaming-kafka-0-10_2.11-2.1.0.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/container-lib/streamsets-datacollector-container-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/container-lib/streamsets-datacollector-common-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/api-lib/streamsets-datacollector-api-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/cluster/streamsets-datacollector-cluster-bootstrap-3.3.2-SNAPSHOT.jar --conf spark.running.mode=yarn --conf spark.streaming.dynamicAllocation.maxExecutors=16 --conf spark.driver.extraJavaOptions=-Duser.home=. --conf 'spark.executor.extraJavaOptions=-javaagent:./streamsets-datacollector-bootstrap-3.3.2-SNAPSHOT.jar -Duser.home=. -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug' --conf spark.executorEnv.JAVA_HOME=/home/app/java/jdk1.8.0_65 --conf spark.streaming.kafka.consumer.poll.max.retries=5 --conf spark.yarn.appMasterEnv.JAVA_HOME=/home/app/java/jdk1.8.0_65 --conf 'spark.driver.extraJavaOptions=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=35053' --name 'StreamSets Data Collector: Cluster_OriginKafka_To_Trash_TestMultiWorkerPerf' --class com.streamsets.pipeline.BootstrapClusterStreaming /home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/cluster/streamsets-datacollector-cluster-bootstrap-api-3.3.2-SNAPSHOT.jar
分解命令
// 分解
spark-submit --master yarn
--deploy-mode cluster
--executor-memory 1024m
--executor-cores 1
--num-executors 2
--archives /home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/libs.tar.gz,/home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/etc.tar.gz,/home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/resources.tar.gz
--files /home/app/appdata/streamset/data/temp/cluster-pipeline-ClusterOriginKafkaToTrashTestMultiWorkerPerf90ece481-843c-4e5e-86fe-d17911ccbc5c-0/staging/log4j.properties
--jars /home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/main/streamsets-datacollector-bootstrap-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/kafka_2.11-0.10.0.1.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/kafka-clients-0.10.2.1.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/metrics-core-2.2.0.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/streamsets-libs/streamsets-datacollector-edh-cluster-kafka_0_10-spark_2_0-lib/lib/spark-streaming-kafka-0-10_2.11-2.1.0.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/container-lib/streamsets-datacollector-container-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/container-lib/streamsets-datacollector-common-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/api-lib/streamsets-datacollector-api-3.3.2-SNAPSHOT.jar,/home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/cluster/streamsets-datacollector-cluster-bootstrap-3.3.2-SNAPSHOT.jar
--conf spark.running.mode=yarn
--conf spark.streaming.dynamicAllocation.maxExecutors=16
--conf spark.driver.extraJavaOptions=-Duser.home=.
--conf 'spark.executor.extraJavaOptions=-javaagent:./streamsets-datacollector-bootstrap-3.3.2-SNAPSHOT.jar -Duser.home=. -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -Dlog4j.debug'
--conf spark.executorEnv.JAVA_HOME=/home/app/java/jdk1.8.0_65
--conf spark.streaming.kafka.consumer.poll.max.retries=5
--conf spark.yarn.appMasterEnv.JAVA_HOME=/home/app/java/jdk1.8.0_65
--conf 'spark.driver.extraJavaOptions=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=35053'
--name 'StreamSets Data Collector: Cluster_OriginKafka_To_Trash_TestMultiWorkerPerf'
--class com.streamsets.pipeline.BootstrapClusterStreaming
/home/app/stream/streamset/streamset-3.3.2_tag0529/libexec/bootstrap-libs/cluster/streamsets-datacollector-cluster-bootstrap-api-3.3.2-SNAPSHOT.jar
Pipeline相關配置文件:
- info.json
- pipeline.json:
- uiinfo.json:
- rules.json: