val fileword =sc.textFile(path,1)
.map(x=>(x.split(",")(2152),Vectors.dense(x.split(",").map(_.toDouble).toList.take(2151).toArray)))
fileword.sample(false,0.1,2).foreach(println)
valschema =StructType(
StructField("label",StringType)
::StructField("features",VectorType)
::Nil
)//定義字段名稱和格式
valrowRDD = fileword.map(p=>Row(p._1,p._2))//對(duì)rdd記錄進(jìn)行轉(zhuǎn)換
valwordsDataFrame =sqlContext.createDataFrame(rowRDD,schema)//生成dataframe
wordsDataFrame.show(100)