case class Person(name:String,age:Int)
val rddpeople=sc.textFile("/sparksql/people.txt").map(_.split(",")).map(p=>Person(p(0),p(1).trim.toInt))
rddpeople.registerTempTable("rddTable")
sqlContext.sql("SELECT name FROM rddTable WHERE age >= 13 AND age <= 19").map(t => "Name: " + t(0)).collect().foreach(println)
//RDD2演示//導(dǎo)入SparkSQL的數(shù)據(jù)類型和Row
import org.apache.spark.sql._//創(chuàng)建于數(shù)據(jù)結(jié)構(gòu)匹配的schema
val schemaString = "name age"
val schema =? StructType(? ? schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true)))//創(chuàng)建rowRDD
val rowRDD = sc.textFile("/sparksql/people.txt").map(_.split(",")).map(p => Row(p(0), p(1).trim))//用applySchema將schema應(yīng)用到rowRDD
val rddpeople2 = sqlContext.applySchema(rowRDD, schema)
rddpeople2.registerTempTable("rddTable2")
sqlContext.sql("SELECT name FROM rddTable2 WHERE age >= 13 AND age <= 19").map(t => "Name: " + t(0)).collect().foreach(println)
//parquet演示
val parquetpeople = sqlContext.parquetFile("/sparksql/people.parquet")
parquetpeople.registerTempTable("parquetTable")
sqlContext.sql("SELECT name FROM parquetTable WHERE age >= 25").map(t => "Name: " + t(0)).collect().foreach(println)
//json演示
val jsonpeople = sqlContext.jsonFile("/sparksql/people.json")
jsonpeople.registerTempTable("jsonTable")
sqlContext.sql("SELECT name FROM jsonTable WHERE age >= 25").map(t => "Name: " + t(0)).collect().foreach(println)
//jsonRdd
//千萬不要先使用cache SchemaRDD,然后registerAsTable
//在默認(rèn)的情況下,內(nèi)存列存儲的壓縮功能是關(guān)閉的负蚊,要使用壓縮功能需要配置變量COMPRESS_CACHED驻襟。
//sqlContext的cache使用
sqlContext.cacheTable("rddTable")
sqlContext.sql("SELECT name FROM rddTable WHERE age >= 13 AND age <= 19").map(t => "Name: " + t(0)).collect().foreach(println)
scala2.10.4本身對case class有22列的限制匣距,在使用RDD數(shù)據(jù)源的時候就會造成不方便廊驼;
sqlContext中3個表不能同時join,需要兩兩join后再join一次柔袁;
sqlContext中不能直接使用values插入數(shù)據(jù)唧喉;
在編寫sqlContext應(yīng)用程序的時候捣卤,case class要定義在object之外忍抽。
sqlContext.sql("SELECT name FROM parquetTable WHERE age >= 13 AND age <= 19").map(t => "Name: " + t(0)).collect().foreach(println)