package SQL
import org.apache.spark.SparkContext
import org.apache.spark.sql._
/**
* Created by devan on 21/11/14.
* mail msdevanms@gmail.com
*/
object SparSQLCSV {
def main(args: Array[String]) {
val sc = new SparkContext("local[*]","home")
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val people = sc.textFile("/home/devan/Documents/dataset/peoplesTest.csv")
val delimiter = ","
val schemaString = "a,b".split(delimiter)//csv header
//Automated Schema creation
val schema = StructType(schemaString.map(fieldName => StructField(fieldName, StringType, true)))
val peopleLines = people.flatMap(x=> x.split("\n"))
val rowRDD = peopleLines.map(p=>{
Row.fromSeq(p.split(delimiter))
})
val peopleSchemaRDD = sqlContext.applySchema(rowRDD, schema)
peopleSchemaRDD.registerTempTable("people")
sqlContext.sql("SELECT b FROM people").foreach(println)
}
}
Thursday, November 20, 2014
Spark SQL: automatic schema from csv using Header
Subscribe to:
Post Comments (Atom)
Your posts always bring a sense of joy and positivity. Thank you for your commitment to spreading happiness through your writing. Find more useful ideas in this article. Geometry Dash Free fast-paced gameplay demands quick reflexes and precise timing to conquer rhythmic challenges.
ReplyDelete