You are on page 1of 2

1) DATAFRAME:

import org.apache.spark.sql.SparkSession
val sparkSession = SparkSession.builder.master("local").appName("Spark session in
Fresco").getOrCreate()
val langPercentDF = spark.createDataFrame(List(("Scala", 35), ("Python", 30), ("R",
15), ("Java", 20)))
langPercentDF.show()
val lpDF = langPercentDF.withColumnRenamed("_1",
"language").withColumnRenamed("_2", "percent")
lpDF.orderBy(desc("percent")).show(false)

2) DATASET:
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder.master("local").appName("Spark session in
Fresco").getOrCreate()
val numDS = spark.range(5, 100, 5)
numDS.show()
numDS.orderBy(desc("id")).show(5)
numDS.describe().show()

3) CREATE DATSET by JSON


{"name":"Rahul","age":"35"}
{"name":"Sachin","age":"46"}
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder.master("local").appName("Spark session in
Fresco").getOrCreate()
val peopleDS = spark.read.json("/projects/People.json")
peopleDS.show()
case class Person (name: String, age: String)
val personDS = spark.read.json("/projects/People.json").as[Person]
personDS.show()

{"name":"Rahul","age":"35"}
{"name":"Sachin","age":"46"}
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder.master("local").appName("Spark session in
Fresco").getOrCreate()
val peopleDS = spark.read.json("/projects/People.json")
peopleDS.show()
case class Person(name:String,age:String)
object Main
{
def main(args: Array[String])
{
var Person1 = Person("35", "Rahul")
var Person2 = Person("46", "Sachin")
println("Age of the Person1 is " + Person1.age);
println("Name of the Person1 is " + Person1.name);
println("Age of the Person2 is " + Person2.age);
println("Name of the Person2 is " + Person2.name);
}
}

4) PARQUET
{"name":"Rahul","age":"35"}
{"name":"Sachin","age":"46"}
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder.master("local").appName("Spark session in
Fresco").getOrCreate()
val peopleDS = spark.read.json("/projects/People.json")
peopleDS.show()
val peoplePAR = peopleDS.write.parquet("/projects/challenge/data.parquet")
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
val data = sqlContext.read.parquet("/projects/challenge/data.parquet")
data.show()

5) CSV Files
git clone https://github.com/frescoplaylab/Census.git
import org.apache.spark.sql.SparkSession
val spark = SparkSession.builder().master("local[1]").appName("Spark Session in
Frescoplay").getOrCreate()
val dfs = spark.read.format("csv").option("header",
"true").option("Inferschema","true").option("mode",
"DROPMALFORMED").load("/projects/challenge/Census/demography.csv")
val joined = dfs.join(TotalPopulation, "Total Population")

You might also like