Professional Documents
Culture Documents
Sample_data = sc.parallelize([(pk,26),(sm,24),(sr,26),(na,23),(pm,26)])
sc.textFile(‘/local/folder/filename.csv’):from local
sc.textFile(‘hdfs/folder/filename.csv’):from hdfs file system
DataFrame
sample_df = spark.createDataFrame(
Sample_data, [
'Id'
, 'Model'
, 'Year'
]
)
From Csv
sample_data_csv = (
spark
.read
.csv(
'../Data/DataFrames_sample.csv'
, header=True
, inferSchema=True)
)
From Json
sample_data_json_df = (
spark
.read
.json('../Data/DataFrames_sample.json')
)