You are on page 1of 1

from graphframes import *

vertices =
sqlContext.read.format("com.databricks.spark.csv").options(header='true',
inferschema='true').load("file:///home/cloudera/Downloads/vertices_file.csv")
edges = sqlContext.read.format("com.databricks.spark.csv").options(header='true',
inferschema='true').load("file:///home/cloudera/Downloads/edges_file.csv")

edges.registerTempTable("edges")
vertices.registerTempTable("vertices")

sqlContext.sql("select * from vertices LIMIT 5").show()


sqlContext.sql("select * from edges LIMIT 5").show()
g = GraphFrame(vertices, edges)
print g

g.vertices.show()

g.edges.show()

g.inDegrees.show()

g.outDegrees.show()

g.degrees.show()

exp = g.vertices.groupBy("Experience")
exp.count().show()

motifs = g.find("(a)-[e]->(b)")
motifs.count().show()

e2 = paths.select("g.src", "g.dst", "e.city")


e2.count().show()

result = g.labelPropagation(maxIter=5)
result.show()
results = g.pageRank(resetProbability=0.15, tol=0.01)
results.vertices.show()

results.edges.show()
rank = g.pageRank(resetProbability=0.15, maxIter=5)
ranks.vertices.orderBy(ranks.vertices.pagerank.desc()).limit(10).show()

You might also like