You are on page 1of 3

6.

00 Handout, Lecture 20
(Not intended to make sense outside of lecture)

cl ass Poi nt ( obj ect ) :

cl ass Cl ust er ( obj ect ) :

cl ass Cl ust er Set ( obj ect ) :

cl ass Mammal ( Poi nt ) :
def __i ni t __( sel f , name, or i gi nal At t r s, scal edAt t r s = None) :
Poi nt . __i ni t __( sel f , name, or i gi nal At t r s, or i gi nal At t r s)
def scal eFeat ur es( sel f , key) :
scal eDi ct = {' i dent i t y' : [ 1, 1, 1, 1, 1, 1, 1, 1] ,
' 1/ max' : [ 1/ 3. 0, 1/ 4. 0, 1. 0, 1. 0, 1/ 4. 0, 1/ 4. 0, 1/ 6. 0, 1/ 6. 0] }
scal edFeat ur es = [ ]
f eat ur es = sel f . get Or i gi nal At t r s( )
f or i i n r ange( l en( f eat ur es) ) :
scal edFeat ur es. append( f eat ur es[ i ] *scal eDi ct [ key] [ i ] )
sel f . at t r s = scal edFeat ur es

def bui l dMammal Poi nt s( f Name, scal i ng) :
nameLi st , f eat ur eLi st = r eadMammal Dat a( f Name)
poi nt s = [ ]
f or i i n r ange( l en( nameLi st ) ) :
poi nt = Mammal ( nameLi st [ i ] , pyl ab. ar r ay( f eat ur eLi st [ i ] ) )
poi nt . scal eFeat ur es( scal i ng)
poi nt s. append( poi nt )
r et ur n poi nt s

#Use hi er ar chi cal cl ust er i ng f or mammal s t eet h
def t est 0( numCl ust er s = 2, scal i ng = ' i dent i t y' , pr i nt St eps = Fal se,
pr i nt Hi st or y = Tr ue) :
poi nt s = bui l dMammal Poi nt s( ' mammal Teet h. t xt ' , scal i ng)
cS = Cl ust er Set ( Mammal )
f or p i n poi nt s:
cS. add( Cl ust er ( [ p] , Mammal ) )
hi st or y = cS. mer geN( Cl ust er . maxLi nkageDi st , numCl ust er s,
t oPr i nt = pr i nt St eps)
i f pr i nt Hi st or y:

cl ust er s = cS. get Cl ust er s( )
pr i nt ' Fi nal set of cl ust er s: '
i ndex = 0
f or c i n cl ust er s:
pr i nt ' C' + st r ( i ndex) + ' : ' , c
i ndex += 1

def kmeans( poi nt s, k, cut of f , poi nt Type, maxI t er s = 100,
t oPr i nt = Fal se) :
#Get k r andoml y chosen i ni t i al cent r oi ds
i ni t i al Cent r oi ds = r andom. sampl e( poi nt s, k)
cl ust er s = [ ]
#Cr eat e a si ngl et on cl ust er f or each cent r oi d
f or p i n i ni t i al Cent r oi ds:
cl ust er s. append( Cl ust er ( [ p] , poi nt Type) )
numI t er s = 0
bi ggest Change = cut of f
whi l e bi ggest Change >= cut of f and numI t er s < maxI t er s:
#Cr eat e a l i st cont ai ni ng k empt y l i st s
newCl ust er s = [ ]
f or i i n r ange( k) :
newCl ust er s. append( [ ] )
f or p i n poi nt s:
#Fi nd t he cent r oi d cl osest t o p
smal l est Di st ance = p. di st ance( cl ust er s[ 0] . get Cent r oi d( ) )
i ndex = 0
f or i i n r ange( k) :
di st ance = p. di st ance( cl ust er s[ i ] . get Cent r oi d( ) )
i f di st ance < smal l est Di st ance:
smal l est Di st ance = di st ance
i ndex = i
#Add p t o t he l i st of poi nt s f or t he appr opr i at e cl ust er
newCl ust er s[ i ndex] . append( p)
#Updat e each cl ust er and r ecor d how much t he cent r oi d has changed
bi ggest Change = 0. 0
f or i i n r ange( l en( cl ust er s) ) :
change = cl ust er s[ i ] . updat e( newCl ust er s[ i ] )
bi ggest Change = max( bi ggest Change, change)
numI t er s += 1
#Cal cul at e t he coher ence of t he l east coher ent cl ust er
maxDi st = 0. 0
f or c i n cl ust er s:
f or p i n c. member s( ) :
i f p. di st ance( c. get Cent r oi d( ) ) > maxDi st :
maxDi st = p. di st ance( c. get Cent r oi d( ) )
pr i nt ' Number of i t er at i ons =' , numI t er s, ' Max Di amet er =' , maxDi st
r et ur n cl ust er s, maxDi st
MIT OpenCourseWare
http://ocw.mit.edu
6.00SC Introduction to Computer Science and Programming
Spring 2011

For information about citing these materials or our Terms of Use, visit: http://ocw.mit.edu/terms.

You might also like