You are on page 1of 3

########## Data Manipulation (Pandas)

pip install pandas


pip install numpy

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mathplotlib import style

style.use('ggplot')

web_stats = {'Day':[1,2,3,4,5,6],
'Visitors': [4,4,4,5,5,6],
'Bounce_Rate':[4,4,4,5,5,6]}

df = pd.DataFrame(web_stats)

#print(df)
#print(df.head())
#print(df.tail())
#print(df.tail(2))

#Setting Day Index


print(df.set_index('Day'))

#Index are placed in the Dataframe


df.set_index('Day', inplace=True)

#Below both code are same to print the columns


print(df['Visitors'])
print(df.Visitors])

#Print Multiple Columns


print(df[['Visitors', 'Bounce_Rate']])

#Convert to List
print(df.Visistors.tolist())

#Convert to Array
print(np.array(df[['Visitors', 'Bounce_Rate']]))

###################
################### IO (Converter to anything)

import pandas as pd

df = pd.read_csv('FileName.csv')
df.set_index('Date', inplace=True)
print(df.head())
df.to_csv('NewFile.csv')

df.read_csv('NewFile.csv', index_col=0)

#ReName the Column


df.columns = ['Autain_HPI']
print(df.head())

df.to_csv('NewFile2.csv', header=False)
#Define Headers, if not defined
df.read_csv('NewFile.csv', names=['Date','Austin_HPI'],index_col=0)
print(df.head())

#To HTML
df.to_html('eample.html')

#Column Rename
df.rename(columns={'Autin_HPI':'7th Digit of Autin'}, inplace=True)

######### Quandl
#########
pip install quandl

-> Housing price index (Freddie Mac)

import Quandl
import pandas as pd

api_key = open('quadlkey.text','r').read()
df = Quandl.get('FMAC/HPI_AK', authtoken=api_key)
print(df.head())

#Load from li
# Read from List,
fiddy_states = pd.read_html('http://list of us')

# this is list
print(fiddy_states)

#Require column zero


print(fiddy_states[0][0])

#Remove first column, as it doesnt require then put everything


for abbv in fiddy_states[0][0][1:]
print("FMAC/HPI" + str(abbv))

#Part 5 - Combining Data Frame


##############################

import pandas as pd

#concatenate dataframe, if they have same columns


concat = pd.concat([df1,df2])

#concatenate different columns


concat = pd.concat([df1,df2, df3])

#Adding to the series (At the end)


s = pd.series([80,20,50], index=['Colum1','Column2','Column3'])

df4 = df1.append(S, index_ignore = True)


print(df4)

#Part 6 - Merging & Joining Data Frames


########################################
import pandas as pd

df1 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2001, 2002, 2003, 2004])

df2 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2005, 2006, 2007, 2008])

df3 = pd.DataFrame({'HPI':[80,85,88,85],
'Unemployment':[7, 8, 9, 6],
'Low_tier_HPI':[50, 52, 50, 53]},
index = [2001, 2002, 2003, 2004])

#Merge (When doesnt matter to you)


print(pd.merge(df1,df2,on = 'HPI'))

#Merge on the two columns (remove duplicates)


print(pd.merge(df1,df2,on=['HPI','Int_rate']))

#Joined (When index matter to you)


df1.set_index('HPI', inplace=True)
df3.set_index('HPI', inplace=True)

joined = df1.join(df3)
print(joined)

#Merged
print(pd.merge(df1,df2,on = 'HPI', how = 'outer'))

##### Part - 7 -
################################

You might also like