Pg 24 Table 2.

# Import required packages

import pandas as pd

# Load data

housing_df = pd.read_csv ('WestRoxbury.csv')

housing_df.shape # find the dimension of data frame

housing_df.head () # show the first five rows

print (housing_df) # show all the data

# Rename columns: replace spaces with '_' to allow dot notation

housing_df = housing_df.rename (columns={'Total Value' : 'Total_Value'}) # explicit

housing_df.columns = [s.strip().replace(' ', '_') for s in housing_df.columns] # all columns

# Practice showing the first four rows of the data

housing_df.loc[0:3] # loc[a:b] gives rows a to b, inclusive

housing_df.iloc[0:4] # iloc[a:b] gives rows a to b-1

# Different ways of showing the first 10 values in column Total_Value

housing_df ['Total_Value'] .iloc[0:10]

housing_df.iloc [4, 0:10]

housing_df.iloc [4:5, 0:10] # use a slice to return a data frame

# Use pd.concat to combine non-consecutive columns into a new data frame

# The axis argument specifies the dimension along which the

# concatenation happens, 0=rows, 1=columns

pd.concat([housing_df.iloc[4:6,0:2], housing_df.iloc[4:6,4:6]], axis=1)

# To specify a full column, use:



housing_df['Total_Value'] [0:10] # show the first 10 rows of the first column

# Descriptive statistics

print ('Number of rows ', len(housing_df['Total_Value'])) # show length of first column

print ('Mean of Total_Value ', housing_df['Total_Value'] .mean()) # show mean of column

housing_df.describe() # show summary statistics for each column

Table 4.3

Cereals_df = pd.read_csv(‘Cereals.csv’)

Cereals_df =cereals_df.rename(columns={‘CAT, MEDV’ ; ‘CAT_MEDV’})


Cereals_df .describe()

# Compute mean, standard deviation, min, max, median

# cereals

Print (‘Mean : ‘, Cereals_df.cereals.mean())

Print (‘Std. dev : ‘, Cereals_df. cereals.std())

Print (‘Min : ‘, Cereals_df. cereals.min())

Print (‘Max : ‘, Cereals_df. cereals.max())

Print (‘Median : ‘, Cereals_df. cereals.median())

# Compute mean, standard dev., min, max, median

Pd.DataFrame({‘mean’ : cereals_df.mean() ,

‘sd’ : cereals_df.std() ,

‘min’ : cereals_df.min() ,

‘max’ : cereals_df.max() ,
‘median’ : cereals_df.median})

Python code in practice

import pandas as pd

df = pd.read_csv("Cereals.csv")


# import pandas

import pandas as pd

# import matplotlib

import matplotlib.pyplot as plt

# import seaborn

import seaborn as sns

%matplotlib inline

