You are on page 1of 9

Python Programs

Q.1 WAP to create a Series that stores the area of some states in km and find out the biggest and
smallest three areas from the given Series.

import pandas as pd
ser1=pd.Series([34556,87653,456,345,200,4356],index=['pune','mumbai','kolkatta','chennai','benglore
','lonavala'])
print("Top three biggest areas are:")
print(ser1.sort_values(ascending=False).head(3))
print("Smallest three are are:")
print(ser1.sort_values(ascending=False).tail(3))
print(ser1['mumbai'])

Q.2 WAP to create data series and then change the indexes of the series object in any random order

import pandas as pd
import numpy as np
s1=pd.Series(data=[100,200,300,200,400,500], index=['A','B','C','D','E','F'])
print("Original Data Series:")
print(s1)
s1=s1.reindex(index=['C','B','A','E','F','D'])
print("Data Series of changing the order of the index:")
print(s1)
Q.3 Given a series object s4. Write a program to change the values at its 2nd row and 3rd row to
8000.

import pandas as pd
s4=pd.Series([2000,3000,3000,5000,2000,1000])
print("Original series:")
print(s4)
s4[1:3]=8000
print("Series object after changing value:")
print(s4)

Q.4 Create a panda’s series from a dictionary of values and a ndarray

import pandas as pd
import numpy as np
dict = {'A':'one', 'B':'two','C':'three','D':'four'}
arr1 = np.array([6700,5600,8760,6700,np.NaN])
s1=pd.Series(dict)
s2=pd.Series(data=arr1+100,index=arr1)

print("Series with data as dictionary")


print(s1)
print("Series with data as ndarray")
print(s2)
Q.6 Create a data frame for examination result and display row labels, column labels data types of
each column and the dimensions

import pandas as pd
dic={'class':['IX','X','XI','XII'],
'pass_percent':[100,99,98,100]}
result=pd.DataFrame(dic)
print(result)
print(result.index)
print(result.columns)
print(result.dtypes)
print(result.shape)

Q.7 WAP to print a DataFrame one column at a time and print only first three columns

import pandas as pd
import numpy as np
df=pd.DataFrame({'letters':['a','b','c','d'] ,'numbers':[1,2,3,4],'symbols':['@','#','$','%'],'code':
['chhhg','rhfd','rhhd','tjjg']})
print("first three columns of the dataframe are:")
first_col=df.iloc[:,0]
print(first_col)
second_col=df.iloc[:,1]
print(second_col)
third_col=df.iloc[:,2]
print(third_col)
Q.8 Replace all negative values with 0

import pandas as pd
dic={'data1':[2,-6,7,4,-8,5],
'data2':[3,-8,7,6,9,-5]}
df=pd.DataFrame(dic)
print(df)
print("DataFrame after replacing -ve values with 0:")
df[df<0]=0
print(df)

Q.9 Filter out rows based on different criteria such as duplicate rows.

import pandas as pd
dic={'name':['rohit','ramesh','rasika','geeta','rohit','geeta'],
'marksinIP':[85,78,90,95,85,95]}
marks=pd.DataFrame(dic)
print(marks)
duplicate_rows=marks[marks.dulpicated()]
print(duplicate_rows)
print("after filtering duplicate data:")
filter_rows=marks[marks.duplicated(keep=False)]
print(filter_rows)
Q.10 Create a Data Frame quarterly sales where each row contains the item category, item name,
and expenditure. Group the rows by the category and print the total expenditure per category.

import pandas as pd
dic={'itemcat': ['car','ac','machine','fridge','ac','car'],
'itemname':['ford','hitachi','godrej','lg','LG','Honda'],
'expenditure':[700000,50000,34000,20000,60000,800000]}
sales=pd.DataFrame(dic)
print(sales)
qs=sales.groupby('itemcat')
print("result after filtering dataframe:")
print(qs['itemcat','expenditure'].sum())

Q.11 Importing and exporting data between pandas and CSV file

import pandas as pd
df=pd.read_csv("dataframe1.csv”)
print(df)

import pandas as pd
dic={'itemcat':['car','ac','machine','fridge','ac','car'],
'itemname':['ford','hitachi','godrej','lg','LG','Honda'],
'expenditure':[700000,50000,34000,20000,60000,800000]}
df=pd.DataFrame(dic)
df.to_csv("Export_Dataframe.csv")
Q.12 Import a CSV file into a DataFrame where separator is $.
 Make sure that the first row of the CSV file is considered as data and not column index
 Give column labels as [‘ID’,’ITEM’,’PRICE’]
 Use values of ID column as row labels

import pandas as pd
df = pd.read_csv("dataframe1.csv",header=None,names=['ID','ITEM','PRICE'],
index_col=’ID’, sep= “$”)
print(df)

Q.13 Given the school result data, analyses the performance of the students on different parameters,
e.g subject wise or class wise.

import matplotlib.pyplot as plt


subject=['Physics','Chemistry','Maths','Biology','Hindi']
Percentage=[89,90,95,79,85]
col=['red','green','blue','yellow','purple']
plt.bar(subject,Percentage, color=col)
plt.xlabel("Subject-Wise")
plt.ylabel("Passing Percentage")
plt.title("Bar graph for result analysis")
plt.grid(True)
plt.show()
Q.14 Create an appropriate charts for dataframe that has columns as rank=['1st','2nd','3rd'],
Percent_Science=[92,87,83],
Percent_Maths=[99,89,79],
Percent_Hindi=[89,85,83].
Also add titles and legends to the chart.

import matplotlib.pyplot as plt


import numpy as np
rank=['1st','2nd','3rd']
Sci=[92,87,83]
Maths=[99,89,79]
Hindi=[89,85,83]
x=np.arange(len(rank))
plt.bar(x,Sci,width=0.25, color='green', label='Science')
plt.bar(x+0.25,Maths,width=0.25,color='red', label='Maths')
plt.bar(x+0.50,Hindi, width=0.25, color='yellow', label='Hindi')
plt.xlabel("Position")
plt.ylabel("Percentage")
plt.title("Result Analysis")
plt.legend(loc='upper left')
plt.xticks(x,rank)
plt.show
plt.savefig("multibar.pdf")
Q.15. Create a histogram for a range of data that shows number of people in a particular age group.

import matplotlib.pyplot as plt

population_age=[22,23,24,25,23,21,25,26,34,35,36,45,43,42,45,56,55,50,54,57,67,65,68,78,77,76,76
,75,74,87,86,85,78,76,89,99,98,97,96,95,87,65,67,64,53,20]

bins=[20,40,60,80,100]

plt.hist(population_age, bins, histtype='bar’,color=’y’)


plt.xticks([20,40,60,80,100])
plt.xlabel("age group")
plt.ylabel("number of people")
plt.title("histogram")
plt.show()

You might also like