You are on page 1of 2

Data analytics and Decision making Dr.

Vishwesh Singbal

Handling missing values - Example 2


(Lines starting with # are comments, Do not copy “>>” when copying the code)

# Importing the pandas library


>>import pandas as pd

# Reading a CSV file named "weather_data.csv" and parsing the 'day' column as datetime
>>df = pd.read_csv("weather_data.csv", parse_dates=['day'])

# Checking the data type of the first element in the 'day' column
>>type(df.day[0])

# Setting the 'day' column as the index of the DataFrame


>>df.set_index('day', inplace=True)

# Displaying the DataFrame with the updated index


>>df

# Creating a new DataFrame 'new_df' by filling missing values with 0


>>new_df = df.fillna(0)
>>new_df

# Creating a new DataFrame 'new_df' by filling missing values with specified values
>>new_df = df.fillna({
'temperature': 0,
'windspeed': 0,
'event': 'No Event'
})
>>new_df

# Creating a new DataFrame 'new_df' by forward-filling missing values


>>new_df = df.fillna(method="ffill")
>>new_df

# Creating a new DataFrame 'new_df' by backward-filling missing values along rows


>>new_df = df.fillna(method="bfill")
>>new_df

# Creating a new DataFrame 'new_df' by backward-filling missing values along columns


>>new_df = df.fillna(method="bfill", axis="columns")
# axis is either "index" or "columns"
>>new_df

# Creating a new DataFrame 'new_df' by forward-filling missing values with a limit of 1


>>new_df = df.fillna(method="ffill", limit=1)
>>new_df

# Creating a new DataFrame 'new_df' by interpolating missing values


>>new_df = df.interpolate()
>>new_df
Data analytics and Decision making Dr. Vishwesh Singbal

# Creating a new DataFrame 'new_df' by time-based interpolation of missing values


>>new_df = df.interpolate(method="time")
>>new_df

# Creating a new DataFrame 'new_df' by dropping rows with any missing values
>>new_df = df.dropna()
>>new_df

# Creating a new DataFrame 'new_df' by dropping rows where all values are missing
>>new_df = df.dropna(how='all')
>>new_df

# Creating a new DataFrame 'new_df' by dropping rows with less than 1 non-null value
>>new_df = df.dropna(thresh=1)
>>new_df

# Creating a DatetimeIndex 'idx' for the date range from "01-01-2017" to "01-11-2017"
>>dt = pd.date_range("01-01-2017", "01-11-2017")
>>idx = pd.DatetimeIndex(dt)

# Reindexing the DataFrame 'df' with the new DatetimeIndex 'idx'


df.reindex(idx)

You might also like