You are on page 1of 4

import pandas as pd

from sklearn.linear_model import LinearRegression


import matplotlib.pyplot as plt

# Reading data from the dataset.csv file


df = pd.read_csv('dataset.csv')

# Removing rows where salary is greater than 2500 & height is less than 170
df2 = df[df['salary'] <= 2500]
df2 = df[df['height'] >= 170]

# Creating the third DataFrame


new_patient_data = {
'patient': ['frank'],
'age': [35],
'height': [175],
'av.pulse': [80],
'salary': [2500]
}
new_patient_df = pd.DataFrame(new_patient_data)
df3 = pd.concat([df, new_patient_df], ignore_index=True)

# Output DataFrame after removing rows


print(df)
print()
# Output the second DataFrame
print(df2)
print()

# Output the third DataFrame


print(df3)
print()

# Plotting histogram for average pulse


plt.figure(figsize=(8, 6))
plt.hist(df['height'], bins=20, color='lightgreen', edgecolor='black')
plt.title('Height Distribution')
plt.xlabel('Height')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# Create a new row of data for the new patient (df3)


new_patient_data = {
'patient': ['frank'],
'age': [35],
'height': [175],
'av.pulse': [80],
'salary': [2500]
}
# Creating first DataFrame
df = pd.DataFrame('dataset.csv')

# Creating the second Dataframe


df2 = pd.DataFrame('dataset.csv')

# Creating the second Dataframe


df3 = pd.DataFrame('dataset.csv')

# Concatenate the new patient DataFrame with the main DataFrame


df3 = pd.concat([df, new_patient_df], ignore_index=True)

# Create a DataFrame from the new patient data


new_patient_df = pd.DataFrame(new_patient_data)

# Output DataFrame after removing rows


print(df)

print()
# Make an output of the 2nd DataFrame
print(df2)

print()
# Make an output of the 3rd DataFrame
print(df3)

### Part2 Linear Regression


X = df[['age']] # Feature
y = df['salary'] # Target variable

# Create a linear regression model


model = LinearRegression()

# Fit the model


model.fit(X, y)

# Make predictions
predictions = model.predict(X)

# Plotting the linear regression line


plt.scatter(X, y, color='blue')
plt.plot(X, predictions, color='red')
plt.title('Linear Regression')
plt.xlabel('Age')
plt.ylabel('Salary')
plt.show()

You might also like