You are on page 1of 1

# Import libraries

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree

# Load the dataset from 'fertility.csv'


data = pd.read_csv('fertility.csv')
N = 10 #number of attributes

# Data preprocessing
# Assuming 'Diagnosis' is the target variable, and the rest are features
X = data.drop('Diagnosis', axis=1)
y = data['Diagnosis']

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)

# Create a Decision Tree Classifier


clf = DecisionTreeClassifier(random_state=42)

# Train the model on the training data


clf.fit(X_train, y_train)

# Make predictions on the test data


y_pred = clf.predict(X_test)

# Plot the Decision Tree


plt.figure(figsize=(12, 8))
plot_tree(clf, filled=True, feature_names=list(X.columns), class_names=['N', 'O'])
#N means Noraml and O menas Altered
plt.show()
#Season refers in which the analysis was performed
#Age refers age at the time of analysis
#SI refers to Surgical Intervention
#HF refers to High Fevers last year
# Alcohol refers to frequency of alcohol consumption
# Sitting refers to number of hours spent sitting per day

You might also like