Professional Documents
Culture Documents
Aiml Lab5
Aiml Lab5
Shreyash hire
20190802114
Aim: To write a program to demonstrate the working of the Decision Tree based ID3
algorithm. Use an appropriate data set for building the decision tree and apply this
knowledge to classify a new sample. How to find the Entropy and Information Gain in
Decision Tree Learning.
Theory : The decision tree is performed by calculating the entropy in relation to the
target column in the dataset, generating the information gain. The node with the
greatest information gain will be the root node and will eventually form the tree for any
given dataset.
Output:
10/27/21, 12:37 PM 20190802114_AIMLLAB5
In [48]:
import pandas as pd
import numpy as np
In [49]:
getdata = pd.read_csv("Titanic_Dataset.csv")
In [50]:
getdata.head()
7.2500 NaN
0 1 0 3 1 2 1 1 2 3 1 3 3 4 1 1 4 5 0 3 21171
Futrelle,
Mrs.
Jacques
Heath
(Lily May Peel)
for c in class_list:
total_class_count = train_data[train_data[label] == c].shape[0] total_class_entr =
- (total_class_count / total_row) * np.log2(total_class_c total_entr +=
total_class_entr
return total_entr
In [52]:
def calc_entropy(feature_value_data, label, class_list):
class_count = feature_value_data.shape[0]
entropy = 0
for c in class_list:
label_class_count = feature_value_data[feature_value_data[label] == c].shape
entropy_class = 0
if label_class_count != 0:
probability_class = label_class_count / class_count
entropy_class = - probability_class * np.log2(probability_class) entropy
+= entropy_class
https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-8gov7k35/nbconvert/html/binder/20190802114_AIMLLAB5.ipynb?download=false 1/5
10/27/21, 12:37 PM 20190802114_AIMLLAB5
return entropy
In [53]:
def calc_info_gain(feature_name, train_data, label, class_list):
feature_value_list = train_data[feature_name].unique()
total_row = train_data.shape[0]
feature_info = 0.0
In [54]:
def find_most_informative_feature(train_data, label, class_list):
feature_list = train_data.columns.drop(label)
max_info_gain = -1
max_info_feature = None
return max_info_feature
In [55]:
def generate_sub_tree(feature_name, train_data, label, class_list):
feature_value_count_dict = train_data[feature_name].value_counts(sort=False)
tree = {}
for feature_value, count in feature_value_count_dict.iteritems():
feature_value_data = train_data[train_data[feature_name] == feature_value]
assigned_to_node = False
for c in class_list:
class_count = feature_value_data[feature_value_data[label] == c].shape[0
if class_count == count:
tree[feature_value] = c
train_data = train_data[train_data[feature_name] != feature_value]
assigned_to_node = True
if not assigned_to_node:
tree[feature_value] = "?"
In [56]:
def make_tree(root, prev_feature_value, train_data, label, class_list):
if train_data.shape[0] != 0:
max_info_feature = find_most_informative_feature(train_data, label, class_li tree,
train_data = generate_sub_tree(max_info_feature, train_data, label, cl next_root =
None
if prev_feature_value != None:
root[prev_feature_value] = dict()
https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-8gov7k35/nbconvert/html/binder/20190802114_AIMLLAB5.ipynb?download=false 2/5
10/27/21, 12:37 PM 20190802114_AIMLLAB5
root[prev_feature_value][max_info_feature] = tree
next_root = root[prev_feature_value][max_info_feature]
else:
root[max_info_feature] = tree
next_root = root[max_info_feature]
In [57]:
def id3(train_data_m, label):
train_data = train_data_m.copy()
tree = {}
class_list = train_data[label].unique()
make_tree(tree, None, train_data_m, label, class_list)
return tree
In [58]:
tree = id3(getdata, 'Survived')
print(tree)
0, 454: 1, 455: 0, 456: 1, 457: 0, 458: 1, 459: 1, 460: 0, 461: 1, 462: 0, 463: 0, 4
64: 0, 465: 0, 466: 0, 467: 0, 468: 0, 469: 0, 470: 1, 471: 0, 472: 0, 473: 1, 474:
1, 475: 0, 476: 0, 477: 0, 478: 0, 479: 0, 480: 1, 481: 0, 482: 0, 483: 0, 484: 1, 4
85: 1, 486: 0, 487: 1, 488: 0, 489: 0, 490: 1, 491: 0, 492: 0, 493: 0, 494: 0, 495:
0, 496: 0, 497: 1, 498: 0, 499: 0, 500: 0, 501: 0, 502: 0, 503: 0, 504: 0, 505: 1, 5
06: 0, 507: 1, 508: 1, 509: 0, 510: 1, 511: 1, 512: 0, 513: 1, 514: 1, 515: 0, 516:
0, 517: 1, 518: 0, 519: 1, 520: 0, 521: 1, 522: 0, 523: 0, 524: 1, 525: 0, 526: 0, 5
27: 1, 528: 0, 529: 0, 530: 0, 531: 1, 532: 0, 533: 0, 534: 1, 535: 0, 536: 1, 537:
0, 538: 1, 539: 0, 540: 1, 541: 1, 542: 0, 543: 0, 544: 1, 545: 0, 546: 0, 547: 1, 5
48: 1, 549: 0, 550: 1, 551: 1, 552: 0, 553: 0, 554: 1, 555: 1, 556: 0, 557: 1, 558:
0, 559: 1, 560: 1, 561: 0, 562: 0, 563: 0, 564: 0, 565: 0, 566: 0, 567: 0, 568: 0, 5
69: 0, 570: 1, 571: 1, 572: 1, 573: 1, 574: 1, 575: 0, 576: 0, 577: 1, 578: 1, 579:
0, 580: 1, 581: 1, 582: 1, 583: 0, 584: 0, 585: 0, 586: 1, 587: 0, 588: 1, 589: 0, 5
90: 0, 591: 0, 592: 1, 593: 0, 594: 0, 595: 0, 596: 0, 597: 1, 598: 0, 599: 0, 600:
1, 601: 1, 602: 0, 603: 0, 604: 0, 605: 1, 606: 0, 607: 0, 608: 1, 609: 1, 610: 1, 6
11: 0, 612: 0, 613: 1, 614: 0, 615: 0, 616: 1, 617: 0, 618: 0, 619: 1, 620: 0, 621:
0, 622: 1, 623: 1, 624: 0, 625: 0, 626: 0, 627: 0, 628: 1, 629: 0, 630: 0, 631: 1, 6
32: 0, 633: 1, 634: 0, 635: 0, 636: 1, 637: 0, 638: 0, 639: 0, 640: 0, 641: 0, 642:
1, 643: 0, 644: 1, 645: 1, 646: 1, 647: 0, 648: 1, 649: 0, 650: 1, 651: 0, 652: 1, 6
53: 0, 654: 1, 655: 0, 656: 0, 657: 0, 658: 0, 659: 0, 660: 0, 661: 1, 662: 0, 663:
0, 664: 0, 665: 1, 666: 0, 667: 0, 668: 0, 669: 0, 670: 1, 671: 1, 672: 0, 673: 0, 6
74: 1, 675: 0, 676: 0, 677: 0, 678: 1, 679: 0, 680: 1, 681: 0, 682: 1, 683: 0, 684:
0, 685: 0, 686: 0, 687: 0, 688: 0, 689: 0, 690: 1, 691: 1, 692: 1, 693: 1, 694: 0, 6
95: 0, 696: 0, 697: 0, 698: 1, 699: 0, 700: 0, 701: 1, 702: 1, 703: 0, 704: 0, 705:
0, 706: 0, 707: 1, 708: 1, 709: 1, 710: 1, 711: 1, 712: 0, 713: 1, 714: 0, 715: 0, 7
16: 0, 717: 1, 718: 1, 719: 0, 720: 0, 721: 1, 722: 0, 723: 0, 724: 0, 725: 1, 726:
0, 727: 1, 728: 1, 729: 0, 730: 0, 731: 1, 732: 0, 733: 0, 734: 0, 735: 0, 736: 0, 7
37: 0, 738: 1, 739: 0, 740: 0, 741: 1, 742: 0, 743: 1, 744: 0, 745: 1, 746: 0, 747:
0, 748: 1, 749: 0, 750: 0, 751: 1, 752: 1, 753: 0, 754: 0, 755: 1, 756: 1, 757: 0, 7
58: 0, 759: 0, 760: 1, 761: 0, 762: 0, 763: 1, 764: 1, 765: 0, 766: 1, 767: 0, 768:
0, 769: 0, 770: 0, 771: 0, 772: 0, 773: 0, 774: 0, 775: 1, 776: 0, 777: 0, 778: 1, 7
79: 0, 780: 1, 781: 1, 782: 1, 783: 0, 784: 0, 785: 0, 786: 0, 787: 1, 788: 0, 789:
1, 790: 0, 791: 0, 792: 0, 793: 0, 794: 0, 795: 0, 796: 0, 797: 1, 798: 1, 799: 0, 8
00: 0, 801: 0, 802: 1, 803: 1, 804: 1, 805: 1, 806: 0, 807: 0, 808: 0, 809: 0, 810:
1, 811: 0, 812: 0, 813: 0, 814: 0, 815: 0, 816: 0, 817: 0, 818: 0, 819: 0, 820: 0, 8
21: 1, 822: 1, 823: 0, 824: 1, 825: 0, 826: 0, 827: 0, 828: 1, 829: 1, 830: 1, 831:
1, 832: 1, 833: 0, 834: 0, 835: 0, 836: 1, 837: 0, 838: 0, 839: 1, 840: 1, 841: 0, 8
42: 0, 843: 1, 844: 0, 845: 0, 846: 0, 847: 0, 848: 0, 849: 0, 850: 1, 851: 0, 852:
0, 853: 0, 854: 1, 855: 0, 856: 1, 857: 1, 858: 1, 859: 1, 860: 0, 861: 0, 862: 0, 8
63: 1, 864: 0, 865: 0, 866: 1, 867: 1, 868: 0, 869: 0, 870: 1, 871: 0, 872: 1, 873:
0, 874: 0, 875: 1, 876: 1, 877: 0, 878: 0, 879: 0, 880: 1, 881: 1, 882: 0, 883: 0, 8
84: 0, 885: 0, 886: 0, 887: 0, 888: 1, 889: 0, 890: 1, 891: 0}}
In [59]:
def predict(tree, instance):
if not isinstance(tree, dict):
return tree
else:
root_node = next(iter(tree))
feature_value = instance[root_node]
if feature_value in tree[root_node]:
return predict(tree[root_node][feature_value], instance)
else:
return None
In [60]:
def evaluate(tree, test_data_m, label):
correct_preditct = 0
wrong_preditct = 0
for index, row in test_data_m.iterrows():
result = predict(tree, test_data_m.iloc[index])
if result == test_data_m[label].iloc[index]:
correct_preditct += 1
else:
wrong_preditct += 1
accuracy = correct_preditct / (correct_preditct + wrong_preditct)
return accuracy
https://hub.gke2.mybinder.org/user/ipython-ipython-in-depth-8gov7k35/nbconvert/html/binder/20190802114_AIMLLAB5.ipynb?download=false 4/5
10/27/21, 12:37 PM 20190802114_AIMLLAB5
In [61]:
test_data_m = pd.read_csv("Titanic_Dataset.csv")
In [62]:
print(test_data_m.head())
print("")
print(test_data_m.tail())
print("")
Name Sex Age SibSp \ 0 Braund, Mr. Owen Harris male 22.0 1 1 Cumings, Mrs.
John Bradley (Florence Briggs Th... female 38.0 1 2 Heikkinen, Miss. Laina
female 26.0 0 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1
4 Allen, Mr. William Henry male 35.0 0
In [63]:
print("\n Accuracy: ", accuracy)
Accuracy: 1.0
In [ ]:
In [ ]: