You are on page 1of 6

Assignment3_200020094

March 14, 2024

[106]: import pandas as pd


import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from sklearn.cluster import KMeans
from scipy.spatial import ConvexHull

[3]: # Step 1: Read the data


data = pd.read_csv("material_data_Alankar.txt")

[4]: data

[4]: Elements Atomic_Number Electronegativity Atomic_Radius \


0 H 1 2.20 0.25
1 He 2 0.00 1.20
2 Li 3 0.98 1.45
3 Be 4 1.57 1.05
4 B 5 2.04 0.85
.. … … … …
90 U 92 1.38 1.75
91 Np 93 1.36 1.75
92 Pu 94 1.28 1.75
93 Am 95 1.30 1.75
94 Cm 96 1.30 1.76

Thermal_Conductivity Density Crystal_System


0 0.1805 0.09 HEX
1 0.1513 0.18 HCP
2 85.0000 530.00 BCC
3 190.0000 1850.00 HCP
4 27.0000 2340.00 RHO
.. … … …
90 27.0000 18950.00 ORTH
91 6.0000 20200.00 ORTH
92 6.0000 19840.00 MON
93 10.0000 13670.00 HCP
94 8.8000 13500.00 HCP

1
[95 rows x 7 columns]

[160]: # Step 2: Extract relevant columns


atomic_number = data["Atomic_Number"]
electronegativity = data["Electronegativity"]

[161]: # Step 3: Plot Electronegativity vs. Atomic Number


plt.figure(figsize=(10, 6))
plt.scatter(atomic_number, electronegativity, c='b', label="Elements")
plt.plot(atomic_number, electronegativity, color='red', linestyle='--',␣
↪label='Line Passing Through Points')

plt.xlabel("Atomic Number")
plt.ylabel("Electronegativity")
plt.title("Electronegativity vs. Atomic Number")
plt.grid(True)

I was able to plot the line very easily since both the atomic number and electronegativity were
already in sorted orded in the data, otherwise, we would have to first sort them before plotting

[18]: # Step 4: Identify peaks


peaks, _ = find_peaks(electronegativity, height=0)

# Step 5: Count peaks


num_peaks = len(peaks)
print("Number of peaks observed:", num_peaks)

2
Number of peaks observed: 15

[164]: # Step 5: Merge nearby peaks based on margin


margin = 4 # This threshold is adjustable
merged_peaks = [peaks[0]]
for i in range(1, len(peaks)):
if atomic_number[peaks[i]] - atomic_number[peaks[i-1]] > margin:
merged_peaks.append(peaks[i])

# Step 6: Count peaks


num_peaks = len(merged_peaks)
print("Number of merged peaks observed:", num_peaks)

Number of merged peaks observed: 11

[211]: plt.plot(atomic_number, electronegativity, color='red', linestyle='--',␣


↪label='Line Passing Through Points')

plt.scatter(atomic_number[merged_peaks], electronegativity[merged_peaks],␣
↪color='Brown', label="Peaks (Margined)")

plt.scatter(atomic_number[merged_peaks[6]], electronegativity[merged_peaks[6]],␣
↪color='Blue', label="Peaks (Margined)")

plt.scatter(atomic_number[merged_peaks[8]], electronegativity[merged_peaks[8]],␣
↪color='Blue', label="Peaks (Margined)")

[211]: <matplotlib.collections.PathCollection at 0x234c44c3790>

3
0.0.1 From algorithm, by merging peaks, we got 11 peaks and I am observing 9 peaks
as we see
The peaks colored in red are totalling to 9 peaks (which I also see) and the blue ones as indicated
in the graph above are the peaks the algorithm has computed extra, this is very much possible
because of the so much noise around the peak that it appears it has created a local maxima, and
considers it a peak, although that is not significant peak for our data

[168]: # Step 7: Cluster elements based on the number of merged peaks


clusters = {}
for idx, peak in enumerate(merged_peaks):
if idx == 0:
clusters[idx] = atomic_number[:peak+1].tolist()
elif idx == len(merged_peaks) - 1:
clusters[idx] = atomic_number[merged_peaks[idx-1]+1:].tolist()
else:
clusters[idx] = atomic_number[merged_peaks[idx-1]+1:peak+1].tolist()

[169]: clusters.items()

[169]: dict_items([(0, [1, 2, 3, 4, 5, 6, 7, 8, 9]), (1, [10, 11, 12, 13, 14, 15, 16,
17]), (2, [18, 19, 20, 21, 22, 23, 24]), (3, [25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36]), (4, [37, 38, 39, 40, 41, 42]), (5, [43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53]), (6, [54, 55, 56, 57, 58, 59, 60]), (7, [61, 62, 63, 64,
65, 66, 67, 68, 69]), (8, [70, 71, 72, 73, 74]), (9, [75, 76, 77, 78, 79]), (10,
[80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96])])

[170]: # Step 8: Define a function to create a polygon from the bounding box
def create_polygon(x, y, margin):
x_min, x_max = min(x) - margin, max(x) + margin
y_min, y_max = min(y) - margin, max(y) + margin
return [(x_min, y_min), (x_min, y_max), (x_max, y_max), (x_max, y_min)]

[178]: # Step 9: Plot clusters with background shapes


plt.figure(figsize=(12, 8))
colors = ['r', 'g', 'y', 'm', 'c', 'k', 'brown', 'orange', 'purple', 'violet',␣
↪'indigo']

margins = [2,1,1,1,1,1,1,1,1,1,2]
for idx, (cluster_idx, elements) in enumerate(clusters.items()):
cluster_x = atomic_number[electronegativity.index.isin(elements)].tolist()
cluster_y = electronegativity[electronegativity.index.isin(elements)].
↪tolist()

plt.scatter(cluster_x, cluster_y, c=colors[idx], label=f"Cluster␣


↪{cluster_idx+1}")

# Create a polygon for the background shape


polygon = create_polygon(cluster_x, cluster_y, margins[idx])
plt.fill(*zip(*polygon), colors[idx], alpha=0.2)

4
plt.legend()
plt.show()

0.0.2 Now, applying KMeans Clustering


[210]: # Step 10: Apply KMeans clustering
X = np.array(list(zip(atomic_number, electronegativity)))
kmeans = KMeans(n_clusters=10) # Adjust the number of clusters as needed
kmeans.fit(X)
y_kmeans = kmeans.predict(X)

# Step 11: Plot the clustered points


plt.figure(figsize=(12, 8))
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis', label='Clusters',␣
↪alpha=0.9)

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],␣


↪marker='*', s=200, c='red', label='Cluster Centers')

for i in range(len(kmeans.cluster_centers_)):
cluster_points = X[y_kmeans == i]
hull = ConvexHull(cluster_points)
plt.fill(cluster_points[hull.vertices, 0], cluster_points[hull.vertices,␣
↪1], alpha=0.15, color='blue')

5
plt.legend()
plt.show()

The above is the final result after applying KMeans Algorithm, this algorithm does a pretty decent
job at clustering

[ ]:

You might also like