You are on page 1of 20

Untitled-1

June 6, 2021

1 Signal Processing - Homework


1.1 Taghiyev Hasan. CS-018.
[55]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tabulate # For table creation
import statsmodels.api as sm

1:DATA (25%) A: Series(table 1, figure 1)(10%)

[56]: df = pd.read_csv('EARTHQUAKE.dat', delim_whitespace=True, header=None,␣


,→names=["YEAR", "MONTH", "DAY", "HOUR", "MAGNITUDE"] )

[57]: def year_fraction(row):


return row["YEAR"]+(row["MONTH"]/100)

[58]: def above_six():


return df.loc[(df["YEAR"]>=1910) & (df["YEAR"] <= 2017) & (df["MAGNITUDE"]␣
,→>= 6)]

[59]: matrix_B = pd.DataFrame()


df = above_six()
matrix_B["DATE"] = pd.Series(year_fraction(df))
matrix_B["MAGNITUDE"] = pd.Series(df["MAGNITUDE"])
matrix_B.plot(x="DATE", y="MAGNITUDE", title="Figure 1")
print(matrix_B.head(3))
plt.show()

DATE MAGNITUDE
199 1910.01 6.83
200 1910.01 6.59
201 1910.01 6.98

1
B: Categorization (Table2, Figure 2) (15%)

[60]: table2 = [[" 6 <= x < 7", matrix_B.loc[(matrix_B["MAGNITUDE"].between(6,7))].


,→count()["DATE"]],

["7 <= x < 8", matrix_B.loc[(matrix_B["MAGNITUDE"].between(7,8))].


,→count()["DATE"]],

["8 <= x < 9", matrix_B.loc[(matrix_B["MAGNITUDE"].between(8,9))].


,→count()["DATE"]],

["9 <= x", matrix_B.loc[(matrix_B["MAGNITUDE"] >= 9)].count()["DATE"]]]


tabulate.tabulate(table2, ["Magnitude", "Count"], tablefmt='html')

[60]: '<table>\n<thead>\n<tr><th>Magnitude </th><th style="text-align: right;">


Count</th></tr>\n</thead>\n<tbody>\n<tr><td>6 &lt;= x &lt; 7 </td><td
style="text-align: right;"> 10842</td></tr>\n<tr><td>7 &lt;= x &lt; 8 </td><td
style="text-align: right;"> 1165</td></tr>\n<tr><td>8 &lt;= x &lt; 9 </td><td
style="text-align: right;"> 80</td></tr>\n<tr><td>9 &lt;= x </td><td
style="text-align: right;"> 4</td></tr>\n</tbody>\n</table>'

[61]: table2 = pd.DataFrame({"Date": [], "Magnitude" :[], "Count": []})


for i in range(1910, 2017, 10):
table2 = table2.append({"Date": f"{i}-{i+10}", "Magnitude": "6 <= x < 7",␣
,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(6,7)) &␣

,→(matrix_B["DATE"].between(i, i+10)) ].count()["DATE"]}, ignore_index=True)

2
table2 = table2.append({"Date": f"{i}-{i+10}", "Magnitude": "7 <= x < 8",␣
,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(7,8)) &␣
,→(matrix_B["DATE"].between(i, i+10)) ].count()["DATE"]}, ignore_index=True)

table2 = table2.append({"Date": f"{i}-{i+10}", "Magnitude": "8 <= x < 9",␣


,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(8,9)) &␣

,→(matrix_B["DATE"].between(i, i+10)) ].count()["DATE"]}, ignore_index=True)

table2 = table2.append({"Date": f"{i}-{i+10}", "Magnitude": "9 <= x",␣


,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"] >= 9) & (matrix_B["DATE"].

,→between(i, i+10)) ].count()["DATE"]}, ignore_index=True)

tabulate.tabulate(table2, ["Date", "Magnitude", "Count"], tablefmt='html')

[61]: '<table>\n<thead>\n<tr><th style="text-align: right;"> </th><th>Date


</th><th>Magnitude </th><th style="text-align: right;">
Count</th></tr>\n</thead>\n<tbody>\n<tr><td style="text-align: right;">
0</td><td>1910-1920</td><td>6 &lt;= x &lt; 7 </td><td style="text-align:
right;"> 428</td></tr>\n<tr><td style="text-align: right;">
1</td><td>1910-1920</td><td>7 &lt;= x &lt; 8 </td><td style="text-align:
right;"> 87</td></tr>\n<tr><td style="text-align: right;">
2</td><td>1910-1920</td><td>8 &lt;= x &lt; 9 </td><td style="text-align:
right;"> 10</td></tr>\n<tr><td style="text-align: right;">
3</td><td>1910-1920</td><td>9 &lt;= x </td><td style="text-align: right;">
0</td></tr>\n<tr><td style="text-align: right;"> 4</td><td>1920-1930</td><td>6
&lt;= x &lt; 7 </td><td style="text-align: right;"> 835</td></tr>\n<tr><td
style="text-align: right;"> 5</td><td>1920-1930</td><td>7 &lt;= x &lt; 8
</td><td style="text-align: right;"> 100</td></tr>\n<tr><td style="text-
align: right;"> 6</td><td>1920-1930</td><td>8 &lt;= x &lt; 9 </td><td
style="text-align: right;"> 6</td></tr>\n<tr><td style="text-align:
right;"> 7</td><td>1920-1930</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align: right;">
8</td><td>1930-1940</td><td>6 &lt;= x &lt; 7 </td><td style="text-align:
right;"> 935</td></tr>\n<tr><td style="text-align: right;">
9</td><td>1930-1940</td><td>7 &lt;= x &lt; 8 </td><td style="text-align:
right;"> 111</td></tr>\n<tr><td style="text-align:
right;">10</td><td>1930-1940</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 7</td></tr>\n<tr><td style="text-align:
right;">11</td><td>1930-1940</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">12</td><td>1940-1950</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 815</td></tr>\n<tr><td style="text-align:
right;">13</td><td>1940-1950</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 110</td></tr>\n<tr><td style="text-align:
right;">14</td><td>1940-1950</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 10</td></tr>\n<tr><td style="text-align:
right;">15</td><td>1940-1950</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">16</td><td>1950-1960</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1173</td></tr>\n<tr><td style="text-align:

3
right;">17</td><td>1950-1960</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 81</td></tr>\n<tr><td style="text-align:
right;">18</td><td>1950-1960</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 7</td></tr>\n<tr><td style="text-align:
right;">19</td><td>1950-1960</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">20</td><td>1960-1970</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1080</td></tr>\n<tr><td style="text-align:
right;">21</td><td>1960-1970</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 113</td></tr>\n<tr><td style="text-align:
right;">22</td><td>1960-1970</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 9</td></tr>\n<tr><td style="text-align:
right;">23</td><td>1960-1970</td><td>9 &lt;= x </td><td style="text-align:
right;"> 2</td></tr>\n<tr><td style="text-align:
right;">24</td><td>1970-1980</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1125</td></tr>\n<tr><td style="text-align:
right;">25</td><td>1970-1980</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 125</td></tr>\n<tr><td style="text-align:
right;">26</td><td>1970-1980</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 8</td></tr>\n<tr><td style="text-align:
right;">27</td><td>1970-1980</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">28</td><td>1980-1990</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1104</td></tr>\n<tr><td style="text-align:
right;">29</td><td>1980-1990</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 91</td></tr>\n<tr><td style="text-align:
right;">30</td><td>1980-1990</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 1</td></tr>\n<tr><td style="text-align:
right;">31</td><td>1980-1990</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">32</td><td>1990-2000</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1155</td></tr>\n<tr><td style="text-align:
right;">33</td><td>1990-2000</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 120</td></tr>\n<tr><td style="text-align:
right;">34</td><td>1990-2000</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 5</td></tr>\n<tr><td style="text-align:
right;">35</td><td>1990-2000</td><td>9 &lt;= x </td><td style="text-align:
right;"> 0</td></tr>\n<tr><td style="text-align:
right;">36</td><td>2000-2010</td><td>6 &lt;= x &lt; 7 </td><td style="text-
align: right;"> 1209</td></tr>\n<tr><td style="text-align:
right;">37</td><td>2000-2010</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 118</td></tr>\n<tr><td style="text-align:
right;">38</td><td>2000-2010</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 10</td></tr>\n<tr><td style="text-align:
right;">39</td><td>2000-2010</td><td>9 &lt;= x </td><td style="text-align:
right;"> 1</td></tr>\n<tr><td style="text-align:
right;">40</td><td>2010-2020</td><td>6 &lt;= x &lt; 7 </td><td style="text-

4
align: right;"> 983</td></tr>\n<tr><td style="text-align:
right;">41</td><td>2010-2020</td><td>7 &lt;= x &lt; 8 </td><td style="text-
align: right;"> 109</td></tr>\n<tr><td style="text-align:
right;">42</td><td>2010-2020</td><td>8 &lt;= x &lt; 9 </td><td style="text-
align: right;"> 7</td></tr>\n<tr><td style="text-align:
right;">43</td><td>2010-2020</td><td>9 &lt;= x </td><td style="text-align:
right;"> 1</td></tr>\n</tbody>\n</table>'

[62]: table2_1 = pd.DataFrame({"Date": [], "Magnitude" :[], "Count": []})


table2_2 = pd.DataFrame({"Date": [], "Magnitude" :[], "Count": []})
table2_3 = pd.DataFrame({"Date": [], "Magnitude" :[], "Count": []})
for i in range(1910, 2018):
table2_1 = table2_1.append({"Date": f"{i}", "Magnitude": "6 <= x < 7",␣
,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(6,7)) &␣

,→(matrix_B["DATE"].between(i, i+1)) ].count()["DATE"]}, ignore_index=True)

table2_2 = table2_2.append({"Date": f"{i}", "Magnitude": "7 <= x < 8",␣


,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(7,8)) &␣

,→(matrix_B["DATE"].between(i, i+1)) ].count()["DATE"]}, ignore_index=True)

table2_3 = table2_3.append({"Date": f"{i}", "Magnitude": "8 <= x < 9",␣


,→"Count": matrix_B.loc[ (matrix_B["MAGNITUDE"].between(8,9)) &␣

,→(matrix_B["DATE"].between(i, i+1)) ].count()["DATE"]}, ignore_index=True)

fig, ax = plt.subplots()
ax.set_xticks(np.arange(0, 120, 10))
plt.title("Figure 2")
plt.plot(table2_1["Date"], table2_1["Count"], label="6 <= x < 7")
plt.plot(table2_2["Date"], table2_2["Count"], label="7 <= x < 8")
plt.plot(table2_3["Date"], table2_3["Count"], label="8 <= x < 9")
plt.legend()
plt.show()

5
[63]: matrix_C = pd.DataFrame({"Date": [], "Cat1":[], "Cat2": [], "Cat3":[]})
for i in range(1910, 2018):
cat1 = matrix_B.loc[(matrix_B["DATE"].between(i, i+1)) &␣
,→(matrix_B["MAGNITUDE"].between(6, 7))].count()["DATE"]

cat2 = matrix_B.loc[(matrix_B["DATE"].between(i, i+1)) &␣


,→(matrix_B["MAGNITUDE"].between(7, 8))].count()["DATE"]

cat3 = matrix_B.loc[(matrix_B["DATE"].between(i, i+1)) &␣


,→(matrix_B["MAGNITUDE"].between(8, 9))].count()["DATE"]

matrix_C = matrix_C.append({"Date": i, "Cat1": cat1, "Cat2": cat2, "Cat3":␣


,→cat3}, ignore_index=True)

# tabulate.tabulate(matrix_C, ["Year", "Category 1", "Category 2", "Category␣


,→3"], tablefmt='html')

2: MEAN and MEDIAN A: Mean (Table 3, Figure 3) (10%)

[64]: mean = matrix_B.mean(axis=0)["MAGNITUDE"]


std1 = matrix_B.std()["MAGNITUDE"]
std2 = std1*2
std3 = std1*3
table3 = pd.DataFrame(index=[0])
table3["mean"] = mean
table3["std1"] = std1
table3["std2"] = std2

6
table3["std3"] = std3

plt.figure(figsize=(6, 6))
plt.plot(matrix_B['MAGNITUDE'],
[mean for i in range(0, len(matrix_B['MAGNITUDE']))])
plt.plot(matrix_B['MAGNITUDE'], [
mean - std1 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.plot(matrix_B['MAGNITUDE'], [
mean + std1 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.plot(matrix_B['MAGNITUDE'], [
mean - std2 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.plot(matrix_B['MAGNITUDE'], [
mean + std2 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.plot(matrix_B['MAGNITUDE'], [
mean - std3 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.plot(matrix_B['MAGNITUDE'], [
mean + std3 for i in range(0, len(matrix_B['MAGNITUDE']))
])
plt.xlabel("Magnitude")
plt.legend([
'MEAN', 'MEAN - STD1', 'MEAN + STD1', 'MEAN - STD2', 'MEAN + STD2',
'MEAN - STD3', 'MEAN + STD3'
])
plt.title("Figure 3")

[64]: Text(0.5, 1.0, 'Figure 3')

7
[65]: table3['BELOW_STD1'] = len(
matrix_B[matrix_B['MAGNITUDE'] < (mean + std1)])
table3['BETWEEN_STD1_STD2'] = len(
matrix_B[(matrix_B['MAGNITUDE'] < (mean + std2))
& (matrix_B['MAGNITUDE'] > (mean + std1))])
table3['ABOVE_STD3'] = len(
matrix_B[matrix_B['MAGNITUDE'] < (mean + std3)])
tabulate.tabulate(table3, ["Mean", "STD 1", "STD 2", "STD 3", "Below STD1",␣
,→"Between STD1 and STD2", "Above STD3"], tablefmt='html')

[65]: '<table>\n<thead>\n<tr><th style="text-align: right;"> </th><th style="text-


align: right;"> Mean</th><th style="text-align: right;"> STD 1</th><th
style="text-align: right;"> STD 2</th><th style="text-align: right;"> STD
3</th><th style="text-align: right;"> Below STD1</th><th style="text-align:
right;"> Between STD1 and STD2</th><th style="text-align: right;"> Above

8
STD3</th></tr>\n</thead>\n<tbody>\n<tr><td style="text-align: right;"> 0</td><td
style="text-align: right;">6.43237</td><td style="text-align:
right;">0.420982</td><td style="text-align: right;">0.841963</td><td
style="text-align: right;">1.26294</td><td style="text-align: right;">
10319</td><td style="text-align: right;"> 1062</td><td
style="text-align: right;"> 11782</td></tr>\n</tbody>\n</table>'

B: Median (Table 4, Figure 4) (10%)

[12]: median = matrix_B.median(axis=0)["MAGNITUDE"]


q1 = np.percentile(matrix_B["MAGNITUDE"], 25, interpolation = 'midpoint')
q3 = np.percentile(matrix_B["MAGNITUDE"], 75, interpolation = 'midpoint')
table4 = pd.DataFrame(index=[0])
table4["median"] = median
table4["q1"] = q1
table4["q3"] = q3
plt.plot(matrix_B["DATE"], matrix_B["MAGNITUDE"])
plt.plot(matrix_B["DATE"], [median for i in matrix_B["DATE"]])
plt.plot(matrix_B["DATE"], [q1 for i in matrix_B["DATE"]])
plt.plot(matrix_B["DATE"], [q3 for i in matrix_B["DATE"]])
plt.ylabel("Magnitude")
plt.xlabel("Year")
plt.legend(["Data", "Median", "Q1", "Q3"])
plt.title("Figure 4")
plt.show()

9
[13]: below_q1 = 0
between_q1_q3 = 0
above_q3 = 0
for i in matrix_B["MAGNITUDE"]:
if i < q1:
below_q1 += 1
if q1 < i < q3:
between_q1_q3 += 1
if i > q3:
above_q3 += 1
table4["below_q1"] = below_q1
table4["between_q1_q3"] = between_q1_q3
table4["above_q3"] = above_q3
tabulate.tabulate(table4, ["Median", "Q1", "Q3", "Below Q1", "Between Q1 and␣
,→Q3", "Above Q3"], tablefmt='html')

# print(below_q1, between_q1_q3, above_q3)

[13]: '<table>\n<thead>\n<tr><th style="text-align: right;"> </th><th style="text-


align: right;"> Median</th><th style="text-align: right;"> Q1</th><th
style="text-align: right;"> Q3</th><th style="text-align: right;"> Below
Q1</th><th style="text-align: right;"> Between Q1 and Q3</th><th style="text-
align: right;"> Above Q3</th></tr>\n</thead>\n<tbody>\n<tr><td style="text-
align: right;"> 0</td><td style="text-align: right;"> 6.31</td><td
style="text-align: right;">6.13</td><td style="text-align: right;"> 6.6</td><td
style="text-align: right;"> 2999</td><td style="text-align: right;">
5755</td><td style="text-align: right;">
2962</td></tr>\n</tbody>\n</table>'

[14]: matrix_D = pd.DataFrame({"Year": [], "Magnitude":[]})


for i in range(1910, 2011):
tmp = matrix_B.loc[matrix_B["DATE"].between(i-0.001, i+0.999)]
matrix_D = matrix_D.append( {"Year": i, "Magnitude": (tmp.
,→sum()["MAGNITUDE"] / tmp.count()["MAGNITUDE"])}, ignore_index=True)

matrix_D['Rolling_mean'] = matrix_D.iloc[:,1].rolling(window=10).mean()
matrix_D['Rolling_std1'] = matrix_D.iloc[:,1].rolling(window=10).std()
matrix_D['Rolling_std2'] = matrix_D.iloc[:,1].rolling(window=10).std()*2
matrix_D['Rolling_std3'] = matrix_D.iloc[:,1].rolling(window=10).std()*3
plt.plot(matrix_D["Year"], matrix_D["Rolling_mean"])
plt.plot(matrix_D["Year"], matrix_D["Rolling_std1"])
plt.plot(matrix_D["Year"], matrix_D["Rolling_std2"])
plt.plot(matrix_D["Year"], matrix_D["Rolling_std3"])
plt.legend(["Rolling Mean", "Rolling Std 1", "Rolling Std 2", "Rolling Std 3"])
plt.ylabel("Magnitude")
plt.xlabel("Year")
plt.title("Figure 5")

10
plt.show()

[15]: matrix_E = pd.DataFrame(matrix_D)


matrix_E = matrix_D[['Year', 'Magnitude']].copy()
matrix_E['Rolling_median'] = matrix_E.iloc[:,1].rolling(window=10).median()
matrix_E['Rolling_q1'] = matrix_E.iloc[:,1].rolling(window=10).quantile(.25,␣
,→interpolation = 'midpoint')

matrix_E['Rolling_q2'] = matrix_E.iloc[:,1].rolling(window=10).quantile(.75,␣
,→interpolation = 'midpoint')

plt.plot(matrix_E['Year'], matrix_E['Rolling_median'])
plt.plot(matrix_E['Year'], matrix_E['Rolling_q1'])
plt.plot(matrix_E['Year'], matrix_E['Rolling_q2'])
plt.legend(["Rolling Median", "Rolling Q1", "Rolling Q2"])
plt.title("Figure 6")
plt.ylabel("Magnitude")
plt.xlabel("Year")
plt.show()

11
[16]: table_7 = pd.DataFrame(index=[0])

thickness = (2.6 * (q3-q1))/pow(matrix_B.count()["DATE"], 1/3)


table_7["thickness_fraction"] = thickness
table_7["thickness"] = int(1/thickness)
n, bins, patches = plt.hist(matrix_B["DATE"], bins=int(1/thickness))
mode_index = n.argmax()
table_7["year_mode"] = str((bins[mode_index] + bins[mode_index+1])/2)
plt.title("Figure 7")
plt.xlabel("Year")
plt.ylabel("Times")
plt.plot()
plt.show()

n, bins, patches = plt.hist(matrix_B["MAGNITUDE"], bins=int(1/thickness))


mode_index = n.argmax()
table_7["magnitude_mode"] = str((bins[mode_index] + bins[mode_index+1])/2)
plt.title("Figure 7")
plt.xlabel("Magnitude")
plt.ylabel("Times")
plt.plot()
plt.show()

12
tabulate.tabulate( table_7, ["Thickness Fraction", "Thickness", "Year Mode",␣
,→"Magnitude Mode"], tablefmt='html' )

13
[16]: '<table>\n<thead>\n<tr><th style="text-align: right;"> </th><th style="text-
align: right;"> Thickness Fraction</th><th style="text-align: right;">
Thickness</th><th style="text-align: right;"> Year Mode</th><th style="text-
align: right;"> Magnitude Mode</th></tr>\n</thead>\n<tbody>\n<tr><td
style="text-align: right;"> 0</td><td style="text-align: right;">
0.0533343</td><td style="text-align: right;"> 18</td><td style="text-
align: right;"> 2008.19</td><td style="text-align: right;">
6.09861</td></tr>\n</tbody>\n</table>'

[17]: table_8 = pd.DataFrame({"Type":[], "Date":[], "Magnitude":[]})


# matrix_F = pd.DataFrame()
# matrix_ = matrix_B[['DATE', 'MAGNITUDE']].copy()
# matrix_F['DATE'] = matrix_F['DATE'].astype(int)
matrix_B.boxplot(column=["MAGNITUDE"], showfliers=False)
# matrix_B.reset_index()
min_value = matrix_B.loc[matrix_B["MAGNITUDE"].idxmin()]
max_value = matrix_B.loc[matrix_B["MAGNITUDE"].idxmax()]
table_8 = table_8.append({"Type": "Min Value", "Date": min_value["DATE"],␣
,→"Magnitude": min_value["MAGNITUDE"]}, ignore_index=True)

table_8 = table_8.append({"Type": "Max Value", "Date": max_value["DATE"],␣


,→"Magnitude": max_value["MAGNITUDE"]}, ignore_index=True)

# print(matrix_B["MAGNITUDE"].idxmax())
plt.title("Figure 8")
# fig, ax = plt.subplots()
# ax = matrix_F.boxplot(column=["MAGNITUDE"], by=["DATE"])
# ax = matrix_F.boxplot(column=["MAGNITUDE"])
# ax.set_xticks(np.arange(0, 120, 10))
# plt.boxplot(column=[matrix_F["MAGNITUDE"]], by=[matrix_F["DATE"]])
plt.show()
tabulate.tabulate(table_8, ["Type", "Date", "Magnitude"], tablefmt="html")

14
[17]: '<table>\n<thead>\n<tr><th style="text-align: right;"> </th><th>Type
</th><th style="text-align: right;"> Date</th><th style="text-align: right;">
Magnitude</th></tr>\n</thead>\n<tbody>\n<tr><td style="text-align: right;">
0</td><td>Min Value</td><td style="text-align: right;">1914.05</td><td
style="text-align: right;"> 6 </td></tr>\n<tr><td style="text-align:
right;"> 1</td><td>Max Value</td><td style="text-align: right;">1960.05</td><td
style="text-align: right;"> 9.55</td></tr>\n</tbody>\n</table>'

B: Histogram and Boxplots by parts (Figure 9, Figure 10). (15%)

[18]: matrix_F = matrix_B[['DATE', 'MAGNITUDE']].copy()


matrix_F = matrix_F.loc[(matrix_F["DATE"] < 2010) & (matrix_F["DATE"] >= 1910.
,→0)]

for i in range(1910, 2010, 20):


matrix_F.loc[matrix_F["DATE"].between(i, i+20), 'RANGE'] = f"{i}-{i+19}"

q1 = np.percentile(matrix_F["MAGNITUDE"], 25, interpolation = 'midpoint')


q3 = np.percentile(matrix_F["MAGNITUDE"], 75, interpolation = 'midpoint')

thickness = (2.6 * (q3-q1))/pow(matrix_F.count()["DATE"], 1/3)


plt.hist(matrix_F["RANGE"], bins=int(1/thickness))
plt.title("Figure 9")
plt.xlabel("Year range")
plt.ylabel("Times")
plt.plot()

15
plt.show()

plt.hist(matrix_B["MAGNITUDE"], bins=int(1/thickness))
plt.title("Figure 9")
plt.xlabel("Magnitude")
plt.ylabel("Times")
plt.plot()
plt.show()

16
[19]: fig, ax = plt.subplots(figsize=(7,6))
matrix_F.boxplot(column="MAGNITUDE", by="RANGE", ax=ax, showfliers=False)
plt.show()

17
[146]: fig, ax = plt.subplots(figsize=(8,7))

model = sm.formula.ols(formula='Cat1 ~ Date', data=matrix_C)


res = model.fit()
tmp = matrix_C.assign(fit=res.fittedvalues)
plt.plot(tmp["Date"], tmp["Cat1"], label="Category 1 (6 <= x < 7)")
plt.plot(tmp["Date"], tmp["fit"], label="Category 1 linear fit")
model = sm.formula.ols(formula='Cat2 ~ Date', data=matrix_C)
res = model.fit()
tmp = matrix_C.assign(fit=res.fittedvalues)
plt.plot(tmp["Date"], tmp["Cat2"], label="Category 2 (7 <= x < 8)")
plt.plot(tmp["Date"], tmp["fit"], label="Categoty 2 lenear fit")

model = sm.formula.ols(formula='Cat3 ~ Date', data=matrix_C)


res = model.fit()

18
tmp = matrix_C.assign(fit=res.fittedvalues)
plt.plot(tmp["Date"], tmp["Cat3"], label="Category 3 (8 <= x < 9)")
plt.plot(tmp["Date"], tmp["fit"], label="Category 3 linear fit", color="pink")
print(res.summary())
print(res.params)

plt.legend()
plt.show()

OLS Regression Results


==============================================================================
Dep. Variable: Cat3 R-squared: 0.003
Model: OLS Adj. R-squared: -0.006
Method: Least Squares F-statistic: 0.3103
Date: Thu, 03 Jun 2021 Prob (F-statistic): 0.579
Time: 17:11:11 Log-Likelihood: -127.10
No. Observations: 108 AIC: 258.2
Df Residuals: 106 BIC: 263.6
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 3.4157 4.802 0.711 0.479 -6.106 12.937
Date -0.0014 0.002 -0.557 0.579 -0.006 0.003
==============================================================================
Omnibus: 8.836 Durbin-Watson: 1.923
Prob(Omnibus): 0.012 Jarque-Bera (JB): 9.442
Skew: 0.703 Prob(JB): 0.00890
Kurtosis: 2.650 Cond. No. 1.24e+05
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly
specified.
[2] The condition number is large, 1.24e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
Intercept 3.415681
Date -0.001362
dtype: float64

19
20

You might also like