Professional Documents
Culture Documents
#2. Box and Whisker plot - great for depicting numerical data (such as
number of sales made) through the quartiles
sns.boxplot( x=df["Sale_Status"], y=df["Verified_Date"] )
#3. Heat map - appropriate to use for conversion rate and revenue for
Qs 2,3,4
graphical representation of data where each value of a matrix is represented as a color.
Create a dataset df = pd.DataFrame(np.random.random((5,5)), columns=["a","b","c","d","e"])
Default heatmap p1 = sns.heatmap(df)
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 2 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [33]:
import numpy as n
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 3 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [34]:
Batch_ID = pd.read_csv('data_post2021.csv')
Batch_ID.head() #This column has 58 unique entries. The exact definition of this
#This column indicates the sequence we dialled the leads for the campaign
---------------------------------------------------------------------------
IsADirectoryError Traceback (most recent call last)
<ipython-input-34-72523357077b> in <module>
----> 1 Batch_ID = pd.read_csv('data_post2021.csv')
2 Batch_ID.head() #This column has 58 unique entries. The exact defini
tion of this column and each of its entries will be beneficial.
3 #This column indicates the sequence we dialled the leads for the campa
ign
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in read_csv(fi
lepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze,
prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values
, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, n
a_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_
date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression
, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapec
har, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whites
pace, low_memory, memory_map, float_precision, storage_options)
608 kwds.update(kwds_defaults)
609
--> 610 return _read(filepath_or_buffer, kwds)
611
612
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _read(filep
ath_or_buffer, kwds)
460
461 # Create the parser.
--> 462 parser = TextFileReader(filepath_or_buffer, **kwds)
463
464 if chunksize or iterator:
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(se
lf, f, engine, **kwds)
817 self.options["has_index_names"] = kwds["has_index_names"]
818
--> 819 self._engine = self._make_engine(self.engine)
820
821 def close(self):
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _make_engin
e(self, engine)
1048 )
1049 # error: Too many arguments for "ParserBase"
-> 1050 return mapping[engine](self.f, **self.options) # type: ignore
[call-arg]
1051
1052 def _failover_to_python(self):
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in __init__(se
lf, src, **kwds)
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 4 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
1865
1866 # open handles
-> 1867 self._open_handles(src, kwds)
1868 assert self.handles is not None
1869 for key in ("storage_options", "encoding", "memory_map", "comp
ression"):
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/parsers.py in _open_handl
es(self, src, kwds)
1360 Let the readers open IOHanldes after they are done with their
potential raises.
1361 """
-> 1362 self.handles = get_handle(
1363 src,
1364 "r",
/opt/anaconda3/lib/python3.8/site-packages/pandas/io/common.py in get_handle(p
ath_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_
options)
640 errors = "replace"
641 # Encoding
--> 642 handle = open(
643 handle,
644 ioargs.mode,
In [35]:
Batch_ID.shape #loading and inspecting data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-35-373762c67413> in <module>
----> 1 Batch_ID.shape #loading and inspecting data
In [20]:
Batch_ID.dtypes #loading and inspecting data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-20-148267869b91> in <module>
----> 1 Batch_ID.dtypes #loading and inspecting data
In [21]:
Batch_ID.columns #loading and inspecting data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-21-ef58acf42f5c> in <module>
----> 1 Batch_ID.columns #loading and inspecting data
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 5 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [22]:
Batch_ID.apply('nunique') #loading and inspecting data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-22-8ec1553fa572> in <module>
----> 1 Batch_ID.apply('nunique') #loading and inspecting data
In [23]:
Batch_ID = Batch_ID.drop([['CampaignID', 'Cust_ID', 'Effective_Date','Call_Start'
^
SyntaxError: invalid syntax
In [24]:
Batch_ID = Batch_ID.drop(['CampaignID'], axis=1)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-24-99d56e58c6a8> in <module>
----> 1 Batch_ID = Batch_ID.drop(['CampaignID'], axis=1)
In [25]:
Batch_ID = Batch_ID.rename(columns={"Cust_Sex": "Cust_Gender"})
Batch_ID.head()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-25-2d577942fb8f> in <module>
----> 1 Batch_ID = Batch_ID.rename(columns={"Cust_Sex": "Cust_Gender"})
2 Batch_ID.head()
In [26]:
Batch_ID = Batch_ID.rename(columns={"Avg_est_income": "Avg_income"})
Batch_ID.head()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-26-e69b42e36748> in <module>
----> 1 Batch_ID = Batch_ID.rename(columns={"Avg_est_income": "Avg_income"})
2 Batch_ID.head()
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 6 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [27]:
print(Batch_ID.shape) #removing duplicates
duplicate_rows_df = df[df.duplicated()] #rows containing duplicate data
print(duplicate_rows_df.shape)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-27-c4c486b6b206> in <module>
----> 1 print(Batch_ID.shape) #removing duplicates
2 duplicate_rows_df = df[df.duplicated()] #rows containing duplicate dat
a
3
4 print(duplicate_rows_df.shape)
In [28]:
Batch_ID = Batch_ID.drop_duplicates(keep='Verified_Date')
print(Batch_ID.shape)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-28-db91953c01d4> in <module>
----> 1 Batch_ID = Batch_ID.drop_duplicates(keep='Verified_Date')
2 print(Batch_ID.shape)
In [29]:
Batch_ID.dtypes #data types
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-29-1cdb81cf53b0> in <module>
----> 1 Batch_ID.dtypes #data types
In [30]:
Batch_ID = Batch_ID.drop(["Verified_Date", "Postal_Cde","Effective_Date"], axis
Batch_ID.head()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-30-964043ab9a00> in <module>
----> 1 Batch_ID = Batch_ID.drop(["Verified_Date", "Postal_Cde","Effective_Dat
e"], axis=1)
2 Batch_ID.head()
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 7 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [31]:
Batch_ID['Verified_Date'] = pd.to_datetime(Batch_ID['Verified_Date']) #needed to be
Batch_ID.info()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-31-946ff7e96323> in <module>
----> 1 Batch_ID['Verified_Date'] = pd.to_datetime(Batch_ID['Verified_Date'])
#needed to be renamed
2 Batch_ID.info()
In [32]:
Batch_ID.Postal_Code = pd.to_int(Batch_ID["Postal_Code"]) #needed to be renamed
print(Batch_ID.dtypes)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-32-6cdeb9dea063> in <module>
----> 1 Batch_ID.Postal_Code = pd.to_int(Batch_ID["Postal_Code"]) #needed to b
e renamed
2 print(Batch_ID.dtypes)
/opt/anaconda3/lib/python3.8/site-packages/pandas/__init__.py in __getattr__(n
ame)
242 return _SparseArray
243
--> 244 raise AttributeError(f"module 'pandas' has no attribute '{name}'")
245
246
In [40]:
Batch_ID["Postal_Code"] = Batch_ID["Postal_Code”].astype(int)
In [41]:
Batch_ID["Postal_Code"] = Batch_ID["Postal_Code"].astype(int)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-41-bb0351a0cb47> in <module>
----> 1 Batch_ID["Postal_Code"] = Batch_ID["Postal_Code"].astype(int)
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 8 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
In [ ]:
Batch_ID.Cover_Amount = pd.to_int64(Batch_ID["Cover_Amount"]) #needed to be renamed
print(Batch_ID.dtypes)
In [ ]:
print(Batch_ID.isnull().sum()) #missing values
In [36]:
! pip install missingno
In [37]:
import missingno as msno
msno.matrix(Batch_ID);
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-37-9fa12fbf4e1c> in <module>
1 import missingno as msno
2
----> 3 msno.matrix(Batch_ID);
In [39]:
Batch_ID = Batch_ID([])
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 9 of 10
Final Elucidate AI project (Batch_ID) 11/14/21, 9:01 PM
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-39-3200018f45d5> in <module>
----> 1 Batch_ID = Batch_ID([])
In [38]:
Batch_ID = Batch_ID.drop(["Verified_Date"], axis=1 #Verified_Date - doesnt look lik
Batch_ID = Batch_ID.drop(["Effective_Date"], axis=1 #Effective_Date -had 00:00.0 in
Batch_ID = Batch_ID.drop(["Date_of_Debit"], axis=1 #Date_of_Debit had 00:00.0 in ev
In [42]:
Batch_ID.dtypes
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-42-459ddd2e979d> in <module>
----> 1 Batch_ID.dtypes
In [ ]:
df.
In [ ]:
http://localhost:8888/nbconvert/html/Final%20Elucidate%20AI%20project%20(Batch_ID)%20.ipynb?download=false Page 10 of 10