Professional Documents
Culture Documents
Example - 1
In [1]:
import pandas as pd
In [11]:
Out[11]:
City Temp
0 Mumbai 25
1 Chennai 23
2 Nashik 22
3 Pune 21
4 Delhi 20
5 Banglore 26
In [12]:
Out[12]:
City Humidity
0 Pune 75
1 Mumbai 83
2 Chennai 85
3 Nashik 78
4 Delhi 53
5 Tamilnadu 69
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 1/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [13]:
weather = pd.merge(temp,humidity) ## It will merge only for same values in both dataframes
weather ## By default how = "inner" i.e intersection of both dat
Out[13]:
0 Mumbai 25 83
1 Chennai 23 85
2 Nashik 22 78
3 Pune 21 75
4 Delhi 20 53
In [17]:
Out[17]:
In [21]:
Out[21]:
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 2/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [20]:
Out[20]:
Example - 2
In [4]:
Out[4]:
0 HP Laptop Purchase
1 LG Washing Machine HR
2 Panasonic TV Quality
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 3/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [5]:
Out[5]:
1 LG Fridge HR
2 Panasonic TV Quality
3 Sony AC Design
In [8]:
Out[8]:
In [10]:
Out[10]:
===============================================
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 4/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [2]:
Out[2]:
SOLANKI DONA-VAI-
0 1/4/2017 Sal:1 2 1,690.00 3,380
PLASTICS 9100
SOLANKI LITE
1 1/4/2017 Sal:1 6 1,620.00 9,720
PLASTICS FOAM(1200)
VISHNU
SARNESWARA
2 1/4/2017 Sal:2 CHOTA 500 23 11,500
TRADERS
WINE
SARNESWARA LITE
3 1/4/2017 Sal:2 6 1,620.00 9,720
TRADERS FOAM(1200)
SARNESWARA DONA-VAI-
4 1/4/2017 Sal:2 5 1,690.00 8,450
TRADERS 9100
10*10
47285 31/03/2018 Sal:10042 Vkp 25 137 3,425
SHEET
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 5/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [3]:
Out[3]:
SILVER
0 1/4/2018 Sal:146 TP13 POUCH 50 85 4,250.00
9*12
DURGA
2 1/4/2018 Sal:146 TP13 10*12 1,600.00 5.5 8,800.00
Blue
DURGA
3 1/4/2018 Sal:146 TP13 13*16 400 11 4,400.00
BLUE
10*12
4 1/4/2018 Sal:146 TP13 SARAS- 600 8.1 4,860.00
NAT
HAMPI SPOON
44735 31/03/2019 Sal:9610 200 40 8,000.00
FOODS SOOFY
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 6/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [4]:
Out[4]:
BALAJI DONA-
0 1/4/2019 Sal:687 1 1,730.00 1,730.00
PLASTICS VAI-9100
BALAJI SMART
1 1/4/2019 Sal:687 1 1,730.00 1,730.00
PLASTICS BOUL(48)
BALAJI Vishnu
2 1/4/2019 Sal:688 110 18.5 2,035.00
PLASTICS Ice
3 28/3 0 0
BALAJI 100LEAF
4 1/4/2019 Sal:689 3 585 1,755.00
PLASTICS -SP
13*16
19171 10/10/2019 Sal:4935 K.SRIHARI WHITE 400 16 6,400.00
RK
In [5]:
import warnings
warnings.filterwarnings(action = 'ignore')
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 7/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [6]:
sales_complete_data = sales_data_2017.append([sales_data_2018,sales_data_2019])
sales_complete_data
Out[6]:
SOLANKI DONA-VAI-
0 1/4/2017 Sal:1 2 1,690.00 3,380.0
PLASTICS 9100
SOLANKI LITE
1 1/4/2017 Sal:1 6 1,620.00 9,720.0
PLASTICS FOAM(1200)
VISHNU
SARNESWARA
2 1/4/2017 Sal:2 CHOTA 500 23 11,500.0
TRADERS
WINE
SARNESWARA LITE
3 1/4/2017 Sal:2 6 1,620.00 9,720.0
TRADERS FOAM(1200)
SARNESWARA DONA-VAI-
4 1/4/2017 Sal:2 5 1,690.00 8,450.0
TRADERS 9100
13*16
19171 10/10/2019 Sal:4935 K.SRIHARI 400 16 6,400.0
WHITE RK
In [7]:
sales_complete_data.shape
Out[7]:
(111206, 9)
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 8/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [8]:
sales_complete_data.head(20)
Out[8]:
Voucher
Date Voucher Party Product Qty Rate Gross Disc
Amount
SOLANKI DONA-VAI-
0 1/4/2017 Sal:1 2 1,690.00 3,380.00 NaN 13,100.00
PLASTICS 9100
SOLANKI LITE
1 1/4/2017 Sal:1 6 1,620.00 9,720.00 NaN NaN
PLASTICS FOAM(1200)
VISHNU
SARNESWARA
2 1/4/2017 Sal:2 CHOTA 500 23 11,500.00 NaN 30,990.00
TRADERS
WINE
SARNESWARA LITE
3 1/4/2017 Sal:2 6 1,620.00 9,720.00 NaN NaN
TRADERS FOAM(1200)
SARNESWARA DONA-VAI-
4 1/4/2017 Sal:2 5 1,690.00 8,450.00 NaN NaN
TRADERS 9100
SARNESWARA CLASSIC
5 1/4/2017 Sal:2 1 1,320.00 1,320.00 NaN NaN
TRADERS ENJOY(750)
Vishnu
6 1/4/2017 Sal:898 Lock 100 30 3,000.00 100 5,400.00
250ml
BLACK
7 1/4/2017 Sal:898 Lock 100 26 2,600.00 100 NaN
DOG-350ML
9 try
VAMSI
10 1/4/2017 Sal:2497 KRISHNA Loose Items 1 800 800 NaN 800
FANCY
DUMMY
12
ENTRY
VAMSI
13 1/4/2017 Sal:9263 KRISHNA Loose Items 1 280 280 NaN 280
FANCY
15 dummy entry
17 dummy entry
LITE
18 2/4/2017 Sal:16 KPR 1 1,620.00 1,620.00 NaN 1,620.00
FOAM(1200)
BALAJI 90ML
19 3/4/2017 Sal:3 150 14.5 2,175.00 NaN 2,175.00
PLASTICS RANGEELA
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 9/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [9]:
Out[9]:
In [10]:
sales_cleaned_data.shape
Out[10]:
(95562, 6)
In [11]:
sales_complete_data.dtypes
Out[11]:
Date object
Voucher object
Party object
Product object
Qty object
Rate object
Gross object
Disc object
dtype: object
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 10/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [12]:
sales_cleaned_data.dtypes
Out[12]:
Date object
Voucher int64
Party object
Product object
Qty int64
Rate float64
dtype: object
In [15]:
sales_complete_data.info()
<class 'pandas.core.frame.DataFrame'>
dtypes: object(9)
In [17]:
sales_complete_data.isna().sum()
Out[17]:
Date 12591
Voucher 12557
Party 40
Product 12591
Qty 12557
Rate 12558
Gross 12558
Disc 105609
dtype: int64
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 11/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [18]:
sales_cleaned_data.isna().sum()
Out[18]:
Date 0
Voucher 0
Party 0
Product 0
Qty 0
Rate 1
dtype: int64
In [19]:
In [20]:
sales_complete_data
Out[20]:
SARNESWARA
3 1/4/2017 Sal:2 LITE FOAM(1200) 6 1,620.00
TRADERS
SARNESWARA
4 1/4/2017 Sal:2 DONA-VAI-9100 5 1,690.00
TRADERS
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 12/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [21]:
sales_complete_data.dropna(inplace = True)
sales_complete_data
Out[21]:
In [22]:
sales_complete_data.shape
Out[22]:
(98614, 6)
In [23]:
print(sales_complete_data["Party"].unique())
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 13/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [24]:
sales_complete_data.info()
<class 'pandas.core.frame.DataFrame'>
dtypes: object(6)
In [25]:
sales_complete_data.describe(include = "all" )
Out[25]:
In [26]:
sales_complete_data["Party"].value_counts()
Out[26]:
TP13 13056
K.SRIHARI 2537
KPR 2354
SVP-BUCHHI 1620
...
g.subharao 1
svr brandi 1
VS 1
SK.BABU 1
10-Jul 1
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 14/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [27]:
sales_complete_data["Voucher"]
Out[27]:
0 Sal:1
1 Sal:1
2 Sal:2
3 Sal:2
4 Sal:2
...
19167 Sal:4935
19168 Sal:4935
19169 Sal:4935
19170 Sal:4935
19171 Sal:4935
In [28]:
sales_complete_data["Voucher"] = sales_complete_data["Voucher"].str.replace("Sal:","")
sales_complete_data
Out[28]:
In [29]:
sales_complete_data.dtypes
Out[29]:
Date object
Voucher object
Party object
Product object
Qty object
Rate object
dtype: object
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 15/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [30]:
sales_cleaned_data
Out[30]:
In [31]:
sales_complete_data.groupby(by="Party")
sales_complete_data
Out[31]:
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 16/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [63]:
sales_complete_data.head()
Out[63]:
In [64]:
sales_complete_data.dtypes
Out[64]:
Date object
Voucher object
Party object
Product object
Qty object
Rate object
dtype: object
In [65]:
sales_cleaned_data.dtypes
Out[65]:
Date object
Voucher int64
Party object
Product object
Qty int64
Rate float64
dtype: object
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 17/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [66]:
sales_complete_data["Voucher"] = sales_complete_data["Voucher"].astype("int")
---------------------------------------------------------------------------
5905 results = [
5908 ]
5910 else:
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:419, in
BaseBlockManager.astype(self, dtype, copy, errors)
418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise"
) -> T:
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:304, in
BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs)
303 else:
File ~\anaconda3\lib\site-packages\pandas\core\internals\blocks.py:580, in B
lock.astype(self, dtype, copy, errors)
562 """
564
(...)
576 Block
577 """
1291 try:
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 18/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
1236 else:
1151
ValueError: invalid literal for int() with base 10: ' '
In [60]:
Out[60]:
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 19/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [54]:
sales_complete_data.sort_values(by = "Voucher")
Out[54]:
19145 INV 19
26053 khadervali
26220 AUTULO
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 20/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
In [62]:
pd.to_datetime(sales_complete_data["Date"])
--------------------------------------------------------------------------
-
File ~\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2211, i
n objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_
iso8601, allow_object, allow_mixed)
2210 try:
File ~\anaconda3\lib\site-packages\pandas\_libs\tslibs\conversion.pyx:360,
in pandas._libs.tslibs.conversion.datetime_to_datetime64()
----> 1 pd.to_datetime(sales_complete_data["Date"])
File ~\anaconda3\lib\site-packages\pandas\core\tools\datetimes.py:1047, in
to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, in
fer_datetime_format, origin, cache)
File ~\anaconda3\lib\site-packages\pandas\core\tools\datetimes.py:197, in
_maybe_cache(arg, format, cache, convert_listlike)
199 # GH#39882 and GH#35888 in case of None and NaT we get duplica
tes
File ~\anaconda3\lib\site-packages\pandas\core\tools\datetimes.py:402, in
_convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_dat
etime_format, dayfirst, yearfirst, exact)
403 arg,
404 dayfirst=dayfirst,
405 yearfirst=yearfirst,
406 utc=utc,
407 errors=errors,
408 require_iso8601=require_iso8601,
409 allow_object=True,
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 21/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
410 )
414 # is in UTC
File ~\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2217, i
n objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_
iso8601, allow_object, allow_mixed)
2221 # is in UTC
File ~\anaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2199, i
n objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_
iso8601, allow_object, allow_mixed)
2198 try:
2200 data.ravel("K"),
2201 errors=errors,
2202 utc=utc,
2203 dayfirst=dayfirst,
2204 yearfirst=yearfirst,
2205 require_iso8601=require_iso8601,
2206 allow_mixed=allow_mixed,
2207 )
File ~\anaconda3\lib\site-packages\pandas\_libs\tslibs\parsing.pyx:281, in
pandas._libs.tslibs.parsing.parse_datetime_string()
1367 else:
645 if len(res) == 0:
--> 646 raise ParserError("String does not contain a date: %s", timest
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 22/23
7/11/22, 1:23 PM Daily Task 6 & 7 - Explore Merge Function & Perform Data Cleaning - Jupyter Notebook
r)
648 try:
In [ ]:
localhost:8888/notebooks/Python by John/Daily Tasks/Daily Task 6 %26 7 - Explore Merge Function %26 Perform Data Cleaning.ipynb 23/23