You are on page 1of 4

Jupyter QtConsole 4.2.

0
Python 3.5.1 |Anaconda custom (x86_64)| (default, Dec 7 2015, 11:24:55)
Type "copyright", "credits" or "license" for more information.
IPython 4.1.2 -- An enhanced Interactive Python.
?
-> Introduction and overview of IPython's features.
%quickref -> Quick reference.
help
-> Python's own help system.
object?
-> Details about 'object', use 'object??' for extra details.
In [1]: import os
In [2]: import pandas as pd
In [3]: from pandas import read_csv, DataFrame
In [4]: os.chdir('/Applications/nychatest/FR2/Maximo/')
In [5]: os.chdir('/Applications/nychatest/FR2/Maximo/wo/')
In [6]: dfA = pd.read_csv('woFrameA.csv', dtype=str)
In [7]: columnNames = dfA.columns.values.tolist()
In [8]: len(columnNames)
Out[8]: 215
In [9]: resultFile = open('maximoColumnNames.py', 'w')
In [10]: import pprint
In [11]: resultFile.write('maximoColumnNames = ' + pprint.pformat(columnNames))
Out[11]: 3136
In [12]: resultFile.close()
In [13]: print('Done.')
Done.
In [14]: dfA['STATUS'].value_counts()
Out[14]:
CLOSE
9920039
CAN
1049590
APPR
518899
COMP
243926
WAPPR
166589
WTSCH
16642
INPRG
2022
DISP
971
PLANDOC
884
SCHED
531
---------------240
STATUS
238
VIFAILRV
180
REVIEW
155
CAPWORK
106
FAILSCH
61
INSREV
1
WMATL
1
1

Name: STATUS, dtype: int64


In [15]: dfA.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11921075 entries, 0 to 11921074
Columns: 215 entries, Unnamed: 0 to Z.7
dtypes: object(215)
memory usage: 19.1+ GB
In [16]: del dfA
In [17]: dfB = pd.read_csv('woFrameB.csv', dtype=str)
In [18]: dfB.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11931241 entries, 0 to 11931240
Columns: 215 entries, Unnamed: 0 to Z.7
dtypes: object(215)
memory usage: 19.1+ GB
In [19]: dfB['STATUS'].value_counts()
Out[19]:
CLOSE
10366924
CAN
1087275
APPR
258380
COMP
122212
WAPPR
72402
WTSCH
10927
SCHED
4803
PLANDOC
4176
INPRG
1691
DISP
1014
VIFAILRV
264
FAILSCH
261
---------------240
STATUS
238
REVIEW
238
WMATL
112
CAPWORK
83
INSREV
1
Name: STATUS, dtype: int64
In [20]: del dfB
In [21]: dfC = pd.read_csv('woFrameC.csv', dtype=str)
In [22]: dfC.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9660647 entries, 0 to 9660646
Columns: 215 entries, Unnamed: 0 to Z.7
dtypes: object(215)
memory usage: 15.5+ GB
In [23]: dfC['STATUS'].value_counts()
Out[23]:
CLOSE
6966675
WAPPR
900302
CAN
761311
APPR
484938
2

WTSCH
392671
COMP
83892
SCHED
48195
DISP
9882
INPRG
7048
FAILSCH
3417
PLANDOC
1311
VIFAILRV
210
---------------195
STATUS
193
WMATL
161
REVIEW
123
INSREV
65
CAPWORK
58
Name: STATUS, dtype: int64
In [24]: del dfC
In [25]: os.getcwd()
Out[25]: '/Applications/nychatest/FR2/Maximo/wo'
In [26]: os.chdir('/Applications/nychatest/FR2/Siebel')
In [27]: dfSiebel = pd.read_csv('siebelWOConcat.csv')
/Applications/anaconda/lib/python3.5/sitepackages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (6) have
mixed types. Specify dtype option on import or set low_memory=False.
interactivity=interactivity, compiler=compiler, result=result)
In [28]: dfSiebel.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13830385 entries, 0 to 13830384
Data columns (total 18 columns):
Unnamed: 0
int64
CONTACT_ID
object
SR_ID
object
CREATED_D
object
UNIT_ID
object
SR_NUM
object
WORK_ORDER_NUM
object
LOCATION
object
LOCATION_TYPE
object
LOCATION_ID
object
DESCRIPTION
object
SCHEDULE_
object
SHIFT
object
RESP_SCHEDULER
object
PRIORITY
int64
STATUS
object
WORK_ORDER_ITEM
object
WORK_ORDER_COMPLAINT
object
dtypes: int64(2), object(16)
memory usage: 1.9+ GB
In [29]: dfSiebel['STATUS'].value_counts()
Out[29]:
Closed
12321070
Cancelled
1440606
Scheduled
30465
3

Waiting on Scheduling
Approved
Dispatched or Printed
Failed to Schedule
In Progress
Rescheduled
Waiting on Approval
Waiting on Items/Parts/Tools
Complete
Error
Completed
Waiting on Apporval
Name: STATUS, dtype: int64

27889
4604
2066
1494
509
275
122
63
10
6
6
4

In [30]: