You are on page 1of 3

C:\Users\Dima\Desktop\School Backup\Semester2\Python\D1\KurylovichDGisc9307D1\KurylovichDGisc9307D1.

py

Sunday, April 26, 2015 3:35 AM

#
#
#Title:
D1 Manipulate Street Address
#Course :
GISC9307D1
#Date Created: 04/02/2015
#Author:
Dmitry Kurylovich
#Purpose:
The purpose of this python script is to input, manipulate and write a text file
containing
#
farm addresses. The input file splits the farm data into 2 columns, the purpose
of this
#
script is to produce a text file that seperates/delimits the address into 7
seperate columns + 2 raw columns.
#
#Note:
This script is supposed to run from C:\temp.
#---------------------------------------------------------------------------------------------------------#looks for the directory folder, if it exists, the folder will be deleted and a new
#directory will be created
import os
if os.path.exists (r'C:\temp\KurylovichDGisc9307D1\d1ProcData'):
import shutil
shutil.rmtree (r'C:\temp\KurylovichDGisc9307D1\d1ProcData')
#if the directory does not exist a new one will be created.
if not os.path.exists (r'C:\temp\KurylovichDGisc9307D1\d1ProcData'):
os.makedirs (r'C:\temp\KurylovichDGisc9307D1\d1ProcData')
#setting a relative filepath to the input and output destination,
inFile = r'C:\temp\KurylovichDGisc9307D1\d1RawData\d1RawListOfFarms.txt'
outFile = r'C:\temp\KurylovichDGisc9307D1\d1ProcData\d1ResultListOfFarms.txt'
#print 'Start Read Farms - ' + inFile
#Defining constans for suffix type and direction, this can be modified based on the dataset.
sufTypeConst = ("Rd", "Ave", "St", "Dr", "Pky","Pl","Sq","Lane")
dirConst= ('W','E','N','S')

inHandler = open(inFile, 'r')


outHandler = open(outFile, 'w')
outHandler.write(
'FarmID\tAddress\tStreetNum\tStreetName\tSufType\tDir\tCity\tProvince\tPostalCode\n')
#setting skipline to true for the ifstatement used below
skipFirstLine = True
#line example
#13\t1167 Lakeshore Rd W, St. Catharines, ON L2R 6P9
for line in inHandler:
#Skips first line to discard text header, returns back to beggining of for loop if skipline
is false to begin manipulating the data
if (skipFirstLine == True):
skipFirstLine = False
-1-

C:\Users\Dima\Desktop\School Backup\Semester2\Python\D1\KurylovichDGisc9307D1\KurylovichDGisc9307D1.py

Sunday, April 26, 2015 3:35 AM

continue
#Lines are first split on the tab, to split ID from Address (Raw)
splitted = line.split('\t')
#Id,Address (raw data)
#since line ends on \n, out variable removes any characters that appear after the postal code
out = splitted[0] + "\t" + splitted[1].rstrip()
#Begin to split the address field (index 1)
splitted = splitted[1].split(", ")
# [StreeNumber] [StreetName] [SufType] [Dir]
# Splits the address by the space found in the address name
splittedBySpace = splitted[0].split(" ")
#concatinating the output to add tabs to file
out = out + "\t" + splittedBySpace[0] + "\t"
#counter assumes the type and dir are optional and assumes they do not exist, thus the
counter is set to 2
counter = 2
for i in range(1, len(splittedBySpace)):
# if any of the direction constants (dirConst) are found, 1 is subracted from the counter
if (splittedBySpace[i] in dirConst):
counter-=1
out = out + '\t' + splittedBySpace[i];
#if any suffix type constants are found, 1 is subracted from the counter
elif (splittedBySpace[i] in sufTypeConst):
counter-=1
out = out + '\t' + splittedBySpace[i]
# if suffix not found then the string is treated as a one
else:
out = out + ' ' + splittedBySpace[i]
#compensating for missing subtype and direction
#setting i == zero and stopping loop for the amount in counter
#adding tab to output
for i in range(0, counter):
out = out + '\t'
# [City], this adds a tab for the City column
out = out + "\t" + splitted[1]
# [Province] [PostalCode1] [PostalCode2]
splittedBySpace = splitted[2].split(" ")
#searches for 3 elements: [Province], [Postacode1], [PostalCode2]
if (len(splittedBySpace) > 2):
out = out + "\t" + splittedBySpace[0] + "\t" + splittedBySpace[1] + splittedBySpace[2].
rstrip()
#this else if exists to treat postal codes without a space [L021J0,L0R1B1]
elif (len(splittedBySpace) > 1):
#leaves part 1 as is, leaves postal code without space
out = out + "\t" + splittedBySpace[0] + "\t" + splittedBySpace[1].rstrip()
else:
#assumes postal code is not provided, more of a finishing statement than a functional
-2-

C:\Users\Dima\Desktop\School Backup\Semester2\Python\D1\KurylovichDGisc9307D1\KurylovichDGisc9307D1.py

statement
#attempt to create a more generalized script
out = out + "\t" + splittedBySpace[0].rstrip()
#writes all manipualted data into a new text file
outHandler.write(out + '\n')
#closes the input and output file so they are not locked
inHandler.close()
outHandler.close()

-3-

Sunday, April 26, 2015 3:35 AM