You are on page 1of 4

In[1]:

import pandas as pd
import numpy as np
from astropy.table import Table, Column

In[2]:
sht = pd.read_csv('cneos_closeapproach_data_raw.csv')

In[3]:
years = np.zeros(len(sht),dtype=float)
for i in range(len(sht)):
years[i] = sht['Close-Approach (CA) Date'].loc[i][0:4]
years

Out[3]:
array([1900., 1900., 1900., ..., 2019., 2019., 2019.])

In[4]:
months = np.zeros(len(sht),dtype=float)
mo_convert = {}
mo_convert['Jan'] = 1
mo_convert['Feb'] = 2
mo_convert['Mar'] = 3
mo_convert['Apr'] = 4
mo_convert['May'] = 5
mo_convert['Jun'] = 6
mo_convert['Jul'] = 7
mo_convert['Aug'] = 8
mo_convert['Sep'] = 9
mo_convert['Oct'] = 10
mo_convert['Nov'] = 11
mo_convert['Dec'] = 12
for i in range(len(sht)):
months[i] = mo_convert[sht['Close-Approach (CA) Date'].loc[i][5:8]]
months

Out[4]:
array([1., 1., 1., ..., 7., 7., 7.])
In[5]:
days = np.zeros(len(sht),dtype=float)
for i in range(len(sht)):
if sht['Close-Approach (CA) Date'].loc[i][9] == '0':
days[i] = sht['Close-Approach (CA) Date'].loc[i][10]
else:
days[i] = sht['Close-Approach (CA) Date'].loc[i][9:11]
days

Out[5]:
array([ 4., 11., 29., ..., 24., 24., 24.])

In[6]:
dist_au = np.zeros(len(sht),dtype=float)
for i in range(len(sht)):
dist_au[i] = sht['CA Distance Minimum (LD | au)'].loc[i][-7:]
dist_au

Out[6]:
array([0.00962, 0.03989, 0.02077, ..., 0.03135, 0.00239, 0.00644])

In[7]:
size_ll = np.zeros(len(sht),dtype=float)
size_ul = np.zeros(len(sht),dtype=float)
ints = ['0','1','2','3','4','5','6','7','8','9','.']
ll_mult = 1
ul_mult = 1
for i in range(len(sht)):
#ll starts at string index 0
a = 0 #string index of end of ll
b = 0 #string index of start of ul
c = 0 #string index of end of ul
if str(sht['Estimated Diameter'].loc[i]) == 'n/a':
size_ll[i] = 0
for j in range(len(str(sht['Estimated Diameter'].loc[i]))):
if str(sht['Estimated Diameter'].loc[i])[j] not in ints:
a = j
ll_mult=1
if str(sht['Estimated Diameter'].loc[i])[a+1] == 'k' or
str(sht['Estimated Diameter'].loc[i])[a+2] == 'k':
ll_mult = 1000
break
num_str = float(sht['Estimated Diameter'].loc[i][0:a])*ll_mult
size_ll[i] = num_str
for k in range(a,len(str(sht['Estimated Diameter'].loc[i]))):
if str(sht['Estimated Diameter'].loc[i])[k] in ints:
b = k
break
for l in range(b,len(str(sht['Estimated Diameter'].loc[i]))):
if str(sht['Estimated Diameter'].loc[i])[l] not in ints:
c = l
ul_mult=1
if str(sht['Estimated Diameter'].loc[i])[c] == 'k' or
str(sht['Estimated Diameter'].loc[i])[c+1] == 'k':
ul_mult = 1000
break
num_str2 = float(sht['Estimated Diameter'].loc[i][b:c])*ul_mult
size_ul[i] = num_str2

In[8]:
t = Table([sht['Object'],sht['Close-Approach (CA) Date'], years, months,
days, sht['CA Distance Minimum (LD | au)'], dist_au, sht['V relative
(km/s)'], sht['V infinity (km/s)'], sht['H (mag)'], sht['Estimated
Diameter'], size_ll, size_ul], names = ('Object', 'Date', 'Year',
'Month', 'Day', 'Minimum Distance (LD | au)', 'Minimum Distance (au)', 'V
relative (km/s)', 'V infinity (km/s)', 'H (mag)', 'Estimated Diamter
(m)', 'Diameter_ll (m)', 'Diameter_ul (m)'))
t

Out[6]:
<Table length=13101>
Object Date Year Month Day Minimum Minimum V V H (mag) Estimated Diameter_ll Diameter_ul
Distance Distance relative infinity Diamter (m) (m)
(LD | au) (au) (km/s) (km/s) (m)
object object float64 float64 float64 object float64 float64 float64 float64 object float64 float64
509352 1900-Jan-04 1900.0 1.0 4.0 3.75 | 0.00962 8.69 8.65 20.1 250 m - 250.0 570.0
(2007 22:25 ± 00:02 0.00962 570 m
AG)
(2014 1900-Jan-11 1900.0 1.0 11.0 15.52 | 0.03989 10.65 10.65 24.3 37 m - 82 37.0 82.0
SC324) 01:03 ± 00:17 0.03989 m
… … … … … … … … … … … … …
(2019 2019-Jul-24 0.93 | 54 m - 120
2019.0 7.0 24.0 0.00239 19.19 19.13 23.5 54.0 120.0
OD) 13:31 ± < 00:01 0.00239 m
(2019 2019-Jul-24 2.51 | 23 m - 52
2019.0 7.0 24.0 0.00644 9.05 9.0 25.3 23.0 52.0
OE) 14:36 ± < 00:01 0.00644 m

In[14]:
df = t.to_pandas()
df
Out[14]:
Object Date Year Month Day Minimum Minimum V V H Estimated Diameter_ll Diameter_ul
Distance Distance relative infinity (mag) Diamter (m) (m)
(LD | au) (au) (km/s) (km/s) (m)
0 509352 1900-Jan-04 1900.0 1.0 4.0 3.75 | 0.00962 8.69 8.65 20.1 250 m - 250.0 570.0
(2007 22:25 ± 00:02 0.00962 570 m
AG)
1 (2014 1900-Jan-11 1900.0 1.0 11.0 15.52 | 0.03989 10.65 10.65 24.3 37 m - 82 37.0 82.0
SC324) 01:03 ± 00:17 0.03989 m
… … … … … … … … … … … … … …
13099 2019-Jul-24
(2019 0.93 | 54 m -
13:31 ± < 2019.0 7.0 24.0 0.00239 19.19 19.13 23.5 54.0 120.0
OD) 0.00239 120 m
00:01
13100 2019-Jul-24
(2019 2.51 | 23 m - 52
14:36 ± < 2019.0 7.0 24.0 0.00644 9.05 9.0 25.3 23.0 52.0
OE) 0.00644 m
00:01

13101 rows × 13 columns

In[15]:
df.to_csv('meteor_data_cleaned.csv')

You might also like