You are on page 1of 3

07/11/2019 RNN - Urban - Jupyter Notebook

In [13]: import glob


import os
import librosa
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf
from tensorflow.python.ops import rnn, rnn_cell
import numpy as np
%matplotlib inline
plt.style.use('ggplot')

tf.disable_v2_behavior()
print(tf.version.VERSION)

frames = 41
bands = 20

2.0.0

In [14]: def sliding_window(data, window_size):


start = 0
while start < len(data):
yield int(start), int(start + window_size)
start += (window_size / 2)

def extract_features(parent_dir, sub_dirs, bands = 20, frames = 41, file_ext="*.wav"):


window_size = 512 * (frames - 1)
mfccs = []
labels = []

for l, sub_dir in enumerate(sub_dirs):


for filename in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
audio, Fs = librosa.load(filename)
label = filename.split('/')[2].split('-')[1]

for (start,end) in sliding_window(audio, window_size):


if(len(audio[start:end]) == window_size):
mfcc = librosa.feature.mfcc(y = audio[start:end], sr = Fs, n_mfcc = bands
mfccs.append(mfcc)
labels.append(label)

features = np.asarray(mfccs).reshape(len(mfccs), frames, bands)

return np.array(features), np.array(labels, dtype = np.int)

def one_hot_encode(labels):
n_labels = len(labels)

n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels, n_unique_labels))

one_hot_encode[np.arange(n_labels), labels] = 1

return one_hot_encode

In [15]: parent_dir = 'Sound-Data'

tr_sub_dirs = ['fold1', 'fold3']


ts_sub_dirs = ['fold2', 'fold4']

tr_features, tr_labels = extract_features(parent_dir, tr_sub_dirs, bands, frames)


tr_labels = one_hot_encode(tr_labels)

ts_features, ts_labels = extract_features(parent_dir, ts_sub_dirs, bands, frames)


ts_labels = one_hot_encode(ts_labels)

localhost:8889/notebooks/RNN - Urban.ipynb# 1/3


07/11/2019 RNN - Urban - Jupyter Notebook

In [19]: tf.reset_default_graph()

batch_size = 50
display_step = 200

# Network Parameters
n_input = bands
n_steps = frames
n_hidden = 320
n_classes = 10

learning_rate = 0.01
training_iterations = 4000

x = tf.placeholder("float", [None, n_steps, n_input])


y = tf.placeholder("float", [None, n_classes])

weight = tf.Variable(tf.random_normal([n_hidden, n_classes]))


bias = tf.Variable(tf.random_normal([n_classes]))

In [20]: def RNN(x, weight, bias):


cell = rnn_cell.LSTMCell(n_hidden, state_is_tuple = True)
cell = rnn_cell.MultiRNNCell([cell] * 1)
output, state = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32)
output = tf.transpose(output, [1, 0, 2])
last = tf.gather(output, int(output.get_shape()[0]) - 1)

return tf.nn.softmax(tf.matmul(last, weight) + bias)

In [21]: prediction = RNN(x, weight, bias)

# Define loss and optimizer


loss_f = -tf.reduce_sum(y * tf.log(prediction))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_f)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables


init = tf.global_variables_initializer()

/home/juansta/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/indexed
_slices.py:424: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown sh
ape. This may consume a large amount of memory.
"Converting sparse IndexedSlices to a dense Tensor of unknown shape. "

localhost:8889/notebooks/RNN - Urban.ipynb# 2/3


07/11/2019 RNN - Urban - Jupyter Notebook

In [*]: with tf.Session() as session:


session.run(init)

for epoch in range(training_iterations):


offset = (epoch * batch_size) % (tr_labels.shape[0] - batch_size)
batch_x = tr_features[offset:(offset + batch_size), :, :]
batch_y = tr_labels[offset:(offset + batch_size), :]
_, c = session.run([optimizer, loss_f], feed_dict = {x: batch_x, y : batch_y})

if epoch % display_step == 0:
# Calculate batch accuracy
acc = session.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = session.run(loss_f, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(epoch) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))

print('Test accuracy: ',round(session.run(accuracy, feed_dict={x: ts_features, y: ts_labe

Iter 0, Minibatch Loss= 217.047424, Training Accuracy= 0.60000


Iter 200, Minibatch Loss= 68.556366, Training Accuracy= 0.50000
Iter 400, Minibatch Loss= 44.655693, Training Accuracy= 0.66000
Iter 600, Minibatch Loss= 61.274078, Training Accuracy= 0.50000
Iter 800, Minibatch Loss= 41.406906, Training Accuracy= 0.76000
Iter 1000, Minibatch Loss= 63.801964, Training Accuracy= 0.56000
Iter 1200, Minibatch Loss= 43.403629, Training Accuracy= 0.74000
Iter 1400, Minibatch Loss= 34.109589, Training Accuracy= 0.80000
Iter 1600, Minibatch Loss= 43.974281, Training Accuracy= 0.64000
Iter 1800, Minibatch Loss= 52.404976, Training Accuracy= 0.60000
Iter 2000, Minibatch Loss= 43.137421, Training Accuracy= 0.74000
Iter 2200, Minibatch Loss= 30.857113, Training Accuracy= 0.70000
Iter 2400, Minibatch Loss= 22.638533, Training Accuracy= 0.78000
Iter 2600, Minibatch Loss= 31.792271, Training Accuracy= 0.86000
Iter 2800, Minibatch Loss= 48.228432, Training Accuracy= 0.72000
Iter 3000, Minibatch Loss= 43.207615, Training Accuracy= 0.70000
Iter 3200, Minibatch Loss= 20.295015, Training Accuracy= 0.84000
Iter 3400, Minibatch Loss= 26.592484, Training Accuracy= 0.88000
Iter 3600, Minibatch Loss= 54.418182, Training Accuracy= 0.64000
Iter 3800, Minibatch Loss= 38.426224, Training Accuracy= 0.70000

In [ ]:

localhost:8889/notebooks/RNN - Urban.ipynb# 3/3

You might also like