You are on page 1of 9

1/16/2021 Pertemuan 5

ACTIVITY PERTEMUAN 5

NAMA : Wisnu Trenggono Wirayuda

NPM : 57418379

KELAS : 3IA07
MATERI : Sentimen Analisis DGX - 1
MATA PRAKTIKUM : Pengantar Kecerdasan Buatan

In [21]:

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import string
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import sklearn
matplotlib.rc('xtick', labelsize=14)
matplotlib.rc('ytick', labelsize=14)

In [22]:

with open("../../full_set.txt") as f:
content = f.readlines()

In [23]:

content[0:10]
Out[23]:

['So there is no way for me to plug it in here in the US unless I go by a


converter.\t0\n',
'Good case, Excellent value.\t1\n',
'Great for the jawbone.\t1\n',
'Tied to charger for conversations lasting more than 45 minutes.MAJOR PRO
BLEMS!!\t0\n',
'The mic is great.\t1\n',
'I have to jiggle the plug to get it to line up right to get decent volum
e.\t0\n',
'If you have several dozen or several hundred contacts, then imagine the
fun of sending each of them one by one.\t0\n',
'If you are Razr owner...you must have this!\t1\n',
'Needless to say, I wasted my money.\t0\n',
'What a waste of money and time!.\t0\n']

In [24]:

## Remove leading and trailing white space


content = [x.strip() for x in content]
## Separate the sentences from the labels
sentences = [x.split("\t")[0] for x in content]
labels = [x.split("\t")[1] for x in content]

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 1/9


1/16/2021 Pertemuan 5

In [25]:

sentences[0:10]
Out[25]:

['So there is no way for me to plug it in here in the US unless I go by a


converter.',
'Good case, Excellent value.',
'Great for the jawbone.',
'Tied to charger for conversations lasting more than 45 minutes.MAJOR PRO
BLEMS!!',
'The mic is great.',
'I have to jiggle the plug to get it to line up right to get decent volum
e.',
'If you have several dozen or several hundred contacts, then imagine the
fun of sending each of them one by one.',
'If you are Razr owner...you must have this!',
'Needless to say, I wasted my money.',
'What a waste of money and time!.']

In [26]:

labels[0:10]
Out[26]:

['0', '1', '1', '0', '1', '0', '0', '1', '0', '0']

In [27]:

y = np.array(labels, dtype='int8')
y = 2*y - 1

In [28]:

##del str

In [29]:

def full_remove(x, removal_list):


for w in removal_list:
x = x.replace(w, ' ')
return x
## Remove digits ##
digits = [str(x) for x in range(10)]
remove_digits = [full_remove(x, digits) for x in sentences]
## Remove punctuation ##
remove_punc = [full_remove(x, list(string.punctuation)) for x in remove_digits]
## Make everything lower-case and remove any white space ##
sents_lower = [x.lower() for x in remove_punc]
sents_lower = [x.strip() for x in sents_lower]

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 2/9


1/16/2021 Pertemuan 5

In [30]:

## Remove stop words ##


stop_set = ['the', 'a', 'an', 'i', 'he', 'she', 'they', 'to', 'of', 'it', 'from']

def removeStopWords(stopWords, txt):


newtxt = ' '.join([word for word in txt.split() if word not in stopWords])
return newtxt
sents_processed = [removeStopWords(stop_set,x) for x in sents_lower]

In [31]:

sents_processed[0:20]
Out[31]:

['so there is no way for me plug in here in us unless go by converter',


'good case excellent value',
'great for jawbone',
'tied charger for conversations lasting more than minutes major problem
s',
'mic is great',
'have jiggle plug get line up right get decent volume',
'if you have several dozen or several hundred contacts then imagine fun s
ending each them one by one',
'if you are razr owner you must have this',
'needless say wasted my money',
'what waste money and time',
'and sound quality is great',
'was very impressed when going original battery extended battery',
'if two were seperated by mere ft started notice excessive static and gar
bled sound headset',
'very good quality though',
'design is very odd as ear clip is not very comfortable at all',
'highly recommend for any one who has blue tooth phone',
'advise everyone do not be fooled',
'so far so good',
'works great',
'clicks into place in way that makes you wonder how long that mechanism w
ould last']

In [32]:

from sklearn.feature_extraction.text import CountVectorizer


from sklearn.feature_extraction.text import TfidfTransformer

In [33]:

vectorizer = CountVectorizer(analyzer = "word",


preprocessor = None,
stop_words = 'english',
max_features = 6000, ngram_range=(1,5))
data_features = vectorizer.fit_transform(sents_processed)
tfidf_transformer = TfidfTransformer()
data_features_tfidf = tfidf_transformer.fit_transform(data_features)
data_mat = data_features_tfidf.toarray()

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 3/9


1/16/2021 Pertemuan 5

In [34]:

np.random.seed(0)
test_index = np.append(np.random.choice((np.where(y==1))[0], 250, replace = False), np.
random.choice((np.where(y==1))[0], 250, replace = False))
train_index = list(set(range(len(labels))) - set(test_index))
train_data = data_mat[train_index,]
train_labels = y[train_index]
test_data = data_mat[test_index,]
test_labels = y[test_index]

In [35]:

from keras.models import Sequential


from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import SpatialDropout1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
max_review_length = 200
tokenizer = Tokenizer(num_words=10000,
filters='!"#$%&()*+,-./:;<=>?@[\]^_`{|}~',
lower=True
)
tokenizer.fit_on_texts(sents_processed)

In [36]:

X = tokenizer.texts_to_sequences(sents_processed)
X = sequence.pad_sequences(X, maxlen=max_review_length)
print('Shape of data tensor:', X.shape)
Shape of data tensor: (3000, 200)

In [37]:

import pandas as pd
Y = pd.get_dummies(y).values
Y
Out[37]:

array([[1, 0],
[0, 1],
[0, 1],
...,
[1, 0],
[1, 0],
[1, 0]], dtype=uint8)

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 4/9


1/16/2021 Pertemuan 5

In [38]:

np.random.seed(0)
test_inds = np.append(
np.random.choice((np.where(y==-1))[0], 250, replace = False),
np.random.choice((np.where(y==1))[0], 250, replace = False))
train_inds = list(set(range(len(labels))) - set(test_inds))
train_data = X[train_inds,]
train_labels = Y[train_inds]
test_data = X[test_inds,]
test_labels = Y[test_inds]

In [39]:

EMBEDDING_DIM = 200
model = Sequential()
model.add(Embedding(10000, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(250, dropout=0.2, return_sequences=True))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
Model: "sequential_2"

Layer (type) Output Shape Param #


=================================================================
embedding_2 (Embedding) (None, 200, 200) 2000000

spatial_dropout1d_2 (Spatial (None, 200, 200) 0

lstm_3 (LSTM) (None, 200, 250) 451000

lstm_4 (LSTM) (None, 100) 140400

dense_2 (Dense) (None, 2) 202


=================================================================
Total params: 2,591,602
Trainable params: 2,591,602
Non-trainable params: 0

None

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 5/9


1/16/2021 Pertemuan 5

In [40]:

epochs = 2
batch_size = 40
model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_split=0.1)

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 6/9


1/16/2021 Pertemuan 5

-
InternalError Traceback (most recent call las
t)
<ipython-input-40-0b400d79b3a7> in <module>
4 epochs=epochs,
5 batch_size=batch_size,
----> 6 validation_split=0.1)

/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in fit(sel
f, x, y, batch_size, epochs, verbose, callbacks, validation_split, validat
ion_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_e
poch, validation_steps, validation_freq, max_queue_size, workers, use_mult
iprocessing, **kwargs)
1211 else:
1212 fit_inputs = x + y + sample_weights
-> 1213 self._make_train_function()
1214 fit_function = self.train_function
1215

/usr/local/lib/python3.5/dist-packages/keras/engine/training.py in _make_t
rain_function(self)
331 updates=updates + metrics_updates,
332 name='train_function',
--> 333 **self._function_kwargs)
334
335 def _make_test_function(self):

/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py
in function(inputs, outputs, updates, **kwargs)
3004 def function(inputs, outputs, updates=None, **kwargs):
3005 if _is_tf_1():
-> 3006 v1_variable_initialization()
3007 return tf_keras_backend.function(inputs, outputs,
3008 updates=updates,

/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py
in v1_variable_initialization()
418
419 def v1_variable_initialization():
--> 420 session = get_session()
421 with session.graph.as_default():
422 variables = tf.global_variables()

/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py
in get_session()
383 '`get_session` is not available when '
384 'TensorFlow is executing eagerly.')
--> 385 return tf_keras_backend.get_session()
386
387

/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/backend.py
in get_session()
477 A TensorFlow session.
478 """
--> 479 session = _get_session()
480 if not _MANUAL_VAR_INIT:
481 with session.graph.as_default():

/usr/local/lib/python3.5/dist-packages/tensorflow/python/keras/backend.py
jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 7/9
1/16/2021 Pertemuan 5
in _get_session()
455 if getattr(_SESSION, 'session', None) is None:
456 _SESSION.session = session_module.Session(
--> 457 config=get_default_session_config())
458 session = _SESSION.session
459 return session

/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py
in init (self, target, graph, config)
1549
1550 """
-> 1551 super(Session, self). init (target, graph, config=config)
1552 # NOTE(mrry): Create these on first ` enter ` to avoid a ref
erence cycle.
1553 self._default_graph_context_manager = None

/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session.py
in init (self, target, graph, config)
674 try:
675 # pylint: disable=protected-access
--> 676 self._session = tf_session.TF_NewSessionRef(self._graph._c_g
raph, opts)
677 # pylint: enable=protected-access
678 finally:

InternalError: cudaGetDevice() failed. Status: CUDA driver version is insu


fficient for CUDA runtime version

In [ ]:

loss, acc = model.evaluate(test_data, test_labels, verbose=2,


batch_size=batch_size)
print("loss:",loss)
print("Validation accuracy:",acc)

In [ ]:

outcome_labels = ['Negative', 'Positive']


new = ["I would not recommend this movie"]

seq = tokenizer.texts_to_sequences(new)
padded = sequence.pad_sequences(seq, maxlen=max_review_length)
pred = model.predict(padded)
print("Probability distribution: ", pred)
print("Is this a Positive or Negative review? ")
print(outcome_labels[np.argmax(pred)])

In [ ]:

new = ["It is not what i am looking for"]

seq = tokenizer.texts_to_sequences(new)
padded = sequence.pad_sequences(seq, maxlen=max_review_length)
pred = model.predict(padded)
print("Probability distribution: ", pred)
print("Is this a Positive or Negative review? ")
print(outcome_labels[np.argmax(pred)])

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 8/9


1/16/2021 Pertemuan 5

In [ ]:

new = ["This isn't what i am looking for"]

seq = tokenizer.texts_to_sequences(new)
padded = sequence.pad_sequences(seq, maxlen=max_review_length)
pred = model.predict(padded)
print("Probability distribution: ", pred)
print("Is this a Positive or Negative review? ")
print(outcome_labels[np.argmax(pred)])

In [ ]:

jupiternb.gunadarma.ac.id:8989/nbconvert/html/3IA07/Wisnu Trenggono Wirayuda/Pertemuan 5.ipynb?download=false 9/9

You might also like