System Impl

# Load Libraries - Make sure to run this cell!
import pandas as pd
import numpy as np
import re, os
from string import printable
from sklearn import model_selection
#import gensim
import tensorflow as tf
from keras.models import Sequential, Model, model_from_json, load_model
from keras import regularizers
from keras.layers.core import Dense, Dropout, Activation, Lambda, Flatten
from keras.layers import Input, ELU, LSTM, Embedding, Convolution2D, MaxPooling2D, \
BatchNormalization, Convolution1D, MaxPooling1D, concatenate
from keras.preprocessing import sequence
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras import backend as K
from pathlib import Path
import json
import warnings
warnings.filterwarnings("ignore")
2.
DATA_HOME = 'data/'
df = pd.read_csv(DATA_HOME + 'url_data_mega_deep_learning.csv')
df.sample(n=25).head(25)
out 2:
url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in df.url]
max_len=75
X = sequence.pad_sequences(url_int_tokens, maxlen=max_len)
target = np.array(df.isMalicious)
print('Matrix dimensions of X: ', X.shape, 'Vector dimension of target: ', target.shape)
X_train, X_test, target_train, target_test = model_selection.train_test_split(X, target, test_size=0.25,

random_state=33)
def print_layers_dims(model):
l_layers = model.layers
# Note None is ALWAYS batch_size
for i in range(len(l_layers)):
print(l_layers[i])
print('Input Shape: ', l_layers[i].input_shape, 'Output Shape: ', l_layers[i].output_shape)
def save_model(fileModelJSON,fileWeights):
#print("Saving model to disk: ",fileModelJSON,"and",fileWeights)
#have h5py installed
if Path(fileModelJSON).is_file():
os.remove(fileModelJSON)
json_string = model.to_json()
with open(fileModelJSON,'w' ) as f:
json.dump(json_string, f)
if Path(fileWeights).is_file():
os.remove(fileWeights)
model.save_weights(fileWeights)
def load_model(fileModelJSON,fileWeights):
#print("Saving model to disk: ",fileModelJSON,"and",fileWeights)
with open(fileModelJSON, 'r') as f:
model_json = json.load(f)
model = model_from_json(model_json)
model.load_weights(fileWeights)
return model
def conv_fully(max_len=75, emb_dim=32, max_vocab_len=100, W_reg=regularizers.l2(1e-4)):
# Input
main_input = Input(shape=(max_len,), dtype='int32', name='main_input')
# Embedding layer
emb = Embedding(input_dim=max_vocab_len, output_dim=emb_dim, input_length=max_len,
W_regularizer=W_reg)(main_input)
emb = Dropout(0.25)(emb)
def sum_1d(X):
return K.sum(X, axis=1)
def get_conv_layer(emb, kernel_size=5, filters=256):
conv = Convolution1D(kernel_size=kernel_size, filters=filters, \
border_mode='same')(emb)
conv = ELU()(conv)
conv = Lambda(sum_1d, output_shape=(filters,))(conv)
#conv = BatchNormalization(mode=0)(conv)
conv = Dropout(0.5)(conv)
return conv
conv1 = get_conv_layer(emb, kernel_size=2, filters=256)
merged = concatenate([conv1,conv2,conv3,conv4], axis=1)
hidden1 = Dense(1024)(merged)
hidden1 = ELU()(hidden1)
hidden1 = BatchNormalization(mode=0)(hidden1)
hidden1 = Dropout(0.5)(hidden1)
hidden2 = Dense(1024)(hidden1)
hidden2 = ELU()(hidden2)
hidden2 = BatchNormalization(mode=0)(hidden2)
hidden2 = Dropout(0.5)(hidden2)
output = Dense(1, activation='sigmoid', name='output')(hidden2)
model = Model(input=[main_input], output=[output])
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
return model
epochs = 5
batch_size = 32
model = conv_fully()
model.fit(X_train, target_train, epochs=epochs, batch_size=batch_size)
loss, accuracy = model.evaluate(X_test, target_test, verbose=1)
print('\nFinal Cross-Validation Accuracy', accuracy, '\n')
print_layers_dims(model)
target_proba = model.predict(X_test, batch_size=1)
target_proba[0:10]
model_name = "deeplearning_1DConv"
save_model(DATA_HOME + model_name + ".json", DATA_HOME + model_name + ".h5")
model = load_model(DATA_HOME + model_name + ".json", DATA_HOME + model_name + ".h5")
l_layers = model.layers
weights = l_layers[1].get_weights()
weights[0].shape
test_url_mal = "naureen.net/etisalat.ae/index2.php"
test_url_benign = "sixt.com/php/reservation?language=en_US"
url = test_url_benign
url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]]
max_len=75
X = sequence.pad_sequences(url_int_tokens, maxlen=max_len)

System Impl

Uploaded by

Document Information

Original Description:

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

System Impl

Uploaded by

Copyright:

Available Formats

# Load Libraries - Make sure to run this cell!

from string import printable

from sklearn import model_selection

from keras.models import Sequential, Model, model_from_json, load_model

from keras import regularizers

from keras.layers.core import Dense, Dropout, Activation, Lambda, Flatten

from keras.layers import Input, ELU, LSTM, Embedding, Convolution2D, MaxPooling2D, \

BatchNormalization, Convolution1D, MaxPooling1D, concatenate

from keras.preprocessing import sequence

from keras.optimizers import SGD, Adam, RMSprop

from keras.utils import np_utils

from keras import backend as K

from pathlib import Path

url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in df.url]

print('Matrix dimensions of X: ', X.shape, 'Vector dimension of target: ', target.shape)

X_train, X_test, target_train, target_test = model_selection.train_test_split(X, target, test_size=0.25,

# Note None is ALWAYS batch_size

print('Input Shape: ', l_layers[i].input_shape, 'Output Shape: ', l_layers[i].output_shape)

#print("Saving model to disk: ",fileModelJSON,"and",fileWeights)

#have h5py installed

#print("Saving model to disk: ",fileModelJSON,"and",fileWeights)

with open(fileModelJSON, 'r') as f:

def conv_fully(max_len=75, emb_dim=32, max_vocab_len=100, W_reg=regularizers.l2(1e-4)):

main_input = Input(shape=(max_len,), dtype='int32', name='main_input')

emb = Embedding(input_dim=max_vocab_len, output_dim=emb_dim, input_length=max_len,

return K.sum(X, axis=1)

def get_conv_layer(emb, kernel_size=5, filters=256):

conv = Convolution1D(kernel_size=kernel_size, filters=filters, \

conv = Lambda(sum_1d, output_shape=(filters,))(conv)

conv1 = get_conv_layer(emb, kernel_size=2, filters=256)

conv2 = get_conv_layer(emb, kernel_size=3, filters=256)

conv3 = get_conv_layer(emb, kernel_size=4, filters=256)

conv4 = get_conv_layer(emb, kernel_size=5, filters=256)

merged = concatenate([conv1,conv2,conv3,conv4], axis=1)

output = Dense(1, activation='sigmoid', name='output')(hidden2)

model = Model(input=[main_input], output=[output])

adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train, target_train, epochs=epochs, batch_size=batch_size)

loss, accuracy = model.evaluate(X_test, target_test, verbose=1)

print('\nFinal Cross-Validation Accuracy', accuracy, '\n')

target_proba = model.predict(X_test, batch_size=1)

save_model(DATA_HOME + model_name + ".json", DATA_HOME + model_name + ".h5")

model = load_model(DATA_HOME + model_name + ".json", DATA_HOME + model_name + ".h5")

url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]]

You might also like