1 files changed, 471 insertions, 0 deletions
diff --git a/models/failure_prediction/python/attention_lstm.py b/models/failure_prediction/python/attention_lstm.py
new file mode 100644
index 0000000..612ff84
--- /dev/null
+++ b/models/failure_prediction/python/attention_lstm.py
@@ -0,0 +1,471 @@
+# pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102
+# -*- coding: utf-8 -*-
+"""Attention_LSTM.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1fXlRsp5_7EmuJBdayJTd2ChTWs8CTXIp
+
+Contributors: **Rohit Singh Rathaur, Girish L.**
+
+Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from keras.utils.vis_utils import plot_model
+from keras import backend as K
+import seaborn as sns
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from google.colab import drive
+drive.mount('/content/drive')
+
+# Importing libraries
+
+df_Ellis = pd.read_csv(
+    "/content/drive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv")
+df_Ellis
+
+df_Ellis.plot()
+
+"""We showed here the histograms of Ellis data"""
+
+# we show here the hist
+df_Ellis.hist(bins=100, figsize=(20, 15))
+# save_fig("attribute_histogram_plots")
+plt.show()
+
+cpu_system_perc = df_Ellis[['ellis-cpu.system_perc']]
+cpu_system_perc.rolling(12).mean().plot(
+    figsize=(20, 10), linewidth=5, fontsize=20)
+plt.xlabel('Timestamp', fontsize=30)
+
+load_avg_1_min = df_Ellis[['ellis-load.avg_1_min']]
+load_avg_1_min.rolling(12).mean().plot(
+    figsize=(20, 10), linewidth=5, fontsize=20)
+plt.xlabel('Timestamp', fontsize=30)
+
+"""## Identifying trends in Time Series data
+There are several ways to think about identifying trends in time series. One popular way
+is by taking a rolling average, which means that, for each time point, we take the average
+of the points on either side of it. Note that the number of points is specified by a window
+size, which we need to choose.
+
+What happens then because we take the average is it tends to smooth out noise and seasonality.
+We will see that below right now. Check out this rolling average of `'ellis-cpu.wait_perc'`
+using the built-in `pandas` methods.
+
+When it comes to determining the window size, here, it makes sense to first try out one of
+twelve months, as we're talking about yearly seasonality.
+
+Note that in the code chunk above we used two sets of squared brackets to extract the
+`'ellis-cpu.wait_perc'` column as a DataFrame; If we would have used one set, like
+`df_Ellis['ellis-cpu.wait_perc']`, we would have created a pandas Series.
+
+In the code chunk above, you also chained methods: you called methods on an object one after
+another. Method chaining is pretty popular and pandas is one of the packages that really
+allows you to use that style of programming to the max!
+"""
+
+cpu_wait_perc = df_Ellis[['ellis-cpu.wait_perc']]
+cpu_wait_perc.rolling(12).mean().plot(
+    figsize=(20, 10), linewidth=5, fontsize=20)
+plt.xlabel('Year', fontsize=30)
+
+"""We have successfully removed the seasonality and we saw an upward trend for
+`ellis-cpu.wait_perc`! But how do these two search terms compare?
+
+We can figure this out by plotting the trends of `'ellis-cpu.wait_perc'`, `cpu_system_perc`
+and `'load_avg_1_min'` on a single figure:
+"""
+
+df_dg = pd.concat([cpu_system_perc.rolling(12).mean(), load_avg_1_min.rolling(
+    12).mean(), cpu_wait_perc.rolling(12).mean()], axis=1)
+df_dg.plot(figsize=(20, 10), linewidth=5, fontsize=20)
+plt.xlabel('Year', fontsize=20)
+
+"""We established the correlation matrix for Ellis data.
+Seaborn has five built-in themes to style its plots: `darkgrid`, `whitegrid`, `dark`,
+`white`, and `ticks`. Seaborn defaults to using the darkgrid theme for its plots, but
+we can change this styling to better suit our presentation needs.
+
+To use any of the preset themes pass the name of it to `sns.set_style()`.
+"""
+
+# we establish the corrmartrice
+color = sns.color_palette()
+sns.set_style('darkgrid')
+
+correaltionMatrice = df_Ellis.corr()
+f, ax = plt.subplots(figsize=(20, 10))
+sns.heatmap(
+    correaltionMatrice,
+    cbar=True,
+    vmin=0,
+    vmax=1,
+    square=True,
+    annot=True)
+plt.show()
+
+"""Correlation between rows or columns of two DataFrame objectsCompute pairwise"""
+
+df_Ellis.corrwith(df_Ellis['ellis-load.avg_1_min'])
+
+# using multivariate feature
+
+features_3 = [
+    'ellis-cpu.wait_perc',
+    'ellis-load.avg_1_min',
+    'ellis-net.in_bytes_sec',
+    'Label']
+
+features = df_Ellis[features_3]
+features.index = df_Ellis['Timestamp']
+features.head()
+
+features.plot(subplots=True)
+
+features = features.values
+
+"""train test split for simple time series moving window average"""
+
+# standardize data
+train_split = 141600
+tf.random.set_seed(13)
+
+# standardize data
+features_mean = features[:train_split].mean()
+features_std = features[:train_split].std()
+features = (features - features_mean) / features_std
+
+print(type(features))
+print(features.shape)
+
+"""Defined a `multivariate_data` function to generate multivariate data"""
+
+# create mutlivariate data
+
+
+def mutlivariate_data(
+        features,
+        target,
+        start_idx,
+        end_idx,
+        history_size,
+        target_size,
+        step,
+        single_step=False):
+    data = []
+    labels = []
+    start_idx = start_idx + history_size
+    if end_idx is None:
+        end_idx = len(features) - target_size
+    for i in range(start_idx, end_idx):
+        idxs = range(i - history_size, i, step)  # using step
+        data.append(features[idxs])
+        if single_step:
+            labels.append(target[i + target_size])
+        else:
+            labels.append(target[i:i + target_size])
+
+    return np.array(data), np.array(labels)
+
+
+"""Generate multivariate data using a defined function `multivariate_data`"""
+
+# generate multivariate data
+
+history = 720
+future_target = 72
+STEP = 6
+
+x_train_ss, y_train_ss = mutlivariate_data(
+    features, features[:, 1], 0, train_split, history, future_target, STEP, single_step=True)
+
+x_val_ss, y_val_ss = mutlivariate_data(features, features[:, 1], train_split, None, history,
+                                       future_target, STEP, single_step=True)
+
+print(x_train_ss.shape, y_train_ss.shape)
+print(x_val_ss.shape, y_val_ss.shape)
+
+"""
+The `tf.data.Dataset` API supports writing descriptive and efficient input pipelines.
+Dataset usage following a common pattern:
+- Creating a source dataset from our input data.
+- Applied dataset transformations to preprocess the data.
+- Iterate over the dataset and process the elements.
+Note: Iteration happens in a streaming fashion, so the full dataset does not need to
+fit into memory.
+Once we have a dataset, we can apply transformations to prepare the data for our model:
+"""
+
+# tensorflow dataset
+batch_size = 256
+buffer_size = 10000
+
+train_ss = tf.data.Dataset.from_tensor_slices((x_train_ss, y_train_ss))
+train_ss = train_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
+
+val_ss = tf.data.Dataset.from_tensor_slices((x_val_ss, y_val_ss))
+val_ss = val_ss.cache().shuffle(buffer_size).batch(batch_size).repeat()
+
+print(train_ss)
+print(val_ss)
+
+x_train_ss.shape[-2:]
+
+"""We used a custom loss function to evaluate the model:"""
+
+
+def root_mean_squared_error(y_true, y_pred):
+    return K.sqrt(K.mean(K.square(y_pred - y_true)))
+
+
+# Modelling using LSTM
+steps = 50
+
+EPOCHS = 20
+
+single_step_model = tf.keras.models.Sequential()
+
+single_step_model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+    32, return_sequences=True, input_shape=x_train_ss.shape[-2:])))
+# single_step_model.add(tf.keras.layers.Dropout(0.3))
+single_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
+# single_step_model.add(tf.keras.layers.Dropout(0.2))
+#model.add(Dense(units=1, activation='relu'))
+single_step_model.add(tf.keras.layers.Activation("relu"))
+single_step_model.add(tf.keras.layers.Dense(1))
+single_step_model.compile(
+    optimizer=tf.keras.optimizers.Adam(),
+    loss='mae',
+    metrics=[
+        tf.keras.metrics.RootMeanSquaredError(
+            name='rmse')])
+#single_step_model.compile(loss='mse', optimizer='rmsprop')
+
+single_step_model_history = single_step_model.fit(
+    train_ss,
+    epochs=EPOCHS,
+    steps_per_epoch=steps,
+    validation_data=val_ss,
+    validation_steps=50)
+
+plot_model(
+    single_step_model,
+    to_file='/content/drive/MyDrive/LFN Anuket/Analysis/data/Final/Bi-LSTM.png',
+    show_shapes=True,
+    show_layer_names=True)
+single_step_model.summary()
+
+
+# plot train test loss
+
+def plot_loss(history, title):
+    loss = history.history['loss']
+    val_loss = history.history['val_loss']
+
+    epochs = range(len(loss))
+    plt.figure()
+    plt.plot(epochs, loss, 'b', label='Train Loss')
+    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
+    plt.title(title)
+    plt.legend()
+    plt.grid()
+    plt.show()
+
+
+plot_loss(single_step_model_history,
+          'Single Step Training and validation loss')
+
+# plot train test loss
+
+
+def plot_loss(history, title):
+    loss = history.history['rmse']
+    val_loss = history.history['val_rmse']
+
+    epochs = range(len(loss))
+    plt.figure()
+    plt.plot(epochs, loss, 'b', label='Train RMSE')
+    plt.plot(epochs, val_loss, 'r', label='Validation RMSE')
+    plt.title(title)
+    plt.legend()
+    plt.grid()
+    plt.show()
+
+
+plot_loss(single_step_model_history,
+          'Single Step Training and validation loss')
+
+# fucntion to create time steps
+
+
+def create_time_steps(length):
+    return list(range(-length, 0))
+
+# function to plot time series data
+
+
+def plot_time_series(plot_data, delta, title):
+    labels = ["History", 'True Future', 'Model Predcited']
+    marker = ['.-', 'rx', 'go']
+    time_steps = create_time_steps(plot_data[0].shape[0])
+
+    if delta:
+        future = delta
+    else:
+        future = 0
+    plt.title(title)
+    for i, x in enumerate(plot_data):
+        if i:
+            plt.plot(
+                future,
+                plot_data[i],
+                marker[i],
+                markersize=10,
+                label=labels[i])
+        else:
+            plt.plot(
+                time_steps,
+                plot_data[i].flatten(),
+                marker[i],
+                label=labels[i])
+    plt.legend()
+    plt.xlim([time_steps[0], (future + 5) * 2])
+
+    plt.xlabel('Time_Step')
+    return plt
+
+# Moving window average
+
+
+def MWA(history):
+    return np.mean(history)
+
+# plot time series and predicted values
+
+
+for x, y in val_ss.take(5):
+    plot = plot_time_series([x[0][:, 1].numpy(), y[0].numpy(),
+                             single_step_model.predict(x)[0]], 12,
+                            'Single Step Prediction')
+    plot.show()
+
+"""# **MultiStep Forcasting**"""
+
+future_target = 72  # 72 future values
+x_train_multi, y_train_multi = mutlivariate_data(features, features[:, 1], 0,
+                                                 train_split, history,
+                                                 future_target, STEP)
+x_val_multi, y_val_multi = mutlivariate_data(features, features[:, 1],
+                                             train_split, None, history,
+                                             future_target, STEP)
+
+print(x_train_multi.shape)
+print(y_train_multi.shape)
+
+#  TF DATASET
+
+train_data_multi = tf.data.Dataset.from_tensor_slices(
+    (x_train_multi, y_train_multi))
+train_data_multi = train_data_multi.cache().shuffle(
+    buffer_size).batch(batch_size).repeat()
+
+val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
+val_data_multi = val_data_multi.batch(batch_size).repeat()
+
+print(train_data_multi)
+print(val_data_multi)
+
+# plotting function
+
+
+def multi_step_plot(history, true_future, prediction):
+    plt.figure(figsize=(12, 6))
+    num_in = create_time_steps(len(history))
+    num_out = len(true_future)
+    plt.grid()
+    plt.plot(num_in, np.array(history[:, 1]), label='History')
+    plt.plot(np.arange(num_out) / STEP, np.array(true_future), 'bo',
+             label='True Future')
+    if prediction.any():
+        plt.plot(np.arange(num_out) / STEP, np.array(prediction), 'ro',
+                 label='Predicted Future')
+    plt.legend(loc='upper left')
+    plt.show()
+
+
+for x, y in train_data_multi.take(1):
+    multi_step_plot(x[0], y[0], np.array([0]))
+
+"""Bi-directional LSTM:
+On some sequence prediction problems, it can be beneficial to allow the LSTM model to
+learn the input sequence both forward and backwards and concatenate both interpretations.
+This is known as bidirectional.
+
+Here, `tf.keras.layers.Bidirectional` is a bidirectional wrapper for RNNs which inherits
+from `Wrapper`, `Layer`, and `module`
+"""
+
+multi_step_model = tf.keras.models.Sequential()
+multi_step_model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+    32, return_sequences=True, input_shape=x_train_multi.shape[-2:])))
+multi_step_model.add(tf.keras.layers.Dropout(0.2))
+multi_step_model.add(tf.keras.layers.LSTM(units=100, return_sequences=False))
+multi_step_model.add(tf.keras.layers.Dropout(0.2))
+#model.add(Dense(units=1, activation='relu'))
+multi_step_model.add(tf.keras.layers.Activation("relu"))
+# aDD dropout layer (0.3)
+multi_step_model.add(tf.keras.layers.Dense(72))  # for 72 outputs
+
+multi_step_model.compile(
+    optimizer=tf.keras.optimizers.RMSprop(
+        clipvalue=1.0), loss='mae', metrics=[
+            tf.keras.metrics.RootMeanSquaredError(
+                name='rmse')])
+
+MULTI_STEP_HISTORY = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
+                                          steps_per_epoch=steps,
+                                          validation_data=val_data_multi,
+                                          validation_steps=50)
+
+plot_loss(MULTI_STEP_HISTORY, 'Multi-Step Training and validation loss')
+
+for x, y in val_data_multi.take(5):
+    multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])
+
+scores = multi_step_model.evaluate(
+    x_train_multi,
+    y_train_multi,
+    verbose=1,
+    batch_size=200)
+print('MAE: {}'.format(scores[1]))
+
+SCORES_TEST = multi_step_model.evaluate(
+    x_val_multi, y_val_multi, verbose=1, batch_size=200)
+print('MAE: {}'.format(scores[1]))
+
+Y_PRED_TEST = multi_step_model.predict(x_val_multi, verbose=0)
+
+plt.figure(figsize=(10, 5))
+plt.plot(Y_PRED_TEST)
+plt.plot(y_val_multi)
+plt.ylabel("Value")
+plt.xlabel("Timestap")
+plt.legend(loc='upper left')
+plt.show()