models/failure_prediction/python/featurecreation.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114

# pylint: disable=C0103, C0116, W0621, E0401, W0104, W0105, R0913, E1136, W0612, E0102, C0301, W0611, C0411, W0311, C0326, C0330, W0106, C0412
# -*- coding: utf-8 -*-
"""FeatureCreation.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1UQzgn71tYU7WHgr-CL1CRNM9q9Ajr2Kx

Contributors: **Rohit Singh Rathaur, Girish L.**

Copyright [2021](2021) [*Rohit Singh Rathaur, BIT Mesra and Girish L., CIT GUBBI, Karnataka*]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

# Commented out IPython magic to ensure Python compatibility.
# Import libraries use for visualization and analysis
import pandas as pd
import numpy as np

# %matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

from pandas import Series, DataFrame
import seaborn as sns
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy import stats
from IPython.display import display, HTML

from google.colab import drive
drive.mount('/gdrive')

"""# **Loading the Data**"""

df_Ellis = pd.read_csv(
    "/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Final.csv")
#df_Bono  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Bono.csv", error_bad_lines=False)
#df_Sprout  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Sprout.csv", error_bad_lines=False)
#df_Homer  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homer.csv", error_bad_lines=False)
#df_Homestead  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Homestead.csv", error_bad_lines=False)
#df_Ralf  = pd.read_csv("/gdrive/MyDrive/LFN Anuket/Analysis/data/matrices/df_Ralf.csv", error_bad_lines=False)

df_Ellis.head()

df_Ellis.describe()

#df_Ellis['SLO1'] = 0
#print('Column names are: ',list(df_Ellis.columns))

df4 = df_Ellis["ellis-load.avg_1_min"] > 2.45
df4
df4.to_csv(
    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/EllisLoadAvgLabel_lessthan0198.csv')
df4.head(50)

df3 = df_Ellis["ellis-cpu.wait_perc"] > 5
df3
df3.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-cpu>5.csv')
df3.head(50)

df5 = df_Ellis["ellis-net.out_packets_sec"] > 1000
df5
df5.to_csv(
    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/ellis-net.in_bytes_sec21139.csv')
df5.head(50)

# We are applying Logical OR Operator between df4 and df3
df6 = (df4[0:176999]) | (df3[0:176999])
df6.head(50)

df6.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/OR_TwoCondition(2).csv')
df6.head(50)

df7 = (df6[0:176999]) | (df5[0:176999])
df7.head(50)

df7.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/FinalORLabel8.5.csv')
df7.head(50)

df_Ellis.insert(7, "Label", df7)

#df_Ellis.insert (8, "Label", df7)

# We applied Logical OR operator in two features only known as  and df3
# and df4 and stored result in df6 which is known as Final Label after
# applying OR condition
df_Ellis
df_Ellis.to_csv(
    '/gdrive/MyDrive/LFN Anuket/Analysis/data/Final/Ellis_FinalTwoConditionwithOR.csv')

df_Ellis.head(100)

# pandas count distinct values in column
df_Ellis['Label'].value_counts()

#final.to_csv('/gdrive/MyDrive/LFN Anuket/Analysis/data/New/FinalLabel.csv')

#df_Ellis.loc[(df_Ellis["ellis-cpu.wait_perc"] > 5) & (df_Ellis["ellis-load.avg_1_min"] > 2)]

"""# **Creating New Features**"""