diff options
author | Sridhar K. N. Rao <sridhar.rao@spirent.com> | 2021-10-22 11:52:49 +0530 |
---|---|---|
committer | Sridhar K. N. Rao <sridhar.rao@spirent.com> | 2021-10-22 11:53:54 +0530 |
commit | 9dd2c03e48832922f0f8f21415473f8960f05b42 (patch) | |
tree | 7c6835cfcadf073840432dd9ac4c80837fc1680d /tools/modelselector/modelselector.py | |
parent | c973b36225fb2f3269c677a10f2f317b0b23c393 (diff) |
Tool: Model Selector.
This patch adds model selector tool.
Signed-off-by: Sridhar K. N. Rao <sridhar.rao@spirent.com>
Change-Id: I73eb64406180d531705dc51c80e7605eefdeae8c
Diffstat (limited to 'tools/modelselector/modelselector.py')
-rw-r--r-- | tools/modelselector/modelselector.py | 834 |
1 files changed, 834 insertions, 0 deletions
diff --git a/tools/modelselector/modelselector.py b/tools/modelselector/modelselector.py new file mode 100644 index 0000000..90b289c --- /dev/null +++ b/tools/modelselector/modelselector.py @@ -0,0 +1,834 @@ +# Copyright 2021 Spirent Communications. +# sridhar.rao@spirent.com +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Tool to suggest which ML approach is more applicable for +a particular data and usecase. +TODO: +1. Minimize code. +a. Reduce returns. +b. Optimize loops. + +2. Add Informative data to the user. +""" + +from __future__ import print_function +import signal +import sys +from pypsi import wizard as wiz +from pypsi.shell import Shell + +# pylint: disable=line-too-long,too-few-public-methods,too-many-instance-attributes, too-many-nested-blocks, too-many-return-statements, too-many-branches + +class Bcolors: + """ + For Coloring + """ + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +class AlgoSelectorWizard(): + """ + Class to create wizards + """ + def __init__(self): + """ + Perform Initialization. + """ + self.shell = Shell() + self.main_values = {} + self.main_l1_values = {} + self.main_l2a_values = {} + self.main_l2b_values = {} + self.main_l3_values = {} + self.main_l4_values = {} + self.unsup_values = {} + self.ri_values = {} + self.gen_values = {} + self.wiz_main = None + self.wiz_main_l1 = None + self.wiz_main_l2_a = None + self.wiz_main_l2_b = None + self.wiz_main_l3 = None + self.wiz_main_l4 = None + self.wiz_generic = None + self.wiz_unsupervised = None + self.wiz_reinforcement = None + self.ml_needed = False + self.supervised = False + self.unsupervised = False + self.reinforcement = False + self.data_size = 'high' + self.interpretability = False + self.faster = False + self.ftod_ratio = 'low' + self.reproducibility = False + + + ############# All the Wizards ################################## + + ### GENERIC Wizards - Need for ML ############################## + def main_wizard_l1(self): + """ + The Main Wizard L1 + """ + self.wiz_main_l1 = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Do you Need ML - Data Availability"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_availability", + # Display name + name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC, + # Help message + help="Y/N/U - Yes/No/Unknown", + validators=(wiz.required_validator), + default='Y', + ), + ) + ) + + def main_wizard_l2_a(self): + """ + The Main Wizard L2-A + """ + self.wiz_main_l2_a = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Do you Need ML - Data Creation"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_creativity", + # Display name + name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC, + # Help message + help="Y/N/U - Yes/No/Unknown", + validators=(wiz.required_validator), + default='Y', + ), + ) + ) + + def main_wizard_l2_b(self): + """ + The Main Wizard L2-B + """ + label = """ One or more meaningful and informative 'tag' to provide context so that a machine learning model can learn from it. For example, labels might indicate whether a photo contains a bird or car, which words were uttered in an audio recording, or if an x-ray contains a tumor. Data labeling is required for a variety of use cases including computer vision, natural language processing, and speech recognition.""" + self.wiz_main_l2_b = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Do you Need ML - Data Programmability"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_label", + # Display name + name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N/U - Yes/No/Unknown). Type help for description of label. "+Bcolors.ENDC, + # Help message + help=label, + validators=(wiz.required_validator), + default='Y', + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_programmability", + # Display name + name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC, + # Help message + help="Y/N/U - Yes/No/Unknown", + validators=(wiz.required_validator), + default='Y', + ), + ) + ) + + + def main_wizard_l3(self): + """ + The Main Wizard L3 + """ + self.wiz_main_l3 = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Do you Need ML - Data Knowledge"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_knowledge", + # Display name + name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC, + # Help message + help="Y/N/U - Yes/No/Unknown", + validators=(wiz.required_validator), + default='Y', + ), + ) + ) + + def main_wizard_l4(self): + """ + The Main Wizard - L4 + """ + self.wiz_main_l4 = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Do you Need ML - Data Pattern"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_pattern", + # Display name + name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC, + # Help message + help="Y/N/U - Yes/No/Unknown", + validators=(wiz.required_validator), + default='Y' + ), + ) + ) + ### GENERIC Wizards - GOAL, METRICS, DATA ############################## + def gen_wizard(self): + """ + Generic Wizard - Goal, metrics, data + """ + self.wiz_generic = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="data_goal", + # Display name + name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC, + # Help message + help="Enter one of Predict/Describe/Explore", + validators=(wiz.required_validator), + default='Explore' + ), + wiz.WizardStep( + # ID where the value will be stored + id="metric_accuracy", + # Display name + name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, + # Help message + help="Enter 1-5: 1 being least important, and 5 being most important", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="metric_speed", + # Display name + name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, + # Help message + help="Enter 1-5: 1 being least important, and 5 being most important", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="metric_interpretability", + # Display name + name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, + # Help message + help="Enter 1-5: 1 being least important, and 5 being most important", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="metric_reproducibility", + # Display name + name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, + # Help message + help="Enter 1-5: 1 being least important, and 5 being most important", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="metric_implementation", + # Display name + name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC, + # Help message + help="Enter 1-5: 1 being least important, and 5 being most important", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_column", + # Display name + name=Bcolors.HEADER+" What does the data (columns) represent? well defined 'Features', 'signals' (Timeseries, pixels, etc) or Text - (Please type the associated number)"+Bcolors.ENDC, + # Help message + help="1. Well Defined Features\n 2. Signals\n 3. Text - Unstructured\n 4. None of the above\n", + validators=(wiz.required_validator), + default='Features' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_signal_type", + # Display name + name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? "+Bcolors.ENDC, + # Help message + help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n ", + validators=(wiz.required_validator), + default='3' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_text_type", + # Display name + name=Bcolors.HEADER+" If Text, can you choose any one from the below list? "+Bcolors.ENDC, + # Help message + help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ", + validators=(wiz.required_validator), + default='3' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_features", + # Display name + name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_features_count", + # Display name + name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC, + # Help message + help="Number or NA", + validators=(wiz.required_validator), + default='10' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_distribution", + # Display name + name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC, + # Help message + help="Y/N/U", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_io_relation", + # Display name + name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC, + # Help message + help="Y/N/U", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_correlation", + # Display name + name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC, + # Help message + help="Y/N/U. Change in one ", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_cond_indep", + # Display name + name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC, + # Help message + help="Y/N/U. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_missing", + # Display name + name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC, + # Help message + help="Y/N/U", + validators=(wiz.required_validator), + default='N' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_size_bytes", + # Display name + name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC, + # Help message + help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes", + validators=(wiz.required_validator), + default='1G' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_size_samples", + # Display name + name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC, + # Help message + help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples", + validators=(wiz.required_validator), + default='1M' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_type_output", + # Display name + name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC, + # Help message + help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="data_output_prob", + # Display name + name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC, + # Help message + help="Y/N", + validators=(wiz.required_validator), + default='N' + ), + ) + ) + + + def unsupervised_wizard(self): + """ + The Un-Supervized Learning Wizard + """ + self.wiz_generic = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="unsup_goal", + # Display name + name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC, + # Help message + help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n", + validators=(wiz.required_validator), + default='1' + ), + wiz.WizardStep( + # ID where the value will be stored + id="unsup_dr_topic_mod", + # Display name + name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='NA' + ), + wiz.WizardStep( + # ID where the value will be stored + id="unsup_clus_dv", + # Display name + name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='NA' + ), + wiz.WizardStep( + # ID where the value will be stored + id="unsup_clus_outliers", + # Display name + name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='NA' + ), + wiz.WizardStep( + # ID where the value will be stored + id="unsup_clus_groups", + # Display name + name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='NA' + ), + + ) + ) + + def reinforcement_wizard(self): + """ + The Reinforced Learning Wizard + """ + message = """ + Reward |--------| + |-------| Agent | Action + | |-----| |-------| + | | |--------| | + | |state | + | | | + | | |-----------| | + | |----|Environment| | + |------| |-----| + |-----------| + """ + self.wiz_reinforcement = wiz.PromptWizard( + name=Bcolors.OKBLUE+"Reinforcement Specific"+Bcolors.ENDC, + description="", + steps=( + # The list of input prompts to ask the user. + wiz.WizardStep( + # ID where the value will be stored + id="ri_info", + # Display name + name=Bcolors.HEADER+" Type help for reference diagram for reinforcement-learning"+Bcolors.ENDC, + # Help message + help=message, + validators=(wiz.required_validator), + default='Type Help or Press Enter' + ), + wiz.WizardStep( + # ID where the value will be stored + id="ri_model_preference", + # Display name + name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="ri_model_availability", + # Display name + name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="ri_modelfree_value", + # Display name + name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="ri_modelfree_value_state", + # Display name + name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC, + # Help message + help="Y/N/NA", + validators=(wiz.required_validator), + default='Y' + ), + wiz.WizardStep( + # ID where the value will be stored + id="ri_app_domain", + # Display name + name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC, + # Help message + help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n", + validators=(wiz.required_validator), + default='1' + ), + ) + ) + + ############### All the Run Operations ###################### + def run_mainwiz(self): + """ + Run the Main Wizard + """ + self.main_wizard_l1() + self.main_l1_values = self.wiz_main_l1.run(self.shell) + if self.main_l1_values['data_availability'].lower() == 'y': + self.main_wizard_l2_b() + self.main_l2b_values = self.wiz_main_l2_b.run(self.shell) + if self.main_l2b_values['data_labe'].lower() == 'y': + self.supervised = True + else: + self.unsupervised = True + if self.main_l2b_values['data_programmability'].lower() == 'y': + print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC) + else: + self.main_wizard_l3() + self.main_l3_values = self.wiz_main_l3.run(self.shell) + if self.main_l3_values['data_knowledge'].lower() == 'y': + print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) + self.ml_needed = True + else: + self.main_wizard_l4() + self.main_l4_values = self.wiz_main_l4.run(self.shell) + if self.main_l4_values['data_pattern'].lower() == 'y': + print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) + self.ml_needed = True + else: + print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC) + else: + self.main_wizard_l2_a() + self.main_l2a_values = self.wiz_main_l2_a.run(self.shell) + if self.main_l2a_values['data_creativity'].lower() == 'y': + print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC) + self.ml_needed = True + self.reinforcement = True + else: + print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC) + + def run_generic_wizard(self): + """ + Run Generic Wizard + """ + self.gen_wizard() + self.gen_values = self.wiz_generic.run(self.shell) + + def run_unsupervised_wizard(self): + """ + Run UnSupervised Learning Wizard. + """ + self.unsupervised_wizard() + self.unsup_values = self.wiz_unsupervised.run(self.shell) + + def run_reinforcement_wizard(self): + """ + Run Reinforced Learning Wizard + """ + self.reinforcement_wizard() + self.ri_values = self.wiz_reinforcement.run(self.shell) + + def decide_unsupervised(self): + """ + Decide which Unsupervised-learning to use + """ + repro = False + clus_prob = False + if int(self.unsup_values['unsup_goal']) == 1: + # Clustering + if 'high' in self.data_size: + if not self.reproducibility: + clus_prob = True + else: + repro = True + else: + if 'y' in self.unsup_values['unsup_clus_dv'].tolower(): + if 'y' in self.unsup_values['unsup_clus_groups'].tolower(): + clus_prob = True + else: + print("Unsupervised Learning model to consider: Hierarchical Clustering") + return + else: + repro = True + if repro: + if 'y' in self.unsup_values['unsup_clus_outliers'].tolower(): + print("Unsupervised Learning model to consider: Hierarchical Clustering") + else: + print("Unsupervised Learning model to consider: DBSCAN") + return + if clus_prob: + if 'y' in self.gen_values['data_output_prob'].tolower(): + print("Unsupervised Learning model to consider: Gaussian Mixture") + else: + print("Unsupervised Learning model to consider: KMeans") + return + elif int(self.unsup_values['unsup_goal']) == 2: + # Dimensionality Reduction + if 'y' in self.unsup_values['unsup_dr_topic_mod'].tolower(): + if 'y' in self.gen_values['data_output_prob'].tolower(): + print("Unsupervised Learning model to consider: SVD") + else: + print("Unsupervised Learning model to consider: LDA") + else: + print("Unsupervised Learning model to consider: PCA") + else: + print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>") + + def decide_reinforcement(self): + """ + Decide which reinforement learning to use. + """ + if (int(self.gen_values['data_type_output']) == 2 or + 'y' in self.ri_values['ri_model_preference'].tolower()): + # Model Bsaed + if 'y' in self.ri_values['ri_model_availability'].tolower(): + print("Reinforcement Learning model to consider - AlphaZero") + else: + print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE") + elif 'n' in self.ri_values['ri_model_preference'].tolower(): + # Model-Free based approach. + if 'y' not in self.ri_values['ri_modelfree_value'].tolower(): + print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic") + else: + if 'y' in self.ri_values['ri_modelfree_value_state'].tolower(): + print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)") + else: + print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets") + else: + # Default + print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>") + + def perform_inference(self): + """ + Perform Inferences. Used across all 3 types. + """ + # Decide whether data is Low or High + self.data_size = 'unknown' + if ('k' in self.gen_values['data_size_bytes'].lower() or + 't' in self.gen_values['data_size_samples']): + self.data_size = 'low' + + if int(self.gen_values['metric_interpretability']) >= 3 : + self.interpretability = True + if int(self.gen_values['metric_speed']) >= 3 : + self.faster = True + if int(self.gen_values['metric_reproducibility']) >= 3 : + self.reproducibility = True + + # Decide Features relative to Data (ftod_ratio) - high/low + if ('k' in self.gen_values['data_size_bytes'].lower() or + 't' in self.gen_values['data_size_samples']): + if int(self.gen_values['data_features_count']) > 50: + self.ftod_ratio = 'high' + elif ('m' in self.gen_values['data_size_bytes'].lower() or + 'm' in self.gen_values['data_size_samples']): + if int(self.gen_values['data_features_count']) > 5000: + self.ftod_ratio = 'high' + else: + if int(self.gen_values['data_features_count']) > 500000: + self.ftod_ratio = 'high' + + + def decide_supervised(self): + """ + Decide which Supervised learning to use. + """ + if 'high' in self.data_size: + # Cover: DT, RF, RNN, CNN, ANN and Naive Bayes + if self.interpretability: + if self.faster: + print("Supervised Learning model to consider - Decision Tree") + else: + print("Supervised Learning model to consider - Random Forest") + else: + if int(self.gen_values['data_column']) == 3: + print("Supervised Learning model to consider - RNN") + elif (int(self.gen_values['data_column']) == 2 and + int(self.gen_values['data_signal_type']) == 1): + print("Supervised Learning model to consider - CNN") + elif (int(self.gen_values['data_column']) == 2 and + (int(self.gen_values['data_signal_type']) == 2 or + int(self.gen_values['data_signal_type']) == 3)): + if 'y' in self.gen_values['data_output_prob'].tolower(): + print("Supervised Learning model to consider - Naive Bayes") + else: + print("Supervised Learning model to consider - ANN") + else: + print("Supervised model to consider Learning - ANN") + elif 'low' in self.data_size: + from_b = False + # Cover: Regressions + if 'high' in self.ftod_ratio: + from_b = True + else: + print("Supervised Learning model to consider - SVN with Gaussian Kernel") + return + if int(self.gen_values['data_type_output']) != 2: + from_b = True + else: + if 'y' in self.gen_values['data_io_relation'].tolower(): + print("Supervised Learning model to consider - Linear Regression or Linear SVM") + else: + print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM") + return + if from_b: + if int(self.gen_values['data_output_type']) == 4: + if 'y' in self.gen_values['data_output_prob'].tolower(): + if 'y' in self.gen_values['data_cond_indep'].tolower(): + print("Supervised Learning model to consider - Naive Bayes") + else: + if 'y' in self.gen_values['data_correlation'].tolower(): + print("Supervised Learning model to consider - LASSO or Ridge Regression") + else: + print("Supervised Learning model to consider - Logistic Regression") + else: + print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM") + + else: + print("Supervised Learning model to consider - KNN") + else: + # Default + print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>") + + def ask_and_decide(self): + """ + THe Main Engine + """ + self.run_mainwiz() + if self.ml_needed: + self.run_generic_wizard() + if self.supervised: + self.decide_supervised() + elif self.unsupervised: + self.run_unsupervised_wizard() + self.decide_unsupervised() + elif self.reinforcement: + self.run_reinforcement_wizard() + self.decide_reinforcement() + + +def signal_handler(signum, frame): + """ + Signal Handler + """ + print("\n You interrupted, No Suggestion will be provided!") + print(signum, frame) + sys.exit(0) + +def main(): + """ + The Main Function + """ + try: + algowiz = AlgoSelectorWizard() + algowiz.ask_and_decide() + except(KeyboardInterrupt, MemoryError): + print("Some Error Occured - No Suggestion can be provided") + + print("Thanks for using the Algoselector-Wizard, " + + "Hope our suggestion will be useful") + +if __name__ == "__main__": + signal.signal(signal.SIGINT, signal_handler) + main() |