summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorSridhar K. N. Rao <srao@linuxfoundation.org>2022-05-03 05:58:12 +0530
committerSridhar K. N. Rao <srao@linuxfoundation.org>2022-05-03 05:59:24 +0530
commit77c3e230fb707d56044234a6e928760c96c95e54 (patch)
tree3f25dce2446ce5c156000d6614ebf7396f60aa97 /tools
parent331e4102e201a564ec06a5a71cdc0ab539564c12 (diff)
[TOOL] Model Selector update.
This patch adds stability updates to Model Selector Signed-off-by: Sridhar K. N. Rao <srao@linuxfoundation.org> Change-Id: I74b4e9a1b1837d85dac7f0d091019a23b1abd7b5
Diffstat (limited to 'tools')
-rw-r--r--tools/modelselector/modelselector.py503
1 files changed, 365 insertions, 138 deletions
diff --git a/tools/modelselector/modelselector.py b/tools/modelselector/modelselector.py
index 90b289c..a3a0b0b 100644
--- a/tools/modelselector/modelselector.py
+++ b/tools/modelselector/modelselector.py
@@ -1,5 +1,5 @@
-# Copyright 2021 Spirent Communications.
-# sridhar.rao@spirent.com
+# Copyright 2022 Linux Foundation.
+# srao@linuxfoundation.org
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -18,10 +18,8 @@ Tool to suggest which ML approach is more applicable for
a particular data and usecase.
TODO:
1. Minimize code.
-a. Reduce returns.
-b. Optimize loops.
-
2. Add Informative data to the user.
+3. Check for Size Entry - 1G/K ..
"""
from __future__ import print_function
@@ -54,6 +52,7 @@ class AlgoSelectorWizard():
Perform Initialization.
"""
self.shell = Shell()
+ # Set of all values from the user
self.main_values = {}
self.main_l1_values = {}
self.main_l2a_values = {}
@@ -63,6 +62,16 @@ class AlgoSelectorWizard():
self.unsup_values = {}
self.ri_values = {}
self.gen_values = {}
+ self.gen_choice_values = {}
+ self.gen_metrics_values = {}
+ self.gen_data_main_values = {}
+ self.gen_data_text_values = {}
+ self.gen_data_features_values = {}
+ self.gen_data_signal_values = {}
+ self.gen_about_data_basic_values = {}
+ self.gen_about_data_adv_values = {}
+ self.gen_about_data_output_values = {}
+ # Set of Wizards.
self.wiz_main = None
self.wiz_main_l1 = None
self.wiz_main_l2_a = None
@@ -70,8 +79,18 @@ class AlgoSelectorWizard():
self.wiz_main_l3 = None
self.wiz_main_l4 = None
self.wiz_generic = None
+ self.wiz_generic_choice = None
+ self.wiz_geneirc_metric = None
+ self.wiz_generic_data_main = None
+ self.wiz_generic_data_signal = None
+ self.wiz_generic_data_features = None
+ self.wiz_generic_data_text = None
+ self.wiz_generic_data_basic = None
+ self.wiz_generic_data_adv = None
+ self.wiz_generic_data_output = None
self.wiz_unsupervised = None
self.wiz_reinforcement = None
+ # Some Inferences
self.ml_needed = False
self.supervised = False
self.unsupervised = False
@@ -101,8 +120,8 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+"Do you have access to data about different situations, or that describes a lot of examples of situations"+Bcolors.ENDC,
# Help message
- help="Y/N/U - Yes/No/Unknown",
- validators=(wiz.required_validator),
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y',
),
)
@@ -123,8 +142,8 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+"Will a system be able to gather a lot of data by trying sequences of actions in many different situations and seeing the results"+Bcolors.ENDC,
# Help message
- help="Y/N/U - Yes/No/Unknown",
- validators=(wiz.required_validator),
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y',
),
)
@@ -144,10 +163,10 @@ class AlgoSelectorWizard():
# ID where the value will be stored
id="data_label",
# Display name
- name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N/U - Yes/No/Unknown). Type help for description of label. "+Bcolors.ENDC,
+ name=Bcolors.HEADER+" Do you have Labelled data? (Type Y/N - Yes/No). Type help for description of label. "+Bcolors.ENDC,
# Help message
help=label,
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y',
),
wiz.WizardStep(
@@ -156,9 +175,9 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+"Can a program or set of rules decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
# Help message
- help="Y/N/U - Yes/No/Unknown",
- validators=(wiz.required_validator),
- default='Y',
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='N',
),
)
)
@@ -179,8 +198,8 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+"Could a knowledgeable human decide what actions to take based on the data you have about the situations"+Bcolors.ENDC,
# Help message
- help="Y/N/U - Yes/No/Unknown",
- validators=(wiz.required_validator),
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y',
),
)
@@ -201,19 +220,19 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+"Could there be patterns in these situations that the humans haven't recognized before"+Bcolors.ENDC,
# Help message
- help="Y/N/U - Yes/No/Unknown",
- validators=(wiz.required_validator),
+ help="Y/N - Yes/No.",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y'
),
)
)
### GENERIC Wizards - GOAL, METRICS, DATA ##############################
- def gen_wizard(self):
+ def gen_choice_wizard(self):
"""
Generic Wizard - Goal, metrics, data
"""
- self.wiz_generic = wiz.PromptWizard(
- name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
+ self.wiz_generic_choice = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
description="",
steps=(
# The list of input prompts to ask the user.
@@ -224,17 +243,84 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" What is your goal with the data? Predict, Describe or Explore"+Bcolors.ENDC,
# Help message
help="Enter one of Predict/Describe/Explore",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Predict',
+ 'predict',
+ 'Describe',
+ 'describe',
+ 'Explore',
+ 'explore'])),
default='Explore'
),
wiz.WizardStep(
# ID where the value will be stored
+ id="data_metrics_pref",
+ # Display name
+ name=Bcolors.HEADER+" Do you know which metrics (speed, accuracy, etc.) are more important for you? "+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
+ ),
+ wiz.WizardStep(
+ # ID where the value will be stored
+ id="data_main",
+ # Display name
+ name=Bcolors.HEADER+" Do you know about the input data type (If its signal/features/text) ? "+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
+ ),
+ wiz.WizardStep(
+ # ID where the value will be stored
+ id="data_databasic_pref",
+ # Display name
+ name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the input data? "+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
+ ),
+ wiz.WizardStep(
+ # ID where the value will be stored
+ id="data_dataadv_pref",
+ # Display name
+ name=Bcolors.HEADER+" Do you have advanced information (distribution, relation, independency, etc.) about the input data? "+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
+ ),
+ wiz.WizardStep(
+ # ID where the value will be stored
+ id="data_dataoutput_pref",
+ # Display name
+ name=Bcolors.HEADER+" Do you have basic information (size, count, etc.) about the output? "+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
+ ),
+ )
+ )
+
+ def gen_metrics_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_metrics = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
+ wiz.WizardStep(
+ # ID where the value will be stored
id="metric_accuracy",
# Display name
name=Bcolors.HEADER+" How important the metric 'Accuracy' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
# Help message
help="Enter 1-5: 1 being least important, and 5 being most important",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
wiz.WizardStep(
@@ -244,7 +330,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" How important the metric 'Speed' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
# Help message
help="Enter 1-5: 1 being least important, and 5 being most important",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
wiz.WizardStep(
@@ -254,7 +340,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" How important the metric 'Interpretability' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
# Help message
help="Enter 1-5: 1 being least important, and 5 being most important",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
wiz.WizardStep(
@@ -264,7 +350,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" How important the metric 'Reproducibility' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
# Help message
help="Enter 1-5: 1 being least important, and 5 being most important",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
wiz.WizardStep(
@@ -274,47 +360,73 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" How important the metric 'Ease of Implementation and Maintenance' is for you? 1-5: 1- Least important 5- Most Important"+Bcolors.ENDC,
# Help message
help="Enter 1-5: 1 being least important, and 5 being most important",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
+ )
+ )
+
+ def gen_data_main_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_main = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
id="data_column",
# Display name
- name=Bcolors.HEADER+" What does the data (columns) represent? well defined 'Features', 'signals' (Timeseries, pixels, etc) or Text - (Please type the associated number)"+Bcolors.ENDC,
+ name=Bcolors.HEADER+" What does the data (columns) represent? Please type help and select the associated number"+Bcolors.ENDC,
# Help message
- help="1. Well Defined Features\n 2. Signals\n 3. Text - Unstructured\n 4. None of the above\n",
- validators=(wiz.required_validator),
- default='Features'
+ help="1. Well Defined Features\n 2. Signals - Timeseries, pixels, etc\n 3. Text - Unstructured\n 4. None of the above\n",
+ validators=(wiz.required_validator, wiz.int_validator(1, 4)),
+ default='1'
),
+ )
+ )
+
+ def gen_data_signal_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_signal = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
id="data_signal_type",
# Display name
- name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? "+Bcolors.ENDC,
+ name=Bcolors.HEADER+" If Signals, can you choose any one from the below list? Please type help for list "+Bcolors.ENDC,
# Help message
help="1. Image\n 2. Audio\n 3. Timeseries\n 4. None of the above\n 5. Not Applicable\n ",
- validators=(wiz.required_validator),
- default='3'
- ),
- wiz.WizardStep(
- # ID where the value will be stored
- id="data_text_type",
- # Display name
- name=Bcolors.HEADER+" If Text, can you choose any one from the below list? "+Bcolors.ENDC,
- # Help message
- help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='3'
),
+ )
+ )
+
+ def gen_data_features_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_features = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
id="data_features",
# Display name
name=Bcolors.HEADER+" If features, are they well defined? i.e., are all the variables well understood? "+Bcolors.ENDC,
# Help message
- help="Y/N/NA",
- validators=(wiz.required_validator),
+ help="Y/N",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y'
),
wiz.WizardStep(
@@ -323,80 +435,139 @@ class AlgoSelectorWizard():
# Display name
name=Bcolors.HEADER+" If features, How many are there? "+Bcolors.ENDC,
# Help message
- help="Number or NA",
- validators=(wiz.required_validator),
+ help="Number only - Approximate should be OK.",
+ validators=(wiz.required_validator, wiz.int_validator(1, 100000)),
default='10'
),
+ )
+ )
+
+ def gen_data_text_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_text = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Goal, and Preferences"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
- id="data_distribution",
+ id="data_text_type",
# Display name
- name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
+ name=Bcolors.HEADER+" If Text, can you choose any one from the below list? Please type help for list"+Bcolors.ENDC,
# Help message
- help="Y/N/U",
- validators=(wiz.required_validator),
- default='Y'
+ help="1. Webpages\n 2. Emails\n 3. Social-Media Posts\n 4. Books\n 5. Formal Articles\n 6. Speech converted to text\n 7. None of the above\n 8. Not Applicable\n ",
+ validators=(wiz.required_validator, wiz.int_validator(1, 8)),
+ default='3'
),
+
+ )
+ )
+
+ def gen_about_data_basic_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_basic = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Basic Input Data Information"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
- id="data_io_relation",
+ id="data_missing",
# Display name
- name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
+ name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
# Help message
- help="Y/N/U",
- validators=(wiz.required_validator),
- default='Y'
+ help="Y/N",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='N'
),
wiz.WizardStep(
# ID where the value will be stored
- id="data_correlation",
+ id="data_size_bytes",
# Display name
- name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
+ name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
# Help message
- help="Y/N/U. Change in one ",
+ help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
validators=(wiz.required_validator),
- default='Y'
+ default='1G'
),
wiz.WizardStep(
# ID where the value will be stored
- id="data_cond_indep",
+ id="data_size_samples",
# Display name
- name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
+ name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
# Help message
- help="Y/N/U. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
+ help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
validators=(wiz.required_validator),
+ default='1M'
+ ),
+ )
+ )
+
+ def gen_about_data_advanced_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_adv = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Advanced Input Data Information"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
+ wiz.WizardStep(
+ # ID where the value will be stored
+ id="data_distribution",
+ # Display name
+ name=Bcolors.HEADER+" Are you aware of any 'Distribution' that is inherent to the data, we can take advantage of?"+Bcolors.ENDC,
+ # Help message
+ help="Y/N - Yes",
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='Y'
),
wiz.WizardStep(
# ID where the value will be stored
- id="data_missing",
+ id="data_io_relation",
# Display name
- name=Bcolors.HEADER+" Are there any missing values in the data? "+Bcolors.ENDC,
+ name=Bcolors.HEADER+" Is the probability of 'Linear Relation' between input and the output is high?"+Bcolors.ENDC,
# Help message
- help="Y/N/U",
- validators=(wiz.required_validator),
- default='N'
+ help="Y/N - Yes/No",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
),
wiz.WizardStep(
# ID where the value will be stored
- id="data_size_bytes",
+ id="data_correlation",
# Display name
- name=Bcolors.HEADER+" How big is the data in terms of size? (Use K/M/G Bytes unit) "+Bcolors.ENDC,
+ name=Bcolors.HEADER+" Are you confident that there is NO high correlation among the independent variables in your day?"+Bcolors.ENDC,
# Help message
- help="Number(integer) and unit: K for Kilo, M for Mega and G for Giga. Ex: 10G for 10 Giga bytes",
- validators=(wiz.required_validator),
- default='1G'
+ help="Y/N/ - Yes/No ",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
),
wiz.WizardStep(
# ID where the value will be stored
- id="data_size_samples",
+ id="data_cond_indep",
# Display name
- name=Bcolors.HEADER+" How big is the data in terms of samples? (Use T/M/B Samples) "+Bcolors.ENDC,
+ name=Bcolors.HEADER+" Are you confident that the variables are conditionally independent?"+Bcolors.ENDC,
# Help message
- help="Number(integer) and unit: T for Thousand, M for Million and B for Billion. Ex: 1M for 1 Million Samples",
- validators=(wiz.required_validator),
- default='1M'
+ help="Y/N/. If probability that it rains given lightining and thunder is same as probability that it rains given lightining, then rain and thunder are conditionally independent",
+ validators=(wiz.required_validator, wiz.boolean_validator),
+ default='Y'
),
+ )
+ )
+
+ def gen_about_output_wizard(self):
+ """
+ Generic Wizard - Goal, metrics, data
+ """
+ self.wiz_generic_data_output = wiz.PromptWizard(
+ name=Bcolors.OKBLUE+"Understanding Data Output"+Bcolors.ENDC,
+ description="",
+ steps=(
+ # The list of input prompts to ask the user.
wiz.WizardStep(
# ID where the value will be stored
id="data_type_output",
@@ -404,7 +575,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" What is the expected output data type ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
# Help message
help=" 1:Numerical-Discrete\n 2:Numerical-Continuous\n 3:Ordinal\n 4:Categorical-Binary\n 5:Categorical-Multiclass",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 5)),
default='1'
),
wiz.WizardStep(
@@ -414,7 +585,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" Is the expected output data a probability value ? "+Bcolors.ENDC,
# Help message
help="Y/N",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.boolean_validator),
default='N'
),
)
@@ -425,7 +596,7 @@ class AlgoSelectorWizard():
"""
The Un-Supervized Learning Wizard
"""
- self.wiz_generic = wiz.PromptWizard(
+ self.wiz_unsupervised = wiz.PromptWizard(
name=Bcolors.OKBLUE+"Understanding Goal, Metrics, Data and Output Type"+Bcolors.ENDC,
description="",
steps=(
@@ -437,7 +608,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" What is the main goal? (Please type number associated with type in 'help')"+Bcolors.ENDC,
# Help message
help="1: Explore Similar Groups (clustering) \n 2: Perform Dimensionality Reduction\n 3: Others\n",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 3)),
default='1'
),
wiz.WizardStep(
@@ -447,7 +618,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" If dimensionality reduction, do you prefer topic modelling ? (Please type NA is you are not sure)"+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='NA'
),
wiz.WizardStep(
@@ -457,7 +629,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" Are you aware of density variations in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='NA'
),
wiz.WizardStep(
@@ -467,7 +640,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" Are there too many outliers in your data ? (Please type NA is you are not sure)"+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='NA'
),
wiz.WizardStep(
@@ -477,7 +651,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" If clustering, do you know how many groups to form? (Please type NA is you are not sure)"+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='NA'
),
@@ -522,7 +697,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" Do you prefer model-based approach? (Type NA if you are not sure) "+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='Y'
),
wiz.WizardStep(
@@ -532,7 +708,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" Do you have a model for model-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='Y'
),
wiz.WizardStep(
@@ -542,7 +719,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" In Model-Free approach, do you prefer value-based approach? (Type NA if not applicable) "+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='Y'
),
wiz.WizardStep(
@@ -552,7 +730,8 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" In Model-Free Value-Based approach, do you prefer state-only model? (Type NA if not applicable) "+Bcolors.ENDC,
# Help message
help="Y/N/NA",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.choice_validator(['Y','N','NA','Na',
+ 'y','n','na','nA'])),
default='Y'
),
wiz.WizardStep(
@@ -562,7 +741,7 @@ class AlgoSelectorWizard():
name=Bcolors.HEADER+" What is the application domain ? (Please type number associated with type in 'help') "+Bcolors.ENDC,
# Help message
help=" 1:Computer Resource Mgmt.\n 2:Robotics\n 3:Traffic-Control\n 4:Reccommenders\n 5:Autonomous Vehicles\n 6:Games\n 7:Chemistry\n 8:Others\n",
- validators=(wiz.required_validator),
+ validators=(wiz.required_validator, wiz.int_validator(1, 8)),
default='1'
),
)
@@ -575,25 +754,26 @@ class AlgoSelectorWizard():
"""
self.main_wizard_l1()
self.main_l1_values = self.wiz_main_l1.run(self.shell)
- if self.main_l1_values['data_availability'].lower() == 'y':
+ if self.main_l1_values['data_availability']:
+ print("OK-1")
self.main_wizard_l2_b()
self.main_l2b_values = self.wiz_main_l2_b.run(self.shell)
- if self.main_l2b_values['data_labe'].lower() == 'y':
+ if self.main_l2b_values['data_label']:
self.supervised = True
else:
self.unsupervised = True
- if self.main_l2b_values['data_programmability'].lower() == 'y':
+ if self.main_l2b_values['data_programmability']:
print(Bcolors.FAIL+"ML is not required - Please consider alternate approaches\n"+Bcolors.ENDC)
else:
self.main_wizard_l3()
self.main_l3_values = self.wiz_main_l3.run(self.shell)
- if self.main_l3_values['data_knowledge'].lower() == 'y':
+ if self.main_l3_values['data_knowledge']:
print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
self.ml_needed = True
else:
self.main_wizard_l4()
self.main_l4_values = self.wiz_main_l4.run(self.shell)
- if self.main_l4_values['data_pattern'].lower() == 'y':
+ if self.main_l4_values['data_pattern']:
print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
self.ml_needed = True
else:
@@ -601,7 +781,7 @@ class AlgoSelectorWizard():
else:
self.main_wizard_l2_a()
self.main_l2a_values = self.wiz_main_l2_a.run(self.shell)
- if self.main_l2a_values['data_creativity'].lower() == 'y':
+ if self.main_l2a_values['data_creativity']:
print(Bcolors.OKGREEN+"Looks like you need ML, let's continue"+Bcolors.ENDC)
self.ml_needed = True
self.reinforcement = True
@@ -612,8 +792,55 @@ class AlgoSelectorWizard():
"""
Run Generic Wizard
"""
- self.gen_wizard()
- self.gen_values = self.wiz_generic.run(self.shell)
+ self.gen_choice_wizard()
+ self.gen_choice_values = self.wiz_generic_choice.run(self.shell)
+ if self.gen_choice_values['data_metrics_pref']:
+ self.gen_metrics_wizard()
+ self.gen_metrics_values = self.wiz_generic_metrics.run(self.shell)
+ if self.gen_choice_values['data_main']:
+ self.gen_data_main_wizard()
+ self.gen_data_main_values = self.wiz_generic_data_main.run(self.shell)
+ if int(self.gen_data_main_values['data_column']) == 3:
+ self.gen_data_text_wizard()
+ self.gen_data_text_values = self.wiz_generic_data_text.run(self.shell)
+ else:
+ self.gen_data_text_values = {'data_text_type': '3'}
+ if int(self.gen_data_main_values['data_column']) == 1:
+ self.gen_data_features_wizard()
+ self.gen_data_features_values = self.wiz_generic_data_features.run(self.shell)
+ else:
+ self.gen_data_features_values = {'data_features': 'Y',
+ 'data_features_count': '10'}
+ if int(self.gen_data_main_values['data_column']) == 2:
+ self.gen_data_signal_wizard()
+ self.gen_data_signal_values = self.wiz_generic_data_signal.run(self.shell)
+ else:
+ self.gen_data_signal_values = {'data_signal_type': '1'}
+ else:
+ self.gen_data_main_values = {'data_column': '1'}
+ print("Unknown Data Type")
+ if self.gen_choice_values['data_databasic_pref']:
+ self.gen_about_data_basic_wizard()
+ self.gen_about_data_basic_values = self.wiz_generic_data_basic.run(self.shell)
+ else:
+ self.gen_about_data_basic_values = {'data_missing':'N',
+ 'data_size_bytes': '1G',
+ 'data_size_samples': '1M'}
+ if self.gen_choice_values['data_dataadv_pref']:
+ self.gen_about_data_advanced_wizard()
+ self.gen_about_data_adv_values = self.wiz_generic_data_adv.run(self.shell)
+ else:
+ self.gen_about_data_adv_values = {'data_distribution': 'N',
+ 'data_io_relation': 'N',
+ 'data_correlation': 'N',
+ 'data_cond_indep': 'N'}
+ if self.gen_choice_values['data_dataoutput_pref']:
+ self.gen_about_output_wizard()
+ self.gen_about_data_output_values = self.wiz_generic_data_output.run(self.shell)
+ else:
+ self.gen_about_data_output_values = {'data_type_output': '1',
+ 'data_output_prob': 'N'}
+
def run_unsupervised_wizard(self):
"""
@@ -643,8 +870,8 @@ class AlgoSelectorWizard():
else:
repro = True
else:
- if 'y' in self.unsup_values['unsup_clus_dv'].tolower():
- if 'y' in self.unsup_values['unsup_clus_groups'].tolower():
+ if 'y' in self.unsup_values['unsup_clus_dv'].lower():
+ if 'y' in self.unsup_values['unsup_clus_groups'].lower():
clus_prob = True
else:
print("Unsupervised Learning model to consider: Hierarchical Clustering")
@@ -652,21 +879,21 @@ class AlgoSelectorWizard():
else:
repro = True
if repro:
- if 'y' in self.unsup_values['unsup_clus_outliers'].tolower():
+ if 'y' in self.unsup_values['unsup_clus_outliers'].lower():
print("Unsupervised Learning model to consider: Hierarchical Clustering")
else:
print("Unsupervised Learning model to consider: DBSCAN")
return
if clus_prob:
- if 'y' in self.gen_values['data_output_prob'].tolower():
+ if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
print("Unsupervised Learning model to consider: Gaussian Mixture")
else:
print("Unsupervised Learning model to consider: KMeans")
return
elif int(self.unsup_values['unsup_goal']) == 2:
# Dimensionality Reduction
- if 'y' in self.unsup_values['unsup_dr_topic_mod'].tolower():
- if 'y' in self.gen_values['data_output_prob'].tolower():
+ if 'y' in self.unsup_values['unsup_dr_topic_mod'].lower():
+ if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
print("Unsupervised Learning model to consider: SVD")
else:
print("Unsupervised Learning model to consider: LDA")
@@ -679,25 +906,25 @@ class AlgoSelectorWizard():
"""
Decide which reinforement learning to use.
"""
- if (int(self.gen_values['data_type_output']) == 2 or
- 'y' in self.ri_values['ri_model_preference'].tolower()):
+ if (int(self.gen_about_data_output_values['data_type_output']) == 2 or
+ 'y' in self.ri_values['ri_model_preference'].lower()):
# Model Bsaed
- if 'y' in self.ri_values['ri_model_availability'].tolower():
+ if 'y' in self.ri_values['ri_model_availability'].lower():
print("Reinforcement Learning model to consider - AlphaZero")
else:
print("Reinforcement Learning models to consider - World Models, I2A, MBMF, and MBVE")
- elif 'n' in self.ri_values['ri_model_preference'].tolower():
+ elif 'n' in self.ri_values['ri_model_preference'].lower():
# Model-Free based approach.
- if 'y' not in self.ri_values['ri_modelfree_value'].tolower():
+ if 'y' not in self.ri_values['ri_modelfree_value'].lower():
print("Reinforcement Learning models to consider: Policy Gradient and Actor Critic")
else:
- if 'y' in self.ri_values['ri_modelfree_value_state'].tolower():
+ if 'y' in self.ri_values['ri_modelfree_value_state'].lower():
print("Reinforcement Learning models to consider - Monte Carlo, TD(0), and TD(Lambda)")
else:
print("Reinforcement Learning models to consider - SARSA, QLearning, Deep Queue Nets")
else:
# Default
- print("Sorry. We need to discuss, please connect with Anuket Thoth Project <sridhar.rao@spirent.com>")
+ print("Sorry. We need to discuss, please connect with Anuket Thoth Project <srao@linuxfoundation.org>")
def perform_inference(self):
"""
@@ -705,28 +932,28 @@ class AlgoSelectorWizard():
"""
# Decide whether data is Low or High
self.data_size = 'unknown'
- if ('k' in self.gen_values['data_size_bytes'].lower() or
- 't' in self.gen_values['data_size_samples']):
+ if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
+ 't' in self.gen_about_data_basic_values['data_size_samples']):
self.data_size = 'low'
- if int(self.gen_values['metric_interpretability']) >= 3 :
+ if int(self.gen_metrics_values['metric_interpretability']) >= 3 :
self.interpretability = True
- if int(self.gen_values['metric_speed']) >= 3 :
+ if int(self.gen_metrics_values['metric_speed']) >= 3 :
self.faster = True
- if int(self.gen_values['metric_reproducibility']) >= 3 :
+ if int(self.gen_metrics_values['metric_reproducibility']) >= 3 :
self.reproducibility = True
# Decide Features relative to Data (ftod_ratio) - high/low
- if ('k' in self.gen_values['data_size_bytes'].lower() or
- 't' in self.gen_values['data_size_samples']):
- if int(self.gen_values['data_features_count']) > 50:
+ if ('k' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
+ 't' in self.gen_about_data_basic_values['data_size_samples']):
+ if int(self.gen_data_features_values['data_features_count']) > 50:
self.ftod_ratio = 'high'
- elif ('m' in self.gen_values['data_size_bytes'].lower() or
- 'm' in self.gen_values['data_size_samples']):
- if int(self.gen_values['data_features_count']) > 5000:
+ elif ('m' in self.gen_about_data_basic_values['data_size_bytes'].lower() or
+ 'm' in self.gen_about_data_basic_values['data_size_samples']):
+ if int(self.gen_data_features_values['data_features_count']) > 5000:
self.ftod_ratio = 'high'
else:
- if int(self.gen_values['data_features_count']) > 500000:
+ if int(self.gen_data_features_values['data_features_count']) > 500000:
self.ftod_ratio = 'high'
@@ -742,15 +969,15 @@ class AlgoSelectorWizard():
else:
print("Supervised Learning model to consider - Random Forest")
else:
- if int(self.gen_values['data_column']) == 3:
+ if int(self.gen_data_main_values['data_column']) == 3:
print("Supervised Learning model to consider - RNN")
- elif (int(self.gen_values['data_column']) == 2 and
- int(self.gen_values['data_signal_type']) == 1):
+ elif (int(self.gen_data_main_values['data_column']) == 2 and
+ int(self.gen_data_signal_values['data_signal_type']) == 1):
print("Supervised Learning model to consider - CNN")
- elif (int(self.gen_values['data_column']) == 2 and
- (int(self.gen_values['data_signal_type']) == 2 or
- int(self.gen_values['data_signal_type']) == 3)):
- if 'y' in self.gen_values['data_output_prob'].tolower():
+ elif (int(self.gen_data_main_values['data_column']) == 2 and
+ (int(self.gen_data_signal_values['data_signal_type']) == 2 or
+ int(self.gen_data_signal_values['data_signal_type']) == 3)):
+ if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
print("Supervised Learning model to consider - Naive Bayes")
else:
print("Supervised Learning model to consider - ANN")
@@ -764,21 +991,21 @@ class AlgoSelectorWizard():
else:
print("Supervised Learning model to consider - SVN with Gaussian Kernel")
return
- if int(self.gen_values['data_type_output']) != 2:
+ if int(self.gen_about_data_output_values['data_type_output']) != 2:
from_b = True
else:
- if 'y' in self.gen_values['data_io_relation'].tolower():
+ if 'y' in self.gen_about_data_adv_values['data_io_relation'].lower():
print("Supervised Learning model to consider - Linear Regression or Linear SVM")
else:
print("Supervised Learning model to consider - Polynomial Regression or nonLinear SVM")
return
if from_b:
- if int(self.gen_values['data_output_type']) == 4:
- if 'y' in self.gen_values['data_output_prob'].tolower():
- if 'y' in self.gen_values['data_cond_indep'].tolower():
+ if int(self.gen_about_data_output_values['data_output_type']) == 4:
+ if 'y' in self.gen_about_data_output_values['data_output_prob'].lower():
+ if 'y' in self.gen_about_data_adv_values['data_cond_indep'].lower():
print("Supervised Learning model to consider - Naive Bayes")
else:
- if 'y' in self.gen_values['data_correlation'].tolower():
+ if 'y' in self.gen_about_data_adv_values['data_correlation'].lower():
print("Supervised Learning model to consider - LASSO or Ridge Regression")
else:
print("Supervised Learning model to consider - Logistic Regression")