From e1d2da8d6635f15672906a4c97f766ab14c774b4 Mon Sep 17 00:00:00 2001
From: PyCaret <moez@pycaret.org>
Date: Wed, 10 Jun 2020 00:14:57 -0400
Subject: [PATCH] updated classification.py and uploaded package on
 pycaret-nightly

---
 .gitignore                |   6 +-
 pycaret/classification.py | 117 +++++++++++++++++++++++++++++++++-----
 pycaret/utils.py          |   2 +-
 setup.py                  |   7 ++-
 4 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/.gitignore b/.gitignore
index 971b8bd..ad370b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,8 @@ catboost_info
 /app.py
 /logo.png
 /mlruns
-/Results.html
\ No newline at end of file
+/Results.html
+/build
+/pycaret_nightly.egg-info
+/pycaret.egg-info
+/dist
diff --git a/pycaret/classification.py b/pycaret/classification.py
index 75e1b37..bcab27c 100644
--- a/pycaret/classification.py
+++ b/pycaret/classification.py
@@ -2049,7 +2049,43 @@ def create_model(estimator = None,
         
     else:
         model = estimator
-        full_name = str(model).split("(")[0]
+
+        def get_model_name(e):
+            return str(e).split("(")[0]
+
+        if y.value_counts().count() > 2:
+
+            mn = get_model_name(estimator.estimator)
+
+            if 'catboost' in mn:
+                mn = 'CatBoostClassifier'
+
+            model_dict_logging = {'ExtraTreesClassifier' : 'Extra Trees Classifier',
+                                'GradientBoostingClassifier' : 'Gradient Boosting Classifier', 
+                                'RandomForestClassifier' : 'Random Forest Classifier',
+                                'LGBMClassifier' : 'Light Gradient Boosting Machine',
+                                'XGBClassifier' : 'Extreme Gradient Boosting',
+                                'AdaBoostClassifier' : 'Ada Boost Classifier', 
+                                'DecisionTreeClassifier' : 'Decision Tree Classifier', 
+                                'RidgeClassifier' : 'Ridge Classifier',
+                                'LogisticRegression' : 'Logistic Regression',
+                                'KNeighborsClassifier' : 'K Neighbors Classifier',
+                                'GaussianNB' : 'Naive Bayes',
+                                'SGDClassifier' : 'SVM - Linear Kernel',
+                                'SVC' : 'SVM - Radial Kernel',
+                                'GaussianProcessClassifier' : 'Gaussian Process Classifier',
+                                'MLPClassifier' : 'MLP Classifier',
+                                'QuadraticDiscriminantAnalysis' : 'Quadratic Discriminant Analysis',
+                                'LinearDiscriminantAnalysis' : 'Linear Discriminant Analysis',
+                                'CatBoostClassifier' : 'CatBoost Classifier',
+                                'BaggingClassifier' : 'Bagging Classifier',
+                                'VotingClassifier' : 'Voting Classifier'} 
+
+            full_name = model_dict_logging.get(mn)
+        
+        else:
+
+            full_name = get_model_name(estimator)
     
     progress.value += 1
     
@@ -2305,6 +2341,12 @@ def create_model(estimator = None,
 
             # Log model parameters
             params = model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
             mlflow.log_params(params)
             
             # Log metrics
@@ -2608,7 +2650,10 @@ def ensemble_model(estimator,
     def get_model_name(e):
         return str(e).split("(")[0]
 
-    mn = get_model_name(estimator)
+    if y.value_counts().count() > 2:
+        mn = get_model_name(estimator.estimator)
+    else:
+        mn = get_model_name(estimator)
 
     if 'catboost' in str(estimator):
         mn = 'CatBoostClassifier'
@@ -2983,10 +3028,11 @@ def ensemble_model(estimator,
 
         with mlflow.start_run(run_name=full_name) as run:        
             params = model.get_params()
-            try:
-                params.pop('base_estimator')
-            except:
-                pass
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
 
             mlflow.log_params(params)
             mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
@@ -4370,6 +4416,11 @@ def compare_models(blacklist = None,
             else:
                 params = model_store_final.get_params()
 
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+                    
             mlflow.log_params(params)
 
             #set tag of compare_models
@@ -4682,7 +4733,10 @@ def tune_model(estimator = None,
     def get_model_name(e):
         return str(e).split("(")[0]
 
-    mn = get_model_name(estimator)
+    if len(estimator.classes_) > 2:
+        mn = get_model_name(estimator.estimator)
+    else:
+        mn = get_model_name(estimator)
 
     if 'catboost' in mn:
         mn = 'CatBoostClassifier'
@@ -4996,7 +5050,12 @@ def tune_model(estimator = None,
                         'algorithm' : ["SAMME", "SAMME.R"]
                         }    
 
-        model_grid = RandomizedSearchCV(estimator=AdaBoostClassifier(base_estimator = _estimator_.base_estimator, random_state=seed), 
+        if y.value_counts().count() > 2:
+            base_estimator_input = _estimator_.estimator.base_estimator
+        else:
+            base_estimator_input = _estimator_.base_estimator
+
+        model_grid = RandomizedSearchCV(estimator=AdaBoostClassifier(base_estimator = base_estimator_input, random_state=seed), 
                                         param_distributions=param_grid, scoring=optimize, n_iter=n_iter, 
                                         cv=cv, random_state=seed, n_jobs=n_jobs_param)
 
@@ -5500,9 +5559,19 @@ def tune_model(estimator = None,
 
         with mlflow.start_run(run_name=full_name) as run:        
             params = best_model.get_params()
+
+            # Log model parameters
+            params = model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
+            mlflow.log_params(params)
+
             mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                 "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)
 
             # Log internal parameters
             mlflow.log_param("tune_model_fold", fold)
@@ -6174,6 +6243,7 @@ def blend_models(estimator_list = 'All',
     
     #refitting the model on complete X_train, y_train
     monitor.iloc[1,1:] = 'Finalizing Model'
+    monitor.iloc[2,1:] = 'Almost Finished'
     
     if verbose:
         if html_param:
@@ -6947,9 +7017,17 @@ def stack_models(estimator_list,
 
         with mlflow.start_run(run_name='Stacking Classifier') as run:        
             params = meta_model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+            
+            mlflow.log_params(params)
+            
             mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                 "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)
+
 
             # Log internal parameters
             mlflow.log_param("stack_models_estimator_list", estimator_list)
@@ -7712,9 +7790,16 @@ def create_stacknet(estimator_list,
 
         with mlflow.start_run(run_name='Stacking Classifier (Multi-layer)') as run:        
             params = meta_model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+    
+            mlflow.log_params(params)
+            
             mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                 "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)
 
             # Log other parameter of create_model function (internal to pycaret)
             mlflow.log_param("create_stacknet_estimator_list", estimator_list)
@@ -8433,10 +8518,12 @@ def calibrate_model(estimator,
 
             # Log model parameters
             params = model.get_params()
-            try:
-                params.pop('base_estimator')
-            except:
-                pass
+            
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
             mlflow.log_params(params)
             mlflow.log_param('base_estimator', full_name)
             
diff --git a/pycaret/utils.py b/pycaret/utils.py
index d7d0126..79459b9 100644
--- a/pycaret/utils.py
+++ b/pycaret/utils.py
@@ -3,7 +3,7 @@
 # License: MIT
 
 def version():
-    print("1.0.1")
+    print("pycaret-nightly-0.3")
 
 
 def check_metric(actual, prediction, metric, round=4):
diff --git a/setup.py b/setup.py
index 5e9ca2b..be599af 100644
--- a/setup.py
+++ b/setup.py
@@ -12,9 +12,9 @@ with open('requirements.txt') as f:
     required = f.read().splitlines()
 
 setup(
-    name="pycaret",
-    version="1.0.1",
-    description="An open source, low-code machine learning library in Python.",
+    name="pycaret-nightly",
+    version="0.3",
+    description="Nightly build of PyCaret - An open source, low-code machine learning library in Python.",
     long_description=readme(),
     long_description_content_type="text/markdown",
     url="https://github.com/pycaret/pycaret",
@@ -27,6 +27,7 @@ setup(
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
     ],
+    packages=["pycaret"],
     include_package_data=True,
     install_requires=required
 )
-- 
GitLab