updated classification.py and uploaded package on pycaret-nightly

e1d2da8d · PyCaret · 1bf89196 · e1d2da8d · e1d2da8d · e1d2da8d
隐藏空白更改
内联并排

Showing with 112 addition and 20 deletion

.gitignore .gitignore +5 -1

pycaret/classification.py pycaret/classification.py +102 -15

pycaret/utils.py pycaret/utils.py +1 -1

setup.py setup.py +4 -3

未找到文件。
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,8 @@ catboost_info
 /app.py
 /logo.png
 /mlruns
-/Results.html
\ No newline at end of file
+/Results.html
+/build
+/pycaret_nightly.egg-info
+/pycaret.egg-info
+/dist
--- a/pycaret/classification.py
+++ b/pycaret/classification.py
@@ -2049,7 +2049,43 @@ def create_model(estimator = None,
        
    else:
        model = estimator
-        full_name = str(model).split("(")[0]
+
+        def get_model_name(e):
+            return str(e).split("(")[0]
+
+        if y.value_counts().count() > 2:
+
+            mn = get_model_name(estimator.estimator)
+
+            if 'catboost' in mn:
+                mn = 'CatBoostClassifier'
+
+            model_dict_logging = {'ExtraTreesClassifier' : 'Extra Trees Classifier',
+                                'GradientBoostingClassifier' : 'Gradient Boosting Classifier', 
+                                'RandomForestClassifier' : 'Random Forest Classifier',
+                                'LGBMClassifier' : 'Light Gradient Boosting Machine',
+                                'XGBClassifier' : 'Extreme Gradient Boosting',
+                                'AdaBoostClassifier' : 'Ada Boost Classifier', 
+                                'DecisionTreeClassifier' : 'Decision Tree Classifier', 
+                                'RidgeClassifier' : 'Ridge Classifier',
+                                'LogisticRegression' : 'Logistic Regression',
+                                'KNeighborsClassifier' : 'K Neighbors Classifier',
+                                'GaussianNB' : 'Naive Bayes',
+                                'SGDClassifier' : 'SVM - Linear Kernel',
+                                'SVC' : 'SVM - Radial Kernel',
+                                'GaussianProcessClassifier' : 'Gaussian Process Classifier',
+                                'MLPClassifier' : 'MLP Classifier',
+                                'QuadraticDiscriminantAnalysis' : 'Quadratic Discriminant Analysis',
+                                'LinearDiscriminantAnalysis' : 'Linear Discriminant Analysis',
+                                'CatBoostClassifier' : 'CatBoost Classifier',
+                                'BaggingClassifier' : 'Bagging Classifier',
+                                'VotingClassifier' : 'Voting Classifier'} 
+
+            full_name = model_dict_logging.get(mn)
+        
+        else:
+
+            full_name = get_model_name(estimator)
    
    progress.value += 1
    
@@ -2305,6 +2341,12 @@ def create_model(estimator = None,

            # Log model parameters
            params = model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
            mlflow.log_params(params)
            
            # Log metrics
@@ -2608,7 +2650,10 @@ def ensemble_model(estimator,
    def get_model_name(e):
        return str(e).split("(")[0]

-    mn = get_model_name(estimator)
+    if y.value_counts().count() > 2:
+        mn = get_model_name(estimator.estimator)
+    else:
+        mn = get_model_name(estimator)

    if 'catboost' in str(estimator):
        mn = 'CatBoostClassifier'
@@ -2983,10 +3028,11 @@ def ensemble_model(estimator,

        with mlflow.start_run(run_name=full_name) as run:        
            params = model.get_params()
-            try:
-                params.pop('base_estimator')
-            except:
-                pass
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)

            mlflow.log_params(params)
            mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
@@ -4370,6 +4416,11 @@ def compare_models(blacklist = None,
            else:
                params = model_store_final.get_params()

+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+                    
            mlflow.log_params(params)

            #set tag of compare_models
@@ -4682,7 +4733,10 @@ def tune_model(estimator = None,
    def get_model_name(e):
        return str(e).split("(")[0]

-    mn = get_model_name(estimator)
+    if len(estimator.classes_) > 2:
+        mn = get_model_name(estimator.estimator)
+    else:
+        mn = get_model_name(estimator)

    if 'catboost' in mn:
        mn = 'CatBoostClassifier'
@@ -4996,7 +5050,12 @@ def tune_model(estimator = None,
                        'algorithm' : ["SAMME", "SAMME.R"]
                        }    

-        model_grid = RandomizedSearchCV(estimator=AdaBoostClassifier(base_estimator = _estimator_.base_estimator, random_state=seed), 
+        if y.value_counts().count() > 2:
+            base_estimator_input = _estimator_.estimator.base_estimator
+        else:
+            base_estimator_input = _estimator_.base_estimator
+
+        model_grid = RandomizedSearchCV(estimator=AdaBoostClassifier(base_estimator = base_estimator_input, random_state=seed), 
                                        param_distributions=param_grid, scoring=optimize, n_iter=n_iter, 
                                        cv=cv, random_state=seed, n_jobs=n_jobs_param)

@@ -5500,9 +5559,19 @@ def tune_model(estimator = None,

        with mlflow.start_run(run_name=full_name) as run:        
            params = best_model.get_params()
+
+            # Log model parameters
+            params = model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
+            mlflow.log_params(params)
+
            mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)

            # Log internal parameters
            mlflow.log_param("tune_model_fold", fold)
@@ -6174,6 +6243,7 @@ def blend_models(estimator_list = 'All',
    
    #refitting the model on complete X_train, y_train
    monitor.iloc[1,1:] = 'Finalizing Model'
+    monitor.iloc[2,1:] = 'Almost Finished'
    
    if verbose:
        if html_param:
@@ -6947,9 +7017,17 @@ def stack_models(estimator_list,

        with mlflow.start_run(run_name='Stacking Classifier') as run:        
            params = meta_model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+            
+            mlflow.log_params(params)
+            
            mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)
+

            # Log internal parameters
            mlflow.log_param("stack_models_estimator_list", estimator_list)
@@ -7712,9 +7790,16 @@ def create_stacknet(estimator_list,

        with mlflow.start_run(run_name='Stacking Classifier (Multi-layer)') as run:        
            params = meta_model.get_params()
+
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+    
+            mlflow.log_params(params)
+            
            mlflow.log_metrics({"Accuracy": avgs_acc[0], "AUC": avgs_auc[0], "Recall": avgs_recall[0], "Precision" : avgs_precision[0],
                                "F1": avgs_f1[0], "Kappa": avgs_kappa[0], "MCC": avgs_mcc[0]})
-            mlflow.log_params(params)

            # Log other parameter of create_model function (internal to pycaret)
            mlflow.log_param("create_stacknet_estimator_list", estimator_list)
@@ -8433,10 +8518,12 @@ def calibrate_model(estimator,

            # Log model parameters
            params = model.get_params()
-            try:
-                params.pop('base_estimator')
-            except:
-                pass
+            
+            for i in list(params):
+                v = params.get(i)
+                if len(str(v)) > 250:
+                    params.pop(i)
+
            mlflow.log_params(params)
            mlflow.log_param('base_estimator', full_name)
            

--- a/pycaret/utils.py
+++ b/pycaret/utils.py
@@ -3,7 +3,7 @@
 # License: MIT

 def version():
-    print("1.0.1")
+    print("pycaret-nightly-0.3")


 def check_metric(actual, prediction, metric, round=4):

--- a/setup.py
+++ b/setup.py
@@ -12,9 +12,9 @@ with open('requirements.txt') as f:
    required = f.read().splitlines()

 setup(
-    name="pycaret",
-    version="1.0.1",
-    description="An open source, low-code machine learning library in Python.",
+    name="pycaret-nightly",
+    version="0.3",
+    description="Nightly build of PyCaret - An open source, low-code machine learning library in Python.",
    long_description=readme(),
    long_description_content_type="text/markdown",
    url="https://github.com/pycaret/pycaret",
@@ -27,6 +27,7 @@ setup(
        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
    ],
+    packages=["pycaret"],
    include_package_data=True,
    install_requires=required
 )