diff --git a/pycaret/anomaly.py b/pycaret/anomaly.py index b034cfc1358f78f1149304b09680f583ded5e0da..0f4b654e78cd60f9c94285dc4837ae435725539f 100644 --- a/pycaret/anomaly.py +++ b/pycaret/anomaly.py @@ -2,7 +2,7 @@ # Author: Moez Ali # License: MIT # Release: PyCaret 2.0x -# Last modified : 24/07/2020 +# Last modified : 27/07/2020 def setup(data, categorical_features = None, @@ -350,182 +350,59 @@ def setup(data, from platform import python_version, platform, python_build, machine - logger.info("python_version: " + str(python_version())) - logger.info("python_build: " + str(python_build())) - logger.info("machine: " + str(machine())) - logger.info("platform: " + str(platform())) - - import psutil - psvm = psutil.virtual_memory() - logger.info("Memory: " + str(psvm)) - logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) - logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) - - logger.info("Checking libraries") - - try: - from pandas import __version__ - logger.info("pd==" + str(__version__)) - except: - logger.warning("pandas not found") - - try: - from numpy import __version__ - logger.info("numpy==" + str(__version__)) - except: - logger.warning("numpy not found") - - try: - from sklearn import __version__ - logger.info("sklearn==" + str(__version__)) - except: - logger.warning("sklearn not found") - - try: - from xgboost import __version__ - logger.info("xgboost==" + str(__version__)) - except: - logger.warning("xgboost not found") - - try: - from lightgbm import __version__ - logger.info("lightgbm==" + str(__version__)) - except: - logger.warning("lightgbm not found") - - try: - from catboost import __version__ - logger.info("catboost==" + str(__version__)) - except: - logger.warning("catboost not found") - - try: - from kmodes import __version__ - logger.info("kmodes==" + str(__version__)) - except: - logger.warning("kmodes not found") - - try: - from pyod.version import __version__ - logger.info("pyod==" + str(__version__)) - except: - logger.warning("pyod not found") - - try: - import warnings - warnings.filterwarnings('ignore') - from gensim import __version__ - logger.info("gensim==" + str(__version__)) - except: - logger.warning("gensim not found") - - try: - from spacy import __version__ - logger.info("spacy==" + str(__version__)) - except: - logger.warning("spacy not found") - - try: - from nltk import __version__ - logger.info("nltk==" + str(__version__)) - except: - logger.warning("nltk not found") - - try: - from textblob import __version__ - logger.info("textblob==" + str(__version__)) - except: - logger.warning("textblob not found") - - try: - from pyLDAvis import __version__ - logger.info("pyLDAvis==" + str(__version__)) - except: - logger.warning("pyLDAvis not found") - - try: - from mlxtend import __version__ - logger.info("mlxtend==" + str(__version__)) - except: - logger.warning("mlxtend not found") - - try: - from matplotlib import __version__ - logger.info("matplotlib==" + str(__version__)) - except: - logger.warning("matplotlib not found") - - try: - from seaborn import __version__ - logger.info("seaborn==" + str(__version__)) - except: - logger.warning("seaborn not found") - try: - from plotly import __version__ - logger.info("plotly==" + str(__version__)) + logger.info("python_version: " + str(python_version())) except: - logger.warning("plotly not found") + logger.warning("cannot find platform.python_version") try: - from cufflinks import __version__ - logger.info("cufflinks==" + str(__version__)) + logger.info("python_build: " + str(python_build())) except: - logger.warning("cufflinks not found") + logger.warning("cannot find platform.python_build") try: - from yellowbrick import __version__ - logger.info("yellowbrick==" + str(__version__)) + logger.info("machine: " + str(machine())) except: - logger.warning("yellowbrick not found") + logger.warning("cannot find platform.machine") try: - from shap import __version__ - logger.info("shap==" + str(__version__)) + logger.info("platform: " + str(platform())) except: - logger.warning("shap not found. cannot use interpret_model without shap.") + logger.warning("cannot find platform.platform") - try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) - except: - logger.warning("pandas_profiling not found") + import psutil try: - from wordcloud import __version__ - logger.info("wordcloud==" + str(__version__)) + psvm = psutil.virtual_memory() + logger.info("Memory: " + str(psvm)) except: - logger.warning("wordcloud not found") + logger.warning("cannot find psutil.version_memory") try: - from umap import __version__ - logger.info("umap==" + str(__version__)) + logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) + logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) except: - logger.warning("umap not found") + logger.warning("cannot find psutil.cpu_count") - try: - from IPython import __version__ - logger.info("IPython==" + str(__version__)) - except: - logger.warning("IPython not found") + logger.info("Checking libraries") try: - from ipywidgets import __version__ - logger.info("ipywidgets==" + str(__version__)) + from pandas import __version__ + logger.info("pd==" + str(__version__)) except: - logger.warning("ipywidgets not found") + logger.warning("pandas not found") try: - from joblib import __version__ - logger.info("joblib==" + str(__version__)) + from numpy import __version__ + logger.info("numpy==" + str(__version__)) except: - logger.warning("joblib not found") + logger.warning("numpy not found") try: - from imblearn import __version__ - logger.info("imblearn==" + str(__version__)) + from pyod import __version__ + logger.info("pyod==" + str(__version__)) except: - logger.warning("imblearn not found") + logger.warning("pyod not found") try: from mlflow.version import VERSION @@ -535,13 +412,6 @@ def setup(data, except: logger.warning("mlflow not found") - try: - from awscli import __version__ - logger.info("awscli==" + str(__version__)) - except: - logger.warning("awscli not found. cannot use deploy_model without awscli") - - logger.info("Checking Exceptions") #run_time diff --git a/pycaret/classification.py b/pycaret/classification.py index 28f4d1c267deb1e14a6be496983b5ce398b337fa..4f96c262b5257f88af79a4165fa29d2b57dc403c 100644 --- a/pycaret/classification.py +++ b/pycaret/classification.py @@ -2,7 +2,7 @@ # Author: Moez Ali # License: MIT # Release: PyCaret 2.0x -# Last modified : 24/07/2020 +# Last modified : 27/07/2020 def setup(data, target, @@ -508,10 +508,18 @@ def setup(data, logger.info("platform: " + str(platform())) import psutil - psvm = psutil.virtual_memory() - logger.info("Memory: " + str(psvm)) - logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) - logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) + + try: + psvm = psutil.virtual_memory() + logger.info("Memory: " + str(psvm)) + except: + logger.warning("cannot find psutil.version_memory") + + try: + logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) + logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) + except: + logger.warning("cannot find psutil.cpu_count") logger.info("Checking libraries") @@ -527,7 +535,6 @@ def setup(data, except: logger.warning("numpy not found") - try: from sklearn import __version__ logger.info("sklearn==" + str(__version__)) @@ -552,18 +559,6 @@ def setup(data, except: logger.warning("catboost not found") - try: - from shap import __version__ - logger.info("shap==" + str(__version__)) - except: - logger.warning("shap not found. cannot use interpret_model without shap.") - - try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) - except: - logger.warning("pandas_profiling not found") - try: from mlflow.version import VERSION import warnings @@ -571,72 +566,6 @@ def setup(data, logger.info("mlflow==" + str(VERSION)) except: logger.warning("mlflow not found") - - """ - - try: - from kmodes import __version__ - logger.info("kmodes==" + str(__version__)) - except: - logger.warning("kmodes not found") - - try: - from pyod.version import __version__ - logger.info("pyod==" + str(__version__)) - except: - logger.warning("pyod not found") - - try: - from matplotlib import __version__ - logger.info("matplotlib==" + str(__version__)) - except: - logger.warning("matplotlib not found") - - try: - from yellowbrick import __version__ - logger.info("yellowbrick==" + str(__version__)) - except: - logger.warning("yellowbrick not found") - - try: - from shap import __version__ - logger.info("shap==" + str(__version__)) - except: - logger.warning("shap not found. cannot use interpret_model without shap.") - - try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) - except: - logger.warning("pandas_profiling not found") - - try: - from IPython import __version__ - logger.info("IPython==" + str(__version__)) - except: - logger.warning("IPython not found") - - try: - from ipywidgets import __version__ - logger.info("ipywidgets==" + str(__version__)) - except: - logger.warning("ipywidgets not found") - - try: - from joblib import __version__ - logger.info("joblib==" + str(__version__)) - except: - logger.warning("joblib not found") - - - - try: - from awscli import __version__ - logger.info("awscli==" + str(__version__)) - except: - logger.warning("awscli not found. cannot use deploy_model without awscli") - - """ #run_time import datetime, time diff --git a/pycaret/clustering.py b/pycaret/clustering.py index 2b72bc39859e07790e84c716db8548e0b213e4a6..3d950aae8d09313504bddfd2175971de5c6c536c 100644 --- a/pycaret/clustering.py +++ b/pycaret/clustering.py @@ -2,7 +2,7 @@ # Author: Moez Ali # License: MIT # Release: PyCaret 2.0x -# Last modified : 24/07/2020 +# Last modified : 27/07/2020 def setup(data, categorical_features = None, @@ -357,182 +357,65 @@ def setup(data, from platform import python_version, platform, python_build, machine - logger.info("python_version: " + str(python_version())) - logger.info("python_build: " + str(python_build())) - logger.info("machine: " + str(machine())) - logger.info("platform: " + str(platform())) - - import psutil - psvm = psutil.virtual_memory() - logger.info("Memory: " + str(psvm)) - logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) - logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) - - logger.info("Checking libraries") - - try: - from pandas import __version__ - logger.info("pd==" + str(__version__)) - except: - logger.warning("pandas not found") - - try: - from numpy import __version__ - logger.info("numpy==" + str(__version__)) - except: - logger.warning("numpy not found") - - try: - from sklearn import __version__ - logger.info("sklearn==" + str(__version__)) - except: - logger.warning("sklearn not found") - - try: - from xgboost import __version__ - logger.info("xgboost==" + str(__version__)) - except: - logger.warning("xgboost not found") - - try: - from lightgbm import __version__ - logger.info("lightgbm==" + str(__version__)) - except: - logger.warning("lightgbm not found") - - try: - from catboost import __version__ - logger.info("catboost==" + str(__version__)) - except: - logger.warning("catboost not found") - - try: - from kmodes import __version__ - logger.info("kmodes==" + str(__version__)) - except: - logger.warning("kmodes not found") - - try: - from pyod.version import __version__ - logger.info("pyod==" + str(__version__)) - except: - logger.warning("pyod not found") - - try: - import warnings - warnings.filterwarnings('ignore') - from gensim import __version__ - logger.info("gensim==" + str(__version__)) - except: - logger.warning("gensim not found") - - try: - from spacy import __version__ - logger.info("spacy==" + str(__version__)) - except: - logger.warning("spacy not found") - - try: - from nltk import __version__ - logger.info("nltk==" + str(__version__)) - except: - logger.warning("nltk not found") - - try: - from textblob import __version__ - logger.info("textblob==" + str(__version__)) - except: - logger.warning("textblob not found") - - try: - from pyLDAvis import __version__ - logger.info("pyLDAvis==" + str(__version__)) - except: - logger.warning("pyLDAvis not found") - - try: - from mlxtend import __version__ - logger.info("mlxtend==" + str(__version__)) - except: - logger.warning("mlxtend not found") - - try: - from matplotlib import __version__ - logger.info("matplotlib==" + str(__version__)) - except: - logger.warning("matplotlib not found") - try: - from seaborn import __version__ - logger.info("seaborn==" + str(__version__)) + logger.info("python_version: " + str(python_version())) except: - logger.warning("seaborn not found") + logger.warning("cannot find platform.python_version") try: - from plotly import __version__ - logger.info("plotly==" + str(__version__)) + logger.info("python_build: " + str(python_build())) except: - logger.warning("plotly not found") + logger.warning("cannot find platform.python_build") try: - from cufflinks import __version__ - logger.info("cufflinks==" + str(__version__)) + logger.info("machine: " + str(machine())) except: - logger.warning("cufflinks not found") + logger.warning("cannot find platform.machine") try: - from yellowbrick import __version__ - logger.info("yellowbrick==" + str(__version__)) + logger.info("platform: " + str(platform())) except: - logger.warning("yellowbrick not found") + logger.warning("cannot find platform.platform") - try: - from shap import __version__ - logger.info("shap==" + str(__version__)) - except: - logger.warning("shap not found. cannot use interpret_model without shap.") + import psutil try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) + psvm = psutil.virtual_memory() + logger.info("Memory: " + str(psvm)) except: - logger.warning("pandas_profiling not found") + logger.warning("cannot find psutil.version_memory") try: - from wordcloud import __version__ - logger.info("wordcloud==" + str(__version__)) + logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) + logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) except: - logger.warning("wordcloud not found") + logger.warning("cannot find psutil.cpu_count") - try: - from umap import __version__ - logger.info("umap==" + str(__version__)) - except: - logger.warning("umap not found") + logger.info("Checking libraries") try: - from IPython import __version__ - logger.info("IPython==" + str(__version__)) + from pandas import __version__ + logger.info("pd==" + str(__version__)) except: - logger.warning("IPython not found") + logger.warning("pandas not found") try: - from ipywidgets import __version__ - logger.info("ipywidgets==" + str(__version__)) + from numpy import __version__ + logger.info("numpy==" + str(__version__)) except: - logger.warning("ipywidgets not found") + logger.warning("numpy not found") try: - from joblib import __version__ - logger.info("joblib==" + str(__version__)) + from sklearn import __version__ + logger.info("sklearn==" + str(__version__)) except: - logger.warning("joblib not found") + logger.warning("sklearn not found") try: - from imblearn import __version__ - logger.info("imblearn==" + str(__version__)) + from kmodes import __version__ + logger.info("kmodes==" + str(__version__)) except: - logger.warning("imblearn not found") + logger.warning("kmodes not found") try: from mlflow.version import VERSION @@ -542,12 +425,6 @@ def setup(data, except: logger.warning("mlflow not found") - try: - from awscli import __version__ - logger.info("awscli==" + str(__version__)) - except: - logger.warning("awscli not found. cannot use deploy_model without awscli") - logger.info("Checking Exceptions") diff --git a/pycaret/nlp.py b/pycaret/nlp.py index 31585f240de40eaa0ac00e549b85aa22dec8a354..9100d462b41f2fd28d2927b6805b9611a0fa293d 100644 --- a/pycaret/nlp.py +++ b/pycaret/nlp.py @@ -2,7 +2,7 @@ # Author: Moez Ali # License: MIT # Release: PyCaret 2.0x -# Last modified : 24/07/2020 +# Last modified : 27/07/2020 def setup(data, target=None, @@ -147,67 +147,54 @@ def setup(data, from platform import python_version, platform, python_build, machine - logger.info("python_version: " + str(python_version())) - logger.info("python_build: " + str(python_build())) - logger.info("machine: " + str(machine())) - logger.info("platform: " + str(platform())) - - import psutil - psvm = psutil.virtual_memory() - logger.info("Memory: " + str(psvm)) - logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) - logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) - - logger.info("Checking libraries") - try: - from pandas import __version__ - logger.info("pd==" + str(__version__)) + logger.info("python_version: " + str(python_version())) except: - logger.warning("pandas not found") + logger.warning("cannot find platform.python_version") try: - from numpy import __version__ - logger.info("numpy==" + str(__version__)) + logger.info("python_build: " + str(python_build())) except: - logger.warning("numpy not found") + logger.warning("cannot find platform.python_build") try: - from sklearn import __version__ - logger.info("sklearn==" + str(__version__)) + logger.info("machine: " + str(machine())) except: - logger.warning("sklearn not found") + logger.warning("cannot find platform.machine") try: - from xgboost import __version__ - logger.info("xgboost==" + str(__version__)) + logger.info("platform: " + str(platform())) except: - logger.warning("xgboost not found") + logger.warning("cannot find platform.platform") + + import psutil try: - from lightgbm import __version__ - logger.info("lightgbm==" + str(__version__)) + psvm = psutil.virtual_memory() + logger.info("Memory: " + str(psvm)) except: - logger.warning("lightgbm not found") + logger.warning("cannot find psutil.version_memory") try: - from catboost import __version__ - logger.info("catboost==" + str(__version__)) + logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) + logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) except: - logger.warning("catboost not found") + logger.warning("cannot find psutil.cpu_count") + + logger.info("Checking libraries") try: - from kmodes import __version__ - logger.info("kmodes==" + str(__version__)) + from pandas import __version__ + logger.info("pd==" + str(__version__)) except: - logger.warning("kmodes not found") - + logger.warning("pandas not found") + try: - from pyod.version import __version__ - logger.info("pyod==" + str(__version__)) + from numpy import __version__ + logger.info("numpy==" + str(__version__)) except: - logger.warning("pyod not found") - + logger.warning("numpy not found") + try: import warnings warnings.filterwarnings('ignore') @@ -240,90 +227,12 @@ def setup(data, except: logger.warning("pyLDAvis not found") - try: - from mlxtend import __version__ - logger.info("mlxtend==" + str(__version__)) - except: - logger.warning("mlxtend not found") - - try: - from matplotlib import __version__ - logger.info("matplotlib==" + str(__version__)) - except: - logger.warning("matplotlib not found") - - try: - from seaborn import __version__ - logger.info("seaborn==" + str(__version__)) - except: - logger.warning("seaborn not found") - - try: - from plotly import __version__ - logger.info("plotly==" + str(__version__)) - except: - logger.warning("plotly not found") - - try: - from cufflinks import __version__ - logger.info("cufflinks==" + str(__version__)) - except: - logger.warning("cufflinks not found") - - try: - from yellowbrick import __version__ - logger.info("yellowbrick==" + str(__version__)) - except: - logger.warning("yellowbrick not found") - - try: - from shap import __version__ - logger.info("shap==" + str(__version__)) - except: - logger.warning("shap not found. cannot use interpret_model without shap.") - - try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) - except: - logger.warning("pandas_profiling not found") - try: from wordcloud import __version__ logger.info("wordcloud==" + str(__version__)) except: logger.warning("wordcloud not found") - try: - from umap import __version__ - logger.info("umap==" + str(__version__)) - except: - logger.warning("umap not found") - - try: - from IPython import __version__ - logger.info("IPython==" + str(__version__)) - except: - logger.warning("IPython not found") - - try: - from ipywidgets import __version__ - logger.info("ipywidgets==" + str(__version__)) - except: - logger.warning("ipywidgets not found") - - try: - from joblib import __version__ - logger.info("joblib==" + str(__version__)) - except: - logger.warning("joblib not found") - - try: - from imblearn import __version__ - logger.info("imblearn==" + str(__version__)) - except: - logger.warning("imblearn not found") - try: from mlflow.version import VERSION import warnings @@ -332,11 +241,6 @@ def setup(data, except: logger.warning("mlflow not found") - try: - from awscli import __version__ - logger.info("awscli==" + str(__version__)) - except: - logger.warning("awscli not found. cannot use deploy_model without awscli") logger.info("Checking Exceptions") @@ -348,7 +252,6 @@ def setup(data, import warnings warnings.filterwarnings('ignore') - """ error handling starts here """ @@ -1764,6 +1667,11 @@ def plot_model(model = None, #exception checking import sys + import logging + logger.info("Initializing plot_model()") + logger.info("""plot_model(model={}, plot={}, topic_num={}, save={}, system={})""".\ + format(str(model), str(plot), str(topic_num), str(save), str(system))) + #ignore warnings import warnings warnings.filterwarnings('ignore') @@ -1771,7 +1679,9 @@ def plot_model(model = None, #setting default of topic_num if model is not None and topic_num is None: topic_num = 'Topic 0' - + logger.info("Topic selected. topic_num : " + str(topic_num)) + + """ exception handling starts here """ @@ -1801,6 +1711,8 @@ def plot_model(model = None, mod_type = 'rp' + logger.info("Checking exceptions") + #plot checking allowed_plots = ['frequency', 'distribution', 'bigram', 'trigram', 'sentiment', 'pos', 'tsne', 'topic_model', 'topic_distribution', 'wordcloud', 'umap'] @@ -1826,7 +1738,7 @@ def plot_model(model = None, """ - + logger.info("Importing libraries") #import dependencies import pandas as pd import numpy @@ -1842,6 +1754,10 @@ def plot_model(model = None, save_param = True else: save_param = False + + logger.info("save_param set to " + str(save_param)) + + logger.info("plot type: " + str(plot)) if plot == 'frequency': @@ -1851,14 +1767,17 @@ def plot_model(model = None, def get_top_n_words(corpus, n=None): vec = CountVectorizer() + logger.info("Fitting CountVectorizer()") bag_of_words = vec.fit_transform(corpus) sum_words = bag_of_words.sum(axis=0) words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()] words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True) return words_freq[:n] - if topic_num is None: + logger.info("Rendering Visual") + if topic_num is None: + logger.warning("topic_num set to None. Plot generated at corpus level.") common_words = get_top_n_words(data_[target_], n=100) df2 = pd.DataFrame(common_words, columns = ['Text' , 'count']) df3 = df2.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( @@ -1866,22 +1785,24 @@ def plot_model(model = None, asFigure=save_param) else: - title = str(topic_num) + ': ' + 'Top 100 words after removing stop words' - + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] - common_words = get_top_n_words(filtered_df[target_], n=100) df2 = pd.DataFrame(common_words, columns = ['Text' , 'count']) df3 = df2.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( kind='bar', yTitle='Count', linecolor='black', title=title, asFigure=save_param) + logger.info("Visual Rendered Successfully") + if save: df3.write_html('Word Frequency.html') + logger.info("Saving 'Word Frequency.html' in current active directory") except: - + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') @@ -1890,9 +1811,10 @@ def plot_model(model = None, try: if topic_num is None: - + logger.warning("topic_num set to None. Plot generated at corpus level.") b = data_[target_].apply(lambda x: len(str(x).split())) b = pd.DataFrame(b) + logger.info("Rendering Visual") b = b[target_].iplot( kind='hist', bins=100, @@ -1903,13 +1825,15 @@ def plot_model(model = None, asFigure=save_param) else: - title = str(topic_num) + ': ' + 'Word Count Distribution' + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] b = filtered_df[target_].apply(lambda x: len(str(x).split())) b = pd.DataFrame(b) + logger.info("Rendering Visual") b = b[target_].iplot( kind='hist', bins=100, @@ -1919,11 +1843,14 @@ def plot_model(model = None, title= title, asFigure=save_param) + logger.info("Visual Rendered Successfully") + if save: b.write_html('Distribution.html') + logger.info("Saving 'Distribution.html' in current active directory") except: - + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') @@ -1934,6 +1861,7 @@ def plot_model(model = None, from sklearn.feature_extraction.text import CountVectorizer def get_top_n_bigram(corpus, n=None): + logger.info("Fitting CountVectorizer()") vec = CountVectorizer(ngram_range=(2, 2)).fit(corpus) bag_of_words = vec.transform(corpus) sum_words = bag_of_words.sum(axis=0) @@ -1942,28 +1870,33 @@ def plot_model(model = None, return words_freq[:n] if topic_num is None: - + logger.warning("topic_num set to None. Plot generated at corpus level.") common_words = get_top_n_bigram(data_[target_], 100) df3 = pd.DataFrame(common_words, columns = ['Text' , 'count']) + logger.info("Rendering Visual") df3 = df3.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( kind='bar', yTitle='Count', linecolor='black', title='Top 100 bigrams after removing stop words', asFigure=save_param) else: - title = str(topic_num) + ': ' + 'Top 100 bigrams after removing stop words' + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] - common_words = get_top_n_bigram(filtered_df[target_], 100) df3 = pd.DataFrame(common_words, columns = ['Text' , 'count']) + logger.info("Rendering Visual") df3 = df3.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( kind='bar', yTitle='Count', linecolor='black', title=title, asFigure=save_param) + logger.info("Visual Rendered Successfully") + if save: - df3.write_html('Bigram.html') + df3.write_html('Bigram.html') + logger.info("Saving 'Bigram.html' in current active directory") except: - + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') elif plot == 'trigram': @@ -1974,6 +1907,7 @@ def plot_model(model = None, def get_top_n_trigram(corpus, n=None): vec = CountVectorizer(ngram_range=(3, 3)).fit(corpus) + logger.info("Fitting CountVectorizer()") bag_of_words = vec.transform(corpus) sum_words = bag_of_words.sum(axis=0) words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()] @@ -1981,27 +1915,33 @@ def plot_model(model = None, return words_freq[:n] if topic_num is None: - + logger.warning("topic_num set to None. Plot generated at corpus level.") common_words = get_top_n_trigram(data_[target_], 100) df3 = pd.DataFrame(common_words, columns = ['Text' , 'count']) + logger.info("Rendering Visual") df3 = df3.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( kind='bar', yTitle='Count', linecolor='black', title='Top 100 trigrams after removing stop words', asFigure=save_param) else: - title = str(topic_num) + ': ' + 'Top 100 trigrams after removing stop words' + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] common_words = get_top_n_trigram(filtered_df[target_], 100) df3 = pd.DataFrame(common_words, columns = ['Text' , 'count']) + logger.info("Rendering Visual") df3 = df3.groupby('Text').sum()['count'].sort_values(ascending=False).iplot( kind='bar', yTitle='Count', linecolor='black', title=title, asFigure=save_param) + logger.info("Visual Rendered Successfully") + if save: df3.write_html('Trigram.html') + logger.info("Saving 'Trigram.html' in current active directory") except: - + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') @@ -2014,9 +1954,10 @@ def plot_model(model = None, from textblob import TextBlob if topic_num is None: - + logger.warning("topic_num set to None. Plot generated at corpus level.") sentiments = data_[target_].map(lambda text: TextBlob(text).sentiment.polarity) sentiments = pd.DataFrame(sentiments) + logger.info("Rendering Visual") sentiments = sentiments[target_].iplot( kind='hist', bins=50, @@ -2028,10 +1969,13 @@ def plot_model(model = None, else: title = str(topic_num) + ': ' + 'Sentiment Polarity Distribution' + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] sentiments = filtered_df[target_].map(lambda text: TextBlob(text).sentiment.polarity) sentiments = pd.DataFrame(sentiments) + logger.info("Rendering Visual") sentiments = sentiments[target_].iplot( kind='hist', bins=50, @@ -2041,11 +1985,14 @@ def plot_model(model = None, title=title, asFigure=save_param) + logger.info("Visual Rendered Successfully") + if save: - sentiments.write_html('Sentiments.html') + sentiments.write_html('Sentiments.html') + logger.info("Saving 'Sentiments.html' in current active directory") except: - + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') @@ -2054,10 +2001,12 @@ def plot_model(model = None, from textblob import TextBlob b = list(id2word.token2id.keys()) + logger.info("Fitting TextBlob()") blob = TextBlob(str(b)) pos_df = pd.DataFrame(blob.tags, columns = ['word' , 'pos']) pos_df = pos_df.loc[pos_df['pos'] != 'POS'] pos_df = pos_df.pos.value_counts()[:20] + logger.info("Rendering Visual") pos_df = pos_df.iplot( kind='bar', xTitle='POS', @@ -2065,13 +2014,18 @@ def plot_model(model = None, title='Top 20 Part-of-speech tagging for review corpus', asFigure=save_param) + logger.info("Visual Rendered Sucessfully") + if save: pos_df.write_html('POS.html') + logger.info("Saving 'POS.html' in current active directory") elif plot == 'tsne': + logger.info("SubProcess assign_model() called ==================================") b = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") b.dropna(axis=0, inplace=True) #droping rows where Dominant_Topic is blank c = [] @@ -2083,13 +2037,16 @@ def plot_model(model = None, bb = b[c] from sklearn.manifold import TSNE + logger.info("Fitting TSNE()") X_embedded = TSNE(n_components=3).fit_transform(bb) + logger.info("Sorting Dataframe") X = pd.DataFrame(X_embedded) X['Dominant_Topic'] = b['Dominant_Topic'] X.sort_values(by='Dominant_Topic', inplace=True) X.dropna(inplace=True) + logger.info("Rendering Visual") import plotly.express as px df = X fig = px.scatter_3d(df, x=0, y=1, z=2, @@ -2098,8 +2055,11 @@ def plot_model(model = None, if system: fig.show() + logger.info("Visual Rendered Successfully") + if save: - fig.write_html("TSNE.html") + fig.write_html("TSNE.html") + logger.info("Saving 'TSNE.html' in current active directory") elif plot == 'topic_model': @@ -2110,8 +2070,10 @@ def plot_model(model = None, import warnings warnings.filterwarnings('ignore') pyLDAvis.enable_notebook() + logger.info("Preparing pyLDAvis visual") vis = pyLDAvis.gensim.prepare(model, corpus, id2word, mds='mmds') display(vis) + logger.info("Visual Rendered Successfully") elif plot == 'topic_distribution': @@ -2157,7 +2119,9 @@ def plot_model(model = None, kw_df = pd.DataFrame({'Topic': topic_name, 'Keyword' : keyword}).set_index('Topic') + logger.info("SubProcess assign_model() called ==================================") ass_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") ass_df_pivot = ass_df.pivot_table(index='Dominant_Topic', values='Topic_0', aggfunc='count') df2 = ass_df_pivot.join(kw_df) df2 = df2.reset_index() @@ -2168,6 +2132,8 @@ def plot_model(model = None, """ + logger.info("Sorting Dataframe") + topic_list = list(df2['Topic']) s = [] @@ -2192,14 +2158,19 @@ def plot_model(model = None, sorting column ends """ + logger.info("Rendering Visual") + import plotly.express as px fig = px.bar(df2, x='Topic', y='Documents', hover_data = ['Keyword'], title='Document Distribution by Topics') if system: fig.show() + logger.info("Visual Rendered Successfully") + if save: fig.write_html("Topic Distribution.html") + logger.info("Saving 'Topic Distribution.html' in current active directory") elif plot == 'wordcloud': @@ -2211,15 +2182,18 @@ def plot_model(model = None, stopwords = set(STOPWORDS) if topic_num is None: - + logger.warning("topic_num set to None. Plot generated at corpus level.") atext = " ".join(review for review in data_[target_]) else: + logger.info("SubProcess assign_model() called ==================================") assigned_df = assign_model(model, verbose = False) + logger.info("SubProcess assign_model() end ==================================") filtered_df = assigned_df.loc[assigned_df['Dominant_Topic'] == topic_num] atext = " ".join(review for review in filtered_df[target_]) + logger.info("Fitting WordCloud()") wordcloud = WordCloud(width = 800, height = 800, background_color ='white', stopwords = stopwords, @@ -2231,16 +2205,24 @@ def plot_model(model = None, plt.axis("off") plt.tight_layout(pad = 0) + logger.info("Rendering Visual") + if save or log_plots_param: if system: plt.savefig("Wordcloud.png") else: plt.savefig("Wordcloud.png") plt.close() + + logger.info("Saving 'Wordcloud.png' in current active directory") + else: plt.show() - + + logger.info("Visual Rendered Successfully") + except: + logger.warning("Invalid topic_num param or empty Vocab. Try changing Topic Number.") sys.exit('(Value Error): Invalid topic_num param or empty Vocab. Try changing Topic Number.') elif plot == 'umap': @@ -2256,24 +2238,36 @@ def plot_model(model = None, import matplotlib.pyplot as plt tfidf = TfidfVectorizer() + logger.info("Fitting TfidfVectorizer()") docs = tfidf.fit_transform(data_[target_]) # Instantiate the clustering model clusters = KMeans(n_clusters=5, random_state=seed) + logger.info("Fitting KMeans()") clusters.fit(docs) plt.figure(figsize=(10,6)) umap = UMAPVisualizer(random_state=seed) + logger.info("Fitting UMAP()") umap.fit(docs, ["c{}".format(c) for c in clusters.labels_]) + logger.info("Rendering Visual") + if save or log_plots_param: if system: umap.show(outpath="UMAP.png") else: umap.show(outpath="UMAP.png", clear_figure=True) + + logger.info("Saving 'UMAP.png' in current active directory") + else: umap.show() + + logger.info("Visual Rendered Successfully") + + logger.info("plot_model() succesfully completed......................................") def tune_model(model=None, multi_core=False, diff --git a/pycaret/regression.py b/pycaret/regression.py index df4cc9bc952df73fa2de42b3d8678f2c6098464b..d461fa77e416faee80fffb3b22047c51c858de04 100644 --- a/pycaret/regression.py +++ b/pycaret/regression.py @@ -2,7 +2,7 @@ # Author: Moez Ali # License: MIT # Release: PyCaret 2.0x -# Last modified : 24/07/2020 +# Last modified : 27/07/2020 def setup(data, target, @@ -497,16 +497,39 @@ def setup(data, from platform import python_version, platform, python_build, machine - logger.info("python_version: " + str(python_version())) - logger.info("python_build: " + str(python_build())) - logger.info("machine: " + str(machine())) - logger.info("platform: " + str(platform())) + try: + logger.info("python_version: " + str(python_version())) + except: + logger.warning("cannot find platform.python_version") + + try: + logger.info("python_build: " + str(python_build())) + except: + logger.warning("cannot find platform.python_build") + + try: + logger.info("machine: " + str(machine())) + except: + logger.warning("cannot find platform.machine") + + try: + logger.info("platform: " + str(platform())) + except: + logger.warning("cannot find platform.platform") import psutil - psvm = psutil.virtual_memory() - logger.info("Memory: " + str(psvm)) - logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) - logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) + + try: + psvm = psutil.virtual_memory() + logger.info("Memory: " + str(psvm)) + except: + logger.warning("cannot find psutil.version_memory") + + try: + logger.info("Physical Core: " + str(psutil.cpu_count(logical=False))) + logger.info("Logical Core: " + str(psutil.cpu_count(logical=True))) + except: + logger.warning("cannot find psutil.cpu_count") logger.info("Checking libraries") @@ -546,134 +569,6 @@ def setup(data, except: logger.warning("catboost not found") - try: - from kmodes import __version__ - logger.info("kmodes==" + str(__version__)) - except: - logger.warning("kmodes not found") - - try: - from pyod.version import __version__ - logger.info("pyod==" + str(__version__)) - except: - logger.warning("pyod not found") - - try: - import warnings - warnings.filterwarnings('ignore') - from gensim import __version__ - logger.info("gensim==" + str(__version__)) - except: - logger.warning("gensim not found") - - try: - from spacy import __version__ - logger.info("spacy==" + str(__version__)) - except: - logger.warning("spacy not found") - - try: - from nltk import __version__ - logger.info("nltk==" + str(__version__)) - except: - logger.warning("nltk not found") - - try: - from textblob import __version__ - logger.info("textblob==" + str(__version__)) - except: - logger.warning("textblob not found") - - try: - from pyLDAvis import __version__ - logger.info("pyLDAvis==" + str(__version__)) - except: - logger.warning("pyLDAvis not found") - - try: - from mlxtend import __version__ - logger.info("mlxtend==" + str(__version__)) - except: - logger.warning("mlxtend not found") - - try: - from matplotlib import __version__ - logger.info("matplotlib==" + str(__version__)) - except: - logger.warning("matplotlib not found") - - try: - from seaborn import __version__ - logger.info("seaborn==" + str(__version__)) - except: - logger.warning("seaborn not found") - - try: - from plotly import __version__ - logger.info("plotly==" + str(__version__)) - except: - logger.warning("plotly not found") - - try: - from cufflinks import __version__ - logger.info("cufflinks==" + str(__version__)) - except: - logger.warning("cufflinks not found") - - try: - from yellowbrick import __version__ - logger.info("yellowbrick==" + str(__version__)) - except: - logger.warning("yellowbrick not found") - - try: - from shap import __version__ - logger.info("shap==" + str(__version__)) - except: - logger.warning("shap not found. cannot use interpret_model without shap.") - - try: - from pandas_profiling import __version__ - logger.info("pandas_profiling==" + str(__version__)) - except: - logger.warning("pandas_profiling not found") - - try: - from wordcloud import __version__ - logger.info("wordcloud==" + str(__version__)) - except: - logger.warning("wordcloud not found") - - try: - from umap import __version__ - logger.info("umap==" + str(__version__)) - except: - logger.warning("umap not found") - - try: - from IPython import __version__ - logger.info("IPython==" + str(__version__)) - except: - logger.warning("IPython not found") - - try: - from ipywidgets import __version__ - logger.info("ipywidgets==" + str(__version__)) - except: - logger.warning("ipywidgets not found") - - try: - from joblib import __version__ - logger.info("joblib==" + str(__version__)) - except: - logger.warning("joblib not found") - - try: - from imblearn import __version__ - logger.info("imblearn==" + str(__version__)) - except: - logger.warning("imblearn not found") - try: from mlflow.version import VERSION import warnings @@ -682,12 +577,6 @@ def setup(data, except: logger.warning("mlflow not found") - try: - from awscli import __version__ - logger.info("awscli==" + str(__version__)) - except: - logger.warning("awscli not found. cannot use deploy_model without awscli") - #run_time import datetime, time runtime_start = time.time()