diff --git a/nlp.py b/nlp.py
index 7922039057b184f7aae8b4d05abc05e31710e54d..56df48bcce23559be0bfeaf5805da3d43e9abb20 100644
--- a/nlp.py
+++ b/nlp.py
@@ -63,9 +63,21 @@ def setup(data,
       status bar. To switch off the warnings, you may consider the following code 
       in your anaconda terminal. 
     
-      jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10
-    
+         jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10
+      
+      
+    - Some functionalities in pycaret.nlp requires you to have english language model. 
+      The language model is not downloaded automatically when you install pycaret. 
+      You will have to download two models using your Anaconda Prompt or python 
+      command line interface.  To download the model, please type the following in 
+      your command line:
+      
+         python -m spacy download en_core_web_sm
+         python -m textblob.download_corpora
     
+      Once downloaded, please restart your kernel and re-run the setup.
+        
+          
     
     """
     
@@ -106,7 +118,14 @@ def setup(data,
         if type(session_id) is not int:
             sys.exit('(Type Error): session_id parameter must be an integer.')  
             
-            
+    #chcek if spacy is loaded 
+    try:
+        import spacy
+        sp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])
+    except:
+        sys.exit('(Type Error): spacy english model is not yet downloaded. See the documentation of setup to see installation guide.')
+    
+    
     """
     error handling ends here
     """
@@ -203,6 +222,12 @@ def setup(data,
     """
     DEFINE STOPWORDS
     """
+    try:
+        import nltk
+        nltk.download('stopwords')
+    except:
+        pass
+
     from nltk.corpus import stopwords
     stop_words = stopwords.words('english')
     
@@ -486,6 +511,7 @@ def setup(data,
     return text, data_, corpus, id2word, seed, target_, experiment__
 
 
+
 def create_model(model=None,
                  multi_core=False,
                  num_topics = None,
@@ -1066,6 +1092,7 @@ def assign_model(model,
     
     return bb_
 
+
 def plot_model(model = None,
                plot = 'frequency',
                topic_num = None):
@@ -1614,6 +1641,7 @@ def plot_model(model = None,
         umap.fit(docs, ["c{}".format(c) for c in clusters.labels_])
         umap.show()
 
+
 def tune_model(model=None,
                multi_core=False,
                supervised_target=None,
@@ -1621,10 +1649,10 @@ def tune_model(model=None,
                optimize=None,
                auto_fe = True,
                fold=10):
-    
-    
+
+
     """
-        
+
     Description:
     ------------
     This function is only applicable for topic models created using create_model().
@@ -1634,13 +1662,13 @@ def tune_model(model=None,
     is considered as objective function to maximize. This function allows to select
     estimator from a large library available in pycaret (see below). By default 
     supervised estimator is Linear. 
-    
+
     This function returns the topic model with K number of topics that are considered
     best using optimize param.
-    
+
     setup() function must be called prior to using this function.
-    
-    
+
+
         Example
         -------
         tuned_lda = tune_model('lda', supervised_target = 'status', optimize='AUC') 
@@ -1650,12 +1678,12 @@ def tune_model(model=None,
         default optimize param is 'Accuracy' for classification tasks and 'R2' for
         regression tasks. Task is determined automatically based on supervised_target
         param.
-        
-        
+
+
         Alternatively, 
-        
+
         tuned_lda_us = tune_model('lda')
-        
+
         This will return trained Latent Dirichlet Allocation model with k number of 
         topics that is optimized to improve coherence value of model, since no 
         supervised_target param is passed.
@@ -1667,7 +1695,7 @@ def tune_model(model=None,
     model : string, default = None
 
     Enter abbreviated name of the model. List of available models supported: 
-    
+
     Model                              Abbreviated String   Original Implementation 
     ---------                          ------------------   -----------------------
     Latent Dirichlet Allocation        'lda'                gensim/models/ldamodel.html
@@ -1679,11 +1707,11 @@ def tune_model(model=None,
     multi_core: Boolean, default = False
     True would utilize all CPU cores to parallelize and speed up model training. Only
     available for 'lda'. For all other models, multi_core parameter is ignored.
-    
+
     supervised_target: string, default = None
     Name of target column for supervised learning. If None model coherence value is used
     as objective function.
-    
+
     estimator: string, default = None
 
     Estimator                     Abbreviated String     Task 
@@ -1705,6 +1733,7 @@ def tune_model(model=None,
     Extra Trees Classifier        'et'                   Classification
     Extreme Gradient Boosting     'xgboost'              Classification
     Light Gradient Boosting       'lightgbm'             Classification
+    CatBoost Classifier           'catboost'             Classification
     Linear Regression             'lr'                   Regression
     Lasso Regression              'lasso'                Regression
     Ridge Regression              'ridge'                Regression
@@ -1729,81 +1758,82 @@ def tune_model(model=None,
     Multi Level Perceptron        'mlp'                  Regression
     Extreme Gradient Boosting     'xgboost'              Regression
     Light Gradient Boosting       'lightgbm'             Regression
-    
+    CatBoost Regressor            'catboost'             Regression
+
     If set to None, by default Linear model is used for both classification
     and regression tasks.
-    
+
     optimize: string, default = None
-    
+
     For Classification tasks:
     Accuracy, AUC, Recall, Precision, F1, Kappa
-    
+
     For Regression tasks:
     MAE, MSE, RMSE, R2, ME
-    
+
     If set to None, default is 'Accuracy' for classification and 'R2' for 
     regression tasks.
-    
+
     auto_fe: boolean, default = True
     Automatic text feature engineering. Only used when supervised_target is
     passed. When set to true, it will generate text based features such as 
     polarity, subjectivity, wordcounts to be used in supervised learning.
     Ignored when supervised_target is set to None.
-    
+
     fold: integer, default = 10
     Number of folds to be used in Kfold CV. Must be at least 2. 
 
-    
+
     Returns:
     --------
 
     visual plot:  Visual plot with k number of topics on x-axis with metric to
     -----------   optimize on y-axis. Coherence is used when learning is 
                   unsupervised. Also, prints the best model metric.
-    
+
     model:        trained model object with best K number of topics.
     -----------
 
     Warnings:
     ---------
-    
+
     - Random Projections ('rp') and Non Negative Matrix Factorization ('nmf')
       is not available for unsupervised learning. Error is raised when 'rp' or
       'nmf' is passed without supervised_target.
-      
-      
+
+
     - Estimators using kernel based methods such as Kernel Ridge Regressor, 
       Automatic Relevance Determinant, Gaussian Process Classifier, Radial Basis
       Support Vector Machine and Multi Level Perceptron may take longer training 
       times.
-           
-         
-    
-          
+
+
+
+
     """
-    
-    
-    
+
+
+
     """
     exception handling starts here
     """
-    
+
     #ignore warnings
     import warnings
     warnings.filterwarnings('ignore') 
-    
+
     import sys
-    
+
     #checking for model parameter
     if model is None:
         sys.exit('(Value Error): Model parameter Missing. Please see docstring for list of available models.')
-        
+
     #checking for allowed models
     allowed_models = ['lda', 'lsi', 'hdp', 'rp', 'nmf']
-    
+
     if model not in allowed_models:
         sys.exit('(Value Error): Model Not Available. Please see docstring for list of available models.')
-        
+
     #checking multicore type:
     if type(multi_core) is not bool:
         sys.exit('(Type Error): multi_core parameter can only take argument as True or False.')
@@ -1815,86 +1845,86 @@ def tune_model(model=None,
         all_col.remove(target)
         if supervised_target not in all_col:
             sys.exit('(Value Error): supervised_target not recognized. It can only be one of the following: ' + str(all_col))
-    
+
     #supervised target exception handling
     if supervised_target is None:
         models_not_allowed = ['rp', 'nmf']
-        
+
         if model in models_not_allowed:
             sys.exit('(Type Error): Model not supported for unsupervised tuning. Either supervised_target param has to be passed or different model has to be used. Please see docstring for available models.')
-    
-    
-    
+
+
+
     #checking estimator:
     if estimator is not None:
-        
+
         available_estimators = ['lr', 'knn', 'nb', 'dt', 'svm', 'rbfsvm', 'gpc', 'mlp', 'ridge', 'rf', 'qda', 'ada', 
                             'gbc', 'lda', 'et', 'lasso', 'ridge', 'en', 'lar', 'llar', 'omp', 'br', 'ard', 'par', 
                             'ransac', 'tr', 'huber', 'kr', 'svm', 'knn', 'dt', 'rf', 'et', 'ada', 'gbr', 
-                            'mlp', 'xgboost', 'lightgbm']
-                
+                            'mlp', 'xgboost', 'lightgbm', 'catboost']
+
         if estimator not in available_estimators:
             sys.exit('(Value Error): Estimator Not Available. Please see docstring for list of available estimators.')
-    
-    
+
+
     #checking optimize parameter
     if optimize is not None:
-        
+
         available_optimizers = ['MAE', 'MSE', 'RMSE', 'R2', 'ME', 'Accuracy', 'AUC', 'Recall', 'Precision', 'F1', 'Kappa']
-        
+
         if optimize not in available_optimizers:
             sys.exit('(Value Error): optimize parameter Not Available. Please see docstring for list of available parameters.')
-    
+
     #checking auto_fe:
     if type(auto_fe) is not bool:
         sys.exit('(Type Error): auto_fe parameter can only take argument as True or False.')
-    
-    
+
+
     #checking fold parameter
     if type(fold) is not int:
         sys.exit('(Type Error): Fold parameter only accepts integer value.')
-    
-    
+
+
     """
     exception handling ends here
     """
-    
+
     #pre-load libraries
     import pandas as pd
     import ipywidgets as ipw
     from IPython.display import display, HTML, clear_output, update_display
     import datetime, time
-    
+
     #progress bar
     max_steps = 25
 
     progress = ipw.IntProgress(value=0, min=0, max=max_steps, step=1 , description='Processing: ')
     display(progress)
-    
+
     timestampStr = datetime.datetime.now().strftime("%H:%M:%S")
-    
+
     monitor = pd.DataFrame( [ ['Initiated' , '. . . . . . . . . . . . . . . . . .', timestampStr ], 
                              ['Status' , '. . . . . . . . . . . . . . . . . .' , 'Loading Dependencies'],
                              ['Step' , '. . . . . . . . . . . . . . . . . .',  'Initializing' ] ],
                               columns=['', ' ', '   ']).set_index('')
-    
+
     display(monitor, display_id = 'monitor')
-        
-    
+
+
     #General Dependencies
     from sklearn.linear_model import LogisticRegression
     from sklearn.model_selection import cross_val_predict
     from sklearn import metrics
     import numpy as np
     import plotly.express as px
-    
+
     #setting up cufflinks
     import cufflinks as cf
     cf.go_offline()
     cf.set_config_file(offline=False, world_readable=True)
-    
+
     progress.value += 1 
-    
+
     #define the problem
     if supervised_target is None:
         problem ='unsupervised'
@@ -1902,7 +1932,7 @@ def tune_model(model=None,
         problem = 'classification'
     else:
         problem = 'regression'
-    
+
     #define topic_model_name
     if model == 'lda':
         topic_model_name = 'Latent Dirichlet Allocation'
@@ -1914,7 +1944,7 @@ def tune_model(model=None,
         topic_model_name = 'Non-Negative Matrix Factorization'
     elif model == 'rp':
         topic_model_name = 'Random Projections'
-        
+
     #defining estimator:
     if problem == 'classification' and estimator is None:
         estimator = 'lr'
@@ -1922,7 +1952,7 @@ def tune_model(model=None,
         estimator = 'lr'        
     else:
         estimator = estimator
-    
+
     #defining optimizer:
     if optimize is None and problem == 'classification':
         optimize = 'Accuracy'
@@ -1930,42 +1960,42 @@ def tune_model(model=None,
         optimize = 'R2'
     else:
         optimize=optimize
-    
+
     progress.value += 1 
-    
+
     #creating sentiments
-                             
+
     if problem == 'classification' or problem == 'regression':
-                             
+
         if auto_fe:
-            
+
             monitor.iloc[1,1:] = 'Feature Engineering'
             update_display(monitor, display_id = 'monitor')
-                             
+
             from textblob import TextBlob
-            
+
             monitor.iloc[2,1:] = 'Extracting Polarity'
             update_display(monitor, display_id = 'monitor')
-                             
+
             polarity = data_[target_].map(lambda text: TextBlob(text).sentiment.polarity)
-                             
+
             monitor.iloc[2,1:] = 'Extracting Subjectivity'
             update_display(monitor, display_id = 'monitor')
-                             
+
             subjectivity = data_[target_].map(lambda text: TextBlob(text).sentiment.subjectivity)
-                             
+
             monitor.iloc[2,1:] = 'Extracting Wordcount'
             update_display(monitor, display_id = 'monitor')
-                             
+
             word_count = [len(i) for i in text]
-            
+
             progress.value += 1 
-            
+
     #defining tuning grid
     param_grid = [2,4,8,16,32,64,100,200,300,400] 
-    
+
     master = []; master_df = []
-    
+
     monitor.iloc[1,1:] = 'Creating Topic Model'
     update_display(monitor, display_id = 'monitor')
 
@@ -1973,7 +2003,7 @@ def tune_model(model=None,
         progress.value += 1                      
         monitor.iloc[2,1:] = 'Fitting Model With ' + str(i) + ' Topics'
         update_display(monitor, display_id = 'monitor')
-                             
+
         #create and assign the model to dataset d
         m = create_model(model=model, multi_core=multi_core, num_topics=i, verbose=False)
         d = assign_model(m, verbose=False)
@@ -1987,40 +2017,40 @@ def tune_model(model=None,
         master_df.append(d)
 
         #topic model creation end's here
-    
+
     if problem == 'unsupervised':
-                             
+
         monitor.iloc[1,1:] = 'Evaluating Topic Model'
         update_display(monitor, display_id = 'monitor')
-        
+
         from gensim.models import CoherenceModel
 
         coherence = []
         metric = []
-        
+
         counter = 0
-        
+
         for i in master:
             progress.value += 1 
             monitor.iloc[2,1:] = 'Evaluating Coherence With ' + str(param_grid[counter]) + ' Topics'
             update_display(monitor, display_id = 'monitor')
-                             
+
             model = CoherenceModel(model=i, texts=text, dictionary=id2word, coherence='c_v')
             model_coherence = model.get_coherence()
             coherence.append(model_coherence)
             metric.append('Coherence')
             counter += 1
-        
+
         monitor.iloc[1,1:] = 'Compiling Results'
         monitor.iloc[1,1:] = 'Finalizing'
         update_display(monitor, display_id = 'monitor')
-                             
+
         df = pd.DataFrame({'# Topics': param_grid, 'Score' : coherence, 'Metric': metric})
         df.columns = ['# Topics', 'Score', 'Metric']
-        
+
         sorted_df = df.sort_values(by='Score', ascending=False)
         ival = sorted_df.index[0]
-        
+
         best_model = master[ival]
         best_model_df = master_df[ival]
 
@@ -2028,28 +2058,28 @@ def tune_model(model=None,
                       title= 'Coherence Value and # of Topics', color='Metric')
 
         fig.update_layout(plot_bgcolor='rgb(245,245,245)')
-        
+
         clear_output()
-        
+
         fig.show()
-        
+
         best_k = np.array(sorted_df.head(1)['# Topics'])[0]
         best_m = round(np.array(sorted_df.head(1)['Score'])[0],4)
         p = 'Best Model: ' + topic_model_name + ' |' + ' # Topics: ' + str(best_k) + ' | ' + 'Coherence: ' + str(best_m)
         print(p)
 
-    
+
     elif problem == 'classification':
-        
+
         """
-        
+
         defining estimator
-        
+
         """
-        
+
         monitor.iloc[1,1:] = 'Evaluating Topic Model'
         update_display(monitor, display_id = 'monitor')
-                             
+
         if estimator == 'lr':
 
             from sklearn.linear_model import LogisticRegression
@@ -2139,36 +2169,40 @@ def tune_model(model=None,
             from sklearn.ensemble import ExtraTreesClassifier 
             model = ExtraTreesClassifier(random_state=seed)
             full_name = 'Extra Trees Classifier'
-            
+
         elif estimator == 'xgboost':
-            
+
             from xgboost import XGBClassifier
             model = XGBClassifier(random_state=seed, n_jobs=-1, verbosity=0)
             full_name = 'Extreme Gradient Boosting'
-            
+
         elif estimator == 'lightgbm':
-            
+
             import lightgbm as lgb
             model = lgb.LGBMClassifier(random_state=seed)
             full_name = 'Light Gradient Boosting Machine'
-        
-        
+
+        elif estimator == 'catboost':
+            from catboost import CatBoostClassifier
+            model = CatBoostClassifier(random_state=seed, silent=True) # Silent is True to suppress CatBoost iteration results 
+            full_name = 'CatBoost Classifier'
+
         progress.value += 1 
-        
+
         """
         start model building here
 
         """
-                             
+
         acc = [];  auc = []; recall = []; prec = []; kappa = []; f1 = []
 
         for i in range(0,len(master_df)):
             progress.value += 1 
             param_grid_val = param_grid[i]
-            
+
             monitor.iloc[2,1:] = 'Evaluating Classifier With ' + str(param_grid_val) + ' Topics'
             update_display(monitor, display_id = 'monitor')                
-                             
+
             #prepare the dataset for supervised problem
             d = master_df[i]
             d.dropna(axis=0, inplace=True) #droping rows where Dominant_Topic is blank
@@ -2209,14 +2243,14 @@ def tune_model(model=None,
             else:
                 auc.append(0)
 
-                             
+
         monitor.iloc[1,1:] = 'Compiling Results'
         monitor.iloc[1,1:] = 'Finalizing'
         update_display(monitor, display_id = 'monitor')
-                             
+
         df = pd.DataFrame({'# Topics': param_grid, 'Accuracy' : acc, 'AUC' : auc, 'Recall' : recall, 
                    'Precision' : prec, 'F1' : f1, 'Kappa' : kappa})
-        
+
         sorted_df = df.sort_values(by=optimize, ascending=False)
         ival = sorted_df.index[0]
 
@@ -2230,33 +2264,33 @@ def tune_model(model=None,
         fig.update_layout(plot_bgcolor='rgb(245,245,245)')
         title= str(full_name) + ' Metrics and # of Topics'
         fig.update_layout(title={'text': title, 'y':0.95,'x':0.45,'xanchor': 'center','yanchor': 'top'})
-        
+
         clear_output()
 
         fig.show()
-        
+
         best_k = np.array(sorted_df.head(1)['# Topics'])[0]
         best_m = round(np.array(sorted_df.head(1)[optimize])[0],4)
         p = 'Best Model: ' + topic_model_name + ' |' + ' # Topics: ' + str(best_k) + ' | ' + str(optimize) + ' : ' + str(best_m)
         print(p)
 
     elif problem == 'regression':
-        
+
         """
-        
+
         defining estimator
-        
+
         """
-        
+
         monitor.iloc[1,1:] = 'Evaluating Topic Model'
         update_display(monitor, display_id = 'monitor')
-                                    
+
         if estimator == 'lr':
-        
+
             from sklearn.linear_model import LinearRegression
             model = LinearRegression()
             full_name = 'Linear Regression'
-        
+
         elif estimator == 'lasso':
 
             from sklearn.linear_model import Lasso
@@ -2381,36 +2415,41 @@ def tune_model(model=None,
             from sklearn.neural_network import MLPRegressor
             model = MLPRegressor(random_state=seed)
             full_name = 'MLP Regressor'
-            
+
         elif estimator == 'xgboost':
-            
+
             from xgboost import XGBRegressor
             model = XGBRegressor(random_state=seed, n_jobs=-1, verbosity=0)
             full_name = 'Extreme Gradient Boosting Regressor'
-            
+
         elif estimator == 'lightgbm':
-            
+
             import lightgbm as lgb
             model = lgb.LGBMRegressor(random_state=seed)
             full_name = 'Light Gradient Boosting Machine'
             
+        elif estimator == 'catboost':
+            from catboost import CatBoostRegressor
+            model = CatBoostRegressor(random_state=seed, silent = True)
+            full_name = 'CatBoost Regressor'
+
         progress.value += 1 
-        
+
         """
         start model building here
 
         """
-        
+
         score = []
         metric = []
-        
+
         for i in range(0,len(master_df)):
             progress.value += 1 
             param_grid_val = param_grid[i]
-            
+
             monitor.iloc[2,1:] = 'Evaluating Regressor With ' + str(param_grid_val) + ' Topics'
             update_display(monitor, display_id = 'monitor')    
-                             
+
             #prepare the dataset for supervised problem
             d = master_df[i]
             d.dropna(axis=0, inplace=True) #droping rows where Dominant_Topic is blank
@@ -2430,7 +2469,7 @@ def tune_model(model=None,
             if optimize == 'R2':
                 r2_ = metrics.r2_score(y,pred)
                 score.append(r2_)
-                
+
             elif optimize == 'MAE':          
                 mae_ = metrics.mean_absolute_error(y,pred)
                 score.append(mae_)
@@ -2438,31 +2477,31 @@ def tune_model(model=None,
             elif optimize == 'MSE':
                 mse_ = metrics.mean_squared_error(y,pred)
                 score.append(mse_)
-                
+
             elif optimize == 'RMSE':
                 mse_ = metrics.mean_squared_error(y,pred)        
                 rmse_ = np.sqrt(mse_)
                 score.append(rmse_)
-            
+
             elif optimize == 'ME':
                 max_error_ = metrics.max_error(y,pred)
                 score.append(max_error_)
-                
+
             metric.append(str(optimize))
-        
+
         monitor.iloc[1,1:] = 'Compiling Results'
         monitor.iloc[1,1:] = 'Finalizing'
         update_display(monitor, display_id = 'monitor')                    
-         
+
         df = pd.DataFrame({'# Topics': param_grid, 'Score' : score, 'Metric': metric})
         df.columns = ['# Topics', optimize, 'Metric']
-        
+
         #sorting to return best model
         if optimize == 'R2':
             sorted_df = df.sort_values(by=optimize, ascending=False)
         else: 
             sorted_df = df.sort_values(by=optimize, ascending=True)
-            
+
         ival = sorted_df.index[0]
 
         best_model = master[ival]
@@ -2474,19 +2513,20 @@ def tune_model(model=None,
         fig.update_layout(plot_bgcolor='rgb(245,245,245)')
         progress.value += 1 
         clear_output()
-        
+
         fig.show()
         best_k = np.array(sorted_df.head(1)['# Topics'])[0]
         best_m = round(np.array(sorted_df.head(1)[optimize])[0],4)
         p = 'Best Model: ' + topic_model_name + ' |' + ' # Topics: ' + str(best_k) + ' | ' + str(optimize) + ' : ' + str(best_m)
         print(p)
-        
+
     #storing into experiment
     tup = ('Best Model',best_model)
     experiment__.append(tup)    
-        
+
     return best_model
-    
+
+
 
 def evaluate_model(model):
     
@@ -2572,6 +2612,7 @@ def evaluate_model(model):
     
     d = interact_manual(plot_model, model = fixed(model), plot = a, topic_num=b)
 
+
 def save_model(model, model_name):
     
     """
@@ -2615,6 +2656,8 @@ def save_model(model, model_name):
     joblib.dump(model, model_name)
     print('Model Succesfully Saved')
 
+
+
 def load_model(model_name):
     
     """
@@ -2656,6 +2699,7 @@ def load_model(model_name):
     print('Model Sucessfully Loaded')
     return joblib.load(model_name)
 
+
 def save_experiment(experiment_name=None):
     
         
@@ -2714,6 +2758,7 @@ def save_experiment(experiment_name=None):
     
     print('Experiment Succesfully Saved')
 
+
 def load_experiment(experiment_name):
     
     """
@@ -2782,5 +2827,4 @@ def get_topics(data, text, model=None, num_topics=4):
     s = setup(data=data, target=text)
     c = create_model(model=model, num_topics=num_topics, verbose=False)
     dataset = assign_model(c, verbose=False)
-    return dataset
-
+    return dataset
\ No newline at end of file