Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
pycaret
提交
f4761c81
pycaret
项目概览
OpenDocCN
/
pycaret
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
pycaret
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
f4761c81
编写于
6月 19, 2020
作者:
P
PyCaret
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pycaret-nightly 0.7
上级
4c588463
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
612 addition
and
26 deletion
+612
-26
.gitignore
.gitignore
+1
-0
pycaret/classification.py
pycaret/classification.py
+0
-2
pycaret/regression.py
pycaret/regression.py
+609
-22
pycaret/utils.py
pycaret/utils.py
+1
-1
setup.py
setup.py
+1
-1
未找到文件。
.gitignore
浏览文件 @
f4761c81
...
...
@@ -16,3 +16,4 @@ catboost_info
/pycaret_nightly.egg-info
/pycaret.egg-info
/dist
.vscode/settings.json
pycaret/classification.py
浏览文件 @
f4761c81
...
...
@@ -7928,8 +7928,6 @@ def create_stacknet(estimator_list,
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
mean_acc
=
np
.
mean
(
score_acc
)
mean_auc
=
np
.
mean
(
score_auc
)
mean_recall
=
np
.
mean
(
score_recall
)
...
...
pycaret/regression.py
浏览文件 @
f4761c81
...
...
@@ -1632,6 +1632,7 @@ def setup(data,
if
logging_param
:
import
mlflow
from
pathlib
import
Path
if
experiment_name
is
None
:
exp_name_
=
'clf-default-name'
...
...
@@ -1677,6 +1678,9 @@ def setup(data,
# Log the transformation pipeline
save_model
(
prep_pipe
,
'Transformation Pipeline'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Transformation Pipeline'
+
'.pkl'
)
size_bytes
=
Path
(
'Transformation Pipeline.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Transformation Pipeline.pkl'
)
# Log pandas profile
...
...
@@ -2329,6 +2333,7 @@ def create_model(estimator = None,
#import mlflow
import
mlflow
from
pathlib
import
Path
import
os
mlflow
.
set_experiment
(
exp_name_log
)
...
...
@@ -2410,10 +2415,13 @@ def create_model(estimator = None,
except
:
pass
# Log model and transformation pipeline
save_model
(
model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
os
.
remove
(
'Trained Model.pkl'
)
# Log model and transformation pipeline
save_model
(
model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
progress
.
value
+=
1
...
...
@@ -2446,7 +2454,7 @@ def ensemble_model(estimator,
fold
=
10
,
n_estimators
=
10
,
round
=
4
,
choose_better
=
Tru
e
,
#added in pycaret==1.0.1
choose_better
=
Fals
e
,
#added in pycaret==1.0.1
optimize
=
'r2'
,
#added in pycaret==1.0.1
verbose
=
True
):
"""
...
...
@@ -2495,7 +2503,7 @@ def ensemble_model(estimator,
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
choose_better: Boolean, default =
Tru
e
choose_better: Boolean, default =
Fals
e
When set to set to True, base estimator is returned when the metric doesn't
improve by ensemble_model. This gurantees the returned object would perform
atleast equivalent to base estimator created using create_model or model
...
...
@@ -2539,6 +2547,10 @@ def ensemble_model(estimator,
#exception checking
import
sys
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#Check for allowed method
available_method
=
[
'Bagging'
,
'Boosting'
]
if
method
not
in
available_method
:
...
...
@@ -2666,6 +2678,33 @@ def ensemble_model(estimator,
estimator__
=
model_dict
.
get
(
mn
)
model_dict_logging
=
{
'ExtraTreesRegressor'
:
'Extra Trees Regressor'
,
'GradientBoostingRegressor'
:
'Gradient Boosting Regressor'
,
'RandomForestRegressor'
:
'Random Forest'
,
'LGBMRegressor'
:
'Light Gradient Boosting Machine'
,
'XGBRegressor'
:
'Extreme Gradient Boosting'
,
'AdaBoostRegressor'
:
'AdaBoost Regressor'
,
'DecisionTreeRegressor'
:
'Decision Tree'
,
'Ridge'
:
'Ridge Regression'
,
'TheilSenRegressor'
:
'TheilSen Regressor'
,
'BayesianRidge'
:
'Bayesian Ridge'
,
'LinearRegression'
:
'Linear Regression'
,
'ARDRegression'
:
'Automatic Relevance Determination'
,
'KernelRidge'
:
'Kernel Ridge'
,
'RANSACRegressor'
:
'Random Sample Consensus'
,
'HuberRegressor'
:
'Huber Regressor'
,
'Lasso'
:
'Lasso Regression'
,
'ElasticNet'
:
'Elastic Net'
,
'Lars'
:
'Least Angle Regression'
,
'OrthogonalMatchingPursuit'
:
'Orthogonal Matching Pursuit'
,
'MLPRegressor'
:
'Multi Level Perceptron'
,
'KNeighborsRegressor'
:
'K Neighbors Regressor'
,
'SVR'
:
'Support Vector Machine'
,
'LassoLars'
:
'Lasso Least Angle Regression'
,
'PassiveAggressiveRegressor'
:
'Passive Aggressive Regressor'
,
'CatBoostRegressor'
:
'CatBoost Regressor'
,
'BaggingRegressor'
:
'Bagging Regressor'
}
'''
MONITOR UPDATE STARTS
'''
...
...
@@ -2893,7 +2932,12 @@ def ensemble_model(estimator,
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
model_fit_start
=
time
.
time
()
model
.
fit
(
data_X
,
data_y
)
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
#storing results in create_model_container
create_model_container
.
append
(
model_results
.
data
)
...
...
@@ -2934,6 +2978,9 @@ def ensemble_model(estimator,
else
:
model
=
base_model
#re-instate display_constainer state
display_container
.
pop
(
-
1
)
#storing into experiment
model_name
=
str
(
model
).
split
(
"("
)[
0
]
tup
=
(
model_name
,
model
)
...
...
@@ -2943,6 +2990,114 @@ def ensemble_model(estimator,
tup
=
(
nam
,
model_results
)
experiment__
.
append
(
tup
)
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
if
logging_param
:
#Creating Logs message monitor
monitor
.
iloc
[
1
,
1
:]
=
'Creating Logs'
monitor
.
iloc
[
2
,
1
:]
=
'Almost Finished'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
import
mlflow
from
pathlib
import
Path
import
os
mlflow
.
set_experiment
(
exp_name_log
)
full_name
=
model_dict_logging
.
get
(
mn
)
with
mlflow
.
start_run
(
run_name
=
full_name
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
params
=
model
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
mlflow
.
log_metrics
({
"MAE"
:
avgs_mae
[
0
],
"MSE"
:
avgs_mse
[
0
],
"RMSE"
:
avgs_rmse
[
0
],
"R2"
:
avgs_r2
[
0
],
"RMSLE"
:
avgs_rmsle
[
0
],
"MAPE"
:
avgs_mape
[
0
]})
# Log internal parameters
mlflow
.
log_param
(
'ensemble_model_estimator'
,
full_name
)
mlflow
.
log_param
(
'ensemble_model_method'
,
method
)
mlflow
.
log_param
(
'ensemble_model_fold'
,
fold
)
mlflow
.
log_param
(
'ensemble_model_n_estimators'
,
n_estimators
)
mlflow
.
log_param
(
'ensemble_model_round'
,
round
)
mlflow
.
log_param
(
'ensemble_model_choose_better'
,
choose_better
)
mlflow
.
log_param
(
'ensemble_model_optimize'
,
optimize
)
mlflow
.
log_param
(
'ensemble_model_verbose'
,
verbose
)
#set tag of compare_models
mlflow
.
set_tag
(
"Source"
,
"ensemble_model"
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log training time in seconds
mlflow
.
log_metric
(
"TT"
,
model_fit_time
)
# Log model and transformation pipeline
save_model
(
model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
# Generate hold-out predictions and save as html
holdout
=
predict_model
(
model
,
verbose
=
False
)
holdout_score
=
pull
()
display_container
.
pop
(
-
1
)
holdout_score
.
to_html
(
'Holdout.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Holdout.html'
)
os
.
remove
(
'Holdout.html'
)
# Log AUC and Confusion Matrix plot
if
log_plots_param
:
try
:
plot_model
(
model
,
plot
=
'residuals'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Residuals.png'
)
os
.
remove
(
"Residuals.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'error'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Prediction Error.png'
)
os
.
remove
(
"Prediction Error.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'feature'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Feature Importance.png'
)
os
.
remove
(
"Feature Importance.png"
)
except
:
pass
# Log the CV results as model_results.html artifact
model_results
.
data
.
to_html
(
'Results.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Results.html'
)
os
.
remove
(
'Results.html'
)
if
verbose
:
clear_output
()
if
html_param
:
...
...
@@ -3153,6 +3308,9 @@ def compare_models(blacklist = None,
else
:
n_select_num
=
abs
(
n_select
)
if
n_select_num
>
len_mod
:
n_select_num
=
len_mod
if
whitelist
is
not
None
:
wl
=
len
(
whitelist
)
bl
=
len_of_blacklist
...
...
@@ -3527,12 +3685,19 @@ def compare_models(blacklist = None,
mask
=
actual
!=
0
return
(
np
.
fabs
(
actual
-
prediction
)
/
actual
)[
mask
].
mean
()
#create URI (before loop)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
name_counter
=
0
model_store
=
[]
for
model
in
model_library
:
#run_time
runtime_start
=
time
.
time
()
progress
.
value
+=
1
'''
...
...
@@ -3575,6 +3740,7 @@ def compare_models(blacklist = None,
ytrain
,
ytest
=
data_y
.
iloc
[
train_i
],
data_y
.
iloc
[
test_i
]
time_start
=
time
.
time
()
model_store_by_fold
.
append
(
model
.
fit
(
Xtrain
,
ytrain
))
time_end
=
time
.
time
()
pred_
=
model
.
predict
(
Xtest
)
try
:
...
...
@@ -3586,7 +3752,6 @@ def compare_models(blacklist = None,
except
:
pass
time_end
=
time
.
time
()
mae
=
metrics
.
mean_absolute_error
(
ytest
,
pred_
)
mse
=
metrics
.
mean_squared_error
(
ytest
,
pred_
)
rmse
=
np
.
sqrt
(
mse
)
...
...
@@ -3642,13 +3807,12 @@ def compare_models(blacklist = None,
avgs_rmsle
=
np
.
append
(
avgs_rmsle
,
np
.
mean
(
score_rmsle
))
avgs_r2
=
np
.
append
(
avgs_r2
,
np
.
mean
(
score_r2
))
avgs_mape
=
np
.
append
(
avgs_mape
,
np
.
mean
(
score_mape
))
avgs_training_time
=
np
.
append
(
avgs_training_time
,
np
.
sum
(
score_training_time
))
avgs_training_time
=
np
.
append
(
avgs_training_time
,
np
.
mean
(
score_training_time
))
compare_models_
=
pd
.
DataFrame
({
'Model'
:
model_names
[
name_counter
],
'MAE'
:
avgs_mae
,
'MSE'
:
avgs_mse
,
'RMSE'
:
avgs_rmse
,
'R2'
:
avgs_r2
,
'RMSLE'
:
avgs_rmsle
,
'MAPE'
:
avgs_mape
,
'TT (Sec)'
:
avgs_training_time
})
master_display
=
pd
.
concat
([
master_display
,
compare_models_
],
ignore_index
=
True
)
master_display
=
master_display
.
round
(
round
)
#master_display.loc[:,'TT (Sec)'] = master_display.loc[:,'TT (Sec)'].round(2)
if
sort
==
'R2'
:
master_display
=
master_display
.
sort_values
(
by
=
sort
,
ascending
=
False
)
...
...
@@ -3661,6 +3825,61 @@ def compare_models(blacklist = None,
if
html_param
:
update_display
(
master_display
,
display_id
=
display_id
)
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
"""
MLflow logging starts here
"""
if
logging_param
:
import
mlflow
from
pathlib
import
Path
import
os
run_name
=
model_names
[
name_counter
]
with
mlflow
.
start_run
(
run_name
=
run_name
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
params
=
model
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
#set tag of compare_models
mlflow
.
set_tag
(
"Source"
,
"compare_models"
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
#Log top model metrics
mlflow
.
log_metric
(
"MAE"
,
avgs_mae
[
0
])
mlflow
.
log_metric
(
"MSE"
,
avgs_mse
[
0
])
mlflow
.
log_metric
(
"RMSE"
,
avgs_rmse
[
0
])
mlflow
.
log_metric
(
"R2"
,
avgs_r2
[
0
])
mlflow
.
log_metric
(
"RMSLE"
,
avgs_rmsle
[
0
])
mlflow
.
log_metric
(
"MAPE"
,
avgs_mape
[
0
])
mlflow
.
log_metric
(
"TT"
,
avgs_training_time
[
0
])
# Log model and transformation pipeline
save_model
(
model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
score_mae
=
np
.
empty
((
0
,
0
))
score_mse
=
np
.
empty
((
0
,
0
))
score_rmse
=
np
.
empty
((
0
,
0
))
...
...
@@ -3726,7 +3945,7 @@ def compare_models(blacklist = None,
update_display
(
monitor
,
display_id
=
'monitor'
)
progress
.
value
+=
1
k
=
model_dict
.
get
(
i
)
m
=
create_model
(
estimator
=
k
,
verbose
=
False
)
m
=
create_model
(
estimator
=
k
,
verbose
=
False
,
system
=
False
)
model_store_final
.
append
(
m
)
if
len
(
model_store_final
)
==
1
:
...
...
@@ -3747,7 +3966,7 @@ def compare_models(blacklist = None,
def
blend_models
(
estimator_list
=
'All'
,
fold
=
10
,
round
=
4
,
choose_better
=
Tru
e
,
#added in pycaret==1.0.1
choose_better
=
Fals
e
,
#added in pycaret==1.0.1
optimize
=
'r2'
,
#added in pycaret==1.0.1
turbo
=
True
,
verbose
=
True
):
...
...
@@ -3797,7 +4016,7 @@ def blend_models(estimator_list = 'All',
round: integer, default = 4
Number of decimal places the metrics in the score grid will be rounded to.
choose_better: Boolean, default =
Tru
e
choose_better: Boolean, default =
Fals
e
When set to True, base estimator is returned when the metric doesn't
improve by ensemble_model. This gurantees the returned object would perform
atleast equivalent to base estimator created using create_model or model
...
...
@@ -3847,8 +4066,11 @@ def blend_models(estimator_list = 'All',
#exception checking
import
sys
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#checking error for estimator_list (string)
if
estimator_list
!=
'All'
:
for
i
in
estimator_list
:
if
'sklearn'
not
in
str
(
type
(
i
))
and
'CatBoostRegressor'
not
in
str
(
type
(
i
)):
...
...
@@ -4182,6 +4404,7 @@ def blend_models(estimator_list = 'All',
except
:
pass
time_end
=
time
.
time
()
mae
=
metrics
.
mean_absolute_error
(
ytest
,
pred_
)
mse
=
metrics
.
mean_squared_error
(
ytest
,
pred_
)
...
...
@@ -4308,7 +4531,11 @@ def blend_models(estimator_list = 'All',
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
model_fit_start
=
time
.
time
()
model
.
fit
(
data_X
,
data_y
)
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
progress
.
value
+=
1
...
...
@@ -4350,11 +4577,14 @@ def blend_models(estimator_list = 'All',
base_models_
=
[]
for
i
in
estimator_list
:
m
=
create_model
(
i
,
verbose
=
False
)
m
=
create_model
(
i
,
verbose
=
False
,
system
=
False
)
s
=
create_model_container
[
-
1
][
compare_dimension
][
-
2
:][
0
]
scorer
.
append
(
s
)
base_models_
.
append
(
m
)
#re-instate display_constainer state
display_container
.
pop
(
-
1
)
if
optimize
==
'r2'
:
index_scorer
=
scorer
.
index
(
max
(
scorer
))
else
:
...
...
@@ -4365,6 +4595,101 @@ def blend_models(estimator_list = 'All',
else
:
model
=
base_models_
[
index_scorer
-
1
]
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
if
logging_param
:
#Creating Logs message monitor
monitor
.
iloc
[
1
,
1
:]
=
'Creating Logs'
monitor
.
iloc
[
2
,
1
:]
=
'Almost Finished'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
import
mlflow
from
pathlib
import
Path
import
os
with
mlflow
.
start_run
(
run_name
=
'Voting Regressor'
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
mlflow
.
log_metrics
({
"MAE"
:
avgs_mae
[
0
],
"MSE"
:
avgs_mse
[
0
],
"RMSE"
:
avgs_rmse
[
0
],
"R2"
:
avgs_r2
[
0
],
"RMSLE"
:
avgs_rmsle
[
0
],
"MAPE"
:
avgs_mape
[
0
]})
# Log internal parameters
mlflow
.
log_param
(
"blend_models_estimator_list"
,
model_names_final
)
mlflow
.
log_param
(
"blend_models_fold"
,
fold
)
mlflow
.
log_param
(
"blend_models_round"
,
round
)
mlflow
.
log_param
(
"blend_models_choose_better"
,
choose_better
)
mlflow
.
log_param
(
"blend_models_optimize"
,
optimize
)
mlflow
.
log_param
(
"blend_models_turbo"
,
turbo
)
mlflow
.
log_param
(
"blend_models_verbose"
,
verbose
)
# Log model and transformation pipeline
save_model
(
model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
# Generate hold-out predictions and save as html
holdout
=
predict_model
(
model
,
verbose
=
False
)
holdout_score
=
pull
()
display_container
.
pop
(
-
1
)
holdout_score
.
to_html
(
'Holdout.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Holdout.html'
)
os
.
remove
(
'Holdout.html'
)
#set tag of compare_models
mlflow
.
set_tag
(
"Source"
,
"blend_models"
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log training time of compare_models
mlflow
.
log_metric
(
"TT"
,
model_fit_time
)
# Log AUC and Confusion Matrix plot
if
log_plots_param
:
try
:
plot_model
(
model
,
plot
=
'residuals'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Residuals.png'
)
os
.
remove
(
"Residuals.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'error'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Prediction Error.png'
)
os
.
remove
(
"Prediction Error.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'feature'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Feature Importance.png'
)
os
.
remove
(
"Feature Importance.png"
)
except
:
pass
# Log the CV results as model_results.html artifact
model_results
.
data
.
to_html
(
'Results.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Results.html'
)
os
.
remove
(
'Results.html'
)
if
verbose
:
clear_output
()
if
html_param
:
...
...
@@ -4380,7 +4705,7 @@ def tune_model(estimator,
n_iter
=
10
,
custom_grid
=
None
,
#added in pycaret==1.0.1
optimize
=
'r2'
,
choose_better
=
Tru
e
,
#added in pycaret==1.0.1
choose_better
=
Fals
e
,
#added in pycaret==1.0.1
verbose
=
True
):
...
...
@@ -4460,7 +4785,7 @@ def tune_model(estimator,
'rmsle', 'mape'. When using 'rmse' or 'rmsle' the base scorer is 'mse' and when using
'mape' the base scorer is 'mae'.
choose_better: Boolean, default =
Tru
e
choose_better: Boolean, default =
Fals
e
When set to set to True, base estimator is returned when the metric doesn't improve
by tune_model. This gurantees the returned object would perform atleast equivalent
to base estimator created using create_model or model returned by compare_models.
...
...
@@ -4499,6 +4824,10 @@ def tune_model(estimator,
#exception checking
import
sys
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#checking estimator if string
if
type
(
estimator
)
is
str
:
sys
.
exit
(
'(Type Error): The behavior of tune_model in version 1.0.1 is changed. Please pass trained model object.'
)
...
...
@@ -4637,6 +4966,33 @@ def tune_model(estimator,
'CatBoostRegressor'
:
'catboost'
,
'BaggingRegressor'
:
'Bagging'
}
model_dict_logging
=
{
'ExtraTreesRegressor'
:
'Extra Trees Regressor'
,
'GradientBoostingRegressor'
:
'Gradient Boosting Regressor'
,
'RandomForestRegressor'
:
'Random Forest'
,
'LGBMRegressor'
:
'Light Gradient Boosting Machine'
,
'XGBRegressor'
:
'Extreme Gradient Boosting'
,
'AdaBoostRegressor'
:
'AdaBoost Regressor'
,
'DecisionTreeRegressor'
:
'Decision Tree'
,
'Ridge'
:
'Ridge Regression'
,
'TheilSenRegressor'
:
'TheilSen Regressor'
,
'BayesianRidge'
:
'Bayesian Ridge'
,
'LinearRegression'
:
'Linear Regression'
,
'ARDRegression'
:
'Automatic Relevance Determination'
,
'KernelRidge'
:
'Kernel Ridge'
,
'RANSACRegressor'
:
'Random Sample Consensus'
,
'HuberRegressor'
:
'Huber Regressor'
,
'Lasso'
:
'Lasso Regression'
,
'ElasticNet'
:
'Elastic Net'
,
'Lars'
:
'Least Angle Regression'
,
'OrthogonalMatchingPursuit'
:
'Orthogonal Matching Pursuit'
,
'MLPRegressor'
:
'Multi Level Perceptron'
,
'KNeighborsRegressor'
:
'K Neighbors Regressor'
,
'SVR'
:
'Support Vector Machine'
,
'LassoLars'
:
'Lasso Least Angle Regression'
,
'PassiveAggressiveRegressor'
:
'Passive Aggressive Regressor'
,
'CatBoostRegressor'
:
'CatBoost Regressor'
,
'BaggingRegressor'
:
'Bagging Regressor'
}
_estimator_
=
estimator
estimator
=
model_dict
.
get
(
mn
)
...
...
@@ -4680,7 +5036,7 @@ def tune_model(estimator,
MONITOR UPDATE STARTS
'''
monitor
.
iloc
[
1
,
1
:]
=
'Searching Hyperparameters
Grid
'
monitor
.
iloc
[
1
,
1
:]
=
'Searching Hyperparameters'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
...
...
@@ -5493,7 +5849,11 @@ def tune_model(estimator,
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
model_fit_start
=
time
.
time
()
best_model
.
fit
(
data_X
,
data_y
)
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
progress
.
value
+=
1
...
...
@@ -5520,7 +5880,7 @@ def tune_model(estimator,
#creating base model for comparison
if
estimator
in
[
'Bagging'
,
'ada'
]:
base_model
=
create_model
(
estimator
=
_estimator_
,
verbose
=
False
)
base_model
=
create_model
(
estimator
=
_estimator_
,
verbose
=
False
,
system
=
False
)
else
:
base_model
=
create_model
(
estimator
=
estimator
,
verbose
=
False
)
base_model_results
=
create_model_container
[
-
1
][
compare_dimension
][
-
2
:][
0
]
...
...
@@ -5537,6 +5897,9 @@ def tune_model(estimator,
else
:
best_model
=
base_model
#re-instate display_constainer state
display_container
.
pop
(
-
1
)
#storing into experiment
model_name
=
'Tuned '
+
str
(
model
).
split
(
"("
)[
0
]
tup
=
(
model_name
,
best_model
)
...
...
@@ -5544,6 +5907,123 @@ def tune_model(estimator,
nam
=
str
(
model_name
)
+
' Score Grid'
tup
=
(
nam
,
model_results
)
experiment__
.
append
(
tup
)
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
#mlflow logging
if
logging_param
:
#Creating Logs message monitor
monitor
.
iloc
[
1
,
1
:]
=
'Creating Logs'
monitor
.
iloc
[
2
,
1
:]
=
'Almost Finished'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
import
mlflow
from
pathlib
import
Path
import
os
mlflow
.
set_experiment
(
exp_name_log
)
full_name
=
model_dict_logging
.
get
(
mn
)
with
mlflow
.
start_run
(
run_name
=
full_name
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
params
=
best_model
.
get_params
()
# Log model parameters
params
=
model
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
mlflow
.
log_metrics
({
"MAE"
:
avgs_mae
[
0
],
"MSE"
:
avgs_mse
[
0
],
"RMSE"
:
avgs_rmse
[
0
],
"R2"
:
avgs_r2
[
0
],
"RMSLE"
:
avgs_rmsle
[
0
],
"MAPE"
:
avgs_mape
[
0
]})
# Log internal parameters
mlflow
.
log_param
(
"tune_model_fold"
,
fold
)
mlflow
.
log_param
(
"tune_model_round"
,
round
)
mlflow
.
log_param
(
"tune_model_n_iter"
,
n_iter
)
mlflow
.
log_param
(
"tune_model_optimize"
,
optimize
)
mlflow
.
log_param
(
"tune_model_choose_better"
,
choose_better
)
mlflow
.
log_param
(
"tune_model_verbose"
,
verbose
)
#set tag of compare_models
mlflow
.
set_tag
(
"Source"
,
"tune_model"
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log training time in seconds
mlflow
.
log_metric
(
"TT"
,
model_fit_time
)
# Log model and transformation pipeline
save_model
(
best_model
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
# Log the CV results as model_results.html artifact
model_results
.
data
.
to_html
(
'Results.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Results.html'
)
os
.
remove
(
'Results.html'
)
# Generate hold-out predictions and save as html
holdout
=
predict_model
(
best_model
,
verbose
=
False
)
holdout_score
=
pull
()
display_container
.
pop
(
-
1
)
holdout_score
.
to_html
(
'Holdout.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Holdout.html'
)
os
.
remove
(
'Holdout.html'
)
# Log AUC and Confusion Matrix plot
if
log_plots_param
:
try
:
plot_model
(
model
,
plot
=
'residuals'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Residuals.png'
)
os
.
remove
(
"Residuals.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'error'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Prediction Error.png'
)
os
.
remove
(
"Prediction Error.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'feature'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Feature Importance.png'
)
os
.
remove
(
"Feature Importance.png"
)
except
:
pass
# Log hyperparameter tuning grid
d1
=
model_grid
.
cv_results_
.
get
(
'params'
)
dd
=
pd
.
DataFrame
.
from_dict
(
d1
)
dd
[
'Score'
]
=
model_grid
.
cv_results_
.
get
(
'mean_test_score'
)
dd
.
to_html
(
'Iterations.html'
,
col_space
=
75
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Iterations.html'
)
os
.
remove
(
'Iterations.html'
)
if
verbose
:
clear_output
()
...
...
@@ -5562,7 +6042,7 @@ def stack_models(estimator_list,
round
=
4
,
restack
=
True
,
plot
=
False
,
choose_better
=
Tru
e
,
#added in pycaret==1.0.1
choose_better
=
Fals
e
,
#added in pycaret==1.0.1
optimize
=
'r2'
,
#added in pycaret==1.0.1
finalize
=
False
,
verbose
=
True
):
...
...
@@ -5622,7 +6102,7 @@ def stack_models(estimator_list,
When plot is set to True, it will return the correlation plot of prediction
from all base models provided in estimator_list.
choose_better: Boolean, default =
Tru
e
choose_better: Boolean, default =
Fals
e
When set to True, base estimator is returned when the metric doesn't
improve by ensemble_model. This gurantees the returned object would perform
atleast equivalent to base estimator created using create_model or model
...
...
@@ -5670,6 +6150,10 @@ def stack_models(estimator_list,
#exception checking
import
sys
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#checking error for estimator_list
for
i
in
estimator_list
:
if
'sklearn'
not
in
str
(
type
(
i
))
and
'CatBoostRegressor'
not
in
str
(
type
(
i
)):
...
...
@@ -5824,6 +6308,8 @@ def stack_models(estimator_list,
counter
=
0
model_fit_start
=
time
.
time
()
for
model
in
estimator_list
:
'''
...
...
@@ -5897,6 +6383,7 @@ def stack_models(estimator_list,
avgs_r2
=
np
.
empty
((
0
,
0
))
avgs_mape
=
np
.
empty
((
0
,
0
))
avgs_training_time
=
np
.
empty
((
0
,
0
))
def
calculate_mape
(
actual
,
prediction
):
mask
=
actual
!=
0
return
(
np
.
fabs
(
actual
-
prediction
)
/
actual
)[
mask
].
mean
()
...
...
@@ -6023,6 +6510,9 @@ def stack_models(estimator_list,
'''
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
mean_mae
=
np
.
mean
(
score_mae
)
mean_mse
=
np
.
mean
(
score_mse
)
mean_rmse
=
np
.
mean
(
score_rmse
)
...
...
@@ -6110,6 +6600,9 @@ def stack_models(estimator_list,
s
=
create_model_container
[
-
1
][
compare_dimension
][
-
2
:][
0
]
scorer
.
append
(
s
)
#re-instate display_constainer state
display_container
.
pop
(
-
1
)
#returning better model
if
optimize
==
'r2'
:
index_scorer
=
scorer
.
index
(
max
(
scorer
))
...
...
@@ -6136,6 +6629,100 @@ def stack_models(estimator_list,
linewidths
=
1
)
ax
.
set_ylim
(
sorted
(
ax
.
get_xlim
(),
reverse
=
True
))
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
if
logging_param
and
not
finalize
:
import
mlflow
from
pathlib
import
Path
import
os
#Creating Logs message monitor
monitor
.
iloc
[
1
,
1
:]
=
'Creating Logs'
monitor
.
iloc
[
2
,
1
:]
=
'Almost Finished'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
with
mlflow
.
start_run
(
run_name
=
'Stacking Regressor'
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
params
=
meta_model
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
mlflow
.
log_metrics
({
"MAE"
:
avgs_mae
[
0
],
"MSE"
:
avgs_mse
[
0
],
"RMSE"
:
avgs_rmse
[
0
],
"R2"
:
avgs_r2
[
0
],
"RMSLE"
:
avgs_rmsle
[
0
],
"MAPE"
:
avgs_mape
[
0
]})
# Log internal parameters
mlflow
.
log_param
(
"stack_models_estimator_list"
,
estimator_list
)
mlflow
.
log_param
(
"stack_models_fold"
,
fold
)
mlflow
.
log_param
(
"stack_models_round"
,
round
)
mlflow
.
log_param
(
"stack_models_restack"
,
restack
)
mlflow
.
log_param
(
"stack_models_plot"
,
plot
)
mlflow
.
log_param
(
"stack_models_choose_better"
,
choose_better
)
mlflow
.
log_param
(
"stack_models_optimize"
,
optimize
)
mlflow
.
log_param
(
"stack_models_finalize"
,
finalize
)
mlflow
.
log_param
(
"stack_models_verbose"
,
verbose
)
#set tag of stack_models
mlflow
.
set_tag
(
"Source"
,
"stack_models"
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log model and transformation pipeline
save_model
(
models_
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
# Log training time of compare_models
mlflow
.
log_metric
(
"TT"
,
model_fit_time
)
# Log the CV results as model_results.html artifact
model_results
.
data
.
to_html
(
'Results.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Results.html'
)
os
.
remove
(
'Results.html'
)
if
log_plots_param
:
plt
.
subplots
(
figsize
=
(
15
,
7
))
ax
=
sns
.
heatmap
(
base_prediction_cor
,
vmin
=
0.2
,
vmax
=
1
,
center
=
0
,
cmap
=
'magma'
,
square
=
True
,
annot
=
True
,
linewidths
=
1
)
ax
.
set_ylim
(
sorted
(
ax
.
get_xlim
(),
reverse
=
True
))
plt
.
savefig
(
"Stacking Heatmap.png"
)
mlflow
.
log_artifact
(
'Stacking Heatmap.png'
)
os
.
remove
(
'Stacking Heatmap.png'
)
plt
.
close
()
# Generate hold-out predictions and save as html
holdout
=
predict_model
(
models_
,
verbose
=
False
)
holdout_score
=
pull
()
display_container
.
pop
(
-
1
)
holdout_score
.
to_html
(
'Holdout.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Holdout.html'
)
os
.
remove
(
'Holdout.html'
)
if
verbose
:
clear_output
()
if
html_param
:
...
...
@@ -6150,7 +6737,7 @@ def create_stacknet(estimator_list,
fold
=
10
,
round
=
4
,
restack
=
True
,
choose_better
=
Tru
e
,
#added in pycaret==1.0.1
choose_better
=
Fals
e
,
#added in pycaret==1.0.1
optimize
=
'r2'
,
#added in pycaret==1.0.1
finalize
=
False
,
verbose
=
True
):
...
...
@@ -6203,7 +6790,7 @@ def create_stacknet(estimator_list,
the predicted label of last layer is passed to meta model when making final
predictions.
choose_better: Boolean, default =
Tru
e
choose_better: Boolean, default =
Fals
e
When set to True, base estimator is returned when the metric doesn't
improve by ensemble_model. This gurantees the returned object would perform
atleast equivalent to base estimator created using create_model or model
...
...
pycaret/utils.py
浏览文件 @
f4761c81
...
...
@@ -3,7 +3,7 @@
# License: MIT
def
version
():
print
(
"pycaret-nightly-0.
6
"
)
print
(
"pycaret-nightly-0.
7
"
)
def
check_metric
(
actual
,
prediction
,
metric
,
round
=
4
):
...
...
setup.py
浏览文件 @
f4761c81
...
...
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
setup
(
name
=
"pycaret-nightly"
,
version
=
"0.
6
"
,
version
=
"0.
7
"
,
description
=
"Nightly build of PyCaret - An open source, low-code machine learning library in Python."
,
long_description
=
readme
(),
long_description_content_type
=
"text/markdown"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录