Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
pycaret
提交
2e8d9bae
pycaret
项目概览
OpenDocCN
/
pycaret
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
pycaret
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
2e8d9bae
编写于
6月 21, 2020
作者:
P
PyCaret
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
updated regresion.py
上级
f4761c81
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
291 addition
and
23 deletion
+291
-23
pycaret/regression.py
pycaret/regression.py
+289
-21
pycaret/utils.py
pycaret/utils.py
+1
-1
setup.py
setup.py
+1
-1
未找到文件。
pycaret/regression.py
浏览文件 @
2e8d9bae
...
...
@@ -48,10 +48,10 @@ def setup(data,
interaction_threshold
=
0.01
,
transform_target
=
False
,
transform_target_method
=
'box-cox'
,
data_split_shuffle
=
True
,
#added in pycaret==
1.0.1
folds_shuffle
=
False
,
#added in pycaret==
1.0.1
n_jobs
=
-
1
,
#added in pycaret==
1.0.1
html
=
True
,
#added in pycaret==
1.0.1
data_split_shuffle
=
True
,
#added in pycaret==
2.0.0
folds_shuffle
=
False
,
#added in pycaret==
2.0.0
n_jobs
=
-
1
,
#added in pycaret==
2.0.0
html
=
True
,
#added in pycaret==
2.0.0
session_id
=
None
,
experiment_name
=
None
,
#added in pycaret==2.0.0
logging
=
True
,
#added in pycaret==2.0.0
...
...
@@ -59,7 +59,7 @@ def setup(data,
log_profile
=
False
,
#added in pycaret==2.0.0
log_data
=
False
,
#added in pycaret==2.0.0
silent
=
False
,
verbose
=
True
,
#added in pycaret==
1.0.1
verbose
=
True
,
#added in pycaret==
2.0.0
profile
=
False
):
"""
...
...
@@ -1725,7 +1725,7 @@ def create_model(estimator = None,
round
=
4
,
verbose
=
True
,
system
=
True
,
#added in pycaret==2.0.0
**
kwargs
):
#added in pycaret==
1.0.1
**
kwargs
):
#added in pycaret==
2.0.0
"""
...
...
@@ -2454,8 +2454,8 @@ def ensemble_model(estimator,
fold
=
10
,
n_estimators
=
10
,
round
=
4
,
choose_better
=
False
,
#added in pycaret==
1.0.1
optimize
=
'r2'
,
#added in pycaret==
1.0.1
choose_better
=
False
,
#added in pycaret==
2.0.0
optimize
=
'r2'
,
#added in pycaret==
2.0.0
verbose
=
True
):
"""
...
...
@@ -3110,13 +3110,13 @@ def ensemble_model(estimator,
return
model
def
compare_models
(
blacklist
=
None
,
whitelist
=
None
,
#added in pycaret==
1.0.1
whitelist
=
None
,
#added in pycaret==
2.0.0
fold
=
10
,
round
=
4
,
sort
=
'R2'
,
n_select
=
1
,
#added in pycaret==
1.0.1
n_select
=
1
,
#added in pycaret==
2.0.0
turbo
=
True
,
verbose
=
True
):
#added in pycaret==
1.0.1
verbose
=
True
):
#added in pycaret==
2.0.0
"""
...
...
@@ -3966,8 +3966,8 @@ def compare_models(blacklist = None,
def
blend_models
(
estimator_list
=
'All'
,
fold
=
10
,
round
=
4
,
choose_better
=
False
,
#added in pycaret==
1.0.1
optimize
=
'r2'
,
#added in pycaret==
1.0.1
choose_better
=
False
,
#added in pycaret==
2.0.0
optimize
=
'r2'
,
#added in pycaret==
2.0.0
turbo
=
True
,
verbose
=
True
):
...
...
@@ -4703,9 +4703,9 @@ def tune_model(estimator,
fold
=
10
,
round
=
4
,
n_iter
=
10
,
custom_grid
=
None
,
#added in pycaret==
1.0.1
custom_grid
=
None
,
#added in pycaret==
2.0.0
optimize
=
'r2'
,
choose_better
=
False
,
#added in pycaret==
1.0.1
choose_better
=
False
,
#added in pycaret==
2.0.0
verbose
=
True
):
...
...
@@ -6042,8 +6042,8 @@ def stack_models(estimator_list,
round
=
4
,
restack
=
True
,
plot
=
False
,
choose_better
=
False
,
#added in pycaret==
1.0.1
optimize
=
'r2'
,
#added in pycaret==
1.0.1
choose_better
=
False
,
#added in pycaret==
2.0.0
optimize
=
'r2'
,
#added in pycaret==
2.0.0
finalize
=
False
,
verbose
=
True
):
...
...
@@ -6737,8 +6737,8 @@ def create_stacknet(estimator_list,
fold
=
10
,
round
=
4
,
restack
=
True
,
choose_better
=
False
,
#added in pycaret==
1.0.1
optimize
=
'r2'
,
#added in pycaret==
1.0.1
choose_better
=
False
,
#added in pycaret==
2.0.0
optimize
=
'r2'
,
#added in pycaret==
2.0.0
finalize
=
False
,
verbose
=
True
):
...
...
@@ -6838,11 +6838,15 @@ def create_stacknet(estimator_list,
'''
#for checking only
global
inter_level_names
#no active test
#exception checking
import
sys
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#checking estimator_list
if
type
(
estimator_list
[
0
])
is
not
list
:
sys
.
exit
(
"(Type Error): estimator_list parameter must be list of list. "
)
...
...
@@ -7024,6 +7028,8 @@ def create_stacknet(estimator_list,
base_models_
=
[]
model_fit_start
=
time
.
time
()
for
model
in
base_level
:
base_models_
.
append
(
model
.
fit
(
data_X
,
data_y
))
#changed to data_X and data_y
...
...
@@ -7261,6 +7267,9 @@ def create_stacknet(estimator_list,
'''
model_fit_end
=
time
.
time
()
model_fit_time
=
np
.
array
(
model_fit_end
-
model_fit_start
).
round
(
2
)
mean_mae
=
np
.
mean
(
score_mae
)
mean_mse
=
np
.
mean
(
score_mse
)
mean_rmse
=
np
.
mean
(
score_rmse
)
...
...
@@ -7353,6 +7362,9 @@ def create_stacknet(estimator_list,
s
=
create_model_container
[
-
1
][
compare_dimension
][
-
2
:][
0
]
scorer
.
append
(
s
)
#re-instate display_constainer state
display_container
.
pop
(
-
1
)
#returning better model
if
optimize
==
'r2'
:
index_scorer
=
scorer
.
index
(
max
(
scorer
))
...
...
@@ -7372,6 +7384,87 @@ def create_stacknet(estimator_list,
tup
=
(
nam
,
model_results
)
experiment__
.
append
(
tup
)
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
if
logging_param
and
not
finalize
:
import
mlflow
from
pathlib
import
Path
import
os
#Creating Logs message monitor
monitor
.
iloc
[
1
,
1
:]
=
'Creating Logs'
monitor
.
iloc
[
2
,
1
:]
=
'Almost Finished'
if
verbose
:
if
html_param
:
update_display
(
monitor
,
display_id
=
'monitor'
)
with
mlflow
.
start_run
(
run_name
=
'Stacking Regressor (Multi-layer)'
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
params
=
meta_model
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
mlflow
.
log_metrics
({
"MAE"
:
avgs_mae
[
0
],
"MSE"
:
avgs_mse
[
0
],
"RMSE"
:
avgs_rmse
[
0
],
"R2"
:
avgs_r2
[
0
],
"RMSLE"
:
avgs_rmsle
[
0
],
"MAPE"
:
avgs_mape
[
0
]})
# Log other parameter of create_model function (internal to pycaret)
mlflow
.
log_param
(
"create_stacknet_estimator_list"
,
estimator_list
)
mlflow
.
log_param
(
"create_stacknet_fold"
,
fold
)
mlflow
.
log_param
(
"create_stacknet_round"
,
round
)
mlflow
.
log_param
(
"create_stacknet_restack"
,
restack
)
mlflow
.
log_param
(
"create_stacknet_choose_better"
,
choose_better
)
mlflow
.
log_param
(
"create_stacknet_optimize"
,
optimize
)
mlflow
.
log_param
(
"create_stacknet_finalize"
,
finalize
)
mlflow
.
log_param
(
"create_stacknet_verbose"
,
verbose
)
#set tag of create_stacknet
mlflow
.
set_tag
(
"Source"
,
"create_stacknet"
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log model and transformation pipeline
save_model
(
models_
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
# Log training time of compare_models
mlflow
.
log_metric
(
"TT"
,
model_fit_time
)
# Log the CV results as model_results.html artifact
model_results
.
data
.
to_html
(
'Results.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Results.html'
)
os
.
remove
(
'Results.html'
)
# Generate hold-out predictions and save as html
holdout
=
predict_model
(
models_
,
verbose
=
False
)
holdout_score
=
pull
()
display_container
.
pop
(
-
1
)
holdout_score
.
to_html
(
'Holdout.html'
,
col_space
=
65
,
justify
=
'left'
)
mlflow
.
log_artifact
(
'Holdout.html'
)
os
.
remove
(
'Holdout.html'
)
if
verbose
:
clear_output
()
if
html_param
:
...
...
@@ -8013,11 +8106,69 @@ def finalize_model(estimator):
import
warnings
warnings
.
filterwarnings
(
'ignore'
)
#run_time
import
datetime
,
time
runtime_start
=
time
.
time
()
#import depedencies
from
IPython.display
import
clear_output
,
update_display
from
sklearn.base
import
clone
from
copy
import
deepcopy
import
numpy
as
np
#determine runname for logging
def
get_model_name
(
e
):
return
str
(
e
).
split
(
"("
)[
0
]
model_dict_logging
=
{
'ExtraTreesRegressor'
:
'Extra Trees Regressor'
,
'GradientBoostingRegressor'
:
'Gradient Boosting Regressor'
,
'RandomForestRegressor'
:
'Random Forest'
,
'LGBMRegressor'
:
'Light Gradient Boosting Machine'
,
'XGBRegressor'
:
'Extreme Gradient Boosting'
,
'AdaBoostRegressor'
:
'AdaBoost Regressor'
,
'DecisionTreeRegressor'
:
'Decision Tree'
,
'Ridge'
:
'Ridge Regression'
,
'TheilSenRegressor'
:
'TheilSen Regressor'
,
'BayesianRidge'
:
'Bayesian Ridge'
,
'LinearRegression'
:
'Linear Regression'
,
'ARDRegression'
:
'Automatic Relevance Determination'
,
'KernelRidge'
:
'Kernel Ridge'
,
'RANSACRegressor'
:
'Random Sample Consensus'
,
'HuberRegressor'
:
'Huber Regressor'
,
'Lasso'
:
'Lasso Regression'
,
'ElasticNet'
:
'Elastic Net'
,
'Lars'
:
'Least Angle Regression'
,
'OrthogonalMatchingPursuit'
:
'Orthogonal Matching Pursuit'
,
'MLPRegressor'
:
'Multi Level Perceptron'
,
'KNeighborsRegressor'
:
'K Neighbors Regressor'
,
'SVR'
:
'Support Vector Machine'
,
'LassoLars'
:
'Lasso Least Angle Regression'
,
'PassiveAggressiveRegressor'
:
'Passive Aggressive Regressor'
,
'CatBoostRegressor'
:
'CatBoost Regressor'
,
'BaggingRegressor'
:
'Bagging Regressor'
,
'VotingRegressor'
:
'Voting Regressor'
}
if
type
(
estimator
)
is
not
list
:
if
hasattr
(
estimator
,
'voting'
):
mn
=
'VotingRegressor'
else
:
mn
=
get_model_name
(
estimator
)
if
'BaggingRegressor'
in
mn
:
mn
=
get_model_name
(
estimator
.
base_estimator_
)
if
'catboost'
in
mn
:
mn
=
'CatBoostRegressor'
if
type
(
estimator
)
is
list
:
if
type
(
estimator
[
0
])
is
not
list
:
full_name
=
'Stacking Regresspr'
else
:
full_name
=
'Stacking Regressor (Multi-layer)'
else
:
full_name
=
model_dict_logging
.
get
(
mn
)
if
type
(
estimator
)
is
list
:
if
type
(
estimator
[
0
])
is
not
list
:
...
...
@@ -8052,6 +8203,8 @@ def finalize_model(estimator):
finalize
=
True
,
verbose
=
False
)
pull_results
=
pull
()
else
:
model_final
=
clone
(
estimator
)
clear_output
()
...
...
@@ -8063,6 +8216,121 @@ def finalize_model(estimator):
tup
=
(
model_name
,
model_final
)
experiment__
.
append
(
tup
)
#end runtime
runtime_end
=
time
.
time
()
runtime
=
np
.
array
(
runtime_end
-
runtime_start
).
round
(
2
)
#mlflow logging
if
logging_param
:
#import mlflow
import
mlflow
from
pathlib
import
Path
import
os
mlflow
.
set_experiment
(
exp_name_log
)
with
mlflow
.
start_run
(
run_name
=
full_name
)
as
run
:
# Get active run to log as tag
RunID
=
mlflow
.
active_run
().
info
.
run_id
# Log model parameters
try
:
params
=
model_final
.
get_params
()
for
i
in
list
(
params
):
v
=
params
.
get
(
i
)
if
len
(
str
(
v
))
>
250
:
params
.
pop
(
i
)
mlflow
.
log_params
(
params
)
except
:
pass
# get metrics of non-finalized model and log it
try
:
c
=
create_model
(
estimator
,
verbose
=
False
,
system
=
False
)
cr
=
pull
()
log_mae
=
cr
.
loc
[
'Mean'
][
'MAE'
]
log_mse
=
cr
.
loc
[
'Mean'
][
'MSE'
]
log_rmse
=
cr
.
loc
[
'Mean'
][
'RMSE'
]
log_r2
=
cr
.
loc
[
'Mean'
][
'R2'
]
log_rmsle
=
cr
.
loc
[
'Mean'
][
'RMSLE'
]
log_mape
=
cr
.
loc
[
'Mean'
][
'MAPE'
]
mlflow
.
log_metric
(
"MAE"
,
log_mae
)
mlflow
.
log_metric
(
"MSE"
,
log_mse
)
mlflow
.
log_metric
(
"RMSE"
,
log_rmse
)
mlflow
.
log_metric
(
"R2"
,
log_r2
)
mlflow
.
log_metric
(
"RMSLE"
,
log_rmsle
)
mlflow
.
log_metric
(
"MAPE"
,
log_mape
)
except
:
cr
=
pull_results
log_mae
=
cr
.
loc
[
'Mean'
][
'MAE'
]
log_mse
=
cr
.
loc
[
'Mean'
][
'MSE'
]
log_rmse
=
cr
.
loc
[
'Mean'
][
'RMSE'
]
log_r2
=
cr
.
loc
[
'Mean'
][
'R2'
]
log_rmsle
=
cr
.
loc
[
'Mean'
][
'RMSLE'
]
log_mape
=
cr
.
loc
[
'Mean'
][
'MAPE'
]
mlflow
.
log_metric
(
"MAE"
,
log_mae
)
mlflow
.
log_metric
(
"MSE"
,
log_mse
)
mlflow
.
log_metric
(
"RMSE"
,
log_rmse
)
mlflow
.
log_metric
(
"R2"
,
log_r2
)
mlflow
.
log_metric
(
"RMSLE"
,
log_rmsle
)
mlflow
.
log_metric
(
"MAPE"
,
log_mape
)
#set tag of compare_models
mlflow
.
set_tag
(
"Source"
,
"finalize_model"
)
#create MRI (model registration id)
mlflow
.
set_tag
(
"Final"
,
True
)
import
secrets
URI
=
secrets
.
token_hex
(
nbytes
=
4
)
mlflow
.
set_tag
(
"URI"
,
URI
)
mlflow
.
set_tag
(
"USI"
,
USI
)
mlflow
.
set_tag
(
"Run Time"
,
runtime
)
mlflow
.
set_tag
(
"Run ID"
,
RunID
)
# Log training time in seconds
mlflow
.
log_metric
(
"TT"
,
runtime
)
# Log AUC and Confusion Matrix plot
if
log_plots_param
:
try
:
plot_model
(
model
,
plot
=
'residuals'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Residuals.png'
)
os
.
remove
(
"Residuals.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'error'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Prediction Error.png'
)
os
.
remove
(
"Prediction Error.png"
)
except
:
pass
try
:
plot_model
(
model
,
plot
=
'feature'
,
verbose
=
False
,
save
=
True
,
system
=
False
)
mlflow
.
log_artifact
(
'Feature Importance.png'
)
os
.
remove
(
"Feature Importance.png"
)
except
:
pass
# Log model and transformation pipeline
save_model
(
model_final
,
'Trained Model'
,
verbose
=
False
)
mlflow
.
log_artifact
(
'Trained Model'
+
'.pkl'
)
size_bytes
=
Path
(
'Trained Model.pkl'
).
stat
().
st_size
size_kb
=
np
.
round
(
size_bytes
/
1000
,
2
)
mlflow
.
set_tag
(
"Size KB"
,
size_kb
)
os
.
remove
(
'Trained Model.pkl'
)
return
model_final
def
save_model
(
model
,
model_name
,
verbose
=
True
):
...
...
@@ -8325,7 +8593,7 @@ def predict_model(estimator,
platform
=
None
,
authentication
=
None
,
round
=
4
,
verbose
=
True
):
#added in pycaret==
1.0.1
verbose
=
True
):
#added in pycaret==
2.0.0
"""
...
...
pycaret/utils.py
浏览文件 @
2e8d9bae
...
...
@@ -3,7 +3,7 @@
# License: MIT
def
version
():
print
(
"pycaret-nightly-0.
7
"
)
print
(
"pycaret-nightly-0.
8
"
)
def
check_metric
(
actual
,
prediction
,
metric
,
round
=
4
):
...
...
setup.py
浏览文件 @
2e8d9bae
...
...
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
setup
(
name
=
"pycaret-nightly"
,
version
=
"0.
7
"
,
version
=
"0.
8
"
,
description
=
"Nightly build of PyCaret - An open source, low-code machine learning library in Python."
,
long_description
=
readme
(),
long_description_content_type
=
"text/markdown"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录