Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
pycaret
提交
7c19e999
pycaret
项目概览
OpenDocCN
/
pycaret
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
pycaret
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7c19e999
编写于
7月 13, 2020
作者:
P
PyCaret
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update pycaret-nightly==0.23 part 1
上级
97216210
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
114 addition
and
60 deletion
+114
-60
pycaret/classification.py
pycaret/classification.py
+57
-29
pycaret/regression.py
pycaret/regression.py
+55
-29
pycaret/utils.py
pycaret/utils.py
+1
-1
setup.py
setup.py
+1
-1
未找到文件。
pycaret/classification.py
浏览文件 @
7c19e999
...
...
@@ -1867,7 +1867,6 @@ def setup(data,
display_container
,
exp_name_log
,
logging_param
,
log_plots_param
,
USI
,
\
fix_imbalance_param
,
fix_imbalance_method_param
,
logger
def
create_model
(
estimator
=
None
,
ensemble
=
False
,
method
=
None
,
...
...
@@ -4039,7 +4038,6 @@ def plot_model(estimator,
param_df
=
pd
.
DataFrame
.
from_dict
(
estimator
.
get_params
(
estimator
),
orient
=
'index'
,
columns
=
[
'Parameters'
])
display
(
param_df
)
def
compare_models
(
blacklist
=
None
,
whitelist
=
None
,
#added in pycaret==2.0.0
fold
=
10
,
...
...
@@ -10858,6 +10856,9 @@ def optimize_threshold(estimator,
"""
import
logging
logger
.
info
(
"Initializing optimize_threshold()"
)
logger
.
info
(
"Importing libraries"
)
#import libraries
import
sys
...
...
@@ -10876,6 +10877,8 @@ def optimize_threshold(estimator,
ERROR HANDLING STARTS HERE
'''
logger
.
info
(
"Checking exceptions"
)
#exception 1 for multi-class
if
y
.
value_counts
().
count
()
>
2
:
sys
.
exit
(
"(Type Error) optimize_threshold() cannot be used when target is multi-class. "
)
...
...
@@ -10922,12 +10925,14 @@ def optimize_threshold(estimator,
actual
=
np
.
array
(
y_test
)
if
type
(
model
)
is
list
:
logger
.
info
(
"Model Type : Stacking"
)
predicted
=
predict_model
(
model
)
model_name
=
'Stacking'
clear_output
()
try
:
predicted
=
np
.
array
(
predicted
[
'Score'
])
except
:
logger
.
info
(
"Meta model doesn't support predict_proba function."
)
sys
.
exit
(
"(Type Error) Meta model doesn't support predict_proba function. Cannot be used in optimize_threshold(). "
)
else
:
...
...
@@ -10938,6 +10943,8 @@ def optimize_threshold(estimator,
internal function to calculate loss starts here
"""
logger
.
info
(
"Defining loss function"
)
def
calculate_loss
(
actual
,
predicted
,
tp_cost
=
true_positive
,
tn_cost
=
true_negative
,
fp_cost
=
false_positive
,
fn_cost
=
false_negative
):
...
...
@@ -10978,6 +10985,8 @@ def optimize_threshold(estimator,
cost
=
[]
#global optimize_results
logger
.
info
(
"Iteration starts at 0"
)
for
i
in
grid
:
pred_prob
=
(
predicted
>=
i
).
astype
(
int
)
...
...
@@ -10998,8 +11007,10 @@ def optimize_threshold(estimator,
fig
.
add_shape
(
dict
(
type
=
"line"
,
x0
=
x0
,
y0
=
y0
,
x1
=
x1
,
y1
=
y1
,
line
=
dict
(
color
=
"red"
,
width
=
2
)))
fig
.
update_layout
(
title
=
{
'text'
:
title
,
'y'
:
0.95
,
'x'
:
0.45
,
'xanchor'
:
'center'
,
'yanchor'
:
'top'
})
logger
.
info
(
"Figure ready for render"
)
fig
.
show
()
print
(
'Optimized Probability Threshold: '
+
str
(
t
)
+
' | '
+
'Optimized Cost Function: '
+
str
(
y1
))
logger
.
info
(
"optimize_threshold() succesfully completed"
)
def
automl
(
optimize
=
'Accuracy'
,
use_holdout
=
False
):
...
...
@@ -11008,6 +11019,9 @@ def automl(optimize='Accuracy', use_holdout=False):
"""
import
logging
logger
.
info
(
"Initializing automl()"
)
if
optimize
==
'Accuracy'
:
compare_dimension
=
'Accuracy'
elif
optimize
==
'AUC'
:
...
...
@@ -11026,6 +11040,7 @@ def automl(optimize='Accuracy', use_holdout=False):
scorer
=
[]
if
use_holdout
:
logger
.
info
(
"Model Selection Basis : Holdout set"
)
for
i
in
master_model_container
:
pred_holdout
=
predict_model
(
i
,
verbose
=
False
)
p
=
pull
()
...
...
@@ -11034,6 +11049,7 @@ def automl(optimize='Accuracy', use_holdout=False):
scorer
.
append
(
p
)
else
:
logger
.
info
(
"Model Selection Basis : CV Results on Training set"
)
for
i
in
create_model_container
:
r
=
i
[
compare_dimension
][
-
2
:][
0
]
scorer
.
append
(
r
)
...
...
@@ -11045,6 +11061,8 @@ def automl(optimize='Accuracy', use_holdout=False):
automl_finalized
=
finalize_model
(
automl_result
)
logger
.
info
(
"automl() succesfully completed"
)
return
automl_finalized
def
pull
():
...
...
@@ -11075,6 +11093,9 @@ def models(type=None):
"""
import
logging
logger
.
info
(
"Initializing models()"
)
import
pandas
as
pd
...
...
@@ -11139,6 +11160,8 @@ def models(type=None):
if
type
==
'ensemble'
:
df
=
df
[
df
.
index
.
isin
(
ensemble_models
)]
logger
.
info
(
"models() succesfully completed"
)
return
df
def
get_logs
(
experiment_name
=
None
,
save
=
False
):
...
...
@@ -11200,76 +11223,79 @@ def get_logs(experiment_name = None, save = False):
return
runs
def
get_config
(
variable
):
"""
get global environment variable
"""
import
logging
logger
.
info
(
"Initializing get_config()"
)
if
variable
==
'X'
:
return
X
global_var
=
X
if
variable
==
'y'
:
return
y
global_var
=
y
if
variable
==
'X_train'
:
return
X_train
global_var
=
X_train
if
variable
==
'X_test'
:
return
X_test
global_var
=
X_test
if
variable
==
'y_train'
:
return
y_train
global_var
=
y_train
if
variable
==
'y_test'
:
return
y_test
global_var
=
y_test
if
variable
==
'seed'
:
return
seed
global_var
=
seed
if
variable
==
'prep_pipe'
:
return
prep_pipe
global_var
=
prep_pipe
if
variable
==
'folds_shuffle_param'
:
return
folds_shuffle_param
global_var
=
folds_shuffle_param
if
variable
==
'n_jobs_param'
:
return
n_jobs_param
global_var
=
n_jobs_param
if
variable
==
'html_param'
:
return
html_param
global_var
=
html_param
if
variable
==
'create_model_container'
:
return
create_model_container
global_var
=
create_model_container
if
variable
==
'master_model_container'
:
return
master_model_container
global_var
=
master_model_container
if
variable
==
'display_container'
:
return
display_container
global_var
=
display_container
if
variable
==
'exp_name_log'
:
return
exp_name_log
global_var
=
exp_name_log
if
variable
==
'logging_param'
:
return
logging_param
global_var
=
logging_param
if
variable
==
'log_plots_param'
:
return
log_plots_param
global_var
=
log_plots_param
if
variable
==
'USI'
:
return
USI
global_var
=
USI
if
variable
==
'fix_imbalance_param'
:
return
fix_imbalance_param
global_var
=
fix_imbalance_param
if
variable
==
'fix_imbalance_method_param'
:
return
fix_imbalance_method_param
global_var
=
fix_imbalance_method_param
if
variable
==
'logger'
:
return
logger
logger
.
info
(
"Global variable: "
+
str
(
variable
)
+
' returned'
)
logger
.
info
(
"get_config() succesfully completed"
)
return
global_var
def
set_config
(
variable
,
value
):
...
...
@@ -11277,6 +11303,9 @@ def set_config(variable,value):
set global environment variable
"""
import
logging
logger
.
info
(
"Initializing set_config()"
)
if
variable
==
'X'
:
global
X
X
=
value
...
...
@@ -11354,9 +11383,8 @@ def set_config(variable,value):
fix_imbalance_param
=
value
if
variable
==
'fix_imbalance_method_param'
:
global
sefix_imbalance_method_paramed
global
fix_imbalance_method_param
fix_imbalance_method_param
=
value
if
variable
==
'logger'
:
global
logger
logger
=
value
logger
.
info
(
"Global variable: "
+
str
(
variable
)
+
' updated'
)
logger
.
info
(
"set_config() succesfully completed"
)
pycaret/regression.py
浏览文件 @
7c19e999
...
...
@@ -9501,6 +9501,9 @@ def automl(optimize='r2', use_holdout=False):
"""
import
logging
logger
.
info
(
"Initializing automl()"
)
if
optimize
==
'mae'
:
compare_dimension
=
'MAE'
elif
optimize
==
'mse'
:
...
...
@@ -9517,6 +9520,7 @@ def automl(optimize='r2', use_holdout=False):
scorer
=
[]
if
use_holdout
:
logger
.
info
(
"Model Selection Basis : Holdout set"
)
for
i
in
master_model_container
:
pred_holdout
=
predict_model
(
i
,
verbose
=
False
)
p
=
pull
()
...
...
@@ -9525,13 +9529,11 @@ def automl(optimize='r2', use_holdout=False):
scorer
.
append
(
p
)
else
:
logger
.
info
(
"Model Selection Basis : CV Results on Training set"
)
for
i
in
create_model_container
:
r
=
i
[
compare_dimension
][
-
2
:][
0
]
scorer
.
append
(
r
)
#for i in create_model_container:
# r = i[compare_dimension][-2:][0]
# scorer.append(r)
#returning better model
if
optimize
==
'r2'
:
...
...
@@ -9543,6 +9545,8 @@ def automl(optimize='r2', use_holdout=False):
automl_finalized
=
finalize_model
(
automl_result
)
logger
.
info
(
"automl() succesfully completed"
)
return
automl_finalized
def
pull
():
...
...
@@ -9574,6 +9578,9 @@ def models(type=None):
"""
import
logging
logger
.
info
(
"Initializing models()"
)
import
pandas
as
pd
model_id
=
[
'lr'
,
'lasso'
,
'ridge'
,
'en'
,
'lar'
,
'llar'
,
'omp'
,
'br'
,
'ard'
,
'par'
,
...
...
@@ -9654,6 +9661,8 @@ def models(type=None):
if
type
==
'ensemble'
:
df
=
df
[
df
.
index
.
isin
(
ensemble_models
)]
logger
.
info
(
"models() succesfully completed"
)
return
df
def
get_logs
(
experiment_name
=
None
,
save
=
False
):
...
...
@@ -9681,6 +9690,9 @@ def get_logs(experiment_name = None, save = False):
"""
import
logging
logger
.
info
(
"Initializing get_logs()"
)
import
sys
...
...
@@ -9692,17 +9704,24 @@ def get_logs(experiment_name = None, save = False):
import
mlflow
from
mlflow.tracking
import
MlflowClient
logger
.
info
(
"Importing MLFlow Client"
)
client
=
MlflowClient
()
if
client
.
get_experiment_by_name
(
exp_name_log_
)
is
None
:
logger
.
info
(
"No active run found."
)
sys
.
exit
(
'No active run found. Check logging parameter in setup or to get logs for inactive run pass experiment_name.'
)
exp_id
=
client
.
get_experiment_by_name
(
exp_name_log_
).
experiment_id
logger
.
info
(
"Searching runs"
)
runs
=
mlflow
.
search_runs
(
exp_id
)
if
save
:
logger
.
info
(
"Saving logs as csv"
)
file_name
=
str
(
exp_name_log_
)
+
'_logs.csv'
runs
.
to_csv
(
file_name
,
index
=
False
)
logger
.
info
(
"get_logs() succesfully completed"
)
return
runs
def
get_config
(
variable
):
...
...
@@ -9711,68 +9730,73 @@ def get_config(variable):
get global environment variable
"""
import
logging
logger
.
info
(
"Initializing get_config()"
)
if
variable
==
'X'
:
return
X
global_var
=
X
if
variable
==
'y'
:
return
y
global_var
=
y
if
variable
==
'X_train'
:
return
X_train
global_var
=
X_train
if
variable
==
'X_test'
:
return
X_test
global_var
=
X_test
if
variable
==
'y_train'
:
return
y_train
global_var
=
y_train
if
variable
==
'y_test'
:
return
y_test
global_var
=
y_test
if
variable
==
'seed'
:
return
seed
global_var
=
seed
if
variable
==
'prep_pipe'
:
return
prep_pipe
global_var
=
prep_pipe
if
variable
==
'folds_shuffle_param'
:
return
folds_shuffle_param
global_var
=
folds_shuffle_param
if
variable
==
'n_jobs_param'
:
return
n_jobs_param
global_var
=
n_jobs_param
if
variable
==
'html_param'
:
return
html_param
global_var
=
html_param
if
variable
==
'create_model_container'
:
return
create_model_container
global_var
=
create_model_container
if
variable
==
'master_model_container'
:
return
master_model_container
global_var
=
master_model_container
if
variable
==
'display_container'
:
return
display_container
global_var
=
display_container
if
variable
==
'exp_name_log'
:
return
exp_name_log
global_var
=
exp_name_log
if
variable
==
'logging_param'
:
return
logging_param
global_var
=
logging_param
if
variable
==
'log_plots_param'
:
return
log_plots_param
global_var
=
log_plots_param
if
variable
==
'USI'
:
return
USI
global_var
=
USI
if
variable
==
'fix_imbalance_param'
:
return
fix_imbalance_param
global_var
=
fix_imbalance_param
if
variable
==
'fix_imbalance_method_param'
:
return
fix_imbalance_method_param
global_var
=
fix_imbalance_method_param
if
variable
==
'logger'
:
return
logger
logger
.
info
(
"Global variable: "
+
str
(
variable
)
+
' returned'
)
logger
.
info
(
"get_config() succesfully completed"
)
return
global_var
def
set_config
(
variable
,
value
):
...
...
@@ -9780,6 +9804,9 @@ def set_config(variable,value):
set global environment variable
"""
import
logging
logger
.
info
(
"Initializing set_config()"
)
if
variable
==
'X'
:
global
X
X
=
value
...
...
@@ -9857,9 +9884,8 @@ def set_config(variable,value):
fix_imbalance_param
=
value
if
variable
==
'fix_imbalance_method_param'
:
global
sefix_imbalance_method_paramed
global
fix_imbalance_method_param
fix_imbalance_method_param
=
value
if
variable
==
'logger'
:
global
logger
logger
=
value
\ No newline at end of file
logger
.
info
(
"Global variable: "
+
str
(
variable
)
+
' updated'
)
logger
.
info
(
"set_config() succesfully completed"
)
\ No newline at end of file
pycaret/utils.py
浏览文件 @
7c19e999
...
...
@@ -2,7 +2,7 @@
# Author: Moez Ali <moez.ali@queensu.ca>
# License: MIT
version_
=
"pycaret-nightly-0.2
2
"
version_
=
"pycaret-nightly-0.2
3
"
def
version
():
print
(
version_
)
...
...
setup.py
浏览文件 @
7c19e999
...
...
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
setup
(
name
=
"pycaret-nightly"
,
version
=
"0.2
2
"
,
version
=
"0.2
3
"
,
description
=
"Nightly build of PyCaret - An open source, low-code machine learning library in Python."
,
long_description
=
readme
(),
long_description_content_type
=
"text/markdown"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录