Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenDocCN
pycaret
提交
71218845
pycaret
项目概览
OpenDocCN
/
pycaret
通知
2
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
pycaret
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
71218845
编写于
1月 24, 2020
作者:
P
pycaret
提交者:
GitHub
1月 24, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add files via upload
上级
5e75f9f2
变更
2
展开全部
隐藏空白更改
内联
并排
Showing
2 changed file
with
592 addition
and
99 deletion
+592
-99
classification.py
classification.py
+77
-57
regression.py
regression.py
+515
-42
未找到文件。
classification.py
浏览文件 @
71218845
...
...
@@ -3,6 +3,7 @@
# License: MIT
def
setup
(
data
,
target
,
train_size
=
0.7
,
...
...
@@ -13,7 +14,7 @@ def setup(data,
numeric_features
=
None
,
numeric_imputation
=
'mean'
,
date_features
=
None
,
drop
_features
=
None
,
ignore
_features
=
None
,
normalize
=
False
,
normalize_method
=
'zscore'
,
transformation
=
False
,
...
...
@@ -58,12 +59,13 @@ def setup(data,
sampling: bool, default = True
When sample size exceeds 25,000 samples, pycaret will build a base estimator at
various sample levels of the original dataset. This will return a performance
plot of AUC, Accuracy and Recall values at various sample levels, that will
assist you in deciding the preferred sample size for modeling. You are then
required to enter the desired sample size that will be considered for training
and validation in the pycaret environment. 1 - sample size will be discarded and
not be used any further.
various sample size of the original dataset. This will return a performance
plot of AUC, Accuracy, Recall, Precision, Kappa and F1 values at various sample
levels, that will assist you in deciding the preferred sample size for modeling.
You are then required to enter the desired sample size that will be considered for
training and validation in the pycaret environment. When sample_size entered is less
than 1, the remaining dataset (1 - sample) is used in fitting the model only when
finalize_model() is called.
sample_estimator: object, default = None
If None, Logistic Regression is used by default.
...
...
@@ -98,11 +100,10 @@ def setup(data,
and date column is dropped from the dataset. Incase the date column as time
stamp, it will also extract features related to time / hours.
drop
_features: string, default = None
ignore
_features: string, default = None
If any feature has to be ignored for modeling, it can be passed in the param
drop_features. Inferred ID column and DateTime column is automatically set to
drop from the dataset. Incase ID column is not correctly detected, it is
recommended to drop ID column using drop_features.
ignore_features. ID and DateTime column when inferred, is automatically set
ignore for modeling.
normalize: bool, default = False
When set to True, transform feature space using normalize_method param defined.
...
...
@@ -207,8 +208,8 @@ def setup(data,
sys
.
exit
(
"(Value Error): transformation_method param only accepts 'yeo-johnson' or 'quantile' "
)
#cannot drop target
if
drop
_features
is
not
None
:
if
target
in
drop
_features
:
if
ignore
_features
is
not
None
:
if
target
in
ignore
_features
:
sys
.
exit
(
"(Value Error): cannot drop target column. "
)
#forced type check
...
...
@@ -234,10 +235,10 @@ def setup(data,
sys
.
exit
(
"(Value Error): Column type forced is either target column or doesn't exist in the dataset."
)
#drop features
if
drop
_features
is
not
None
:
for
i
in
drop
_features
:
if
ignore
_features
is
not
None
:
for
i
in
ignore
_features
:
if
i
not
in
all_cols
:
sys
.
exit
(
"(Value Error):
Column type forc
ed is either target column or doesn't exist in the dataset."
)
sys
.
exit
(
"(Value Error):
Feature ignor
ed is either target column or doesn't exist in the dataset."
)
#pre-load libraries
import
pandas
as
pd
...
...
@@ -315,10 +316,10 @@ def setup(data,
numeric_features_pass
=
numeric_features
#drop features
if
drop
_features
is
None
:
drop
_features_pass
=
[]
if
ignore
_features
is
None
:
ignore
_features_pass
=
[]
else
:
drop_features_pass
=
drop
_features
ignore_features_pass
=
ignore
_features
#date features
if
date_features
is
None
:
...
...
@@ -346,7 +347,7 @@ def setup(data,
categorical_features
=
cat_features_pass
,
numerical_features
=
numeric_features_pass
,
time_features
=
date_features_pass
,
features_todrop
=
drop
_features_pass
,
features_todrop
=
ignore
_features_pass
,
numeric_imputation_strategy
=
numeric_imputation
,
categorical_imputation_strategy
=
categorical_imputation_pass
,
scale_data
=
normalize
,
...
...
@@ -4396,7 +4397,6 @@ def blend_models(estimator_list = 'All',
def
stack_models
(
estimator_list
,
meta_model
=
None
,
fold
=
10
,
...
...
@@ -4559,12 +4559,28 @@ def stack_models(estimator_list,
'''
#testing
#no active test
#pre-load libraries
import
pandas
as
pd
import
ipywidgets
as
ipw
from
IPython.display
import
display
,
HTML
,
clear_output
,
update_display
import
time
,
datetime
from
copy
import
deepcopy
#copy estimator_list
estimator_list
=
deepcopy
(
estimator_list
)
#Defining meta model.
if
meta_model
==
None
:
from
sklearn.linear_model
import
LogisticRegression
meta_model
=
LogisticRegression
()
else
:
meta_model
=
deepcopy
(
meta_model
)
clear_output
()
#progress bar
max_progress
=
len
(
estimator_list
)
+
fold
+
4
progress
=
ipw
.
IntProgress
(
value
=
0
,
min
=
0
,
max
=
max_progress
,
step
=
1
,
description
=
'Processing: '
)
...
...
@@ -4594,7 +4610,7 @@ def stack_models(estimator_list,
from
sklearn.model_selection
import
StratifiedKFold
from
sklearn.model_selection
import
cross_val_predict
import
seaborn
as
sns
from
copy
import
deepcopy
progress
.
value
+=
1
...
...
@@ -4604,19 +4620,10 @@ def stack_models(estimator_list,
elif
method
==
'hard'
:
predict_method
=
'predict'
#copy estimator_list
estimator_list
=
deepcopy
(
estimator_list
)
#Defining meta model. Logistic Regression hardcoded for now
if
meta_model
==
None
:
from
sklearn.linear_model
import
LogisticRegression
meta_model
=
LogisticRegression
()
else
:
meta_model
=
deepcopy
(
meta_model
)
#defining data_X and data_y
if
finalize
:
data_X
=
X
.
copy
()
data_X
=
X
_
.
copy
()
data_y
=
y
.
copy
()
else
:
data_X
=
X_train
.
copy
()
...
...
@@ -4695,6 +4702,9 @@ def stack_models(estimator_list,
counter
+=
1
#fill nas for base_prediction
base_prediction
.
fillna
(
value
=
0
,
inplace
=
True
)
#defining column names now
target_col_name
=
np
.
array
(
base_prediction
.
columns
[
0
])
model_names
=
np
.
append
(
target_col_name
,
model_names_fixed
)
#added fixed here
...
...
@@ -5030,7 +5040,7 @@ def create_stacknet(estimator_list,
'''
#testing
#no active test
ing
#no active test
#exception checking
import
sys
...
...
@@ -5083,6 +5093,19 @@ def create_stacknet(estimator_list,
import
ipywidgets
as
ipw
from
IPython.display
import
display
,
HTML
,
clear_output
,
update_display
import
time
,
datetime
from
copy
import
deepcopy
#copy estimator_list
estimator_list
=
deepcopy
(
estimator_list
)
#copy meta_model
if
meta_model
is
None
:
from
sklearn.linear_model
import
LogisticRegression
meta_model
=
LogisticRegression
()
else
:
meta_model
=
deepcopy
(
meta_model
)
clear_output
()
#progress bar
max_progress
=
len
(
estimator_list
)
+
fold
+
4
...
...
@@ -5115,13 +5138,9 @@ def create_stacknet(estimator_list,
from
sklearn
import
metrics
from
sklearn.model_selection
import
StratifiedKFold
from
sklearn.model_selection
import
cross_val_predict
from
copy
import
deepcopy
progress
.
value
+=
1
#copy estimator_list
estimator_list
=
deepcopy
(
estimator_list
)
base_level
=
estimator_list
[
0
]
base_level_names
=
[]
...
...
@@ -5168,13 +5187,6 @@ def create_stacknet(estimator_list,
data_X
.
reset_index
(
drop
=
True
,
inplace
=
True
)
data_y
.
reset_index
(
drop
=
True
,
inplace
=
True
)
#defining meta model
if
meta_model
is
None
:
from
sklearn.linear_model
import
LogisticRegression
meta_model
=
LogisticRegression
()
else
:
meta_model
=
deepcopy
(
meta_model
)
#Capturing the method of stacking required by user. method='soft' means 'predict_proba' else 'predict'
if
method
==
'soft'
:
...
...
@@ -5222,7 +5234,8 @@ def create_stacknet(estimator_list,
base_array
=
np
.
empty
((
0
,
0
))
base_counter
+=
1
base_array_df
.
fillna
(
value
=
0
,
inplace
=
True
)
#fill na's with zero
base_array_df
.
columns
=
base_level_fixed
if
restack
:
...
...
@@ -5288,7 +5301,8 @@ def create_stacknet(estimator_list,
model_counter
+=
1
base_array_df
=
pd
.
concat
([
base_array_df
,
inter_array_df
],
axis
=
1
)
base_array_df
.
fillna
(
value
=
0
,
inplace
=
True
)
#fill na's with zero
models_
.
append
(
inter_inner
)
if
restack
==
False
:
...
...
@@ -5498,8 +5512,6 @@ def create_stacknet(estimator_list,
def
interpret_model
(
estimator
,
plot
=
'summary'
,
feature
=
None
,
...
...
@@ -6491,6 +6503,7 @@ def load_experiment(experiment_name):
def
predict_model
(
estimator
,
data
=
None
):
...
...
@@ -6549,6 +6562,10 @@ def predict_model(estimator,
import
re
from
sklearn
import
metrics
from
copy
import
deepcopy
from
IPython.display
import
clear_output
,
update_display
estimator
=
deepcopy
(
estimator
)
clear_output
()
#check if estimator is string, then load model
if
type
(
estimator
)
is
str
:
...
...
@@ -6567,11 +6584,11 @@ def predict_model(estimator,
X_test_
.
reset_index
(
drop
=
True
,
inplace
=
True
)
y_test_
.
reset_index
(
drop
=
True
,
inplace
=
True
)
model
=
deepcopy
(
estimator
)
model
=
estimator
else
:
estimator_
=
deepcopy
(
estimator
)
estimator_
=
estimator
if
type
(
estimator_
)
is
list
:
...
...
@@ -6584,19 +6601,19 @@ def predict_model(estimator,
else
:
prep_pipe_transformer
=
prep_pipe
model
=
deepcopy
(
estimator
)
estimator
=
deepcopy
(
estimator
)
model
=
estimator
estimator
=
estimator
else
:
prep_pipe_transformer
=
prep_pipe
model
=
deepcopy
(
estimator
)
estimator
=
deepcopy
(
estimator
)
model
=
estimator
estimator
=
estimator
try
:
model
=
finalize_model
(
estimator
)
except
:
model
=
deepcopy
(
estimator
)
model
=
estimator
Xtest
=
prep_pipe_transformer
.
transform
(
data
)
X_test_
=
data
.
copy
()
#original concater
...
...
@@ -6613,7 +6630,7 @@ def predict_model(estimator,
"""
#utility
stacker
=
deepcopy
(
model
)
stacker
=
model
restack
=
stacker
.
pop
()
stacker_method
=
stacker
.
pop
()
#stacker_method = stacker_method[0]
...
...
@@ -6815,7 +6832,7 @@ def predict_model(estimator,
"""
#copy
stacker
=
deepcopy
(
model
)
stacker
=
model
#restack
restack
=
stacker
.
pop
()
...
...
@@ -6879,6 +6896,9 @@ def predict_model(estimator,
#ytest = ytest #change
#meta predictions starts here
df
.
fillna
(
value
=
0
,
inplace
=
True
)
df_restack
.
fillna
(
value
=
0
,
inplace
=
True
)
#restacking check
try
:
...
...
regression.py
浏览文件 @
71218845
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录