Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
A-Tune
提交
c2a6751e
A
A-Tune
项目概览
openeuler
/
A-Tune
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
A-Tune
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c2a6751e
编写于
8月 03, 2020
作者:
X
Xiaoguang Li
提交者:
lixiaoguang
8月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add new weighted ensemble (importance) feature selection
Signed-off-by:
N
Licheng Chen
<
chenlicheng@huawei.com
>
上级
f3f5ea9c
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
135 addition
and
3 deletion
+135
-3
analysis/optimizer/optimizer.py
analysis/optimizer/optimizer.py
+5
-3
analysis/optimizer/weighted_ensemble_feature_selector.py
analysis/optimizer/weighted_ensemble_feature_selector.py
+130
-0
未找到文件。
analysis/optimizer/optimizer.py
浏览文件 @
c2a6751e
...
...
@@ -26,6 +26,7 @@ from sklearn.preprocessing import StandardScaler
from
analysis.optimizer.abtest_tuning_manager
import
ABtestTuningManager
from
analysis.optimizer.knob_sampling_manager
import
KnobSamplingManager
from
analysis.optimizer.tpe_optimizer
import
TPEOptimizer
from
analysis.optimizer.weighted_ensemble_feature_selector
import
WeightedEnsembleFeatureSelector
LOGGER
=
logging
.
getLogger
(
__name__
)
...
...
@@ -103,7 +104,7 @@ class Optimizer(multiprocessing.Process):
raise
ValueError
(
"the ref value of {} is not an integer value"
.
format
(
p_nob
[
'name'
]))
if
ref_value
not
in
items
:
raise
ValueError
(
"the ref value of {} is out of range"
.
format
(
p_nob
[
'name'
])
)
items
.
append
(
ref_value
)
self
.
ref
.
append
(
ref_value
)
return
items
if
p_nob
[
'dtype'
]
==
'float'
:
...
...
@@ -125,7 +126,7 @@ class Optimizer(multiprocessing.Process):
raise
ValueError
(
"the ref value of {} is not a float value"
.
format
(
p_nob
[
'name'
]))
if
ref_value
not
in
items
:
raise
ValueError
(
"the ref value of {} is out of range"
.
format
(
p_nob
[
'name'
])
)
items
.
append
(
ref_value
)
self
.
ref
.
append
(
ref_value
)
return
items
if
p_nob
[
'dtype'
]
==
'string'
:
...
...
@@ -290,7 +291,8 @@ class Optimizer(multiprocessing.Process):
LOGGER
.
info
(
"Optimized result: %s"
,
params
)
LOGGER
.
info
(
"The optimized profile has been generated."
)
finalParam
=
{}
rank
=
self
.
feature_importance
(
options
,
performance
,
labels
)
wefs
=
WeightedEnsembleFeatureSelector
()
rank
=
wefs
.
get_ensemble_feature_importance
(
options
,
performance
,
labels
)
finalParam
[
"param"
]
=
params
finalParam
[
"rank"
]
=
rank
...
...
analysis/optimizer/weighted_ensemble_feature_selector.py
0 → 100644
浏览文件 @
c2a6751e
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Huawei Technologies Co., Ltd.
# A-Tune is licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
# http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
# See the Mulan PSL v2 for more details.
# Create: 2020-08-02
"""
This class is used to perform weighted ensemble feature selection
to get really importance tuning parameters
"""
import
numpy
as
np
from
sklearn.linear_model
import
Lasso
from
sklearn.ensemble
import
RandomForestRegressor
from
sklearn.ensemble
import
GradientBoostingRegressor
from
sklearn.ensemble
import
BaggingRegressor
,
AdaBoostRegressor
from
sklearn.tree
import
DecisionTreeRegressor
from
sklearn.linear_model
import
ElasticNet
,
Ridge
import
logging
LOGGER
=
logging
.
getLogger
(
__name__
)
class
WeightedEnsembleFeatureSelector
(
object
):
"""class weighted ensemble feature selector"""
def
__init__
(
self
):
lasso
=
Lasso
(
alpha
=
0.0005
,
max_iter
=
1000000
)
rf
=
RandomForestRegressor
(
n_estimators
=
10000
,
random_state
=
0
,
n_jobs
=-
1
)
gb
=
GradientBoostingRegressor
(
n_estimators
=
10000
,
learning_rate
=
0.1
)
en
=
ElasticNet
(
alpha
=
0.0003
,
max_iter
=
1000000
,
l1_ratio
=
0.8
)
adb
=
AdaBoostRegressor
(
DecisionTreeRegressor
(
max_depth
=
16
),
\
n_estimators
=
10000
,
random_state
=
0
)
bag
=
BaggingRegressor
(
base_estimator
=
DecisionTreeRegressor
(
max_depth
=
16
),
\
n_estimators
=
10000
)
self
.
_regressors
=
[
lasso
,
rf
,
gb
,
en
,
adb
,
bag
]
self
.
_ensemble_model
=
Ridge
(
alpha
=
10
,
max_iter
=
1000000
)
LOGGER
.
info
(
'Weighted Ensemble Feature Selector using:
\
Lasso, RandomForest, GradientBoosting, ElasticNet, AdaBoost, Bagging'
)
def
get_unified_feature_importance
(
self
,
regressor
):
"""get unified feature importance"""
if
hasattr
(
regressor
,
"feature_importances_"
):
return
regressor
.
feature_importances_
elif
hasattr
(
regressor
,
"coef_"
):
return
np
.
abs
(
regressor
.
coef_
)
elif
hasattr
(
regressor
,
"estimators_features_"
):
feature_importances
=
np
.
mean
([
tree
.
feature_importances_
\
for
tree
in
regressor
.
estimators_
],
axis
=
0
)
return
feature_importances
return
None
def
get_one_native_feature_importance
(
self
,
regressor
,
\
list_sample_x
,
list_sample_y
,
labels
,
index
):
"""get one native feature importance, just fit data once"""
regressor
.
fit
(
list_sample_x
,
list_sample_y
)
unified_feature_importance
=
self
.
get_unified_feature_importance
(
regressor
)
result
=
zip
(
unified_feature_importance
,
labels
,
index
)
result
=
sorted
(
result
,
key
=
lambda
x
:
-
x
[
0
])
sorted_index
=
[
i
for
coef
,
label
,
i
in
result
]
return
sorted_index
def
get_native_feature_importances
(
self
,
list_sample_x
,
list_sample_y
,
labels
,
index
):
"""get natice feature importance"""
native_feature_importances
=
[]
for
regressor
in
self
.
_regressors
:
native_fi
=
self
.
get_one_native_feature_importance
(
regressor
,
\
list_sample_x
,
list_sample_y
,
labels
,
index
)
native_feature_importances
.
append
(
native_fi
)
return
native_feature_importances
def
get_ensemble_train_datas
(
self
,
list_sample_x
):
"""get ensemble train datas"""
predictions
=
[]
for
regressor
in
self
.
_regressors
:
prediction
=
regressor
.
predict
(
list_sample_x
)
predictions
.
append
(
prediction
)
train_datas
=
[]
for
i
in
range
(
len
(
list_sample_x
)):
train_data
=
[]
for
j
in
range
(
len
(
predictions
)):
train_data
.
append
(
predictions
[
j
][
i
])
train_datas
.
append
(
train_data
)
return
train_datas
def
get_ensemble_weights
(
self
,
list_sample_x
,
list_sample_y
):
"""get ensemble weights"""
ensemble_train_datas
=
self
.
get_ensemble_train_datas
(
list_sample_x
)
self
.
_ensemble_model
.
fit
(
ensemble_train_datas
,
list_sample_y
)
orig_weight
=
self
.
_ensemble_model
.
coef_
orig_weight
-=
np
.
max
(
orig_weight
)
softmax_weight
=
np
.
exp
(
orig_weight
)
/
np
.
sum
(
np
.
exp
(
orig_weight
))
return
softmax_weight
def
get_ensemble_feature_importance
(
self
,
list_sample_x
,
list_sample_y
,
labels
):
"""Make sure the input list_sample_x is preprocessed with StandardScaler"""
index
=
[
i
for
i
in
range
(
len
(
labels
))]
native_feature_importances
=
self
.
get_native_feature_importances
(
\
list_sample_x
,
list_sample_y
,
labels
,
index
)
LOGGER
.
info
(
'Get feature importances for each model: %s'
,
native_feature_importances
)
ensemble_weights
=
self
.
get_ensemble_weights
(
list_sample_x
,
list_sample_y
)
LOGGER
.
info
(
'Get ensemble weights for each model: %s'
,
ensemble_weights
)
ensemble_scores
=
[
0
for
i
in
range
(
len
(
list_sample_x
[
0
]))]
for
e
in
range
(
len
(
ensemble_weights
)):
en_weight
=
ensemble_weights
[
e
]
native_fi
=
native_feature_importances
[
e
]
feature_num
=
len
(
native_fi
)
for
i
in
range
(
len
(
native_fi
)):
feature_index
=
native_fi
[
i
]
ensemble_scores
[
feature_index
]
+=
\
en_weight
*
(
feature_num
-
i
)
# the larger, the better
ensemble_result
=
zip
(
ensemble_scores
,
labels
,
index
)
ensemble_result
=
sorted
(
ensemble_result
,
key
=
lambda
x
:
-
x
[
0
])
rank
=
", "
.
join
(
"%s: %s"
%
(
label
,
round
(
score
,
3
))
\
for
score
,
label
,
i
in
ensemble_result
)
sorted_index
=
[
i
for
score
,
label
,
i
in
ensemble_result
]
LOGGER
.
info
(
'ensemble rank: %s'
,
rank
)
LOGGER
.
info
(
'ensemble sorted_index: %s'
,
sorted_index
)
return
rank
,
sorted_index
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录