Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
openeuler
A-Tune
提交
97f8a043
A
A-Tune
项目概览
openeuler
/
A-Tune
通知
5
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
A
A-Tune
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
97f8a043
编写于
8月 25, 2020
作者:
O
openeuler-ci-bot
提交者:
Gitee
8月 25, 2020
浏览文件
操作
浏览文件
下载
差异文件
!159 implement weighted ensemble feature selection with multiprocessing to get real speedup
Merge pull request !159 from bryantclc/master
上级
77fd80b4
a37837c0
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
42 addition
and
25 deletion
+42
-25
analysis/optimizer/weighted_ensemble_feature_selector.py
analysis/optimizer/weighted_ensemble_feature_selector.py
+42
-25
未找到文件。
analysis/optimizer/weighted_ensemble_feature_selector.py
浏览文件 @
97f8a043
...
@@ -25,22 +25,24 @@ from sklearn.ensemble import BaggingRegressor, AdaBoostRegressor
...
@@ -25,22 +25,24 @@ from sklearn.ensemble import BaggingRegressor, AdaBoostRegressor
from
sklearn.tree
import
DecisionTreeRegressor
from
sklearn.tree
import
DecisionTreeRegressor
from
sklearn.linear_model
import
ElasticNet
,
Ridge
from
sklearn.linear_model
import
ElasticNet
,
Ridge
from
sklearn.tree
import
ExtraTreeRegressor
from
sklearn.tree
import
ExtraTreeRegressor
import
threading
import
multiprocessing
LOGGER
=
logging
.
getLogger
(
__name__
)
LOGGER
=
logging
.
getLogger
(
__name__
)
class
FeatureSelector
Thread
(
threading
.
Thread
):
class
FeatureSelector
Process
(
multiprocessing
.
Process
):
"""class feature selector each with
thread
ing"""
"""class feature selector each with
multiprocess
ing"""
def
__init__
(
self
,
regressor
,
list_sample_x
,
list_sample_y
,
labels
,
index
):
def
__init__
(
self
,
regressor
,
list_sample_x
,
list_sample_y
,
labels
,
index
,
sorted_index_queue
,
prediction_queue
):
threading
.
Thread
.
__init__
(
self
)
multiprocessing
.
Process
.
__init__
(
self
)
self
.
_regressor
=
regressor
self
.
_regressor
=
regressor
self
.
_list_sample_x
=
list_sample_x
self
.
_list_sample_x
=
list_sample_x
self
.
_list_sample_y
=
list_sample_y
self
.
_list_sample_y
=
list_sample_y
self
.
_labels
=
labels
self
.
_labels
=
labels
self
.
_index
=
index
self
.
_index
=
index
self
.
_sorted_index
=
[]
self
.
_sorted_index
=
[]
self
.
_sorted_index_queue
=
sorted_index_queue
self
.
_prediction_queue
=
prediction_queue
def
get_unified_feature_importance
(
self
,
regressor
):
def
get_unified_feature_importance
(
self
,
regressor
):
"""get unified feature importance for different type regressor"""
"""get unified feature importance for different type regressor"""
...
@@ -56,6 +58,12 @@ class FeatureSelectorThread(threading.Thread):
...
@@ -56,6 +58,12 @@ class FeatureSelectorThread(threading.Thread):
return
feature_importances
return
feature_importances
return
None
return
None
def
get_ensemble_train_data
(
self
):
"""get ensemble train data"""
regressor
=
self
.
_regressor
prediction
=
regressor
.
predict
(
self
.
_list_sample_x
)
return
prediction
def
run
(
self
):
def
run
(
self
):
"""main method to train the model and get ranked feature importance"""
"""main method to train the model and get ranked feature importance"""
self
.
_regressor
.
fit
(
self
.
_list_sample_x
,
self
.
_list_sample_y
)
self
.
_regressor
.
fit
(
self
.
_list_sample_x
,
self
.
_list_sample_y
)
...
@@ -63,16 +71,12 @@ class FeatureSelectorThread(threading.Thread):
...
@@ -63,16 +71,12 @@ class FeatureSelectorThread(threading.Thread):
result
=
zip
(
unified_feature_importance
,
self
.
_labels
,
self
.
_index
)
result
=
zip
(
unified_feature_importance
,
self
.
_labels
,
self
.
_index
)
result
=
sorted
(
result
,
key
=
lambda
x
:
-
x
[
0
])
result
=
sorted
(
result
,
key
=
lambda
x
:
-
x
[
0
])
self
.
_sorted_index
=
[
i
for
coef
,
label
,
i
in
result
]
self
.
_sorted_index
=
[
i
for
coef
,
label
,
i
in
result
]
self
.
_sorted_index_queue
.
put
(
self
.
_sorted_index
)
def
get_sorted_index
(
self
):
prediction
=
self
.
get_ensemble_train_data
()
"""get sorted feature importance index"""
self
.
_prediction_queue
.
put
(
prediction
)
try
:
return
self
.
_sorted_index
except
Exception
:
return
None
class
WeightedEnsembleFeatureSelector
:
class
WeightedEnsembleFeatureSelector
(
object
)
:
"""class weighted ensemble feature selector"""
"""class weighted ensemble feature selector"""
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -92,23 +96,36 @@ class WeightedEnsembleFeatureSelector:
...
@@ -92,23 +96,36 @@ class WeightedEnsembleFeatureSelector:
"""get native feature importances in parallel with multiple threading"""
"""get native feature importances in parallel with multiple threading"""
native_feature_importances
=
[]
native_feature_importances
=
[]
fs_thread_list
=
[]
fs_thread_list
=
[]
sorted_index_queue_list
=
[]
prediction_queue_list
=
[]
for
regressor
in
self
.
_regressors
:
for
regressor
in
self
.
_regressors
:
fs_thread
=
FeatureSelectorThread
(
regressor
,
list_sample_x
,
list_sample_y
,
labels
,
index
)
sorted_index_queue
=
multiprocessing
.
Queue
()
prediction_queue
=
multiprocessing
.
Queue
()
fs_thread
=
FeatureSelectorProcess
(
regressor
,
list_sample_x
,
list_sample_y
,
labels
,
index
,
sorted_index_queue
,
prediction_queue
)
fs_thread_list
.
append
(
fs_thread
)
fs_thread_list
.
append
(
fs_thread
)
sorted_index_queue_list
.
append
(
sorted_index_queue
)
prediction_queue_list
.
append
(
prediction_queue
)
fs_thread
.
start
()
fs_thread
.
start
()
for
fs_thread
in
fs_thread_list
:
for
fs_thread
in
fs_thread_list
:
fs_thread
.
join
()
fs_thread
.
join
()
for
fs_thread
in
fs_thread_list
:
native_fi
=
fs_thread
.
get_sorted_index
()
for
sorted_index_queue
in
sorted_index_queue_list
:
native_fi
=
sorted_index_queue
.
get
()
native_feature_importances
.
append
(
native_fi
)
native_feature_importances
.
append
(
native_fi
)
return
native_feature_importances
LOGGER
.
info
(
'get sorted index queue list'
)
def
get_ensemble_train_datas
(
self
,
list_sample_x
):
"""get ensemble train datas"""
predictions
=
[]
predictions
=
[]
for
regressor
in
self
.
_regressors
:
for
prediction_queue
in
prediction_queue_list
:
prediction
=
regressor
.
predict
(
list_sample_x
)
prediction
=
prediction_queue
.
get
(
)
predictions
.
append
(
prediction
)
predictions
.
append
(
prediction
)
LOGGER
.
info
(
'get prediction queue list'
)
return
native_feature_importances
,
predictions
def
get_ensemble_train_datas
(
self
,
list_sample_x
,
predictions
):
"""get ensemble train datas"""
train_datas
=
[]
train_datas
=
[]
for
i
in
range
(
len
(
list_sample_x
)):
for
i
in
range
(
len
(
list_sample_x
)):
train_data
=
[]
train_data
=
[]
...
@@ -117,9 +134,9 @@ class WeightedEnsembleFeatureSelector:
...
@@ -117,9 +134,9 @@ class WeightedEnsembleFeatureSelector:
train_datas
.
append
(
train_data
)
train_datas
.
append
(
train_data
)
return
train_datas
return
train_datas
def
get_ensemble_weights
(
self
,
list_sample_x
,
list_sample_y
):
def
get_ensemble_weights
(
self
,
list_sample_x
,
list_sample_y
,
predictions
):
"""get ensemble weights"""
"""get ensemble weights"""
ensemble_train_datas
=
self
.
get_ensemble_train_datas
(
list_sample_x
)
ensemble_train_datas
=
self
.
get_ensemble_train_datas
(
list_sample_x
,
predictions
)
self
.
_ensemble_model
.
fit
(
ensemble_train_datas
,
list_sample_y
)
self
.
_ensemble_model
.
fit
(
ensemble_train_datas
,
list_sample_y
)
orig_weight
=
self
.
_ensemble_model
.
coef_
orig_weight
=
self
.
_ensemble_model
.
coef_
orig_weight
-=
np
.
max
(
orig_weight
)
orig_weight
-=
np
.
max
(
orig_weight
)
...
@@ -129,10 +146,10 @@ class WeightedEnsembleFeatureSelector:
...
@@ -129,10 +146,10 @@ class WeightedEnsembleFeatureSelector:
def
get_ensemble_feature_importance
(
self
,
list_sample_x
,
list_sample_y
,
labels
):
def
get_ensemble_feature_importance
(
self
,
list_sample_x
,
list_sample_y
,
labels
):
"""Make sure the input list_sample_x is preprocessed with StandardScaler"""
"""Make sure the input list_sample_x is preprocessed with StandardScaler"""
index
=
list
(
range
(
len
(
labels
)))
index
=
list
(
range
(
len
(
labels
)))
native_feature_importances
=
self
.
get_native_feature_importances_parallel
(
native_feature_importances
,
predictions
=
self
.
get_native_feature_importances_parallel
(
list_sample_x
,
list_sample_y
,
labels
,
index
)
list_sample_x
,
list_sample_y
,
labels
,
index
)
LOGGER
.
info
(
'Get feature importances for each model: %s'
,
native_feature_importances
)
LOGGER
.
info
(
'Get feature importances for each model: %s'
,
native_feature_importances
)
ensemble_weights
=
self
.
get_ensemble_weights
(
list_sample_x
,
list_sample_y
)
ensemble_weights
=
self
.
get_ensemble_weights
(
list_sample_x
,
list_sample_y
,
predictions
)
LOGGER
.
info
(
'Get ensemble weights for each model: %s'
,
ensemble_weights
)
LOGGER
.
info
(
'Get ensemble weights for each model: %s'
,
ensemble_weights
)
ensemble_scores
=
[
0
for
i
in
range
(
len
(
list_sample_x
[
0
]))]
ensemble_scores
=
[
0
for
i
in
range
(
len
(
list_sample_x
[
0
]))]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录