optimizer.py 6.5 KB
Newer Older
D
dogsheng 已提交
1 2 3
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Huawei Technologies Co., Ltd.
4 5 6
# A-Tune is licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
7
#     http://license.coscl.org.cn/MulanPSL2
D
dogsheng 已提交
8 9 10
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
11
# See the Mulan PSL v2 for more details.
D
dogsheng 已提交
12 13 14 15 16 17
# Create: 2019-10-29

"""
This class is used to find optimal settings and generate optimized profile.
"""

Z
Zhipeng Xie 已提交
18
import logging
D
dogsheng 已提交
19
from multiprocessing import Process
Z
Zhipeng Xie 已提交
20
import numpy as np
D
dogsheng 已提交
21
from skopt.optimizer import gp_minimize
22 23
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
D
dogsheng 已提交
24

Z
Zhipeng Xie 已提交
25
LOGGER = logging.getLogger(__name__)
D
dogsheng 已提交
26 27 28


class Optimizer(Process):
Z
Zhipeng Xie 已提交
29 30 31
    """find optimal settings and generate optimized profile"""

    def __init__(self, name, params, child_conn, engine="bayes", max_eval=50):
D
dogsheng 已提交
32 33 34 35 36
        super(Optimizer, self).__init__(name=name)
        self.knobs = params
        self.child_conn = child_conn
        self.engine = engine
        self.max_eval = int(max_eval)
37
        self.ref = []
D
dogsheng 已提交
38 39

    def build_space(self):
Z
Zhipeng Xie 已提交
40
        """build space"""
D
dogsheng 已提交
41
        objective_params_list = []
Z
Zhipeng Xie 已提交
42 43
        for p_nob in self.knobs:
            if p_nob['type'] == 'discrete':
44 45
                items = self.handle_discrete_data(p_nob)
                objective_params_list.append(items)
Z
Zhipeng Xie 已提交
46 47
            elif p_nob['type'] == 'continuous':
                r_range = p_nob['range']
48 49
                if r_range is None or len(r_range) != 2:
                    raise ValueError("the item of the scope value of {} must be 2"
50
                                     .format(p_nob['name']))
51 52 53 54 55 56 57 58
                try:
                    ref_value = int(p_nob['ref'])
                except ValueError:
                    raise ValueError("the ref value of {} is not an integer value"
                                     .format(p_nob['name']))
                if ref_value < r_range[0] or ref_value > r_range[1]:
                    raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
                self.ref.append(ref_value)
Z
Zhipeng Xie 已提交
59
                objective_params_list.append((r_range[0], r_range[1]))
60 61
            else:
                raise ValueError("the type of {} is not supported".format(p_nob['name']))
D
dogsheng 已提交
62 63
        return objective_params_list

64 65 66 67 68 69 70 71 72
    def handle_discrete_data(self, p_nob):
        """handle discrete data"""
        if p_nob['dtype'] == 'int':
            items = p_nob['items']
            if items is None:
                items = []
            r_range = p_nob['range']
            step = 1
            if 'step' in p_nob.keys():
73
                step = 1 if p_nob['step'] < 1 else p_nob['step']
74
            if r_range is not None:
75 76
                length = len(r_range) if len(r_range) % 2 == 0 else len(r_range) - 1
                for i in range(0, length, 2):
77 78
                    items.extend(list(np.arange(r_range[i], r_range[i + 1] + 1, step=step)))
            items = list(set(items))
79 80 81 82
            try:
                ref_value = int(p_nob['ref'])
            except ValueError:
                raise ValueError("the ref value of {} is not an integer value"
83
                                 .format(p_nob['name']))
84 85 86
            if ref_value not in items:
                raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
            self.ref.append(ref_value)
87 88 89 90 91 92 93 94 95 96
            return items
        if p_nob['dtype'] == 'string':
            items = p_nob['options']
            keys = []
            length = len(self.ref)
            for key, value in enumerate(items):
                keys.append(key)
                if p_nob['ref'] == value:
                    self.ref.append(key)
            if len(self.ref) == length:
97
                raise ValueError("the ref value of {} is out of range"
98 99 100 101
                                 .format(p_nob['name']))
            return keys
        raise ValueError("the dtype of {} is not supported".format(p_nob['name']))

102 103 104 105 106 107 108
    @staticmethod
    def feature_importance(options, performance, labels):
        """feature importance"""
        options = StandardScaler().fit_transform(options)
        lasso = Lasso()
        lasso.fit(options, performance)
        result = zip(lasso.coef_, labels)
109 110 111
        total_sum = sum(map(abs, lasso.coef_))
        if total_sum == 0:
            return ", ".join("%s: 0" % label for label in labels)
112
        result = sorted(result, key=lambda x: -np.abs(x[0]))
113 114
        rank = ", ".join("%s: %s%%" % (label, round(coef * 100 / total_sum, 2))
                         for coef, label in result)
115 116
        return rank

D
dogsheng 已提交
117 118 119
    def run(self):
        def objective(var):
            for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
120 121 122 123
                if knob['dtype'] == 'string':
                    params[knob['name']] = knob['options'][var[i]]
                else:
                    params[knob['name']] = var[i]
D
dogsheng 已提交
124 125
            self.child_conn.send(params)
            result = self.child_conn.recv()
Z
Zhipeng Xie 已提交
126 127 128
            x_num = 0.0
            eval_list = result.split(',')
            for value in eval_list:
D
dogsheng 已提交
129
                num = float(value)
Z
Zhipeng Xie 已提交
130
                x_num = x_num + num
131 132
            options.append(var)
            performance.append(x_num)
Z
Zhipeng Xie 已提交
133
            return x_num
D
dogsheng 已提交
134 135

        params = {}
136 137 138
        options = []
        performance = []
        labels = []
D
dogsheng 已提交
139
        try:
Z
Zhipeng Xie 已提交
140
            LOGGER.info("Running performance evaluation.......")
141
            ret = gp_minimize(objective, self.build_space(), n_calls=self.max_eval, x0=self.ref)
Z
Zhipeng Xie 已提交
142
            LOGGER.info("Minimization procedure has been completed.")
143
        except Exception as value_error:
Z
Zhipeng Xie 已提交
144
            LOGGER.error('Value Error: %s', value_error)
145 146
            self.child_conn.send(value_error)
            return None
D
dogsheng 已提交
147 148

        for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
149 150 151 152
            if knob['dtype'] == 'string':
                params[knob['name']] = knob['options'][ret.x[i]]
            else:
                params[knob['name']] = ret.x[i]
153
            labels.append(knob['name'])
D
dogsheng 已提交
154
        self.child_conn.send(params)
Z
Zhipeng Xie 已提交
155 156
        LOGGER.info("Optimized result: %s", params)
        LOGGER.info("The optimized profile has been generated.")
157 158 159

        rank = self.feature_importance(options, performance, labels)
        LOGGER.info("The feature importances of current evaluation are: %s", rank)
D
dogsheng 已提交
160 161
        return params

Z
Zhipeng Xie 已提交
162 163
    def stop_process(self):
        """stop process"""
D
dogsheng 已提交
164 165
        self.child_conn.close()
        self.terminate()
166