optimizer.py 9.9 KB
Newer Older
D
dogsheng 已提交
1 2 3
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Huawei Technologies Co., Ltd.
4 5 6
# A-Tune is licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
7
#     http://license.coscl.org.cn/MulanPSL2
D
dogsheng 已提交
8 9 10
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
11
# See the Mulan PSL v2 for more details.
D
dogsheng 已提交
12 13 14 15 16 17
# Create: 2019-10-29

"""
This class is used to find optimal settings and generate optimized profile.
"""

Z
Zhipeng Xie 已提交
18
import logging
19
import multiprocessing
Z
Zhipeng Xie 已提交
20
import numpy as np
21 22
import sys
from skopt.optimizer import gp_minimize, dummy_minimize
23 24
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
D
dogsheng 已提交
25

Z
Zhipeng Xie 已提交
26
LOGGER = logging.getLogger(__name__)
D
dogsheng 已提交
27 28


29
class Optimizer(multiprocessing.Process):
Z
Zhipeng Xie 已提交
30 31
    """find optimal settings and generate optimized profile"""

32
    def __init__(self, name, params, child_conn, engine="bayes", max_eval=50, x0=None, y0=None, n_random_starts=20):
D
dogsheng 已提交
33 34 35 36 37
        super(Optimizer, self).__init__(name=name)
        self.knobs = params
        self.child_conn = child_conn
        self.engine = engine
        self.max_eval = int(max_eval)
38
        self.ref = []
39 40 41
        self.x0 = x0
        self.y0 = y0
        self._n_random_starts = 20 if n_random_starts is None else n_random_starts
D
dogsheng 已提交
42 43

    def build_space(self):
Z
Zhipeng Xie 已提交
44
        """build space"""
D
dogsheng 已提交
45
        objective_params_list = []
Z
Zhipeng Xie 已提交
46 47
        for p_nob in self.knobs:
            if p_nob['type'] == 'discrete':
48 49
                items = self.handle_discrete_data(p_nob)
                objective_params_list.append(items)
Z
Zhipeng Xie 已提交
50 51
            elif p_nob['type'] == 'continuous':
                r_range = p_nob['range']
52 53
                if r_range is None or len(r_range) != 2:
                    raise ValueError("the item of the scope value of {} must be 2"
54
                                     .format(p_nob['name']))
55 56 57 58 59 60 61 62
                try:
                    ref_value = int(p_nob['ref'])
                except ValueError:
                    raise ValueError("the ref value of {} is not an integer value"
                                     .format(p_nob['name']))
                if ref_value < r_range[0] or ref_value > r_range[1]:
                    raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
                self.ref.append(ref_value)
Z
Zhipeng Xie 已提交
63
                objective_params_list.append((r_range[0], r_range[1]))
64 65
            else:
                raise ValueError("the type of {} is not supported".format(p_nob['name']))
D
dogsheng 已提交
66 67
        return objective_params_list

68 69 70 71 72 73 74 75 76
    def handle_discrete_data(self, p_nob):
        """handle discrete data"""
        if p_nob['dtype'] == 'int':
            items = p_nob['items']
            if items is None:
                items = []
            r_range = p_nob['range']
            step = 1
            if 'step' in p_nob.keys():
77
                step = 1 if p_nob['step'] < 1 else p_nob['step']
78
            if r_range is not None:
79 80
                length = len(r_range) if len(r_range) % 2 == 0 else len(r_range) - 1
                for i in range(0, length, 2):
81 82
                    items.extend(list(np.arange(r_range[i], r_range[i + 1] + 1, step=step)))
            items = list(set(items))
83 84 85 86
            try:
                ref_value = int(p_nob['ref'])
            except ValueError:
                raise ValueError("the ref value of {} is not an integer value"
87
                                 .format(p_nob['name']))
88 89 90
            if ref_value not in items:
                raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
            self.ref.append(ref_value)
91 92 93 94 95 96 97 98 99 100
            return items
        if p_nob['dtype'] == 'string':
            items = p_nob['options']
            keys = []
            length = len(self.ref)
            for key, value in enumerate(items):
                keys.append(key)
                if p_nob['ref'] == value:
                    self.ref.append(key)
            if len(self.ref) == length:
101
                raise ValueError("the ref value of {} is out of range"
102 103 104 105
                                 .format(p_nob['name']))
            return keys
        raise ValueError("the dtype of {} is not supported".format(p_nob['name']))

106 107 108 109 110 111 112
    @staticmethod
    def feature_importance(options, performance, labels):
        """feature importance"""
        options = StandardScaler().fit_transform(options)
        lasso = Lasso()
        lasso.fit(options, performance)
        result = zip(lasso.coef_, labels)
113 114 115
        total_sum = sum(map(abs, lasso.coef_))
        if total_sum == 0:
            return ", ".join("%s: 0" % label for label in labels)
116
        result = sorted(result, key=lambda x: -np.abs(x[0]))
117 118
        rank = ", ".join("%s: %s%%" % (label, round(coef * 100 / total_sum, 2))
                         for coef, label in result)
119 120
        return rank

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    def _get_intvalue_from_knobs(self, kv):
        """get the int value from knobs if dtype if string"""
        x_each = []
        for p_nob in self.knobs:
            if p_nob['name'] not in kv.keys():
                raise ValueError("the param {} is not in the x0 ref".format(p_nob['name']))
            if p_nob['dtype'] != 'string':
                x_each.append(int(kv[p_nob['name']]))
                continue
            options = p_nob['options']
            for key, value in enumerate(options):
                if value != kv[p_nob['name']]:
                    continue
                x_each.append(key)
        return x_each

    def transfer(self):
        """transfer ref x0 to int, y0 to float"""
        list_ref_x = []
        list_ref_y = []
        if self.x0 is None or self.y0 is None:
            return (list_ref_x, list_ref_y)

        for xValue in self.x0:
            kv = {}
            if len(xValue) != len(self.knobs):
                raise ValueError("x0 is not the same length with knobs")

            for i, val in enumerate(xValue):
                params = val.split("=")
                if len(params) != 2:
                    raise ValueError("the param format of {} is not correct".format(params))
                kv[params[0]] = params[1]

            ref_x = self._get_intvalue_from_knobs(kv)
            if len(ref_x) != len(self.knobs):
                raise ValueError("tuning parameter is not the same length with knobs")
            list_ref_x.append(ref_x)
        list_ref_y = [float(y) for y in self.y0]
        return (list_ref_x, list_ref_y)

D
dogsheng 已提交
162
    def run(self):
163
        """start the tuning process"""
D
dogsheng 已提交
164
        def objective(var):
165 166
            """objective method receive the benchmark result and send the next parameters"""
            iterResult = {}
D
dogsheng 已提交
167
            for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
168 169 170 171
                if knob['dtype'] == 'string':
                    params[knob['name']] = knob['options'][var[i]]
                else:
                    params[knob['name']] = var[i]
172 173 174
            
            iterResult["param"] = params
            self.child_conn.send(iterResult)
D
dogsheng 已提交
175
            result = self.child_conn.recv()
Z
Zhipeng Xie 已提交
176 177 178
            x_num = 0.0
            eval_list = result.split(',')
            for value in eval_list:
D
dogsheng 已提交
179
                num = float(value)
Z
Zhipeng Xie 已提交
180
                x_num = x_num + num
181 182
            options.append(var)
            performance.append(x_num)
Z
Zhipeng Xie 已提交
183
            return x_num
D
dogsheng 已提交
184 185

        params = {}
186 187 188
        options = []
        performance = []
        labels = []
D
dogsheng 已提交
189
        try:
190
            params_space = self.build_space()
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
            ref_x, ref_y = self.transfer()
            if len(ref_x) == 0:
                ref_x = self.ref
                ref_y = None
            if not isinstance(ref_x[0], (list, tuple)):
                ref_x = [ref_x]

            LOGGER.info('x0: %s', ref_x)
            LOGGER.info('y0: %s', ref_y)

            if ref_x is not None and isinstance(ref_x[0], (list, tuple)):
                self._n_random_starts = 0 if len(ref_x) >= self._n_random_starts \
                        else self._n_random_starts - len(ref_x) + 1

            LOGGER.info('n_random_starts parameter is: %d', self._n_random_starts)
Z
Zhipeng Xie 已提交
206
            LOGGER.info("Running performance evaluation.......")
207 208 209 210
            if self.engine == 'random':
                ret = dummy_minimize(objective, params_space, n_calls=self.max_eval)
            elif self.engine == 'bayes':
                ret = gp_minimize(objective, params_space, n_calls=self.max_eval, \
211
                                   n_random_starts=self._n_random_starts, x0=ref_x, y0=ref_y)
Z
Zhipeng Xie 已提交
212
            LOGGER.info("Minimization procedure has been completed.")
213 214
        except ValueError as value_error:
            LOGGER.error('Value Error: %s', repr(value_error))
215 216
            self.child_conn.send(value_error)
            return None
217 218 219 220 221 222
        except RuntimeError as runtime_error:
            LOGGER.error('Runtime Error: %s', repr(runtime_error))
            self.child_conn.send(runtime_error)
            return None
        except Exception as e:
            LOGGER.error('Unexpected Error: %s', repr(e))
223
            self.child_conn.send(Exception("Unexpected Error:", repr(e)))
224
            return None
D
dogsheng 已提交
225 226

        for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
227 228 229 230
            if knob['dtype'] == 'string':
                params[knob['name']] = knob['options'][ret.x[i]]
            else:
                params[knob['name']] = ret.x[i]
231
            labels.append(knob['name'])
232
        
Z
Zhipeng Xie 已提交
233 234
        LOGGER.info("Optimized result: %s", params)
        LOGGER.info("The optimized profile has been generated.")
235
        finalParam = {}
236
        rank = self.feature_importance(options, performance, labels)
237 238 239 240 241

        finalParam["param"] = params
        finalParam["rank"] = rank
        finalParam["finished"] = True
        self.child_conn.send(finalParam)
242
        LOGGER.info("The feature importances of current evaluation are: %s", rank)
D
dogsheng 已提交
243 244
        return params

Z
Zhipeng Xie 已提交
245 246
    def stop_process(self):
        """stop process"""
D
dogsheng 已提交
247 248
        self.child_conn.close()
        self.terminate()
249