optimizer.py 10.4 KB
Newer Older
D
dogsheng 已提交
1 2 3
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Huawei Technologies Co., Ltd.
4 5 6
# A-Tune is licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
7
#     http://license.coscl.org.cn/MulanPSL2
D
dogsheng 已提交
8 9 10
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
11
# See the Mulan PSL v2 for more details.
D
dogsheng 已提交
12 13 14 15 16 17
# Create: 2019-10-29

"""
This class is used to find optimal settings and generate optimized profile.
"""

Z
Zhipeng Xie 已提交
18
import logging
19
import multiprocessing
Z
Zhipeng Xie 已提交
20
import numpy as np
21 22
import sys
from skopt.optimizer import gp_minimize, dummy_minimize
23 24
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
D
dogsheng 已提交
25

26 27
from analysis.optimizer.abtest_tuning_manager import ABtestTuningManager

Z
Zhipeng Xie 已提交
28
LOGGER = logging.getLogger(__name__)
D
dogsheng 已提交
29 30


31
class Optimizer(multiprocessing.Process):
Z
Zhipeng Xie 已提交
32 33
    """find optimal settings and generate optimized profile"""

34
    def __init__(self, name, params, child_conn, engine="bayes", max_eval=50, x0=None, y0=None, n_random_starts=20):
D
dogsheng 已提交
35 36 37 38 39
        super(Optimizer, self).__init__(name=name)
        self.knobs = params
        self.child_conn = child_conn
        self.engine = engine
        self.max_eval = int(max_eval)
40
        self.split_count = 5 #should be set by YAML client
41
        self.ref = []
42 43 44
        self.x0 = x0
        self.y0 = y0
        self._n_random_starts = 20 if n_random_starts is None else n_random_starts
D
dogsheng 已提交
45 46

    def build_space(self):
Z
Zhipeng Xie 已提交
47
        """build space"""
D
dogsheng 已提交
48
        objective_params_list = []
Z
Zhipeng Xie 已提交
49 50
        for p_nob in self.knobs:
            if p_nob['type'] == 'discrete':
51 52
                items = self.handle_discrete_data(p_nob)
                objective_params_list.append(items)
Z
Zhipeng Xie 已提交
53 54
            elif p_nob['type'] == 'continuous':
                r_range = p_nob['range']
55 56
                if r_range is None or len(r_range) != 2:
                    raise ValueError("the item of the scope value of {} must be 2"
57
                                     .format(p_nob['name']))
58 59 60 61 62 63 64 65
                try:
                    ref_value = int(p_nob['ref'])
                except ValueError:
                    raise ValueError("the ref value of {} is not an integer value"
                                     .format(p_nob['name']))
                if ref_value < r_range[0] or ref_value > r_range[1]:
                    raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
                self.ref.append(ref_value)
Z
Zhipeng Xie 已提交
66
                objective_params_list.append((r_range[0], r_range[1]))
67 68
            else:
                raise ValueError("the type of {} is not supported".format(p_nob['name']))
D
dogsheng 已提交
69 70
        return objective_params_list

71 72 73 74 75 76 77 78 79
    def handle_discrete_data(self, p_nob):
        """handle discrete data"""
        if p_nob['dtype'] == 'int':
            items = p_nob['items']
            if items is None:
                items = []
            r_range = p_nob['range']
            step = 1
            if 'step' in p_nob.keys():
80
                step = 1 if p_nob['step'] < 1 else p_nob['step']
81
            if r_range is not None:
82 83
                length = len(r_range) if len(r_range) % 2 == 0 else len(r_range) - 1
                for i in range(0, length, 2):
84 85
                    items.extend(list(np.arange(r_range[i], r_range[i + 1] + 1, step=step)))
            items = list(set(items))
86 87 88 89
            try:
                ref_value = int(p_nob['ref'])
            except ValueError:
                raise ValueError("the ref value of {} is not an integer value"
90
                                 .format(p_nob['name']))
91 92 93
            if ref_value not in items:
                raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
            self.ref.append(ref_value)
94 95 96 97 98 99 100 101 102 103
            return items
        if p_nob['dtype'] == 'string':
            items = p_nob['options']
            keys = []
            length = len(self.ref)
            for key, value in enumerate(items):
                keys.append(key)
                if p_nob['ref'] == value:
                    self.ref.append(key)
            if len(self.ref) == length:
104
                raise ValueError("the ref value of {} is out of range"
105 106 107 108
                                 .format(p_nob['name']))
            return keys
        raise ValueError("the dtype of {} is not supported".format(p_nob['name']))

109 110 111 112 113 114 115
    @staticmethod
    def feature_importance(options, performance, labels):
        """feature importance"""
        options = StandardScaler().fit_transform(options)
        lasso = Lasso()
        lasso.fit(options, performance)
        result = zip(lasso.coef_, labels)
116 117 118
        total_sum = sum(map(abs, lasso.coef_))
        if total_sum == 0:
            return ", ".join("%s: 0" % label for label in labels)
119
        result = sorted(result, key=lambda x: -np.abs(x[0]))
120 121
        rank = ", ".join("%s: %s%%" % (label, round(coef * 100 / total_sum, 2))
                         for coef, label in result)
122 123
        return rank

124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
    def _get_intvalue_from_knobs(self, kv):
        """get the int value from knobs if dtype if string"""
        x_each = []
        for p_nob in self.knobs:
            if p_nob['name'] not in kv.keys():
                raise ValueError("the param {} is not in the x0 ref".format(p_nob['name']))
            if p_nob['dtype'] != 'string':
                x_each.append(int(kv[p_nob['name']]))
                continue
            options = p_nob['options']
            for key, value in enumerate(options):
                if value != kv[p_nob['name']]:
                    continue
                x_each.append(key)
        return x_each

    def transfer(self):
        """transfer ref x0 to int, y0 to float"""
        list_ref_x = []
        list_ref_y = []
        if self.x0 is None or self.y0 is None:
            return (list_ref_x, list_ref_y)

        for xValue in self.x0:
            kv = {}
            if len(xValue) != len(self.knobs):
                raise ValueError("x0 is not the same length with knobs")

            for i, val in enumerate(xValue):
                params = val.split("=")
                if len(params) != 2:
                    raise ValueError("the param format of {} is not correct".format(params))
                kv[params[0]] = params[1]

            ref_x = self._get_intvalue_from_knobs(kv)
            if len(ref_x) != len(self.knobs):
                raise ValueError("tuning parameter is not the same length with knobs")
            list_ref_x.append(ref_x)
        list_ref_y = [float(y) for y in self.y0]
        return (list_ref_x, list_ref_y)

D
dogsheng 已提交
165
    def run(self):
166
        """start the tuning process"""
D
dogsheng 已提交
167
        def objective(var):
168 169
            """objective method receive the benchmark result and send the next parameters"""
            iterResult = {}
D
dogsheng 已提交
170
            for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
171 172 173 174
                if knob['dtype'] == 'string':
                    params[knob['name']] = knob['options'][var[i]]
                else:
                    params[knob['name']] = var[i]
175 176 177
            
            iterResult["param"] = params
            self.child_conn.send(iterResult)
D
dogsheng 已提交
178
            result = self.child_conn.recv()
Z
Zhipeng Xie 已提交
179 180 181
            x_num = 0.0
            eval_list = result.split(',')
            for value in eval_list:
D
dogsheng 已提交
182
                num = float(value)
Z
Zhipeng Xie 已提交
183
                x_num = x_num + num
184 185
            options.append(var)
            performance.append(x_num)
Z
Zhipeng Xie 已提交
186
            return x_num
D
dogsheng 已提交
187 188

        params = {}
189 190 191
        options = []
        performance = []
        labels = []
D
dogsheng 已提交
192
        try:
193
            params_space = self.build_space()
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
            ref_x, ref_y = self.transfer()
            if len(ref_x) == 0:
                ref_x = self.ref
                ref_y = None
            if not isinstance(ref_x[0], (list, tuple)):
                ref_x = [ref_x]

            LOGGER.info('x0: %s', ref_x)
            LOGGER.info('y0: %s', ref_y)

            if ref_x is not None and isinstance(ref_x[0], (list, tuple)):
                self._n_random_starts = 0 if len(ref_x) >= self._n_random_starts \
                        else self._n_random_starts - len(ref_x) + 1

            LOGGER.info('n_random_starts parameter is: %d', self._n_random_starts)
Z
Zhipeng Xie 已提交
209
            LOGGER.info("Running performance evaluation.......")
210 211 212 213
            if self.engine == 'random':
                ret = dummy_minimize(objective, params_space, n_calls=self.max_eval)
            elif self.engine == 'bayes':
                ret = gp_minimize(objective, params_space, n_calls=self.max_eval, \
214
                                   n_random_starts=self._n_random_starts, x0=ref_x, y0=ref_y)
215 216 217 218 219
            elif self.engine == 'abtest':
                abtuning_manager = ABtestTuningManager(self.knobs, self.child_conn, self.split_count)
                options, performance = abtuning_manager.do_abtest_tuning_abtest()
                params = abtuning_manager.get_best_params()
                options = abtuning_manager.get_options_index(options) # convert string option into index
Z
Zhipeng Xie 已提交
220
            LOGGER.info("Minimization procedure has been completed.")
221 222
        except ValueError as value_error:
            LOGGER.error('Value Error: %s', repr(value_error))
223 224
            self.child_conn.send(value_error)
            return None
225 226 227 228 229 230
        except RuntimeError as runtime_error:
            LOGGER.error('Runtime Error: %s', repr(runtime_error))
            self.child_conn.send(runtime_error)
            return None
        except Exception as e:
            LOGGER.error('Unexpected Error: %s', repr(e))
231
            self.child_conn.send(Exception("Unexpected Error:", repr(e)))
232
            return None
D
dogsheng 已提交
233 234

        for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
235 236 237 238
            if knob['dtype'] == 'string':
                params[knob['name']] = knob['options'][ret.x[i]]
            else:
                params[knob['name']] = ret.x[i]
239
            labels.append(knob['name'])
240
        
Z
Zhipeng Xie 已提交
241 242
        LOGGER.info("Optimized result: %s", params)
        LOGGER.info("The optimized profile has been generated.")
243
        finalParam = {}
244
        rank = self.feature_importance(options, performance, labels)
245 246 247 248 249

        finalParam["param"] = params
        finalParam["rank"] = rank
        finalParam["finished"] = True
        self.child_conn.send(finalParam)
250
        LOGGER.info("The feature importances of current evaluation are: %s", rank)
D
dogsheng 已提交
251 252
        return params

Z
Zhipeng Xie 已提交
253 254
    def stop_process(self):
        """stop process"""
D
dogsheng 已提交
255 256
        self.child_conn.close()
        self.terminate()
257