optimizer.py 12.9 KB
Newer Older
D
dogsheng 已提交
1 2 3
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2019 Huawei Technologies Co., Ltd.
4 5 6
# A-Tune is licensed under the Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
7
#     http://license.coscl.org.cn/MulanPSL2
D
dogsheng 已提交
8 9 10
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR
# PURPOSE.
11
# See the Mulan PSL v2 for more details.
D
dogsheng 已提交
12 13 14 15 16 17
# Create: 2019-10-29

"""
This class is used to find optimal settings and generate optimized profile.
"""

Z
Zhipeng Xie 已提交
18
import logging
19
import multiprocessing
Z
Zhipeng Xie 已提交
20
import numpy as np
21 22
import sys
from skopt.optimizer import gp_minimize, dummy_minimize
23 24
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
D
dogsheng 已提交
25

26
from analysis.optimizer.abtest_tuning_manager import ABtestTuningManager
27
from analysis.optimizer.knob_sampling_manager import KnobSamplingManager
28
from analysis.optimizer.tpe_optimizer import TPEOptimizer
29
from analysis.optimizer.weighted_ensemble_feature_selector import WeightedEnsembleFeatureSelector
30

Z
Zhipeng Xie 已提交
31
LOGGER = logging.getLogger(__name__)
D
dogsheng 已提交
32 33


34
class Optimizer(multiprocessing.Process):
Z
Zhipeng Xie 已提交
35 36
    """find optimal settings and generate optimized profile"""

37
    def __init__(self, name, params, child_conn, engine="bayes", max_eval=50, x0=None, y0=None, n_random_starts=20):
D
dogsheng 已提交
38 39 40 41 42
        super(Optimizer, self).__init__(name=name)
        self.knobs = params
        self.child_conn = child_conn
        self.engine = engine
        self.max_eval = int(max_eval)
43
        self.split_count = 5 #should be set by YAML client
44
        self.ref = []
45 46 47
        self.x0 = x0
        self.y0 = y0
        self._n_random_starts = 20 if n_random_starts is None else n_random_starts
D
dogsheng 已提交
48 49

    def build_space(self):
Z
Zhipeng Xie 已提交
50
        """build space"""
D
dogsheng 已提交
51
        objective_params_list = []
Z
Zhipeng Xie 已提交
52 53
        for p_nob in self.knobs:
            if p_nob['type'] == 'discrete':
54 55
                items = self.handle_discrete_data(p_nob)
                objective_params_list.append(items)
Z
Zhipeng Xie 已提交
56 57
            elif p_nob['type'] == 'continuous':
                r_range = p_nob['range']
58 59
                if r_range is None or len(r_range) != 2:
                    raise ValueError("the item of the scope value of {} must be 2"
60
                                     .format(p_nob['name']))
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
                if p_nob['dtype'] == 'int':
                    try:
                        ref_value = int(p_nob['ref'])
                        r_range[0] = int(r_range[0])
                        r_range[1] = int(r_range[1])
                    except ValueError:
                        raise ValueError("the ref value of {} is not an integer value"
                                 .format(p_nob['name']))
                elif p_nob['dtype'] == 'float':
                    try:
                        ref_value = float(p_nob['ref'])
                        r_range[0] = float(r_range[0])
                        r_range[1] = float(r_range[1])
                    except ValueError:
                        raise ValueError("the ref value of {} is not an integer value"
                                 .format(p_nob['name']))

78 79 80
                if ref_value < r_range[0] or ref_value > r_range[1]:
                    raise ValueError("the ref value of {} is out of range".format(p_nob['name']))
                self.ref.append(ref_value)
Z
Zhipeng Xie 已提交
81
                objective_params_list.append((r_range[0], r_range[1]))
82 83
            else:
                raise ValueError("the type of {} is not supported".format(p_nob['name']))
D
dogsheng 已提交
84 85
        return objective_params_list

86 87 88 89 90 91 92 93 94
    def handle_discrete_data(self, p_nob):
        """handle discrete data"""
        if p_nob['dtype'] == 'int':
            items = p_nob['items']
            if items is None:
                items = []
            r_range = p_nob['range']
            step = 1
            if 'step' in p_nob.keys():
95
                step = 1 if p_nob['step'] < 1 else p_nob['step']
96
            if r_range is not None:
97 98
                length = len(r_range) if len(r_range) % 2 == 0 else len(r_range) - 1
                for i in range(0, length, 2):
99 100
                    items.extend(list(np.arange(r_range[i], r_range[i + 1] + 1, step=step)))
            items = list(set(items))
101 102 103 104
            try:
                ref_value = int(p_nob['ref'])
            except ValueError:
                raise ValueError("the ref value of {} is not an integer value"
105
                                 .format(p_nob['name']))
106
            if ref_value not in items:
107
                items.append(ref_value)
108
            self.ref.append(ref_value)
109
            return items
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
        if p_nob['dtype'] == 'float':
            items = p_nob['items']
            if items is None:
                items = []
            r_range = p_nob['range']
            step = 0.1
            if 'step' in p_nob.keys():
                step = 0.1 if p_nob['step'] <= 0 else p_nob['step']
            if r_range is not None:
                length = len(r_range) if len(r_range) % 2 == 0 else len(r_range) - 1
                for i in range(0, length, 2):
                    items.extend(list(np.arange(r_range[i], r_range[i + 1], step=step)))
            items = list(set(items))
            try:
                ref_value = float(p_nob['ref'])
            except ValueError:
                raise ValueError("the ref value of {} is not a float value"
                                 .format(p_nob['name']))
            if ref_value not in items:
129
                items.append(ref_value)
130 131
            self.ref.append(ref_value)
            return items
132 133 134 135 136 137 138 139 140
        if p_nob['dtype'] == 'string':
            items = p_nob['options']
            keys = []
            length = len(self.ref)
            for key, value in enumerate(items):
                keys.append(key)
                if p_nob['ref'] == value:
                    self.ref.append(key)
            if len(self.ref) == length:
141
                raise ValueError("the ref value of {} is out of range"
142 143 144 145
                                 .format(p_nob['name']))
            return keys
        raise ValueError("the dtype of {} is not supported".format(p_nob['name']))

146 147 148 149 150 151 152
    @staticmethod
    def feature_importance(options, performance, labels):
        """feature importance"""
        options = StandardScaler().fit_transform(options)
        lasso = Lasso()
        lasso.fit(options, performance)
        result = zip(lasso.coef_, labels)
153 154 155
        total_sum = sum(map(abs, lasso.coef_))
        if total_sum == 0:
            return ", ".join("%s: 0" % label for label in labels)
156
        result = sorted(result, key=lambda x: -np.abs(x[0]))
157 158
        rank = ", ".join("%s: %s%%" % (label, round(coef * 100 / total_sum, 2))
                         for coef, label in result)
159 160
        return rank

161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
    def _get_intvalue_from_knobs(self, kv):
        """get the int value from knobs if dtype if string"""
        x_each = []
        for p_nob in self.knobs:
            if p_nob['name'] not in kv.keys():
                raise ValueError("the param {} is not in the x0 ref".format(p_nob['name']))
            if p_nob['dtype'] != 'string':
                x_each.append(int(kv[p_nob['name']]))
                continue
            options = p_nob['options']
            for key, value in enumerate(options):
                if value != kv[p_nob['name']]:
                    continue
                x_each.append(key)
        return x_each

    def transfer(self):
        """transfer ref x0 to int, y0 to float"""
        list_ref_x = []
        list_ref_y = []
        if self.x0 is None or self.y0 is None:
            return (list_ref_x, list_ref_y)

        for xValue in self.x0:
            kv = {}
            if len(xValue) != len(self.knobs):
                raise ValueError("x0 is not the same length with knobs")

            for i, val in enumerate(xValue):
                params = val.split("=")
                if len(params) != 2:
                    raise ValueError("the param format of {} is not correct".format(params))
                kv[params[0]] = params[1]

            ref_x = self._get_intvalue_from_knobs(kv)
            if len(ref_x) != len(self.knobs):
                raise ValueError("tuning parameter is not the same length with knobs")
            list_ref_x.append(ref_x)
        list_ref_y = [float(y) for y in self.y0]
        return (list_ref_x, list_ref_y)

D
dogsheng 已提交
202
    def run(self):
203
        """start the tuning process"""
D
dogsheng 已提交
204
        def objective(var):
205 206
            """objective method receive the benchmark result and send the next parameters"""
            iterResult = {}
D
dogsheng 已提交
207
            for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
208 209 210 211
                if knob['dtype'] == 'string':
                    params[knob['name']] = knob['options'][var[i]]
                else:
                    params[knob['name']] = var[i]
212 213 214
            
            iterResult["param"] = params
            self.child_conn.send(iterResult)
D
dogsheng 已提交
215
            result = self.child_conn.recv()
Z
Zhipeng Xie 已提交
216 217 218
            x_num = 0.0
            eval_list = result.split(',')
            for value in eval_list:
D
dogsheng 已提交
219
                num = float(value)
Z
Zhipeng Xie 已提交
220
                x_num = x_num + num
221 222
            options.append(var)
            performance.append(x_num)
Z
Zhipeng Xie 已提交
223
            return x_num
D
dogsheng 已提交
224 225

        params = {}
226 227 228
        options = []
        performance = []
        labels = []
D
dogsheng 已提交
229
        try:
230
            params_space = self.build_space()
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
            ref_x, ref_y = self.transfer()
            if len(ref_x) == 0:
                ref_x = self.ref
                ref_y = None
            if not isinstance(ref_x[0], (list, tuple)):
                ref_x = [ref_x]

            LOGGER.info('x0: %s', ref_x)
            LOGGER.info('y0: %s', ref_y)

            if ref_x is not None and isinstance(ref_x[0], (list, tuple)):
                self._n_random_starts = 0 if len(ref_x) >= self._n_random_starts \
                        else self._n_random_starts - len(ref_x) + 1

            LOGGER.info('n_random_starts parameter is: %d', self._n_random_starts)
Z
Zhipeng Xie 已提交
246
            LOGGER.info("Running performance evaluation.......")
247 248 249 250
            if self.engine == 'random':
                ret = dummy_minimize(objective, params_space, n_calls=self.max_eval)
            elif self.engine == 'bayes':
                ret = gp_minimize(objective, params_space, n_calls=self.max_eval, \
251
                                   n_random_starts=self._n_random_starts, x0=ref_x, y0=ref_y)
252 253 254 255 256
            elif self.engine == 'abtest':
                abtuning_manager = ABtestTuningManager(self.knobs, self.child_conn, self.split_count)
                options, performance = abtuning_manager.do_abtest_tuning_abtest()
                params = abtuning_manager.get_best_params()
                options = abtuning_manager.get_options_index(options) # convert string option into index
257 258 259 260 261
            elif self.engine == 'lhs':
                knobsampling_manager = KnobSamplingManager(self.knobs, self.child_conn, self.max_eval, self.split_count)
                options = knobsampling_manager.get_knob_samples()
                performance = knobsampling_manager.do_knob_sampling_test(options)
                params = knobsampling_manager.get_best_params(options, performance)
262 263 264 265 266 267 268 269
            elif self.engine == 'tpe':
                tpe_opt = TPEOptimizer(self.knobs, self.child_conn, self.max_eval)
                best_params = tpe_opt.tpe_minimize_tuning()
                finalParam = {}
                finalParam["finished"] = True
                finalParam["param"] = best_params
                self.child_conn.send(finalParam)
                return best_params
Z
Zhipeng Xie 已提交
270
            LOGGER.info("Minimization procedure has been completed.")
271 272
        except ValueError as value_error:
            LOGGER.error('Value Error: %s', repr(value_error))
273 274
            self.child_conn.send(value_error)
            return None
275 276 277 278 279 280
        except RuntimeError as runtime_error:
            LOGGER.error('Runtime Error: %s', repr(runtime_error))
            self.child_conn.send(runtime_error)
            return None
        except Exception as e:
            LOGGER.error('Unexpected Error: %s', repr(e))
281
            self.child_conn.send(Exception("Unexpected Error:", repr(e)))
282
            return None
D
dogsheng 已提交
283 284

        for i, knob in enumerate(self.knobs):
Z
Zhipeng Xie 已提交
285 286 287 288
            if knob['dtype'] == 'string':
                params[knob['name']] = knob['options'][ret.x[i]]
            else:
                params[knob['name']] = ret.x[i]
289
            labels.append(knob['name'])
290
        
Z
Zhipeng Xie 已提交
291 292
        LOGGER.info("Optimized result: %s", params)
        LOGGER.info("The optimized profile has been generated.")
293
        finalParam = {}
294 295
        wefs = WeightedEnsembleFeatureSelector()
        rank = wefs.get_ensemble_feature_importance(options, performance, labels)
296 297 298 299 300

        finalParam["param"] = params
        finalParam["rank"] = rank
        finalParam["finished"] = True
        self.child_conn.send(finalParam)
301
        LOGGER.info("The feature importances of current evaluation are: %s", rank)
D
dogsheng 已提交
302 303
        return params

Z
Zhipeng Xie 已提交
304 305
    def stop_process(self):
        """stop process"""
D
dogsheng 已提交
306 307
        self.child_conn.close()
        self.terminate()
308