build.py 8.0 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5
import yaml
import copy
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet

6 7
from paddlerec.core.model import Model
from paddlerec.core.utils import table
T
tangwei 已提交
8

T
tangwei 已提交
9

X
xiexionghang 已提交
10
def create(config):
X
xiexionghang 已提交
11 12 13
    """
    Create a model instance by config
    Args:
T
tangwei 已提交
14
        config(dict) : desc model type and net
X
xiexionghang 已提交
15 16 17
    Return:
        Model Instance
    """
X
xiexionghang 已提交
18 19
    model = None
    if config['mode'] == 'fluid':
T
tangwei 已提交
20
        model = YamlModel(config)
T
tangwei 已提交
21
        model.train_net()
X
xiexionghang 已提交
22
    return model
T
tangwei 已提交
23

X
xiexionghang 已提交
24

T
tangwei 已提交
25
class YamlModel(Model):
X
xiexionghang 已提交
26 27
    """R
    """
T
tangwei 已提交
28

X
xiexionghang 已提交
29
    def __init__(self, config):
X
xiexionghang 已提交
30 31
        """R
        """
T
tangwei 已提交
32
        Model.__init__(self, config)
T
tangwei 已提交
33 34 35 36 37 38 39
        self._config = config
        self._name = config['name']
        f = open(config['layer_file'], 'r')
        self._build_nodes = yaml.safe_load(f.read())
        self._build_phase = ['input', 'param', 'summary', 'layer']
        self._build_param = {'layer': {}, 'inner_layer': {}, 'layer_extend': {}, 'model': {}}
        self._inference_meta = {'dependency': {}, 'params': {}}
T
tangwei 已提交
40

T
tangwei 已提交
41
    def train_net(self):
X
xiexionghang 已提交
42 43 44 45 46 47 48 49 50
        """R
        build a fluid model with config
        Return:
            modle_instance(dict)
                train_program
                startup_program
                inference_param : all params name list
                table: table-meta to ps-server
        """
X
xiexionghang 已提交
51 52
        for layer in self._build_nodes['layer']:
            self._build_param['inner_layer'][layer['name']] = layer
T
tangwei 已提交
53

X
xiexionghang 已提交
54 55 56
        self._build_param['table'] = {}
        self._build_param['model']['train_program'] = fluid.Program()
        self._build_param['model']['startup_program'] = fluid.Program()
X
xiexionghang 已提交
57
        with fluid.program_guard(self._build_param['model']['train_program'], \
T
tangwei 已提交
58
                                 self._build_param['model']['startup_program']):
X
xiexionghang 已提交
59 60 61 62 63
            with fluid.unique_name.guard():
                for phase in self._build_phase:
                    if self._build_nodes[phase] is None:
                        continue
                    for node in self._build_nodes[phase]:
T
tangwei 已提交
64
                        exec("""layer=layer.{}(node)""".format(node['class']))
X
xiexionghang 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
                        layer_output, extend_output = layer.generate(self._config['mode'], self._build_param)
                        self._build_param['layer'][node['name']] = layer_output
                        self._build_param['layer_extend'][node['name']] = extend_output
                        if extend_output is None:
                            continue
                        if 'loss' in extend_output:
                            if self._cost is None:
                                self._cost = extend_output['loss']
                            else:
                                self._cost += extend_output['loss']
                        if 'data_var' in extend_output:
                            self._data_var += extend_output['data_var']
                        if 'metric_label' in extend_output and extend_output['metric_label'] is not None:
                            self._metrics[extend_output['metric_label']] = extend_output['metric_dict']

                        if 'inference_param' in extend_output:
X
xiexionghang 已提交
81 82
                            inference_param = extend_output['inference_param']
                            param_name = inference_param['name']
X
xiexionghang 已提交
83
                            if param_name not in self._build_param['table']:
T
tangwei 已提交
84
                                self._build_param['table'][param_name] = {'params': []}
T
tangwei 已提交
85
                                table_meta = table.TableMeta.alloc_new_table(inference_param['table_id'])
X
xiexionghang 已提交
86
                                self._build_param['table'][param_name]['_meta'] = table_meta
X
xiexionghang 已提交
87
                            self._build_param['table'][param_name]['params'] += inference_param['params']
X
xiexionghang 已提交
88
        pass
T
tangwei 已提交
89

X
xiexionghang 已提交
90 91
    @classmethod
    def build_optimizer(self, params):
X
xiexionghang 已提交
92 93
        """R
        """
X
xiexionghang 已提交
94 95 96 97 98 99 100 101
        optimizer_conf = params['optimizer_conf']
        strategy = None
        if 'strategy' in optimizer_conf:
            strategy = optimizer_conf['strategy']
            stat_var_names = []
            metrics = params['metrics']
            for name in metrics:
                model_metrics = metrics[name]
X
xiexionghang 已提交
102
                stat_var_names += [model_metrics[metric]['var'].name for metric in model_metrics]
X
xiexionghang 已提交
103
            strategy['stat_var_names'] = list(set(stat_var_names))
X
xiexionghang 已提交
104
        optimizer_generator = 'optimizer = fluid.optimizer.' + optimizer_conf['class'] + \
T
tangwei 已提交
105 106
                              '(learning_rate=' + str(optimizer_conf['learning_rate']) + ')'
        exec(optimizer_generator)
X
xiexionghang 已提交
107 108 109 110
        optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
        return optimizer

    def dump_model_program(self, path):
X
xiexionghang 已提交
111 112
        """R
        """
X
xiexionghang 已提交
113 114 115 116 117 118 119
        with open(path + '/' + self._name + '_main_program.pbtxt', "w") as fout:
            print >> fout, self._build_param['model']['train_program']
        with open(path + '/' + self._name + '_startup_program.pbtxt', "w") as fout:
            print >> fout, self._build_param['model']['startup_program']
        pass

    def shrink(self, params):
X
xiexionghang 已提交
120 121
        """R
        """
X
xiexionghang 已提交
122 123 124 125 126 127 128
        scope = params['scope']
        decay = params['decay']
        for param_table in self._build_param['table']:
            table_id = self._build_param['table'][param_table]['_meta']._table_id
            fleet.shrink_dense_table(decay, scope=scope, table_id=table_id)

    def dump_inference_program(self, inference_layer, path):
X
xiexionghang 已提交
129 130
        """R
        """
X
xiexionghang 已提交
131 132 133
        pass

    def dump_inference_param(self, params):
X
xiexionghang 已提交
134 135
        """R
        """
X
xiexionghang 已提交
136 137 138
        scope = params['scope']
        executor = params['executor']
        program = self._build_param['model']['train_program']
X
xiexionghang 已提交
139
        for table_name, table in self._build_param['table'].items():
X
xiexionghang 已提交
140 141 142
            fleet._fleet_ptr.pull_dense(scope, table['_meta']._table_id, table['params'])
        for infernce_item in params['inference_list']:
            params_name_list = self.inference_params(infernce_item['layer_name'])
X
xiexionghang 已提交
143
            params_var_list = [program.global_block().var(i) for i in params_name_list]
X
xiexionghang 已提交
144 145 146
            params_file_name = infernce_item['save_file_name']
            with fluid.scope_guard(scope):
                if params['save_combine']:
X
xiexionghang 已提交
147
                    fluid.io.save_vars(executor, "./", \
T
tangwei 已提交
148
                                       program, vars=params_var_list, filename=params_file_name)
X
xiexionghang 已提交
149 150
                else:
                    fluid.io.save_vars(executor, params_file_name, program, vars=params_var_list)
T
tangwei 已提交
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187

    def inference_params(self, inference_layer):
        """
        get params name for inference_layer
        Args:
            inference_layer(str): layer for inference
        Return:
            params(list): params name list that for inference layer
        """
        layer = inference_layer
        if layer in self._inference_meta['params']:
            return self._inference_meta['params'][layer]

        self._inference_meta['params'][layer] = []
        self._inference_meta['dependency'][layer] = self.get_dependency(self._build_param['inner_layer'], layer)
        for node in self._build_nodes['layer']:
            if node['name'] not in self._inference_meta['dependency'][layer]:
                continue
            if 'inference_param' in self._build_param['layer_extend'][node['name']]:
                self._inference_meta['params'][layer] += \
                    self._build_param['layer_extend'][node['name']]['inference_param']['params']
        return self._inference_meta['params'][layer]

    def get_dependency(self, layer_graph, dest_layer):
        """
        get model of dest_layer depends on
        Args:
            layer_graph(dict) : all model in graph
        Return:
            depend_layers(list) : sub-graph model for calculate dest_layer
        """
        dependency_list = []
        if dest_layer in layer_graph:
            dependencys = copy.deepcopy(layer_graph[dest_layer]['input'])
            dependency_list = copy.deepcopy(dependencys)
            for dependency in dependencys:
                dependency_list = dependency_list + self.get_dependency(layer_graph, dependency)
T
tangwei 已提交
188
        return list(set(dependency_list))