提交 1ebde3bd 编写于 作者: W wuzewu

optimizer the method of serialize param attribute

上级 bfedfda8
...@@ -30,7 +30,8 @@ from paddle_hub.downloader import download_and_uncompress ...@@ -30,7 +30,8 @@ from paddle_hub.downloader import download_and_uncompress
from paddle_hub import module_desc_pb2 from paddle_hub import module_desc_pb2
from paddle_hub.logger import logger from paddle_hub.logger import logger
from paddle_hub.signature import Signature from paddle_hub.signature import Signature
from paddle_hub.utils import to_list, get_variable_info, mkdir from paddle_hub.utils import to_list, mkdir
from paddle_hub.paddle_helper import from_param_to_flexible_data, get_variable_info, from_flexible_data_to_param
from paddle_hub.version import __version__ from paddle_hub.version import __version__
__all__ = ["Module", "ModuleConfig", "ModuleUtils"] __all__ = ["Module", "ModuleConfig", "ModuleUtils"]
...@@ -73,48 +74,10 @@ class Module(object): ...@@ -73,48 +74,10 @@ class Module(object):
def _process_parameter(self): def _process_parameter(self):
global_block = self.inference_program.global_block() global_block = self.inference_program.global_block()
param_attrs = self.config.desc.param_attrs param_attrs = self.config.desc.extra_info.map.data['param_attrs']
for key, param_attr in param_attrs.items(): for key, param_attr in param_attrs.map.data.items():
param = {} param = from_flexible_data_to_param(param_attr)
param['name'] = HUB_VAR_PREFIX + key param['name'] = HUB_VAR_PREFIX + key
param['trainable'] = param_attr.trainable
param['do_model_average'] = param_attr.do_model_average
param['optimize_attr'] = {}
param['optimize_attr'][
'learning_rate'] = param_attr.optimize_attr.m['learning_rate'].f
# TODO(wuzewu): recover the param attr with a more reliable way
if param_attr.regularizer.type == "L2DecayRegularizer":
regularizer = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=param_attr.regularizer.
regularization_coeff)
elif param_attr.regularizer.type == "L1DecayRegularizer":
regularizer = fluid.regularizer.L1DecayRegularizer(
regularization_coeff=param_attr.regularizer.
regularization_coeff)
else:
regularizer = None
param['regularizer'] = regularizer
if param_attr.gradient_clip_attr.type == "ErrorClipByValue":
clip = fluid.clip.ErrorClipByValue(
max=param_attr.gradient_clip_attr.max,
min=param_attr.gradient_clip_attr.min)
elif param_attr.gradient_clip_attr.type == "GradientClipByValue":
clip = fluid.clip.GradientClipByValue(
max=param_attr.gradient_clip_attr.max,
min=param_attr.gradient_clip_attr.min)
elif param_attr.gradient_clip_attr.type == "GradientClipByNorm":
clip = fluid.clip.GradientClipByNorm(
clip_norm=param_attr.gradient_clip_attr.clip_norm)
elif param_attr.gradient_clip_attr.type == "GradientClipByGlobalNorm":
clip = fluid.clip.GradientClipByGlobalNorm(
clip_norm=param_attr.gradient_clip_attr.clip_norm,
group_name=param_attr.gradient_clip_attr.group_name)
else:
clip = None
param['gradient_clip_attr'] = clip
if (param['name'] not in global_block.vars): if (param['name'] not in global_block.vars):
continue continue
var = global_block.var(param['name']) var = global_block.var(param['name'])
...@@ -341,46 +304,13 @@ def create_module(sign_arr, module_dir=None, word_dict=None, place=None): ...@@ -341,46 +304,13 @@ def create_module(sign_arr, module_dir=None, word_dict=None, place=None):
fo.write("{}\t{}\n".format(w, w_id)) fo.write("{}\t{}\n".format(w, w_id))
# save fluid Parameter # save fluid Parameter
param_attrs = module_desc.param_attrs extra_info = module_desc.extra_info
extra_info.type = module_desc_pb2.MAP
param_attrs = extra_info.map.data['param_attrs']
param_attrs.type = module_desc_pb2.MAP
for param in program.global_block().iter_parameters(): for param in program.global_block().iter_parameters():
param_attr = param_attrs[param.name] param_attr = param_attrs.map.data[param.name]
param_attr.trainable = param.trainable from_param_to_flexible_data(param, param_attr)
if param.do_model_average:
param_attr.do_model_average = param.do_model_average
# TODO(wuzewu): add a func to transfer python dict to fexiable data
param_attr.optimize_attr.type = module_desc_pb2.MAP
param_attr.optimize_attr.m['learning_rate'].type = module_desc_pb2.FLOAT
param_attr.optimize_attr.m['learning_rate'].f = param.optimize_attr[
'learning_rate']
if param.regularizer:
if isinstance(param.regularizer,
fluid.regularizer.L2DecayRegularizer):
param_attr.regularizer.type = "L2DecayRegularizer"
if isinstance(param.regularizer,
fluid.regularizer.L1DecayRegularizer):
param_attr.regularizer.type = "L1DecayRegularizer"
param_attr.regularizer.regularization_coeff = param.regularizer.regularization_coeff
if param.gradient_clip_attr:
if isinstance(param.gradient_clip_attr,
fluid.clip.ErrorClipByValue):
param_attr.gradient_clip_attr.max = param.gradient_clip_attr.max
param_attr.gradient_clip_attr.min = param.gradient_clip_attr.min
param_attr.gradient_clip_attr.type = "ErrorClipByValue"
if isinstance(param.gradient_clip_attr,
fluid.clip.GradientClipByValue):
param_attr.gradient_clip_attr.max = param.gradient_clip_attr.max
param_attr.gradient_clip_attr.min = param.gradient_clip_attr.min
param_attr.gradient_clip_attr.type = "GradientClipByValue"
if isinstance(param.gradient_clip_attr,
fluid.clip.GradientClipByNorm):
param_attr.gradient_clip_attr.clip_norm = param.gradient_clip_attr.clip_norm
param_attr.gradient_clip_attr.type = "GradientClipByNorm"
if isinstance(param.gradient_clip_attr,
fluid.clip.GradientClipByGlobalNorm):
param_attr.gradient_clip_attr.clip_norm = param.gradient_clip_attr.clip_norm
param_attr.gradient_clip_attr.group_name = param.gradient_clip_attr.group_name
param_attr.gradient_clip_attr.type = "GradientClipByGlobalNorm"
# save signarture info # save signarture info
sign_map = module_desc.sign2var sign_map = module_desc.sign2var
......
...@@ -19,23 +19,34 @@ option optimize_for = LITE_RUNTIME; ...@@ -19,23 +19,34 @@ option optimize_for = LITE_RUNTIME;
package paddle_hub; package paddle_hub;
enum DataType { enum DataType {
INT = 0; NONE = 0;
FLOAT = 1; INT = 1;
STRING = 2; FLOAT = 2;
BOOLEAN = 3; STRING = 3;
LIST = 4; BOOLEAN = 4;
MAP = 5; LIST = 5;
MAP = 6;
SET = 7;
OBJECT = 8;
}
message KVData {
map<string, DataType> keyType = 1;
map<string, FlexibleData> data = 2;
} }
message FlexibleData { message FlexibleData {
DataType type = 1; DataType type = 1;
string name = 2; string name = 2;
int32 i = 3; int64 i = 3;
float f = 4; float f = 4;
bool b = 5; bool b = 5;
string s = 6; string s = 6;
map<string, FlexibleData> m = 7; KVData map = 7;
map<int32, FlexibleData> l = 8; KVData list = 8;
KVData set = 9;
KVData object = 10;
string info = 11;
} }
// Feed Variable Description // Feed Variable Description
...@@ -61,27 +72,6 @@ message AuthInfo { ...@@ -61,27 +72,6 @@ message AuthInfo {
string hub_version = 2; string hub_version = 2;
} }
message ParamAttr {
message Regularizer {
string type = 1;
float regularization_coeff = 2;
}
message GradientClipAttr {
string type = 1;
float min = 2;
float max = 3;
float clip_norm = 4;
string group_name = 5;
}
Regularizer regularizer = 1;
GradientClipAttr gradient_clip_attr = 2;
FlexibleData optimize_attr = 3;
bool trainable = 4;
bool do_model_average = 5;
}
// A Hub Module is stored in a directory with a file 'paddlehub.pb' // A Hub Module is stored in a directory with a file 'paddlehub.pb'
// containing a serialized protocol message of this type. The further contents // containing a serialized protocol message of this type. The further contents
// of the directory depend on the storage format described by the message. // of the directory depend on the storage format described by the message.
...@@ -98,6 +88,6 @@ message ModuleDesc { ...@@ -98,6 +88,6 @@ message ModuleDesc {
AuthInfo auth_info = 5; AuthInfo auth_info = 5;
map<string, ParamAttr> param_attrs = 6; FlexibleData extra_info = 6;
}; };
此差异已折叠。
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle_hub import module_desc_pb2
from paddle_hub.utils import from_pyobj_to_flexible_data, from_flexible_data_to_pyobj
import paddle
import paddle.fluid as fluid
def get_variable_info(var):
assert isinstance(
var,
fluid.framework.Variable), "var should be a fluid.framework.Variable"
var_info = {
'type': var.type,
'name': var.name,
'dtype': var.dtype,
'lod_level': var.lod_level,
'shape': var.shape,
'stop_gradient': var.stop_gradient,
'is_data': var.is_data,
'error_clip': var.error_clip
}
if isinstance(var, fluid.framework.Parameter):
var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average
else:
var_info['persistable'] = var.persistable
return var_info
def from_param_to_flexible_data(param, flexible_data):
flexible_data.type = module_desc_pb2.MAP
from_pyobj_to_flexible_data(param.trainable,
flexible_data.map.data['trainable'])
from_pyobj_to_flexible_data(param.do_model_average,
flexible_data.map.data['do_model_average'])
from_pyobj_to_flexible_data(param.optimize_attr,
flexible_data.map.data['optimize_attr'])
from_pyobj_to_flexible_data(param.regularizer,
flexible_data.map.data['regularizer'])
from_pyobj_to_flexible_data(param.gradient_clip_attr,
flexible_data.map.data['gradient_clip_attr'])
def from_flexible_data_to_param(flexible_data):
param = {'gradient_clip_attr': None, 'regularizer': None}
param['trainable'] = from_flexible_data_to_pyobj(
flexible_data.map.data['trainable'])
param['do_model_average'] = from_flexible_data_to_pyobj(
flexible_data.map.data['do_model_average'])
param['optimize_attr'] = from_flexible_data_to_pyobj(
flexible_data.map.data['optimize_attr'])
if flexible_data.map.data['regularizer'].type != module_desc_pb2.NONE:
regularizer_type = flexible_data.map.data['regularizer'].name
regularization_coeff = flexible_data.map.data[
'regularizer'].object.data['_regularization_coeff '].f
param['regularizer'] = eval(
"fluid.regularizer.%s(regularization_coeff = %f)" %
(regularizer_type, regularization_coeff))
if flexible_data.map.data['regularizer'].type != module_desc_pb2.NONE:
clip_type = flexible_data.map.data['gradient_clip_attr'].name
if clip_type == "ErrorClipByValue" or clip_type == "GradientClipByValue":
max = flexible_data.map.data[
'regularizer'].name, flexible_data.map.data[
'gradient_clip_attr'].object.data['max'].f
min = flexible_data.map.data[
'regularizer'].name, flexible_data.map.data[
'gradient_clip_attr'].object.data['min'].f
param['gradient_clip_attr'] = eval(
"fluid.clip.%s(max = %f, min = %f)" % (clip_type, max, min))
if clip_type == "GradientClipByNorm":
clip_norm = flexible_data.map.data[
'gradient_clip_attr'].object.data['clip_norm'].f
param['gradient_clip_attr'] = eval(
"fluid.clip.%s(clip_norm = %f)" % (clip_type, clip_norm))
if clip_type == "GradientClipByGlobalNorm":
clip_norm = flexible_data.map.data[
'gradient_clip_attr'].object.data['clip_norm'].f
group_name = flexible_data.map.data[
'gradient_clip_attr'].object.data['group_name'].f
param['gradient_clip_attr'] = eval(
"fluid.clip.%s(clip_norm = %f, group_name = %f)" %
(clip_type, clip_norm, group_name))
return param
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from paddle_hub import module_desc_pb2
from paddle_hub.logger import logger
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import os import os
...@@ -30,34 +32,95 @@ def to_list(input): ...@@ -30,34 +32,95 @@ def to_list(input):
return input return input
def get_variable_info(var):
assert isinstance(
var,
fluid.framework.Variable), "var should be a fluid.framework.Variable"
var_info = {
'type': var.type,
'name': var.name,
'dtype': var.dtype,
'lod_level': var.lod_level,
'shape': var.shape,
'stop_gradient': var.stop_gradient,
'is_data': var.is_data,
'error_clip': var.error_clip
}
if isinstance(var, fluid.framework.Parameter):
var_info['trainable'] = var.trainable
var_info['optimize_attr'] = var.optimize_attr
var_info['regularizer'] = var.regularizer
var_info['gradient_clip_attr'] = var.gradient_clip_attr
var_info['do_model_average'] = var.do_model_average
else:
var_info['persistable'] = var.persistable
return var_info
def mkdir(path): def mkdir(path):
""" the same as the shell command mkdir -p " """ the same as the shell command mkdir -p "
""" """
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
def get_keyed_type_of_pyobj(pyobj):
if isinstance(pyobj, bool):
return module_desc_pb2.BOOLEAN
elif isinstance(pyobj, int):
return module_desc_pb2.INT
elif isinstance(pyobj, str):
return module_desc_pb2.STRING
elif isinstance(pyobj, float):
return module_desc_pb2.FLOAT
return module_desc_pb2.STRING
def from_pyobj_to_flexible_data(pyobj, flexible_data):
if isinstance(pyobj, bool):
flexible_data.type = module_desc_pb2.BOOLEAN
flexible_data.b = pyobj
elif isinstance(pyobj, int):
flexible_data.type = module_desc_pb2.INT
flexible_data.i = pyobj
elif isinstance(pyobj, str):
flexible_data.type = module_desc_pb2.STRING
flexible_data.s = pyobj
elif isinstance(pyobj, float):
flexible_data.type = module_desc_pb2.FLOAT
flexible_data.f = pyobj
elif isinstance(pyobj, list) or isinstance(pyobj, tuple):
flexible_data.type = module_desc_pb2.LIST
for index, obj in enumerate(pyobj):
from_pyobj_to_flexible_data(obj,
flexible_data.list.data[str(index)])
elif isinstance(pyobj, set):
flexible_data.type = module_desc_pb2.SET
for index, obj in enumerate(list(pyobj)):
from_pyobj_to_flexible_data(obj, flexible_data.set.data[str(index)])
elif isinstance(pyobj, dict):
flexible_data.type = module_desc_pb2.MAP
for key, value in pyobj.items():
from_pyobj_to_flexible_data(value, flexible_data.map.data[str(key)])
flexible_data.map.keyType[str(key)] = get_keyed_type_of_pyobj(key)
elif isinstance(pyobj, type(None)):
flexible_data.type = module_desc_pb2.NONE
else:
flexible_data.type = module_desc_pb2.OBJECT
flexible_data.name = str(pyobj.__class__.__name__)
for key, value in pyobj.__dict__.items():
from_pyobj_to_flexible_data(value,
flexible_data.object.data[str(key)])
flexible_data.object.keyType[str(key)] = get_keyed_type_of_pyobj(
key)
def from_flexible_data_to_pyobj(flexible_data):
if flexible_data.type == module_desc_pb2.BOOLEAN:
result = flexible_data.b
elif flexible_data.type == module_desc_pb2.INT:
result = flexible_data.i
elif flexible_data.type == module_desc_pb2.STRING:
result = flexible_data.s
elif flexible_data.type == module_desc_pb2.FLOAT:
result = flexible_data.f
elif flexible_data.type == module_desc_pb2.LIST:
result = []
for index in range(len(flexible_data.list.data)):
result.append(
from_flexible_data_to_pyobj(flexible_data.m.data(str(index))))
elif flexible_data.type == module_desc_pb2.SET:
result = set()
for index in range(len(flexible_data.set.data)):
result.add(
from_flexible_data_to_pyobj(flexible_data.m.data(str(index))))
elif flexible_data.type == module_desc_pb2.MAP:
result = {}
for key, value in flexible_data.map.data.items():
key = flexible_data.map.keyType[key]
result[key] = from_flexible_data_to_pyobj(value)
elif flexible_data.type == module_desc_pb2.NONE:
result = None
elif flexible_data.type == module_desc_pb2.OBJECT:
result = None
logger.warning("can't tran flexible_data to python object")
else:
result = None
logger.warning("unknown type of flexible_data")
return result
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册