diff --git a/image_classification/tf2paddle/README.md b/image_classification/tf2paddle/README.md new file mode 100644 index 0000000000000000000000000000000000000000..821e039ea3938ea4273003afa21088a21dfd012e --- /dev/null +++ b/image_classification/tf2paddle/README.md @@ -0,0 +1,54 @@ +## 使用说明 + +`tf2paddle.py`脚本中的工具类`TFModelConverter`实现了将TensorFlow训练好的模型文件转换为PaddlePaddle可加载的模型文件。目前能够支持图像领域常用的:卷积(`Convolution`)层、`Batch Normalization`层和全连接(`Full Connection`)层。图像领域常用的 `ResNet` `VGG` 网络都以这些层此为基础,使用TensorFlow训练的`ResNet`和`VGG`模型能够被转换为PaddlePaddle可加载的模型,进一步用于预训练或是预测服务的开发等。 + +模型转换的基本流程是: +1. 将TensorFlow模型等价地使用PaddlePaddle Python API接口进行改写。 +1. 在TensorFlow中可学习参数用 `Variable` 表示,基于TensorFlow的Python API获取网络中的 Variable。 +1. 确定TensorFlow模型中`Variable`与PaddlePaddle中`paddle.layer`的可学习参数的对应关系。 +1. 对TensorFlow中的`Variable`进行一定的适配(详见下文),转化为PaddlePaddle中的参数存储格式并进行序列化保存。 + +### 需要遵守的约定 + +为使TensorFlow模型中的`Variable`能够正确对应到`paddle.layer`中的可学习参数,目前版本在使用时有如下约束需要遵守: + +1. 目前仅支持将TensorFlow中 `conv2d`,`batchnorm`,`fc`这三种带有可学习`Variable`的Operator训练出的参数向PaddlePaddle模型参数转换。 +1. TensorFlow网络配置中同一Operator内的`Variable`属于相同的scope,以此为依据将`Variable`划分到不同的`paddle.layer`。 +1. `conv2d`、`batchnorm`、`fc`的scope需分别包含`conv`、`bn`、`fc`,以此获取对应`paddle.layer`的类型。也可以通过为`TFModelConverter`传入`layer_type_map`的`dict`,将scope映射到对应的`paddle.layer`的type来规避此项约束。 +1. `conv2d`、`fc`中`Variable`的顺序为:先可学习`Weight`后`Bias`;`batchnorm`中`Variable`的顺序为:`scale`、`shift`、`mean`、`var`,请注意参数存储的顺序将`Variable`对应到`paddle.layer.batch_norm`相应位置的参数。 +1. TensorFlow网络拓扑顺序需和PaddlePaddle网络拓扑顺序一致,尤其注意网络包含分支结构时分支定义的先后顺序,如ResNet的bottleneck模块中两分支定义的先后顺序。这是针对模型转换和PaddlePaddle网络配置均使用PaddlePaddle默认参数命名的情况,此时将根据拓扑顺序进行参数命名。 +1. 若PaddlePaddle网络配置中需要通过调用`param_attr=paddle.attr.Param(name="XX"))`显示地设置可学习参数名字,这时可通过为`TFModelConverter`传入`layer_name_map`或`param_name_map`字典(类型为Python `dict`),在模型转换时将`Variable`的名字映射为所对应的`paddle.layer.XX`中可学习参数的名字。 +1. 要求提供`build_model`接口以从此构建TensorFlow网络,加载模型并返回session。可参照如下示例进行编写: + + ```python + def build_model(): + build_graph() + sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) + sess.run(tf.tables_initializer()) + saver = tf.train.Saver() + saver.restore(sess, 'model/model.ckpt') + return sess + ``` + +### 使用说明 + +按照以上原则操作后,`tf2paddle.py` 脚本的`main`函数提供了一个调用示例,将TensorFlow训练的`ResNet50`模型转换为PaddlePaddle可加载模型。若要对其它各种自定义的模型进行转换,只需修改相关变量的值,在终端执行`python tf2paddle.py`即可。 + +下面是一个简单的调用示例: + +```python +# 定义相关变量 +tf_net = "TF_ResNet50" # 提供build_model的module名 +paddle_tar_name = "Paddle_ResNet50.tar.gz" # 输出的Paddle模型的文件名 + +# 初始化并加载模型 +converter = TFModelConverter(tf_net=tf_net, + paddle_tar_name=paddle_tar_name) +# 进行模型转换 +converter.convert() +``` + +### 注意事项 + +1. 由于TensorFlow中的padding机制较为特殊,在编写PaddlePaddle网络配置时,对`paddle.layer.conv`这种需要padding的层可能需要推算size后在`paddle.layer.conv`外使用`paddle.layer.pad`进行padding。 +1. 与TensorFlow图像输入多使用NHWC的数据组织格式有所不同,PaddlePaddle按照NCHW的格式组织图像输入数据。 diff --git a/image_classification/tf2paddle/tf2paddle.py b/image_classification/tf2paddle/tf2paddle.py new file mode 100644 index 0000000000000000000000000000000000000000..20b6cade17bf0c62ad86ebdf9da981a54bb63d32 --- /dev/null +++ b/image_classification/tf2paddle/tf2paddle.py @@ -0,0 +1,177 @@ +import os +import re +import collections +import struct +import gzip +import tarfile +import cStringIO +import numpy as np + +import tensorflow as tf + +from paddle.proto.ParameterConfig_pb2 import ParameterConfig +from paddle.trainer_config_helpers.default_decorators import wrap_name_default + + +class ModelConverter(object): + def __init__(self, + paddle_tar_name, + param_name_map=None, + layer_name_map=None, + layer_type_map=None): + self.tar_name = paddle_tar_name + self.param_name_map = param_name_map + self.layer_name_map = layer_name_map + self.layer_type_map = layer_type_map + self.params = dict() + + def convert(self): + layers_params = self.arrange_layer_params() + for layer_name in layers_params.keys(): + layer_params, layer_params_names, layer_type = layers_params[ + layer_name] + if len(layer_params) > 0: + if not layer_type: + assert layer_type_map and ( + layer_type_map.get(layer_name) in ["conv", "bn", "fc"]) + layer_type = layer_type_map[layer_name] + self.pre_layer_name = getattr( + self, "convert_" + layer_type + "_layer")( + layer_params, + params_names=[ + self.param_name_map.get(name) + if self.param_name_map else None + for name in layer_params_names + ], + name=None if self.layer_name_map == None else + self.layer_name_map.get(layer_name)) + with gzip.open(self.tar_name, 'w') as f: + self.to_tar(f) + return + + def to_tar(self, f): + tar = tarfile.TarFile(fileobj=f, mode='w') + for param_name in self.params.keys(): + param_conf, param_data = self.params[param_name] + + confStr = param_conf.SerializeToString() + tarinfo = tarfile.TarInfo(name="%s.protobuf" % param_name) + tarinfo.size = len(confStr) + buf = cStringIO.StringIO(confStr) + buf.seek(0) + tar.addfile(tarinfo, fileobj=buf) + + buf = cStringIO.StringIO() + self.serialize(param_data, buf) + tarinfo = tarfile.TarInfo(name=param_name) + buf.seek(0) + tarinfo.size = len(buf.getvalue()) + tar.addfile(tarinfo, buf) + + @staticmethod + def serialize(data, f): + f.write(struct.pack("IIQ", 0, 4, data.size)) + f.write(data.tobytes()) + + +class TFModelConverter(ModelConverter): + def __init__(self, + tf_net, + paddle_tar_name, + param_name_map=None, + layer_name_map=None, + layer_type_map=None): + super(TFModelConverter, self).__init__(paddle_tar_name, param_name_map, + layer_name_map, layer_type_map) + self.sess = __import__(tf_net).build_model() + + def arrange_layer_params(self): + all_vars = tf.global_variables() + layers_params = collections.OrderedDict() + for var in all_vars: + var_name = var.name + scope_pos = var_name.rfind('/') + if scope_pos != -1: + layer_scope = var_name[:scope_pos] + if layers_params.has_key(layer_scope): + layer_params, layer_params_names, layer_type = layers_params[ + layer_scope] + layer_params.append(var.eval(self.sess)) + layer_params_names.append(var_name) + else: + layer_type = re.search('conv|bn|fc', layer_scope) + layers_params[layer_scope] = ([var.eval(self.sess)], + [var_name], layer_type.group() + if layer_type else None) + return layers_params + + @wrap_name_default("conv") + def convert_conv_layer(self, params, params_names=None, name=None): + for i in range(len(params)): + data = np.transpose(params[i], ( + 3, 2, 0, 1)) if len(params[i].shape) == 4 else params[i] + if len(params) == 2: + suffix = "0" if i == 0 else "bias" + file_name = "_%s.w%s" % (name, suffix) if not ( + params_names and params_names[i]) else params_names[i] + else: + file_name = "_%s.w%s" % (name, str(i)) if not ( + params_names and params_names[i]) else params_names[i] + param_conf = ParameterConfig() + param_conf.name = file_name + dims = list(data.shape) + if len(dims) == 1: + dims.insert(1, 1) + param_conf.dims.extend(dims) + param_conf.size = reduce(lambda a, b: a * b, data.shape) + self.params[file_name] = (param_conf, data.flatten()) + + @wrap_name_default("fc_layer") + def convert_fc_layer(self, params, params_names=None, name=None): + for i in range(len(params)): + data = params[i] + if len(params) == 2: + suffix = "0" if i == 0 else "bias" + file_name = "_%s.w%s" % (name, suffix) if not ( + params_names and params_names[i]) else params_names[i] + else: + file_name = "_%s.w%s" % (name, str(i)) if not ( + params_names and params_names[i]) else params_names[i] + param_conf = ParameterConfig() + param_conf.name = file_name + dims = list(data.shape) + if len(dims) < 2: + dims.insert(0, 1) + param_conf.size = reduce(lambda a, b: a * b, dims) + param_conf.dims.extend(dims) + self.params[file_name] = (param_conf, data.flatten()) + return name + + @wrap_name_default("batch_norm") + def convert_bn_layer(self, params, params_names=None, name=None): + params = [params[i] for i in (0, 2, 3, 1)] + params_names = [params_names[i] + for i in (0, 2, 3, 1)] if params_names else params_names + for i in range(len(params)): + data = params[i] + file_name = "_%s.w%s" % (name, str(i)) if i < 3 else "_%s.w%s" % ( + name, "bias") + file_name = file_name if not (params_names and + params_names[i]) else params_names[i] + param_conf = ParameterConfig() + param_conf.name = file_name + dims = list(data.shape) + assert len(dims) == 1 + dims.insert(0, 1) + param_conf.size = reduce(lambda a, b: a * b, dims) + param_conf.dims.extend(dims) + self.params[file_name] = (param_conf, data.flatten()) + return name + + +if __name__ == "__main__": + tf_net = "TF_ResNet" + paddle_tar_name = "Paddle_ResNet50.tar.gz" + + converter = TFModelConverter(tf_net=tf_net, paddle_tar_name=paddle_tar_name) + converter.convert()