提交 83070db8 编写于 作者: G guosheng

Add tf2paddle to convert models

上级 ede5a045
## 使用说明
`tf2paddle.py`提供了将TensorFlow训练的模型转换为PaddlePaddle可使用的模型的接口`TFModelConverter`,其封装了图像领域常用的Convolution、BatchNorm等layer的转换函数,可以完成VGG、ResNet等常用模型的转换。模型转换的基本过程是:基于TensorFlow的Python API获取variable,将各variable对应到PaddlePaddle中layer的参数,进行适配后序列化保存输出可以直接为PaddlePaddle的Python API加载使用的模型文件。
为使TensorFlow模型中的variable能够正确对应到PaddlePaddle模型中layer的参数,正确完成转换,模型转换具有如下约束:
- 支持TensorFlow中conv2d,batchnorm,fc这三种带有trainable variable的Operator中参数的转换。 - TensorFlow配置中同一Operator内的variable属于相同的scope,以此将variable划分到不同的layer。
- conv2d、batchnorm、fc的scope需分别包含conv、bn、fc,以此获取对应layer的type;亦可以通过为`TFModelConverter`传入`layer_type_map``dict`,将scope映射到对应的layer type来规避此项约束。
- conv2d、fc中variable的顺序为先weight后bias,batchnorm中variable的顺序为scale、shift、mean、var,以此将variable对应到layer中相应位置的参数。
- TensorFlow网络拓扑顺序需和PaddlePaddle网络拓扑顺序一致,尤其注意具有分支时左右分支的顺序。这是针对模型转换和PaddlePaddle网络配置均使用PaddlePaddle默认参数命名的情况,此时将根据拓扑顺序进行参数命名;若PaddlePaddle网络配置中自定义了param的name,可以通过为`TFModelConverter`传入`layer_name_map``param_name_map``dict`,在模型转换时将variable的name映射为PaddlePaddle配置中param的name。
此外,要求提供`build_model`接口以从此构建TensorFlow网络,加载模型并返回session。可参照如下示例:
```python
def build_model():
build_graph()
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
sess.run(tf.tables_initializer())
saver = tf.train.Saver()
saver.restore(sess, 'model/model.ckpt')
return sess
```
在完成以上内容后,`TFModelConverter`使用如下:
```python
# 定义相关变量
tf_net = "TF_ResNet50" # 提供build_model的module名
paddle_tar_name = "Paddle_ResNet50.tar.gz" # 输出的Paddle模型的文件名
# 初始化并加载模型
converter = TFModelConverter(tf_net=tf_net,
paddle_tar_name=paddle_tar_name)
# 进行模型转换
converter.convert()
```
`tf2paddle.py`中已提供以上步骤,修改其中相关变量的值后执行`python tf2paddle.py`即可完成模型转换。
此外,在使用转换得到的模型时需要注意:
- 由于TensorFlow中的padding机制较为特殊,在编写PaddlePaddle网络配置时对conv这种需要padding的layer可能需要推算size后在conv外使用pad_layer进行padding。 - 与TensorFlow多使用NHWC的data_format不同,PaddlePaddle使用NCHW的输入数据。
import os
import re
import collections
import struct
import gzip
import tarfile
import cStringIO
import numpy as np
from paddle.proto.ParameterConfig_pb2 import ParameterConfig
from paddle.trainer_config_helpers.default_decorators import wrap_name_default
import tensorflow as tf
class ModelConverter(object):
def __init__(self,
paddle_tar_name,
param_name_map=None,
layer_name_map=None,
layer_type_map=None):
self.tar_name = paddle_tar_name
self.param_name_map = param_name_map
self.layer_name_map = layer_name_map
self.layer_type_map = layer_type_map
self.params = dict()
def convert(self):
layers_params = self.arrange_layer_params()
for layer_name in layers_params.keys():
layer_params, layer_params_names, layer_type = layers_params[
layer_name]
if len(layer_params) > 0:
if not layer_type:
assert layer_type_map and (
layer_type_map.get(layer_name) in ["conv", "bn", "fc"])
layer_type = layer_type_map[layer_name]
self.pre_layer_name = getattr(
self, "convert_" + layer_type + "_layer")(
layer_params,
params_names=[
self.param_name_map.get(name)
if self.param_name_map else None
for name in layer_params_names
],
name=None if self.layer_name_map == None else
self.layer_name_map.get(layer_name))
with gzip.open(self.tar_name, 'w') as f:
self.to_tar(f)
return
def to_tar(self, f):
tar = tarfile.TarFile(fileobj=f, mode='w')
for param_name in self.params.keys():
param_conf, param_data = self.params[param_name]
confStr = param_conf.SerializeToString()
tarinfo = tarfile.TarInfo(name="%s.protobuf" % param_name)
tarinfo.size = len(confStr)
buf = cStringIO.StringIO(confStr)
buf.seek(0)
tar.addfile(tarinfo, fileobj=buf)
buf = cStringIO.StringIO()
self.serialize(param_data, buf)
tarinfo = tarfile.TarInfo(name=param_name)
buf.seek(0)
tarinfo.size = len(buf.getvalue())
tar.addfile(tarinfo, buf)
@staticmethod
def serialize(data, f):
f.write(struct.pack("IIQ", 0, 4, data.size))
f.write(data.tobytes())
class TFModelConverter(ModelConverter):
def __init__(self,
tf_net,
paddle_tar_name,
param_name_map=None,
layer_name_map=None,
layer_type_map=None):
super(TFModelConverter, self).__init__(paddle_tar_name, param_name_map,
layer_name_map, layer_type_map)
self.sess = __import__(tf_net).build_model()
def arrange_layer_params(self):
all_vars = tf.global_variables()
layers_params = collections.OrderedDict()
for var in all_vars:
var_name = var.name
scope_pos = var_name.rfind('/')
if scope_pos != -1:
layer_scope = var_name[:scope_pos]
if layers_params.has_key(layer_scope):
layer_params, layer_params_names, layer_type = layers_params[
layer_scope]
layer_params.append(var.eval(self.sess))
layer_params_names.append(var_name)
else:
layer_type = re.search('conv|bn|fc', layer_scope)
layers_params[layer_scope] = ([var.eval(self.sess)],
[var_name], layer_type.group()
if layer_type else None)
return layers_params
@wrap_name_default("conv")
def convert_conv_layer(self, params, params_names=None, name=None):
for i in range(len(params)):
data = np.transpose(params[i], (3, 2, 0, 1))
if len(params) == 2:
suffix = "0" if i == 0 else "bias"
file_name = "_%s.w%s" % (name, suffix) if not (
params_names and params_names[i]) else params_names[i]
else:
file_name = "_%s.w%s" % (name, str(i)) if not (
params_names and params_names[i]) else params_names[i]
param_conf = ParameterConfig()
param_conf.name = file_name
dims = list(data.shape)
if len(dims) == 1:
dims.insert(1, 1)
param_conf.dims.extend(dims)
param_conf.size = reduce(lambda a, b: a * b, data.shape)
self.params[file_name] = (param_conf, data.flatten())
@wrap_name_default("fc_layer")
def convert_fc_layer(self, params, params_names=None, name=None):
for i in range(len(params)):
data = params[i]
if len(params) == 2:
suffix = "0" if i == 0 else "bias"
file_name = "_%s.w%s" % (name, suffix) if not (
params_names and params_names[i]) else params_names[i]
else:
file_name = "_%s.w%s" % (name, str(i)) if not (
params_names and params_names[i]) else params_names[i]
param_conf = ParameterConfig()
param_conf.name = file_name
dims = list(data.shape)
if len(dims) < 2:
dims.insert(0, 1)
param_conf.size = reduce(lambda a, b: a * b, dims)
param_conf.dims.extend(dims)
self.params[file_name] = (param_conf, data.flatten())
return name
@wrap_name_default("batch_norm")
def convert_bn_layer(self, params, params_names=None, name=None):
params = [params[i] for i in (0, 2, 3, 1)]
params_names = [params_names[i]
for i in (0, 2, 3, 1)] if params_names else params_names
for i in range(len(params)):
data = params[i]
file_name = "_%s.w%s" % (name, str(i)) if i < 3 else "_%s.w%s" % (
name, "bias")
file_name = file_name if not (params_names and
params_names[i]) else params_names[i]
param_conf = ParameterConfig()
param_conf.name = file_name
dims = list(data.shape)
assert len(dims) == 1
dims.insert(0, 1)
param_conf.size = reduce(lambda a, b: a * b, dims)
param_conf.dims.extend(dims)
self.params[file_name] = (param_conf, data.flatten())
return name
if __name__ == "__main__":
tf_net = "TF_ResNet"
paddle_tar_name = "Paddle_ResNet50.tar.gz"
converter = TFModelConverter(tf_net=tf_net, paddle_tar_name=paddle_tar_name)
converter.convert()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册