# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import torch import paddle from paddle import fluid import argparse parser = argparse.ArgumentParser() parser.add_argument( "--pytorch-model", dest='pytorch_model', type=str, help="The source pytorch mode.") parser.add_argument( "--paddle-model", dest='paddle_model', type=str, help="The directory to save paddle model, now saves model as a folder.") parser.add_argument( "--name-map", dest="name_map", type=str, help="name mapping for the source model and the target model.") def read_name_map(fname): """ There should be a 3-column file. The first comuln is the name of parameter in pytorch model's state dict; The second column is the name of parameter in paddle model's state dict; The third column is the shape of the repective parameter in paddle model. """ name_map = {} with open(fname, 'rt') as f: for line in f: src_key, tgt_key, tgt_shape = line.strip().split('\t') tgt_shape = eval(tgt_shape) name_map[src_key] = (tgt_key, tgt_shape) return name_map def torch2paddle(state_dict, name_map, dirname): """ state_dict: pytorch model's state dict. name_map: a text file for name mapping from pytorch model to paddle model. dirname: path of the paddle model to save. """ program = fluid.Program() global_block = program.global_block() for k in state_dict.keys(): global_block.create_parameter( name=name_map[k][0], shape=[1], dtype='float32', initializer=fluid.initializer.Constant(value=0.0)) place = fluid.core.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exe.run(program) # NOTE: transpose the pytorch model's parameter if neccessary # we do not transpose here because we used conv instead of FC layer to replace Linear in pytorch, # which does not need us to transpose the paramerters. # but when you use a FC layer corresponding a torch Linear module, be sure to transpose the weight. # Other transformations are not concerned, but users should check the data shape to ensure that # the transformations are what's expected. for k, v in state_dict.items(): fluid.global_scope().find_var(name_map[k][0]).get_tensor().set( v.cpu().numpy().reshape(name_map[k][1]), place) fluid.io.save_params(exe, dirname, main_program=program) if __name__ == "__main__": args, _ = parser.parse_known_args() result = torch.load(args.pytorch_model) state_dict = result["state_dict"] name_map = read_name_map(args.name_map) torch2paddle(state_dict, name_map, args.paddle_model)