提交 3be4c183 编写于 作者: B BigFishMaster

Merge branch 'develop' of https://github.com/PaddlePaddle/models into model_update

......@@ -218,8 +218,6 @@ class AsyncDataReader(object):
self._sample_proc_num = self._proc_num - 2
self._verbose = verbose
self._force_exit = ForceExitWrapper(self._manager.Value('b', False))
self._pool_manager = SharedMemoryPoolManager(self._batch_buffer_size *
3, self._manager)
def generate_bucket_list(self, is_shuffle):
if self._block_info_list is None:
......@@ -424,6 +422,9 @@ class AsyncDataReader(object):
sample_queue = self._start_async_processing()
batch_queue = self._manager.Queue(self._batch_buffer_size)
self._pool_manager = SharedMemoryPoolManager(self._batch_buffer_size *
3, self._manager)
assembling_proc = DaemonProcessGroup(
proc_num=1,
target=batch_assembling_task,
......@@ -439,3 +440,6 @@ class AsyncDataReader(object):
if isinstance(batch_data, EpochEndSignal):
break
yield batch_data
# clean the shared memory
del self._pool_manager
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import sys, time
from six import reraise
from tblib import Traceback
from multiprocessing import Manager, Process
......@@ -161,9 +161,10 @@ class SharedMemoryPoolManager(object):
def __init__(self, pool_size, manager, name_prefix='/deep_asr'):
self._names = []
self._dict = manager.dict()
self._time_prefix = time.strftime('%Y%m%d%H%M%S')
for i in xrange(pool_size):
name = name_prefix + '_' + str(i)
name = name_prefix + '_' + self._time_prefix + '_' + str(i)
self._dict[name] = SharedNDArray(name)
self._names.append(name)
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "decoder.h"
std::string decode(std::vector<std::vector<float>> probs_mat) {
// Add decoding logic here
return "example decoding result";
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "post_decode_faster.h"
typedef kaldi::int32 int32;
using fst::SymbolTable;
using fst::VectorFst;
using fst::StdArc;
Decoder::Decoder(std::string word_syms_filename,
std::string fst_in_filename,
std::string logprior_rxfilename) {
const char* usage =
"Decode, reading log-likelihoods (of transition-ids or whatever symbol "
"is on the graph) as matrices.";
kaldi::ParseOptions po(usage);
binary = true;
acoustic_scale = 1.5;
allow_partial = true;
kaldi::FasterDecoderOptions decoder_opts;
decoder_opts.Register(&po, true); // true == include obscure settings.
po.Register("binary", &binary, "Write output in binary mode");
po.Register("allow-partial",
&allow_partial,
"Produce output even when final state was not reached");
po.Register("acoustic-scale",
&acoustic_scale,
"Scaling factor for acoustic likelihoods");
word_syms = NULL;
if (word_syms_filename != "") {
word_syms = fst::SymbolTable::ReadText(word_syms_filename);
if (!word_syms)
KALDI_ERR << "Could not read symbol table from file "
<< word_syms_filename;
}
std::ifstream is_logprior(logprior_rxfilename);
logprior.Read(is_logprior, false);
// It's important that we initialize decode_fst after loglikes_reader, as it
// can prevent crashes on systems installed without enough virtual memory.
// It has to do with what happens on UNIX systems if you call fork() on a
// large process: the page-table entries are duplicated, which requires a
// lot of virtual memory.
decode_fst = fst::ReadFstKaldi(fst_in_filename);
decoder = new kaldi::FasterDecoder(*decode_fst, decoder_opts);
}
Decoder::~Decoder() {
if (!word_syms) delete word_syms;
delete decode_fst;
delete decoder;
}
std::string Decoder::decode(
std::string key,
const std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
size_t num_frames = log_probs.size();
size_t dim_label = log_probs[0].size();
kaldi::Matrix<kaldi::BaseFloat> loglikes(
num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
for (size_t i = 0; i < num_frames; ++i) {
memcpy(loglikes.Data() + i * dim_label,
log_probs[i].data(),
sizeof(kaldi::BaseFloat) * dim_label);
}
return decode(key, loglikes);
}
std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
std::vector<std::string> decoding_results;
for (; !posterior_reader.Done(); posterior_reader.Next()) {
std::string key = posterior_reader.Key();
kaldi::Matrix<kaldi::BaseFloat> loglikes(posterior_reader.Value());
decoding_results.push_back(decode(key, loglikes));
}
return decoding_results;
}
std::string Decoder::decode(std::string key,
kaldi::Matrix<kaldi::BaseFloat>& loglikes) {
std::string decoding_result;
if (loglikes.NumRows() == 0) {
KALDI_WARN << "Zero-length utterance: " << key;
}
KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
loglikes.ApplyLog();
loglikes.AddVecToRows(-1.0, logprior);
kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
decoder->Decode(&decodable);
VectorFst<kaldi::LatticeArc> decoded; // linear FST.
if ((allow_partial || decoder->ReachedFinal()) &&
decoder->GetBestPath(&decoded)) {
if (!decoder->ReachedFinal())
KALDI_WARN << "Decoder did not reach end-state, outputting partial "
"traceback.";
std::vector<int32> alignment;
std::vector<int32> words;
kaldi::LatticeWeight weight;
GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
if (word_syms != NULL) {
for (size_t i = 0; i < words.size(); i++) {
std::string s = word_syms->Find(words[i]);
decoding_result += s;
if (s == "")
KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
}
}
}
return decoding_result;
}
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -14,5 +14,44 @@ limitations under the License. */
#include <string>
#include <vector>
#include "base/kaldi-common.h"
#include "base/timer.h"
#include "decoder/decodable-matrix.h"
#include "decoder/faster-decoder.h"
#include "fstext/fstext-lib.h"
#include "hmm/transition-model.h"
#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc
#include "tree/context-dep.h"
#include "util/common-utils.h"
std::string decode(std::vector<std::vector<float>> probs_mat);
class Decoder {
public:
Decoder(std::string word_syms_filename,
std::string fst_in_filename,
std::string logprior_rxfilename);
~Decoder();
// Interface to accept the scores read from specifier and return
// the batch decoding results
std::vector<std::string> decode(std::string posterior_rspecifier);
// Accept the scores of one utterance and return the decoding result
std::string decode(
std::string key,
const std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
private:
// For decoding one utterance
std::string decode(std::string key,
kaldi::Matrix<kaldi::BaseFloat> &loglikes);
fst::SymbolTable *word_syms;
fst::VectorFst<fst::StdArc> *decode_fst;
kaldi::FasterDecoder *decoder;
kaldi::Vector<kaldi::BaseFloat> logprior;
bool binary;
kaldi::BaseFloat acoustic_scale;
bool allow_partial;
};
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
......@@ -15,15 +15,25 @@ limitations under the License. */
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "decoder.h"
#include "post_decode_faster.h"
namespace py = pybind11;
PYBIND11_MODULE(decoder, m) {
m.doc() = "Decode function for Deep ASR model";
m.def("decode",
&decode,
"Decode one input probability matrix "
"and return the transcription");
PYBIND11_MODULE(post_decode_faster, m) {
m.doc() = "Decoder for Deep ASR model";
py::class_<Decoder>(m, "Decoder")
.def(py::init<std::string, std::string, std::string>())
.def("decode",
(std::vector<std::string> (Decoder::*)(std::string)) &
Decoder::decode,
"Decode for the probability matrices in specifier "
"and return the transcriptions.")
.def(
"decode",
(std::string (Decoder::*)(
std::string, const std::vector<std::vector<kaldi::BaseFloat>>&)) &
Decoder::decode,
"Decode one input probability matrix "
"and return the transcription.");
}
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,27 +13,57 @@
# limitations under the License.
import os
import glob
from distutils.core import setup, Extension
from distutils.sysconfig import get_config_vars
args = ['-std=c++11']
try:
kaldi_root = os.environ['KALDI_ROOT']
except:
raise ValueError("Enviroment variable 'KALDI_ROOT' is not defined. Please "
"install kaldi and export KALDI_ROOT=<kaldi's root dir> .")
args = [
'-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable',
'-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable',
'-Wno-deprecated-declarations', '-Wno-unused-function'
]
# remove warning about -Wstrict-prototypes
(opt, ) = get_config_vars('OPT')
os.environ['OPT'] = " ".join(flag for flag in opt.split()
if flag != '-Wstrict-prototypes')
os.environ['CC'] = 'g++'
LIBS = [
'fst', 'kaldi-base', 'kaldi-util', 'kaldi-matrix', 'kaldi-tree',
'kaldi-hmm', 'kaldi-fstext', 'kaldi-decoder', 'kaldi-lat'
]
LIB_DIRS = [
'tools/openfst/lib', 'src/base', 'src/matrix', 'src/util', 'src/tree',
'src/hmm', 'src/fstext', 'src/decoder', 'src/lat'
]
LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS]
LIB_DIRS = [os.path.abspath(path) for path in LIB_DIRS]
ext_modules = [
Extension(
'decoder',
['pybind.cc', 'decoder.cc'],
include_dirs=['pybind11/include', '.'],
'post_decode_faster',
['pybind.cc', 'post_decode_faster.cc'],
include_dirs=[
'pybind11/include', '.', os.path.join(kaldi_root, 'src'),
os.path.join(kaldi_root, 'tools/openfst/src/include')
],
language='c++',
libraries=LIBS,
library_dirs=LIB_DIRS,
runtime_library_dirs=LIB_DIRS,
extra_compile_args=args, ),
]
setup(
name='decoder',
name='post_decode_faster',
version='0.0.1',
author='Paddle',
author_email='',
......
set -e
if [ ! -d pybind11 ]; then
git clone https://github.com/pybind/pybind11.git
......
......@@ -13,7 +13,7 @@ import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.async_data_reader as reader
import decoder.decoder as decoder
from decoder.post_decode_faster import Decoder
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
from data_utils.util import split_infer_result
......@@ -32,6 +32,11 @@ def parse_args():
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--frame_dim',
type=int,
default=120 * 11,
help='Frame dimension of feature data. (default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
......@@ -47,6 +52,11 @@ def parse_args():
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--class_num',
type=int,
default=1749,
help='Number of classes in label. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
......@@ -81,6 +91,21 @@ def parse_args():
type=str,
default='./checkpoint',
help="The checkpoint path to init model. (default: %(default)s)")
parser.add_argument(
'--vocabulary',
type=str,
default='./decoder/graph/words.txt',
help="The path to vocabulary. (default: %(default)s)")
parser.add_argument(
'--graphs',
type=str,
default='./decoder/graph/TLG.fst',
help="The path to TLG graphs for decoding. (default: %(default)s)")
parser.add_argument(
'--log_prior',
type=str,
default="./decoder/logprior",
help="The log prior probs for training data. (default: %(default)s)")
args = parser.parse_args()
return args
......@@ -99,10 +124,11 @@ def infer_from_ckpt(args):
raise IOError("Invalid checkpoint!")
prediction, avg_cost, accuracy = stacked_lstmp_model(
frame_dim=args.frame_dim,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
class_num=args.class_num,
parallel=args.parallel)
infer_program = fluid.default_main_program().clone()
......@@ -154,8 +180,8 @@ def infer_from_ckpt(args):
probs, lod = lodtensor_to_ndarray(results[0])
infer_batch = split_infer_result(probs, lod)
for index, sample in enumerate(infer_batch):
print("Decoding %d: " % (batch_id * args.batch_size + index),
decoder.decode(sample))
key = "utter#%d" % (batch_id * args.batch_size + index)
print(key, ": ", decoder.decode(key, sample), "\n")
print(np.mean(infer_costs), np.mean(infer_accs))
......
......@@ -6,7 +6,8 @@ import paddle.v2 as paddle
import paddle.fluid as fluid
def stacked_lstmp_model(hidden_dim,
def stacked_lstmp_model(frame_dim,
hidden_dim,
proj_dim,
stacked_num,
class_num,
......@@ -20,12 +21,13 @@ def stacked_lstmp_model(hidden_dim,
label data respectively. And in inference, only `feature` is needed.
Args:
hidden_dim(int): The hidden state's dimension of the LSTMP layer.
proj_dim(int): The projection size of the LSTMP layer.
stacked_num(int): The number of stacked LSTMP layers.
parallel(bool): Run in parallel or not, default `False`.
is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes.
frame_dim(int): The frame dimension of feature data.
hidden_dim(int): The hidden state's dimension of the LSTMP layer.
proj_dim(int): The projection size of the LSTMP layer.
stacked_num(int): The number of stacked LSTMP layers.
parallel(bool): Run in parallel or not, default `False`.
is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes.
"""
# network configuration
......@@ -78,7 +80,7 @@ def stacked_lstmp_model(hidden_dim,
# data feeder
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
name="feature", shape=[-1, frame_dim], dtype="float32", lod_level=1)
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
......@@ -92,11 +94,12 @@ def stacked_lstmp_model(hidden_dim,
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [avg_cost, acc]:
for out in [prediction, avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices.
avg_cost, acc = pd()
prediction, avg_cost, acc = pd()
prediction.stop_gradient = True
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
......
......@@ -31,6 +31,11 @@ def parse_args():
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--frame_dim',
type=int,
default=120 * 11,
help='Frame dimension of feature data. (default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
......@@ -46,6 +51,11 @@ def parse_args():
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--class_num',
type=int,
default=1749,
help='Number of classes in label. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
......@@ -119,10 +129,11 @@ def profile(args):
"arg 'first_batches_to_skip' must not be smaller than 0.")
_, avg_cost, accuracy = stacked_lstmp_model(
frame_dim=args.frame_dim,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
class_num=args.class_num,
parallel=args.parallel)
optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
......
......@@ -30,6 +30,11 @@ def parse_args():
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--frame_dim',
type=int,
default=120 * 11,
help='Frame dimension of feature data. (default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
......@@ -45,6 +50,11 @@ def parse_args():
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--class_num',
type=int,
default=1749,
help='Number of classes in label. (default: %(default)d)')
parser.add_argument(
'--pass_num',
type=int,
......@@ -137,10 +147,11 @@ def train(args):
os.mkdir(args.infer_models)
prediction, avg_cost, accuracy = stacked_lstmp_model(
frame_dim=args.frame_dim,
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
class_num=args.class_num,
parallel=args.parallel)
# program for test
......
......@@ -2,7 +2,8 @@
This tool is used to convert a Caffe model to Fluid model
### Howto
1, Prepare caffepb.py in ./proto, two options provided
1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
1) generate it from caffe.proto using protoc
bash ./proto/compile.sh
......@@ -12,14 +13,24 @@ This tool is used to convert a Caffe model to Fluid model
2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file
3, Use the converted model to predict
see more detail info in 'tests/lenet/README.md'
see more detail info in 'examples/xxx'
### Supported models
### Tested models
- Lenet on mnist dataset
- ResNets:(ResNet-50, ResNet-101, ResNet-152)
model addrs:(https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777)
model addr: `https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`_
- GoogleNet:
model addr: `https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`_
- VGG:
model addr: `https://gist.github.com/ksimonyan/211839e770f7b538e2d8`_
- AlexNet:
model addr: `https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`_
### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
......@@ -4,8 +4,8 @@ import os
import sys
import numpy as np
import argparse
from kaffe import KaffeError, print_stderr
from kaffe import KaffeError, print_stderr
from kaffe.paddle import Transformer
......@@ -47,6 +47,8 @@ def convert(def_path, caffemodel_path, data_output_path, code_output_path,
except KaffeError as err:
fatal_error('Error encountered: {}'.format(err))
return 0
def main():
""" main
......@@ -64,9 +66,10 @@ def main():
help='The phase to convert: test (default) or train')
args = parser.parse_args()
validate_arguments(args)
convert(args.def_path, args.caffemodel, args.data_output_path,
args.code_output_path, args.phase)
return convert(args.def_path, args.caffemodel, args.data_output_path,
args.code_output_path, args.phase)
if __name__ == '__main__':
main()
ret = main()
sys.exit(ret)
a demo to show converting caffe models on 'imagenet' using caffe2fluid
---
# How to use
1. prepare python environment
2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. run the tool
eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
#!/bin/env python
#function:
# a demo to show how to use the converted model genereated by caffe2fluid
#
#notes:
# only support imagenet data
import os
import sys
import inspect
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def load_data(imgfile, shape):
h, w = shape[1:]
from PIL import Image
im = Image.open(imgfile)
# The storage order of the loaded image is W(widht),
# H(height), C(channel). PaddlePaddle requires
# the CHW order, so transpose them.
im = im.resize((w, h), Image.ANTIALIAS)
im = np.array(im).astype(np.float32)
im = im.transpose((2, 0, 1)) # CHW
im = im[(2, 1, 0), :, :] # BGR
# The mean to be subtracted from each image.
# By default, the per-channel ImageNet mean.
mean = np.array([104., 117., 124.], dtype=np.float32)
mean = mean.reshape([3, 1, 1])
im = im - mean
return im.reshape([1] + shape)
def build_model(net_file, net_name):
print('build model with net_file[%s] and net_name[%s]' %
(net_file, net_name))
net_path = os.path.dirname(net_file)
module_name = os.path.basename(net_file).rstrip('.py')
if net_path not in sys.path:
sys.path.insert(0, net_path)
try:
m = __import__(module_name, fromlist=[net_name])
MyNet = getattr(m, net_name)
except Exception as e:
print('failed to load module[%s]' % (module_name))
print(e)
return None
input_name = 'data'
input_shape = MyNet.input_shapes()[input_name]
images = fluid.layers.data(name='image', shape=input_shape, dtype='float32')
#label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net = MyNet({input_name: images})
input_shape = MyNet.input_shapes()[input_name]
return net, input_shape
def dump_results(results, names, root):
if os.path.exists(root) is False:
os.path.mkdir(root)
for i in range(len(names)):
n = names[i]
res = results[i]
filename = os.path.join(root, n)
np.save(filename + '.npy', res)
def infer(net_file, net_name, model_file, imgfile, debug=False):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
#1, build model
net, input_shape = build_model(net_file, net_name)
prediction = net.get_output()
#2, load weights for this model
place = fluid.CPUPlace()
exe = fluid.Executor(place)
startup_program = fluid.default_startup_program()
exe.run(startup_program)
if model_file.find('.npy') > 0:
net.load(data_path=model_file, exe=exe, place=place)
else:
net.load(data_path=model_file, exe=exe)
#3, test this model
test_program = fluid.default_main_program().clone()
fetch_list_var = []
fetch_list_name = []
if debug is False:
fetch_list_var.append(prediction)
else:
for k, v in net.layers.items():
fetch_list_var.append(v)
fetch_list_name.append(k)
np_images = load_data(imgfile, input_shape)
results = exe.run(program=test_program,
feed={'image': np_images},
fetch_list=fetch_list_var)
if debug is True:
dump_path = 'results.layers'
dump_results(results, fetch_list_name, dump_path)
print('all results dumped to [%s]' % (dump_path))
else:
result = results[0]
print('predicted class:', np.argmax(result))
if __name__ == "__main__":
""" maybe more convenient to use 'run.sh' to call this tool
"""
net_file = 'models/resnet50/resnet50.py'
weight_file = 'models/resnet50/resnet50.npy'
imgfile = 'data/65.jpeg'
net_name = 'ResNet50'
argc = len(sys.argv)
if argc == 5:
net_file = sys.argv[1]
weight_file = sys.argv[2]
imgfile = sys.argv[3]
net_name = sys.argv[4]
elif argc > 1:
print('usage:')
print('\tpython %s [net_file] [weight_file] [imgfile] [net_name]' %
(sys.argv[0]))
print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file,
weight_file, imgfile, net_name))
sys.exit(1)
infer(net_file, net_name, weight_file, imgfile)
#!/bin/bash
#function:
# a tool used to:
# 1, convert a caffe model
# 2, do inference using this model
#
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
#
#set -x
if [[ $# -lt 3 ]];then
echo "usage:"
echo " bash $0 [model_name] [cf_model_path] [pd_model_path] [only_convert]"
echo " eg: bash $0 resnet50 ./models.caffe/resnet50 ./models/resnet50"
exit 1
else
model_name=$1
cf_model_path=$2
pd_model_path=$3
only_convert=$4
fi
proto_file=$cf_model_path/${model_name}.prototxt
caffemodel_file=$cf_model_path/${model_name}.caffemodel
weight_file=$pd_model_path/${model_name}.npy
net_file=$pd_model_path/${model_name}.py
if [[ ! -e $proto_file ]];then
echo "not found prototxt[$proto_file]"
exit 1
fi
if [[ ! -e $caffemodel_file ]];then
echo "not found caffemodel[$caffemodel_file]"
exit 1
fi
if [[ ! -e $pd_model_path ]];then
mkdir $pd_model_path
fi
PYTHON=`which cfpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
$PYTHON ../../convert.py \
$proto_file \
--caffemodel $caffemodel_file \
--data-output-path $weight_file\
--code-output-path $net_file
ret=$?
if [[ $ret -ne 0 ]];then
echo "failed to convert caffe model[$cf_model_path]"
exit $ret
else
echo "succeed to convert caffe model[$cf_model_path] to fluid model[$pd_model_path]"
fi
if [[ -z $only_convert ]];then
PYTHON=`which pdpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
imgfile="data/65.jpeg"
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
$PYTHON ./infer.py $net_file $weight_file $imgfile $net_name
ret=$?
fi
exit $ret
a demo to show converting caffe model on 'mnist' using caffe2fluid
---
# How to use
1. prepare python environment
2. download caffe model to "models.caffe/lenet" which contains "lenet.caffemodel" and "lenet.prototxt"
3. run the tool
eg: bash ./run.sh lenet ./models.caffe/lenet ./models/lenet
......@@ -4,12 +4,12 @@
# demo to show how to use converted model using caffe2fluid
#
import sys
import os
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from lenet import LeNet as MyNet
def test_model(exe, test_program, fetch_list, test_reader, feeder):
acc_set = []
......@@ -24,10 +24,15 @@ def test_model(exe, test_program, fetch_list, test_reader, feeder):
return float(acc_val)
def main(model_path):
def evaluate(net_file, model_file):
""" main
"""
print('load fluid model in %s' % (model_path))
#1, build model
net_path = os.path.dirname(net_file)
if net_path not in sys.path:
sys.path.insert(0, net_path)
from lenet import LeNet as MyNet
with_gpu = False
paddle.init(use_gpu=with_gpu)
......@@ -45,10 +50,10 @@ def main(model_path):
exe.run(fluid.default_startup_program())
#2, load weights
if model_path.find('.npy') > 0:
net.load(data_path=model_path, exe=exe, place=place)
if model_file.find('.npy') > 0:
net.load(data_path=model_file, exe=exe, place=place)
else:
net.load(data_path=model_path, exe=exe)
net.load(data_path=model_file, exe=exe)
#3, test this model
test_program = fluid.default_main_program().clone()
......@@ -65,10 +70,17 @@ def main(model_path):
if __name__ == "__main__":
import sys
if len(sys.argv) == 2:
fluid_model_path = sys.argv[1]
else:
fluid_model_path = './model.fluid'
main(fluid_model_path)
net_file = 'models/lenet/lenet.py'
weight_file = 'models/lenet/lenet.npy'
argc = len(sys.argv)
if argc == 3:
net_file = sys.argv[1]
weight_file = sys.argv[2]
elif argc > 1:
print('usage:')
print('\tpython %s [net_file] [weight_file]' % (sys.argv[0]))
print('\teg:python %s %s %s %s' % (sys.argv[0], net_file, weight_file))
sys.exit(1)
evaluate(net_file, weight_file)
#!/bin/bash
#function:
# a tool used to:
# 1, convert a caffe model
# 2, do inference using this model
#
#usage:
# bash run.sh lenet ./models.caffe/lenet ./models/lenet
#
#set -x
if [[ $# -lt 3 ]];then
echo "usage:"
echo " bash $0 [model_name] [cf_model_path] [pd_model_path] [only_convert]"
echo " eg: bash $0 lenet ./models.caffe/lenet ./models/lenet"
exit 1
else
model_name=$1
cf_model_path=$2
pd_model_path=$3
no_eval=$4
fi
proto_file=$cf_model_path/${model_name}.prototxt
caffemodel_file=$cf_model_path/${model_name}.caffemodel
weight_file=$pd_model_path/${model_name}.npy
net_file=$pd_model_path/${model_name}.py
if [[ ! -e $proto_file ]];then
echo "not found prototxt[$proto_file]"
exit 1
fi
if [[ ! -e $caffemodel_file ]];then
echo "not found caffemodel[$caffemodel_file]"
exit 1
fi
if [[ ! -e $pd_model_path ]];then
mkdir $pd_model_path
fi
PYTHON=`which cfpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
$PYTHON ../../convert.py \
$proto_file \
--caffemodel $caffemodel_file \
--data-output-path $weight_file\
--code-output-path $net_file
ret=$?
if [[ $ret -ne 0 ]];then
echo "failed to convert caffe model[$cf_model_path]"
exit $ret
else
echo "succeed to convert caffe model[$cf_model_path] to fluid model[$pd_model_path]"
fi
if [[ -z $only_convert ]];then
PYTHON=`which pdpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
if [[ $net_name != "LeNet" ]];then
echo "only support LeNet"
exit 1
fi
$PYTHON ./evaluate.py $net_file $weight_file
ret=$?
fi
exit $ret
......@@ -54,7 +54,6 @@ def show_fallback_warning():
WARNING: PyCaffe not found!
Falling back to a pure protocol buffer implementation.
* Conversions will be drastically slower.
* This backend is UNTESTED!
------------------------------------------------------------
'''
......
......@@ -175,6 +175,7 @@ class GraphBuilder(object):
kind = NodeKind.map_raw_kind(layer.type)
if kind is None:
raise KaffeError('Unknown layer type encountered: %s' % layer.type)
# We want to use the layer's top names (the "output" names), rather than the
# name attribute, which is more of readability thing than a functional one.
# Other layers will refer to a node by its "top name".
......@@ -235,6 +236,7 @@ class GraphBuilder(object):
node.add_parent(parent_node)
if len(layer.top) > 1:
raise KaffeError('Multiple top nodes are not supported.')
for output_name in layer.top:
if output_name == layer.name:
# Output is named the same as the node. No further action required.
......
......@@ -51,17 +51,79 @@ LAYER_DESCRIPTORS = {
'Threshold': shape_identity,
}
LAYER_TYPES = LAYER_DESCRIPTORS.keys()
# layer types in 'V1LayerParameter'
# (v1layertype name, enum value, mapped to layer type)
v1_layertypes = [
('ABSVAL', 35),
('ACCURACY', 1),
('ARGMAX', 30),
('BNLL', 2),
('CONCAT', 3),
('CONVOLUTION', 4),
('DATA', 5),
('DECONVOLUTION', 39),
('DROPOUT', 6),
('ELTWISE', 25),
('EXP', 38),
('FLATTEN', 8),
('IM2COL', 11),
('INNERPRODUCT', 14),
('LRN', 15),
('MEMORYDATA', 29),
('MULTINOMIALLOGISTICLOSS', 16),
('MVN', 34),
('POOLING', 17),
('POWER', 26),
('RELU', 18),
('SIGMOID', 19),
('SIGMOIDCROSSENTROPYLOSS', 27),
('SILENCE', 36),
('SOFTMAX', 20),
('SPLIT', 22),
('SLICE', 33),
('TANH', 23),
('WINDOWDATA', 24),
('THRESHOLD', 31),
]
LAYER_TYPES = LAYER_DESCRIPTORS.keys()
LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES})
#map the layer name in V1 to standard name
V1_LAYER_MAP = {'_not_init_': True}
def get_v1_layer_map():
global V1_LAYER_MAP
if '_not_init_' not in V1_LAYER_MAP:
return V1_LAYER_MAP
else:
del V1_LAYER_MAP['_not_init_']
name2layer = {}
for n in LAYER_TYPES:
name2layer[n.upper()] = n
for l in v1_layertypes:
n, v = l
if n in name2layer and v not in V1_LAYER_MAP:
V1_LAYER_MAP[v] = name2layer[n]
else:
raise KaffeError('not found v1 layer type %s' % n)
return V1_LAYER_MAP
class NodeKind(LayerType):
@staticmethod
def map_raw_kind(kind):
if kind in LAYER_TYPES:
return kind
return None
v1_layers = get_v1_layer_map()
if kind in v1_layers:
return v1_layers[kind]
else:
return None
@staticmethod
def compute_output_shape(node):
......
......@@ -27,6 +27,9 @@ def layer(op):
self.layers[name] = layer_output
# This output is now the input for the next layer.
self.feed(layer_output)
#print('output shape of %s:' % (name))
#print layer_output.shape
# Return self for chained calls.
return self
......@@ -158,41 +161,64 @@ class Network(object):
output = fluid.layers.relu(x=input)
return output
@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
if padding is None:
padding = [0, 0]
def _adjust_pad_if_needed(self, i_hw, k_hw, s_hw, p_hw):
#adjust the padding if needed
i_h, i_w = i_hw
k_h, k_w = k_hw
s_h, s_w = s_hw
p_h, p_w = p_hw
def is_consistent(i, k, s, p):
o = i + 2 * p - k
if o % s == 0:
return True
else:
return False
real_p_h = 0
real_p_w = 0
if is_consistent(i_h, k_h, s_h, p_h) is False:
real_p_h = int(k_h / 2)
if is_consistent(i_w, k_w, s_w, p_w) is False:
real_p_w = int(k_w / 2)
return [real_p_h, real_p_w]
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, name, padding):
# Get the number of channels in the input
h_i, w_i = input.shape[2:]
fluid = import_fluid()
output = fluid.layers.pool2d(
input=input,
pool_size=[k_h, k_w],
pool_stride=[s_h, s_w],
pool_padding=padding,
pool_type='max')
return output
in_hw = input.shape[2:]
k_hw = [k_h, k_w]
s_hw = [s_h, s_w]
@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
if padding is None:
padding = [0, 0]
#fix bug about the difference between conv and pool
#more info: https://github.com/BVLC/caffe/issues/1318
padding = self._adjust_pad_if_needed(in_hw, k_hw, s_hw, [0, 0])
# Get the number of channels in the input
h_i, w_i = input.shape[2:]
fluid = import_fluid()
output = fluid.layers.pool2d(
input=input,
pool_size=[k_h, k_w],
pool_stride=[s_h, s_w],
pool_size=k_hw,
pool_stride=s_hw,
pool_padding=padding,
pool_type='avg')
pool_type=pool_type)
return output
@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
return self.pool('max', input, k_h, k_w, s_h, s_w, name, padding)
@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
return self.pool('avg', input, k_h, k_w, s_h, s_w, name, padding)
@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
raise Exception('lrn() not implemented yet')
fluid = import_fluid()
output = fluid.layers.lrn(input=input, \
n=radius, k=bias, alpha=alpha, beta=beta, name=name)
return output
@layer
def concat(self, inputs, axis, name):
......@@ -228,7 +254,7 @@ class Network(object):
@layer
def softmax(self, input, name):
fluid = import_fluid()
output = fluid.layers.softmax(x=input, name=name)
output = fluid.layers.softmax(input)
return output
@layer
......@@ -256,5 +282,8 @@ class Network(object):
return output
@layer
def dropout(self, input, keep_prob, name):
raise Exception('dropout() not implemented yet')
def dropout(self, input, drop_prob, name, is_test=True):
fluid = import_fluid()
output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test, name=name)
return output
......@@ -132,8 +132,7 @@ class TensorFlowMapper(NodeMapper):
# just scales by alpha (as does Krizhevsky's paper).
# We'll account for that here.
alpha = params.alpha / float(params.local_size)
return TensorFlowNode('lrn',
int(params.local_size / 2), alpha, params.beta)
return TensorFlowNode('lrn', params.local_size, alpha, params.beta)
def map_concat(self, node):
return TensorFlowNode('concat', node.parameters.axis)
......@@ -191,22 +190,33 @@ class TensorFlowEmitter(object):
def emit_setup_def(self):
return self.statement('def setup(self):')
def emit_convert_def(self, input_nodes):
def data_layer_def(name, shape, dtype=None):
if dtype is None:
dtype = 'float32'
def emit_shape_def(self, input_nodes):
self.outdent()
func_def = self.statement('@classmethod')
func_def += self.statement('def input_shapes(cls):')
self.indent()
layer_var = name + '_layer'
shape = [str(s) for s in shape[1:]]
layer_def = '%s = fluid.layers.data(name="%s", shape=[%s], dtype="%s")'\
% (layer_var, name, ','.join(shape), dtype)
return layer_var, layer_def
input_shapes = {}
for n in input_nodes:
name = n.name
output_shape = n.output_shape
shape = [str(s) for s in output_shape[1:]]
input_shapes[name] = ', '.join(shape)
input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
shape_str = ','.join(input_shapes)
func_def += self.statement('return {%s}' % (shape_str))
return '\n\n' + func_def
def emit_convert_def(self, input_nodes):
codes = []
inputs = {}
codes.append('shapes = cls.input_shapes()')
for n in input_nodes:
name = n.name
layer_var, layer_def = data_layer_def(n.name, n.output_shape)
layer_var = name + '_layer'
layer_def = '%s = fluid.layers.data(name="%s", shape=shapes["%s"],'\
' dtype="float32")' % (layer_var, name, name)
#layer_var, layer_def = data_layer_def(n.name, n.output_shape)
codes.append(layer_def)
inputs[name] = layer_var
......@@ -229,7 +239,7 @@ class TensorFlowEmitter(object):
func_def += self.statement('import paddle.v2.fluid as fluid')
for l in codes:
func_def += self.statement(l)
return '\n\n' + func_def
return '\n' + func_def
def emit_main_def(self, name):
if name is None:
......@@ -273,6 +283,7 @@ class TensorFlowEmitter(object):
b += self.emit_node(node)
blocks.append(b[:-1])
s = s + '\n\n'.join(blocks)
s += self.emit_shape_def(input_nodes)
s += self.emit_convert_def(input_nodes)
s += self.emit_main_def(name)
return s
......
### Convert lenet model from caffe format into paddle format(fluid api)
### Howto
1, Prepare your caffepb.py
2, Download a lenet caffe-model
lenet_iter_10000.caffemodel
download address: https://github.com/ethereon/caffe-tensorflow/raw/master/examples/mnist/lenet_iter_10000.caffemodel
md5: cbec75c1c374b6c1981c4a1eb024ae01
lenet.prototxt
download address: https://raw.githubusercontent.com/BVLC/caffe/master/examples/mnist/lenet.prototxt
md5: 27384af843338ab90b00c8d1c81de7d5
2, Convert this model(make sure caffepb.py is ready in ../../proto)
convert to npy format
bash ./convert.sh lenet.prototxt lenet.caffemodel lenet.py lenet.npy
save to fluid format(optional)
bash ./convert.sh lenet.prototxt lenet.caffemodel lenet.py lenet.npy && python ./lenet.py ./lenet.npy ./fluid.model
4, Use this new model(paddle installed in this python)
use fluid format
python ./predict.py ./fluid.model
use npy format
python ./predict.py ./lenet.npy
#!/bin/bash
#function:
# convert a caffe model
# eg:
# bash ./convert.sh ./model.caffe/lenet.prototxt ./model.caffe/lenet.caffemodel lenet.py lenet.npy
if [[ $# -ne 4 ]];then
echo "usage:"
echo " bash $0 [PROTOTXT] [CAFFEMODEL] [PY_NAME] [WEIGHT_NAME]"
echo " eg: bash $0 lenet.prototxt lenet.caffemodel lenet.py lenet.npy"
exit 1
fi
WORK_ROOT=$(dirname `readlink -f ${BASH_SOURCE[0]}`)
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
PROTOTXT=$1
CAFFEMODEL=$2
PY_NAME=$3
WEIGHT_NAME=$4
CONVERTER_PY="$WORK_ROOT/../../convert.py"
$PYTHON $CONVERTER_PY $PROTOTXT --caffemodel $CAFFEMODEL --code-output-path=$PY_NAME --data-output-path=$WEIGHT_NAME
ret=$?
if [[ $ret -eq 0 ]];then
echo "succeed to convert caffe model[$CAFFEMODEL, $PROTOTXT] to paddle model[$PY_NAME, $WEIGHT_NAME]"
else
echo "failed to convert caffe model[$CAFFEMODEL, $PROTOTXT]"
fi
exit $ret
### generated by caffe2fluid, your net is in class "LeNet" ###
import math
import os
import numpy as np
def import_fluid():
import paddle.v2.fluid as fluid
return fluid
def layer(op):
'''Decorator for composable network layers.'''
def layer_decorated(self, *args, **kwargs):
# Automatically set a name if not provided.
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
# Figure out the layer inputs.
if len(self.terminals) == 0:
raise RuntimeError('No input variables found for layer %s.' % name)
elif len(self.terminals) == 1:
layer_input = self.terminals[0]
else:
layer_input = list(self.terminals)
# Perform the operation and get the output.
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT.
self.layers[name] = layer_output
# This output is now the input for the next layer.
self.feed(layer_output)
# Return self for chained calls.
return self
return layer_decorated
class Network(object):
def __init__(self, inputs, trainable=True):
# The input nodes for this network
self.inputs = inputs
# The current list of terminal nodes
self.terminals = []
# Mapping from layer names to layers
self.layers = dict(inputs)
# If true, the resulting variables are set as trainable
self.trainable = trainable
# Switch variable for dropout
self.paddle_env = None
self.setup()
def setup(self):
'''Construct the network. '''
raise NotImplementedError('Must be implemented by the subclass.')
def load(self, data_path, exe=None, place=None, ignore_missing=False):
'''Load network weights.
data_path: The path to the numpy-serialized network weights
ignore_missing: If true, serialized weights for missing layers are ignored.
'''
fluid = import_fluid()
#load fluid mode directly
if os.path.isdir(data_path):
assert (exe is not None), \
'must provide a executor to load fluid model'
fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path)
return True
#load model from a npy file
if exe is None or place is None:
if self.paddle_env is None:
place = fluid.CPUPlace()
exe = fluid.Executor(place)
self.paddle_env = {'place': place, 'exe': exe}
exe = exe.run(fluid.default_startup_program())
else:
place = self.paddle_env['place']
exe = self.paddle_env['exe']
data_dict = np.load(data_path).item()
for op_name in data_dict:
layer = self.layers[op_name]
for param_name, data in data_dict[op_name].iteritems():
try:
name = '%s_%s' % (op_name, param_name)
v = fluid.global_scope().find_var(name)
w = v.get_tensor()
w.set(data, place)
except ValueError:
if not ignore_missing:
raise
return True
def feed(self, *args):
'''Set the input(s) for the next operation by replacing the terminal nodes.
The arguments can be either layer names or the actual layers.
'''
assert len(args) != 0
self.terminals = []
for fed_layer in args:
if isinstance(fed_layer, basestring):
try:
fed_layer = self.layers[fed_layer]
except KeyError:
raise KeyError('Unknown layer name fed: %s' % fed_layer)
self.terminals.append(fed_layer)
return self
def get_output(self):
'''Returns the current network output.'''
return self.terminals[-1]
def get_unique_name(self, prefix):
'''Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
'''
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident)
@layer
def conv(self,
input,
k_h,
k_w,
c_o,
s_h,
s_w,
name,
relu=True,
padding=None,
group=1,
biased=True):
if padding is None:
padding = [0, 0]
# Get the number of channels in the input
c_i, h_i, w_i = input.shape[1:]
# Verify that the grouping parameter is valid
assert c_i % group == 0
assert c_o % group == 0
fluid = import_fluid()
prefix = name + '_'
output = fluid.layers.conv2d(
input=input,
filter_size=[k_h, k_w],
num_filters=c_o,
stride=[s_h, s_w],
padding=padding,
groups=group,
param_attr=fluid.ParamAttr(name=prefix + "weights"),
bias_attr=fluid.ParamAttr(name=prefix + "biases"),
act="relu" if relu is True else None)
return output
@layer
def relu(self, input, name):
fluid = import_fluid()
output = fluid.layers.relu(x=input)
return output
@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
if padding is None:
padding = [0, 0]
# Get the number of channels in the input
h_i, w_i = input.shape[2:]
fluid = import_fluid()
output = fluid.layers.pool2d(
input=input,
pool_size=[k_h, k_w],
pool_stride=[s_h, s_w],
pool_padding=padding,
pool_type='max')
return output
@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
if padding is None:
padding = [0, 0]
# Get the number of channels in the input
h_i, w_i = input.shape[2:]
fluid = import_fluid()
output = fluid.layers.pool2d(
input=input,
pool_size=[k_h, k_w],
pool_stride=[s_h, s_w],
pool_padding=padding,
pool_type='avg')
return output
@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
raise Exception('lrn() not implemented yet')
@layer
def concat(self, inputs, axis, name):
fluid = import_fluid()
output = fluid.layers.concat(input=inputs, axis=axis)
return output
@layer
def add(self, inputs, name):
fluid = import_fluid()
output = inputs[0]
for i in inputs[1:]:
output = fluid.layers.elementwise_add(x=output, y=i)
return output
@layer
def fc(self, input, num_out, name, relu=True, act=None):
fluid = import_fluid()
if act is None:
act = 'relu' if relu is True else None
prefix = name + '_'
output = fluid.layers.fc(
name=name,
input=input,
size=num_out,
act=act,
param_attr=fluid.ParamAttr(name=prefix + 'weights'),
bias_attr=fluid.ParamAttr(name=prefix + 'biases'))
return output
@layer
def softmax(self, input, name):
fluid = import_fluid()
output = fluid.layers.softmax(x=input, name=name)
return output
@layer
def batch_normalization(self, input, name, scale_offset=True, relu=False):
# NOTE: Currently, only inference is supported
fluid = import_fluid()
prefix = name + '_'
param_attr = None if scale_offset is False else fluid.ParamAttr(
name=prefix + 'scale')
bias_attr = None if scale_offset is False else fluid.ParamAttr(
name=prefix + 'offset')
mean_name = prefix + 'mean'
variance_name = prefix + 'variance'
output = fluid.layers.batch_norm(
name=name,
input=input,
is_test=True,
param_attr=param_attr,
bias_attr=bias_attr,
moving_mean_name=mean_name,
moving_variance_name=variance_name,
epsilon=1e-5,
act='relu' if relu is True else None)
return output
@layer
def dropout(self, input, keep_prob, name):
raise Exception('dropout() not implemented yet')
class LeNet(Network):
def setup(self):
self.feed('data')
self.conv(5, 5, 20, 1, 1, relu=False, name='conv1')
self.max_pool(2, 2, 2, 2, name='pool1')
self.conv(5, 5, 50, 1, 1, relu=False, name='conv2')
self.max_pool(2, 2, 2, 2, name='pool2')
self.fc(500, name='ip1')
self.fc(10, relu=False, name='ip2')
self.softmax(name='prob')
@classmethod
def convert(cls, npy_model, fluid_path):
import paddle.v2.fluid as fluid
data_layer = fluid.layers.data(
name="data", shape=[1, 28, 28], dtype="float32")
feed_data = {"data": data_layer}
net = cls(feed_data)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
net.load(data_path=npy_model, exe=exe, place=place)
fluid.io.save_persistables(executor=exe, dirname=fluid_path)
if __name__ == "__main__":
#usage: python xxxnet.py xxx.npy ./model
import sys
npy_weight = sys.argv[1]
fluid_model = sys.argv[2]
LeNet.convert(npy_weight, fluid_model)
exit(0)
......@@ -194,8 +194,19 @@ def ctc_train_net(images, label, args, num_classes):
optimizer = fluid.optimizer.Momentum(
learning_rate=args.learning_rate, momentum=args.momentum)
_, params_grads = optimizer.minimize(sum_cost)
return sum_cost, error_evaluator, inference_program
model_average = None
if args.model_average:
model_average = fluid.optimizer.ModelAverage(
params_grads,
args.average_window,
min_average_window=args.min_average_window,
max_average_window=args.max_average_window)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
return sum_cost, error_evaluator, inference_program, model_average
def ctc_infer(images, num_classes):
......
......@@ -23,6 +23,10 @@ add_arg('momentum', float, 0.9, "Momentum.")
add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.")
add_arg('device', int, 0, "Device id.'-1' means running on CPU"
"while '0' means GPU-0.")
add_arg('model_average', bool, True, "Whether to aevrage model for evaluation.")
add_arg('min_average_window', int, 10000, "Min average window.")
add_arg('max_average_window', int, 15625, "Max average window.")
add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, True, "Whether use parallel training.")
# yapf: disable
......@@ -40,7 +44,7 @@ def train(args, data_reader=dummy_reader):
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator, inference_program = ctc_train_net(images, label, args, num_classes)
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(images, label, args, num_classes)
# data reader
train_reader = data_reader.train(args.batch_size)
......@@ -75,12 +79,16 @@ def train(args, data_reader=dummy_reader):
sys.stdout.flush()
batch_id += 1
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
pass_id, str(test_seq_error[0]))
with model_average.apply(exe):
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
if model_average != None:
model_average.restore(exe)
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
pass_id, str(test_seq_error[0]))
def main():
args = parser.parse_args()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册