提交 26ba6680 编写于 作者: W wanghaoshuang

Merge branch 'develop' of https://github.com/PaddlePaddle/models into fix_params_grad

......@@ -17,7 +17,7 @@ addons:
- python-pip
- python2.7-dev
- clang-format-3.8
ssh_known_hosts: 52.76.173.135
ssh_known_hosts: 13.229.163.131
before_install:
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- sudo pip install -U virtualenv pre-commit pip
......
......@@ -15,9 +15,7 @@ from multiprocessing import Manager, Process
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
from data_utils.util import suppress_complaints, suppress_signal
from data_utils.util import SharedNDArray, SharedMemoryPoolManager
from data_utils.util import DaemonProcessGroup, batch_to_ndarray
from data_utils.util import CriticalException, ForceExitWrapper, EpochEndSignal
from data_utils.util import CriticalException, ForceExitWrapper
class SampleInfo(object):
......@@ -32,11 +30,12 @@ class SampleInfo(object):
label_bin_path (str): File containing the label data.
label_size (int): Byte count of the sample's label data.
label_frame_num (int): Label number of the sample.
sample_name (str): Key of the sample
"""
def __init__(self, feature_bin_path, feature_start, feature_size,
feature_frame_num, feature_dim, label_bin_path, label_start,
label_size, label_frame_num):
label_size, label_frame_num, sample_name):
self.feature_bin_path = feature_bin_path
self.feature_start = feature_start
self.feature_size = feature_size
......@@ -47,6 +46,7 @@ class SampleInfo(object):
self.label_start = label_start
self.label_size = label_size
self.label_frame_num = label_frame_num
self.sample_name = sample_name
class SampleInfoBucket(object):
......@@ -69,8 +69,8 @@ class SampleInfoBucket(object):
split_sentence_threshold(int): Sentence whose length larger than
the value will trigger split operation.
split_sub_sentence_len(int): sub-sentence length is equal to
(split_sub_sentence_len + \
rand() % split_perturb).
(split_sub_sentence_len
+ rand() % split_perturb).
"""
def __init__(self,
......@@ -104,24 +104,33 @@ class SampleInfoBucket(object):
feature_bin_path = self._feature_bin_paths[block_idx]
feature_desc_path = self._feature_desc_paths[block_idx]
label_desc_lines = open(label_desc_path).readlines()
feature_desc_lines = open(feature_desc_path).readlines()
sample_num = int(label_desc_lines[0].split()[1])
assert sample_num == int(feature_desc_lines[0].split()[1])
label_desc_lines = []
if label_desc_path != "":
label_desc_lines = open(label_desc_path).readlines()
sample_num = int(feature_desc_lines[0].split()[1])
if label_desc_path != "":
assert sample_num == int(label_desc_lines[0].split()[1])
for i in xrange(sample_num):
feature_desc_split = feature_desc_lines[i + 1].split()
sample_name = feature_desc_split[0]
feature_start = int(feature_desc_split[2])
feature_size = int(feature_desc_split[3])
feature_frame_num = int(feature_desc_split[4])
feature_dim = int(feature_desc_split[5])
label_desc_split = label_desc_lines[i + 1].split()
label_start = int(label_desc_split[2])
label_size = int(label_desc_split[3])
label_frame_num = int(label_desc_split[4])
assert feature_frame_num == label_frame_num
label_start = -1
label_size = -1
label_frame_num = feature_frame_num
if label_desc_path != "":
label_desc_split = label_desc_lines[i + 1].split()
label_start = int(label_desc_split[2])
label_size = int(label_desc_split[3])
label_frame_num = int(label_desc_split[4])
assert feature_frame_num == label_frame_num
if self._split_sentence_threshold == -1 or \
self._split_perturb == -1 or \
......@@ -131,7 +140,7 @@ class SampleInfoBucket(object):
SampleInfo(feature_bin_path, feature_start,
feature_size, feature_frame_num, feature_dim,
label_bin_path, label_start, label_size,
label_frame_num))
label_frame_num, sample_name))
#split sentence
else:
cur_frame_pos = 0
......@@ -152,16 +161,19 @@ class SampleInfoBucket(object):
* feature_dim * 4, cur_frame_len * feature_dim *
4, cur_frame_len, feature_dim, label_bin_path,
label_start + cur_frame_pos * 4, cur_frame_len *
4, cur_frame_len))
4, cur_frame_len, sample_name))
remain_frame_num -= cur_frame_len
cur_frame_pos += cur_frame_len
if remain_frame_num <= 0:
break
return sample_info_list
class EpochEndSignal():
pass
class AsyncDataReader(object):
"""DataReader provides basic audio sample preprocessing pipeline including
data loading and data augmentation.
......@@ -190,7 +202,7 @@ class AsyncDataReader(object):
def __init__(self,
feature_file_list,
label_file_list,
label_file_list="",
drop_frame_len=512,
proc_num=10,
sample_buffer_size=1024,
......@@ -213,25 +225,30 @@ class AsyncDataReader(object):
self._sample_info_buffer_size = sample_info_buffer_size
self._batch_buffer_size = batch_buffer_size
self._proc_num = proc_num
if self._proc_num <= 2:
raise ValueError("Value of `proc_num` should be greater than 2.")
self._sample_proc_num = self._proc_num - 2
self._verbose = verbose
self._force_exit = ForceExitWrapper(self._manager.Value('b', False))
def generate_bucket_list(self, is_shuffle):
if self._block_info_list is None:
block_feature_info_lines = open(self._feature_file_list).readlines()
block_label_info_lines = open(self._label_file_list).readlines()
assert len(block_feature_info_lines) == len(block_label_info_lines)
self._block_info_list = []
for i in xrange(0, len(block_feature_info_lines), 2):
block_info = (block_feature_info_lines[i],
block_feature_info_lines[i + 1],
block_label_info_lines[i],
block_label_info_lines[i + 1])
self._block_info_list.append(
map(lambda line: line.strip(), block_info))
if self._label_file_list != "":
block_label_info_lines = open(self._label_file_list).readlines()
assert len(block_feature_info_lines) == len(
block_label_info_lines)
for i in xrange(0, len(block_feature_info_lines), 2):
block_info = (block_feature_info_lines[i],
block_feature_info_lines[i + 1],
block_label_info_lines[i],
block_label_info_lines[i + 1])
self._block_info_list.append(
map(lambda line: line.strip(), block_info))
else:
for i in xrange(0, len(block_feature_info_lines), 2):
block_info = (block_feature_info_lines[i],
block_feature_info_lines[i + 1], "", "")
self._block_info_list.append(
map(lambda line: line.strip(), block_info))
if is_shuffle:
self._rng.shuffle(self._block_info_list)
......@@ -251,23 +268,13 @@ class AsyncDataReader(object):
def set_transformers(self, transformers):
self._transformers = transformers
def recycle(self, *args):
for shared_ndarray in args:
if not isinstance(shared_ndarray, SharedNDArray):
raise Value("Only support recycle SharedNDArray object.")
shared_ndarray.recycle(self._pool_manager.pool)
def _start_async_processing(self):
def _sample_generator(self):
sample_info_queue = self._manager.Queue(self._sample_info_buffer_size)
sample_queue = self._manager.Queue(self._sample_buffer_size)
self._order_id = 0
@suppress_complaints(verbose=self._verbose, notify=self._force_exit)
def ordered_feeding_task(sample_info_queue):
if self._verbose == 0:
signal.signal(signal.SIGTERM, suppress_signal)
signal.signal(signal.SIGINT, suppress_signal)
for sample_info_bucket in self._bucket_list:
try:
sample_info_list = \
......@@ -280,12 +287,13 @@ class AsyncDataReader(object):
sample_info_queue.put((sample_info, self._order_id))
self._order_id += 1
for i in xrange(self._sample_proc_num):
for i in xrange(self._proc_num):
sample_info_queue.put(EpochEndSignal())
feeding_proc = DaemonProcessGroup(
proc_num=1, target=ordered_feeding_task, args=(sample_info_queue, ))
feeding_proc.start_all()
feeding_thread = Thread(
target=ordered_feeding_task, args=(sample_info_queue, ))
feeding_thread.daemon = True
feeding_thread.start()
@suppress_complaints(verbose=self._verbose, notify=self._force_exit)
def ordered_processing_task(sample_info_queue, sample_queue, out_order):
......@@ -313,25 +321,32 @@ class AsyncDataReader(object):
sample_info.feature_size)
assert sample_info.feature_frame_num \
* sample_info.feature_dim * 4 == len(feature_bytes), \
(sample_info.feature_bin_path,
sample_info.feature_frame_num,
sample_info.feature_dim,
len(feature_bytes))
label_bytes = read_bytes(sample_info.label_bin_path,
sample_info.label_start,
sample_info.label_size)
assert sample_info.label_frame_num * 4 == len(label_bytes), (
sample_info.label_bin_path, sample_info.label_array,
len(label_bytes))
label_array = struct.unpack('I' * sample_info.label_frame_num,
label_bytes)
label_data = np.array(
label_array, dtype='int64').reshape(
(sample_info.label_frame_num, 1))
* sample_info.feature_dim * 4 \
== len(feature_bytes), \
(sample_info.feature_bin_path,
sample_info.feature_frame_num,
sample_info.feature_dim,
len(feature_bytes))
label_data = None
if sample_info.label_bin_path != "":
label_bytes = read_bytes(sample_info.label_bin_path,
sample_info.label_start,
sample_info.label_size)
assert sample_info.label_frame_num * 4 == len(
label_bytes), (sample_info.label_bin_path,
sample_info.label_array,
len(label_bytes))
label_array = struct.unpack(
'I' * sample_info.label_frame_num, label_bytes)
label_data = np.array(
label_array, dtype='int64').reshape(
(sample_info.label_frame_num, 1))
else:
label_data = np.zeros(
(sample_info.label_frame_num, 1), dtype='int64')
feature_frame_num = sample_info.feature_frame_num
feature_dim = sample_info.feature_dim
......@@ -341,12 +356,11 @@ class AsyncDataReader(object):
feature_data = np.array(
feature_array, dtype='float32').reshape((
sample_info.feature_frame_num, sample_info.feature_dim))
sample_data = (feature_data, label_data)
sample_data = (feature_data, label_data,
sample_info.sample_name)
for transformer in self._transformers:
# @TODO(pkuyym) to make transfomer only accept feature_data
sample_data = transformer.perform_trans(sample_data)
while order_id != out_order[0]:
time.sleep(0.001)
......@@ -362,74 +376,77 @@ class AsyncDataReader(object):
out_order = self._manager.list([0])
args = (sample_info_queue, sample_queue, out_order)
sample_proc = DaemonProcessGroup(
proc_num=self._sample_proc_num,
target=ordered_processing_task,
args=args)
sample_proc.start_all()
workers = [
Process(
target=ordered_processing_task, args=args)
for _ in xrange(self._proc_num)
]
return sample_queue
for w in workers:
w.daemon = True
w.start()
def batch_iterator(self, batch_size, minimum_batch_size):
@suppress_complaints(verbose=self._verbose, notify=self._force_exit)
def batch_assembling_task(sample_queue, batch_queue, pool):
def conv_to_shared(ndarray):
while self._force_exit == False:
try:
(name, shared_ndarray) = pool.popitem()
except Exception as e:
time.sleep(0.001)
finished_proc_num = 0
while self._force_exit == False:
try:
sample = sample_queue.get_nowait()
except Queue.Empty:
time.sleep(0.001)
else:
if isinstance(sample, EpochEndSignal):
finished_proc_num += 1
if finished_proc_num >= self._proc_num:
break
else:
shared_ndarray.copy(ndarray)
return shared_ndarray
continue
if self._verbose == 0:
signal.signal(signal.SIGTERM, suppress_signal)
signal.signal(signal.SIGINT, suppress_signal)
yield sample
def batch_iterator(self, batch_size, minimum_batch_size):
def batch_to_ndarray(batch_samples, lod):
assert len(batch_samples)
frame_dim = batch_samples[0][0].shape[1]
batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
batch_label = np.zeros((lod[-1], 1), dtype="int64")
start = 0
name_lst = []
for sample in batch_samples:
frame_num = sample[0].shape[0]
batch_feature[start:start + frame_num, :] = sample[0]
batch_label[start:start + frame_num, :] = sample[1]
start += frame_num
name_lst.append(sample[2])
return (batch_feature, batch_label, name_lst)
@suppress_complaints(verbose=self._verbose, notify=self._force_exit)
def batch_assembling_task(sample_generator, batch_queue):
batch_samples = []
lod = [0]
done_num = 0
while done_num < self._sample_proc_num:
sample = sample_queue.get()
if isinstance(sample, EpochEndSignal):
done_num += 1
else:
batch_samples.append(sample)
lod.append(lod[-1] + sample[0].shape[0])
if len(batch_samples) == batch_size:
feature, label = batch_to_ndarray(batch_samples, lod)
feature = conv_to_shared(feature)
label = conv_to_shared(label)
lod = conv_to_shared(np.array(lod).astype('int64'))
batch_queue.put((feature, label, lod))
batch_samples = []
lod = [0]
for sample in sample_generator():
batch_samples.append(sample)
lod.append(lod[-1] + sample[0].shape[0])
if len(batch_samples) == batch_size:
(batch_feature, batch_label, name_lst) = batch_to_ndarray(
batch_samples, lod)
batch_queue.put((batch_feature, batch_label, lod, name_lst))
batch_samples = []
lod = [0]
if len(batch_samples) >= minimum_batch_size:
(feature, label) = batch_to_ndarray(batch_samples, lod)
feature = conv_to_shared(feature)
label = conv_to_shared(label)
lod = conv_to_shared(np.array(lod).astype('int64'))
batch_queue.put((feature, label, lod))
(batch_feature, batch_label, name_lst) = batch_to_ndarray(
batch_samples, lod)
batch_queue.put((batch_feature, batch_label, lod, name_lst))
batch_queue.put(EpochEndSignal())
sample_queue = self._start_async_processing()
batch_queue = self._manager.Queue(self._batch_buffer_size)
batch_queue = Queue.Queue(self._batch_buffer_size)
self._pool_manager = SharedMemoryPoolManager(self._batch_buffer_size *
3, self._manager)
assembling_proc = DaemonProcessGroup(
proc_num=1,
assembling_thread = Thread(
target=batch_assembling_task,
args=(sample_queue, batch_queue, self._pool_manager.pool))
assembling_proc.start_all()
args=(self._sample_generator, batch_queue))
assembling_thread.daemon = True
assembling_thread.start()
while self._force_exit == False:
try:
......@@ -440,6 +457,3 @@ class AsyncDataReader(object):
if isinstance(batch_data, EpochEndSignal):
break
yield batch_data
# clean the shared memory
del self._pool_manager
......@@ -22,7 +22,7 @@ class TestTransMeanVarianceNorm(unittest.TestCase):
feature = np.zeros((2, 120), dtype="float32")
feature.fill(1)
trans = trans_mean_variance_norm.TransMeanVarianceNorm(self._file_path)
(feature1, label1) = trans.perform_trans((feature, None))
(feature1, label1, name) = trans.perform_trans((feature, None, None))
(mean, var) = trans.get_mean_var()
feature_flat1 = feature1.flatten()
feature_flat = feature.flatten()
......@@ -70,7 +70,7 @@ class TestTransAddDelta(unittest.TestCase):
feature[2, 0:40].fill(3)
feature[3, 0:40].fill(4)
trans = trans_add_delta.TransAddDelta()
(feature, label) = trans.perform_trans((feature, None))
(feature, label, name) = trans.perform_trans((feature, None, None))
self.assertAlmostEqual(feature.shape[0], 4)
self.assertAlmostEqual(feature.shape[1], 120)
self.assertAlmostEqual(1.0, feature[0][0])
......@@ -93,7 +93,7 @@ class TestTransSplict(unittest.TestCase):
feature[i, :].fill(i)
trans = trans_splice.TransSplice()
(feature, label) = trans.perform_trans((feature, None))
(feature, label, name) = trans.perform_trans((feature, None, None))
self.assertEqual(feature.shape[1], 110)
for i in xrange(8):
......
......@@ -32,9 +32,9 @@ class TransAddDelta(object):
Args:
sample(object,tuple): contain feature numpy and label numpy
Returns:
(feature, label)
(feature, label, name)
"""
(feature, label) = sample
(feature, label, name) = sample
frame_dim = feature.shape[1]
d_frame_dim = frame_dim * 3
head_filled = 5
......@@ -64,7 +64,7 @@ class TransAddDelta(object):
start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
d_frame_dim)
mat.shape = tmp_shape
return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
return (mat[head_filled:mat.shape[0] - tail_filled, :], label, name)
def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
""" regress
......
......@@ -53,9 +53,9 @@ class TransMeanVarianceNorm(object):
Args:
sample(object):input sample, contain feature numpy and label numpy
Returns:
(feature, label)
(feature, label, name)
"""
(feature, label) = sample
(feature, label, name) = sample
shape = feature.shape
assert len(shape) == 2
nfeature_len = shape[0] * shape[1]
......@@ -68,4 +68,4 @@ class TransMeanVarianceNorm(object):
feature[ncur_idx:ncur_idx + self._nLen] = block
ncur_idx += self._nLen
feature = feature.reshape(shape)
return (feature, label)
return (feature, label, name)
......@@ -30,9 +30,9 @@ class TransSplice(object):
Args:
sample(object): input sample(feature, label)
Return:
(feature, label)
(feature, label, name)
"""
(feature, label) = sample
(feature, label, name) = sample
nframe_num = feature.shape[0]
nframe_dim = feature.shape[1]
nnew_frame_dim = nframe_dim * (
......@@ -61,4 +61,4 @@ class TransSplice(object):
np.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
ret = ret.reshape((nframe_num, nnew_frame_dim))
return (ret, label)
return (ret, label, name)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys, time
import sys
from six import reraise
from tblib import Traceback
from multiprocessing import Manager, Process
import posix_ipc, mmap
import numpy as np
......@@ -37,19 +35,6 @@ def lodtensor_to_ndarray(lod_tensor):
return ret, lod_tensor.lod()
def batch_to_ndarray(batch_samples, lod):
frame_dim = batch_samples[0][0].shape[1]
batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
batch_label = np.zeros((lod[-1], 1), dtype="int64")
start = 0
for sample in batch_samples:
frame_num = sample[0].shape[0]
batch_feature[start:start + frame_num, :] = sample[0]
batch_label[start:start + frame_num, :] = sample[1]
start += frame_num
return (batch_feature, batch_label)
def split_infer_result(infer_seq, lod):
infer_batch = []
for i in xrange(0, len(lod[0]) - 1):
......@@ -57,127 +42,10 @@ def split_infer_result(infer_seq, lod):
return infer_batch
class DaemonProcessGroup(object):
def __init__(self, proc_num, target, args):
self._proc_num = proc_num
self._workers = [
Process(
target=target, args=args) for _ in xrange(self._proc_num)
]
def start_all(self):
for w in self._workers:
w.daemon = True
w.start()
@property
def proc_num(self):
return self._proc_num
class EpochEndSignal(object):
pass
class CriticalException(Exception):
pass
class SharedNDArray(object):
"""SharedNDArray utilizes shared memory to avoid data serialization when
data object shared among different processes. We can reconstruct the
`ndarray` when memory address, shape and dtype provided.
Args:
name (str): Address name of shared memory.
whether_verify (bool): Whether to validate the writing operation.
"""
def __init__(self, name, whether_verify=False):
self._name = name
self._shm = None
self._buf = None
self._array = np.zeros(1, dtype=np.float32)
self._inited = False
self._whether_verify = whether_verify
def zeros_like(self, shape, dtype):
size = int(np.prod(shape)) * np.dtype(dtype).itemsize
if self._inited:
self._shm = posix_ipc.SharedMemory(self._name)
else:
self._shm = posix_ipc.SharedMemory(
self._name, posix_ipc.O_CREAT, size=size)
self._buf = mmap.mmap(self._shm.fd, size)
self._array = np.ndarray(shape, dtype, self._buf, order='C')
def copy(self, ndarray):
size = int(np.prod(ndarray.shape)) * np.dtype(ndarray.dtype).itemsize
self.zeros_like(ndarray.shape, ndarray.dtype)
self._array[:] = ndarray
self._buf.flush()
self._inited = True
if self._whether_verify:
shm = posix_ipc.SharedMemory(self._name)
buf = mmap.mmap(shm.fd, size)
array = np.ndarray(ndarray.shape, ndarray.dtype, buf, order='C')
np.testing.assert_array_equal(array, ndarray)
@property
def ndarray(self):
return self._array
def recycle(self, pool):
self._buf.close()
self._shm.close_fd()
self._inited = False
pool[self._name] = self
def __getstate__(self):
return (self._name, self._array.shape, self._array.dtype, self._inited,
self._whether_verify)
def __setstate__(self, state):
self._name = state[0]
self._inited = state[3]
self.zeros_like(state[1], state[2])
self._whether_verify = state[4]
class SharedMemoryPoolManager(object):
"""SharedMemoryPoolManager maintains a multiprocessing.Manager.dict object.
All available addresses are allocated once and will be reused. Though this
class is not process-safe, the pool can be shared between processes. All
shared memory should be unlinked before the main process exited.
Args:
pool_size (int): Size of shared memory pool.
manager (dict): A multiprocessing.Manager object, the pool is
maintained by the proxy process.
name_prefix (str): Address prefix of shared memory.
"""
def __init__(self, pool_size, manager, name_prefix='/deep_asr'):
self._names = []
self._dict = manager.dict()
self._time_prefix = time.strftime('%Y%m%d%H%M%S')
for i in xrange(pool_size):
name = name_prefix + '_' + self._time_prefix + '_' + str(i)
self._dict[name] = SharedNDArray(name)
self._names.append(name)
@property
def pool(self):
return self._dict
def __del__(self):
for name in self._names:
# have to unlink the shared memory
posix_ipc.unlink_shared_memory(name)
def suppress_signal(signo, stack_frame):
pass
......
......@@ -21,14 +21,15 @@ using fst::StdArc;
Decoder::Decoder(std::string word_syms_filename,
std::string fst_in_filename,
std::string logprior_rxfilename) {
std::string logprior_rxfilename,
kaldi::BaseFloat acoustic_scale) {
const char* usage =
"Decode, reading log-likelihoods (of transition-ids or whatever symbol "
"is on the graph) as matrices.";
kaldi::ParseOptions po(usage);
binary = true;
acoustic_scale = 1.5;
this->acoustic_scale = acoustic_scale;
allow_partial = true;
kaldi::FasterDecoderOptions decoder_opts;
decoder_opts.Register(&po, true); // true == include obscure settings.
......
......@@ -29,7 +29,8 @@ class Decoder {
public:
Decoder(std::string word_syms_filename,
std::string fst_in_filename,
std::string logprior_rxfilename);
std::string logprior_rxfilename,
kaldi::BaseFloat acoustic_scale);
~Decoder();
// Interface to accept the scores read from specifier and return
......
......@@ -23,7 +23,7 @@ PYBIND11_MODULE(post_decode_faster, m) {
m.doc() = "Decoder for Deep ASR model";
py::class_<Decoder>(m, "Decoder")
.def(py::init<std::string, std::string, std::string>())
.def(py::init<std::string, std::string, std::string, kaldi::BaseFloat>())
.def("decode",
(std::vector<std::string> (Decoder::*)(std::string)) &
Decoder::decode,
......
data_dir=~/.cache/paddle/dataset/speech/deep_asr_data/aishell
data_url='http://deep-asr-data.gz.bcebos.com/aishell_data.tar.gz'
lst_url='http://deep-asr-data.gz.bcebos.com/aishell_lst.tar.gz'
md5=e017d858d9e509c8a84b73f673f08b9a
if [ ! -e $data_dir ]; then
mkdir -p $data_dir
fi
if [ ! -e $data_dir/aishell_data.tar.gz ]; then
echo "Download $data_dir/aishell_data.tar.gz ..."
wget -c -P $data_dir $data_url
else
echo "Skip downloading for $data_dir/aishell_data.tar.gz has already existed!"
fi
echo "Checking md5 sum ..."
md5sum_tmp=`md5sum $data_dir/aishell_data.tar.gz | cut -d ' ' -f1`
if [ $md5sum_tmp != $md5 ]; then
echo "Md5sum check failed, please remove and redownload "
"$data_dir/aishell_data.tar.gz"
exit 1
fi
echo "Untar aishell_data.tar.gz ..."
tar xzf $data_dir/aishell_data.tar.gz -C $data_dir
if [ ! -e data ]; then
mkdir data
fi
echo "Download and untar lst files ..."
wget -c -P data $lst_url
tar xvf data/aishell_lst.tar.gz -C data
ln -s $data_dir data/aishell
export CUDA_VISIBLE_DEVICES=2,3,4,5
python -u ../../train.py --train_feature_lst data/train_feature.lst \
--train_label_lst data/train_label.lst \
--val_feature_lst data/val_feature.lst \
--val_label_lst data/val_label.lst \
--mean_var data/aishell/global_mean_var \
--checkpoints checkpoints \
--frame_dim 2640 \
--class_num 101 \
--infer_models '' \
--batch_size 128 \
--learning_rate 0.00016 \
--parallel
......@@ -8,7 +8,7 @@ import paddle.fluid as fluid
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
import data_utils.async_data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from data_utils.util import split_infer_result
......@@ -79,12 +79,13 @@ def infer(args):
trans_splice.TransSplice()
]
infer_data_reader = reader.DataReader(args.infer_feature_lst,
args.infer_label_lst)
infer_data_reader = reader.AsyncDataReader(args.infer_feature_lst,
args.infer_label_lst)
infer_data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor()
one_batch = infer_data_reader.batch_iterator(args.batch_size, 1).next()
(features, labels, lod) = one_batch
feature_t.set(features, place)
feature_t.set_lod([lod])
......
......@@ -17,6 +17,7 @@ from decoder.post_decode_faster import Decoder
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
from data_utils.util import split_infer_result
from tools.error_rate import char_errors
def parse_args():
......@@ -86,6 +87,11 @@ def parse_args():
type=str,
default='data/infer_label.lst',
help='The label list path for inference. (default: %(default)s)')
parser.add_argument(
'--ref_txt',
type=str,
default='data/text.test',
help='The reference text for decoding. (default: %(default)s)')
parser.add_argument(
'--checkpoint',
type=str,
......@@ -106,6 +112,16 @@ def parse_args():
type=str,
default="./decoder/logprior",
help="The log prior probs for training data. (default: %(default)s)")
parser.add_argument(
'--acoustic_scale',
type=float,
default=0.2,
help="Scaling factor for acoustic likelihoods. (default: %(default)f)")
parser.add_argument(
'--target_trans',
type=str,
default="./decoder/target_trans.txt",
help="The path to target transcription. (default: %(default)s)")
args = parser.parse_args()
return args
......@@ -117,6 +133,18 @@ def print_arguments(args):
print('------------------------------------------------')
def get_trg_trans(args):
trans_dict = {}
with open(args.target_trans) as trg_trans:
line = trg_trans.readline()
while line:
items = line.strip().split()
key = items[0]
trans_dict[key] = ''.join(items[1:])
line = trg_trans.readline()
return trans_dict
def infer_from_ckpt(args):
"""Inference by using checkpoint."""
......@@ -140,9 +168,14 @@ def infer_from_ckpt(args):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
trg_trans = get_trg_trans(args)
# load checkpoint.
fluid.io.load_persistables(exe, args.checkpoint)
# init decoder
decoder = Decoder(args.vocabulary, args.graphs, args.log_prior,
args.acoustic_scale)
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
......@@ -157,17 +190,16 @@ def infer_from_ckpt(args):
args.infer_label_lst)
infer_data_reader.set_transformers(ltrans)
infer_costs, infer_accs = [], []
total_edit_dist, total_ref_len = 0.0, 0
for batch_id, batch_data in enumerate(
infer_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features.ndarray, place)
feature_t.set_lod([lod.ndarray])
label_t.set(labels.ndarray, place)
label_t.set_lod([lod.ndarray])
infer_data_reader.recycle(features, labels, lod)
(features, labels, lod, name_lst) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
results = exe.run(infer_program,
feed={"feature": feature_t,
......@@ -179,11 +211,19 @@ def infer_from_ckpt(args):
probs, lod = lodtensor_to_ndarray(results[0])
infer_batch = split_infer_result(probs, lod)
for index, sample in enumerate(infer_batch):
key = "utter#%d" % (batch_id * args.batch_size + index)
print(key, ": ", decoder.decode(key, sample), "\n")
print(np.mean(infer_costs), np.mean(infer_accs))
for index, sample in enumerate(infer_batch):
key = name_lst[index]
ref = trg_trans[key]
hyp = decoder.decode(key, sample)
edit_dist, ref_len = char_errors(ref.decode("utf8"), hyp)
total_edit_dist += edit_dist
total_ref_len += ref_len
print(key + "|Ref:", ref)
print(key + "|Hyp:", hyp.encode("utf8"))
print("Instance CER: ", edit_dist / ref_len)
print("Total CER = %f" % (total_edit_dist / total_ref_len))
if __name__ == '__main__':
......
# -*- coding: utf-8 -*-
"""This module provides functions to calculate error rate in different level.
e.g. wer for word-level, cer for char-level.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def _levenshtein_distance(ref, hyp):
"""Levenshtein distance is a string metric for measuring the difference
between two sequences. Informally, the levenshtein disctance is defined as
the minimum number of single-character edits (substitutions, insertions or
deletions) required to change one word into the other. We can naturally
extend the edits to word level when calculate levenshtein disctance for
two sentences.
"""
m = len(ref)
n = len(hyp)
# special case
if ref == hyp:
return 0
if m == 0:
return n
if n == 0:
return m
if m < n:
ref, hyp = hyp, ref
m, n = n, m
# use O(min(m, n)) space
distance = np.zeros((2, n + 1), dtype=np.int32)
# initialize distance matrix
for j in xrange(n + 1):
distance[0][j] = j
# calculate levenshtein distance
for i in xrange(1, m + 1):
prev_row_idx = (i - 1) % 2
cur_row_idx = i % 2
distance[cur_row_idx][0] = i
for j in xrange(1, n + 1):
if ref[i - 1] == hyp[j - 1]:
distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
else:
s_num = distance[prev_row_idx][j - 1] + 1
i_num = distance[cur_row_idx][j - 1] + 1
d_num = distance[prev_row_idx][j] + 1
distance[cur_row_idx][j] = min(s_num, i_num, d_num)
return distance[m % 2][n]
def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
"""Compute the levenshtein distance between reference sequence and
hypothesis sequence in word-level.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param delimiter: Delimiter of input sentences.
:type delimiter: char
:return: Levenshtein distance and word number of reference sentence.
:rtype: list
"""
if ignore_case == True:
reference = reference.lower()
hypothesis = hypothesis.lower()
ref_words = filter(None, reference.split(delimiter))
hyp_words = filter(None, hypothesis.split(delimiter))
edit_distance = _levenshtein_distance(ref_words, hyp_words)
return float(edit_distance), len(ref_words)
def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
"""Compute the levenshtein distance between reference sequence and
hypothesis sequence in char-level.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param remove_space: Whether remove internal space characters
:type remove_space: bool
:return: Levenshtein distance and length of reference sentence.
:rtype: list
"""
if ignore_case == True:
reference = reference.lower()
hypothesis = hypothesis.lower()
join_char = ' '
if remove_space == True:
join_char = ''
reference = join_char.join(filter(None, reference.split(' ')))
hypothesis = join_char.join(filter(None, hypothesis.split(' ')))
edit_distance = _levenshtein_distance(reference, hypothesis)
return float(edit_distance), len(reference)
def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
"""Calculate word error rate (WER). WER compares reference text and
hypothesis text in word-level. WER is defined as:
.. math::
WER = (Sw + Dw + Iw) / Nw
where
.. code-block:: text
Sw is the number of words subsituted,
Dw is the number of words deleted,
Iw is the number of words inserted,
Nw is the number of words in the reference
We can use levenshtein distance to calculate WER. Please draw an attention
that empty items will be removed when splitting sentences by delimiter.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param delimiter: Delimiter of input sentences.
:type delimiter: char
:return: Word error rate.
:rtype: float
:raises ValueError: If word number of reference is zero.
"""
edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
delimiter)
if ref_len == 0:
raise ValueError("Reference's word number should be greater than 0.")
wer = float(edit_distance) / ref_len
return wer
def cer(reference, hypothesis, ignore_case=False, remove_space=False):
"""Calculate charactor error rate (CER). CER compares reference text and
hypothesis text in char-level. CER is defined as:
.. math::
CER = (Sc + Dc + Ic) / Nc
where
.. code-block:: text
Sc is the number of characters substituted,
Dc is the number of characters deleted,
Ic is the number of characters inserted
Nc is the number of characters in the reference
We can use levenshtein distance to calculate CER. Chinese input should be
encoded to unicode. Please draw an attention that the leading and tailing
space characters will be truncated and multiple consecutive space
characters in a sentence will be replaced by one space character.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param remove_space: Whether remove internal space characters
:type remove_space: bool
:return: Character error rate.
:rtype: float
:raises ValueError: If the reference length is zero.
"""
edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
remove_space)
if ref_len == 0:
raise ValueError("Length of reference should be greater than 0.")
cer = float(edit_distance) / ref_len
return cer
......@@ -168,15 +168,13 @@ def profile(args):
start_time = time.time()
frames_seen = 0
# load_data
(features, labels, lod) = batch_data
feature_t.set(features.ndarray, place)
feature_t.set_lod([lod.ndarray])
label_t.set(labels.ndarray, place)
label_t.set_lod([lod.ndarray])
(features, labels, lod, _) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
frames_seen += lod.ndarray[-1]
data_reader.recycle(features, labels, lod)
frames_seen += lod[-1]
outs = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
......
......@@ -192,13 +192,11 @@ def train(args):
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features.ndarray, place)
feature_t.set_lod([lod.ndarray])
label_t.set(labels.ndarray, place)
label_t.set_lod([lod.ndarray])
test_data_reader.recycle(features, labels, lod)
(features, labels, lod, _) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program,
feed={"feature": feature_t,
......@@ -212,6 +210,7 @@ def train(args):
# train data reader
train_data_reader = reader.AsyncDataReader(args.train_feature_lst,
args.train_label_lst, -1)
train_data_reader.set_transformers(ltrans)
# train
for pass_id in xrange(args.pass_num):
......@@ -220,13 +219,11 @@ def train(args):
train_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features.ndarray, place)
feature_t.set_lod([lod.ndarray])
label_t.set(labels.ndarray, place)
label_t.set_lod([lod.ndarray])
train_data_reader.recycle(features, labels, lod)
(features, labels, lod, name_lst) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0)
outs = exe.run(fluid.default_main_program(),
......
......@@ -4,10 +4,109 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
# Advbox
Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
Advbox is a toolbox to generate adversarial examples that fool neural networks and Advbox can benchmark the robustness of machine learning models.
## How to use
The Advbox is based on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) Fluid and is under continual development, always welcoming contributions of the latest method of adversarial attacks and defenses.
1. train a model and save it's parameters. (like fluid_mnist.py)
2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
3. use advbox to generate the adversarial sample.
## Overview
[Szegedy et al.](https://arxiv.org/abs/1312.6199) discovered an intriguing properties of deep neural networks in the context of image classification for the first time. They showed that despite the state-of-the-art deep networks are surprisingly susceptible to adversarial attacks in the form of small perturbations to images that remain (almost) imperceptible to human vision system. These perturbations are found by optimizing the input to maximize the prediction error and the images modified by these perturbations are called as `adversarial examples`. The profound implications of these results triggered a wide interest of researchers in adversarial attacks and their defenses for deep learning in general.
Advbox is similar to [Foolbox](https://github.com/bethgelab/foolbox) and [CleverHans](https://github.com/tensorflow/cleverhans). CleverHans only supports TensorFlow framework while foolbox interfaces with many popular machine learning frameworks such as PyTorch, Keras, TensorFlow, Theano, Lasagne and MXNet. However, these two great libraries don't support PaddlePaddle, an easy-to-use, efficient, flexible and scalable deep learning platform which is originally developed by Baidu scientists and engineers for the purpose of applying deep learning to many products at Baidu.
## Usage
Advbox provides many stable reference implementations of modern methods to generate adversarial examples such as FGSM, DeepFool, JSMA. When you want to benchmark the robustness of your neural networks , you can use the advbox to generate some adversarial examples and benchmark the networks. Some tips of using Advbox:
1. Train a model and save the parameters.
2. Load the parameters which has been trained,then reconstruct the model.
3. Use advbox to generate the adversarial samples.
#### Dependencies
* PaddlePaddle: [the lastest develop branch](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html)
* Python 2.x
#### Structure
Network models, attack method's implements and the criterion that defines adversarial examples are three essential elements to generate adversarial examples. Misclassification is adopted as the adversarial criterion for briefness in Advbox.
The structure of Advbox module are as follows:
.
├── advbox
| ├── __init__.py
| ├── attack
| ├── __init__.py
| ├── base.py
| ├── deepfool.py
| ├── gradient_method.py
| ├── lbfgs.py
| └── saliency.py
| ├── models
| ├── __init__.py
| ├── base.py
| └── paddle.py
| └── adversary.py
├── tutorials
| ├── __init__.py
| ├── mnist_model.py
| ├── mnist_tutorial_lbfgs.py
| ├── mnist_tutorial_fgsm.py
| ├── mnist_tutorial_bim.py
| ├── mnist_tutorial_ilcm.py
| ├── mnist_tutorial_mifgsm.py
| ├── mnist_tutorial_jsma.py
| └── mnist_tutorial_deepfool.py
└── README.md
**advbox.attack**
Advbox implements several popular adversarial attacks which search adversarial examples. Each attack method uses a distance measure(L1, L2, etc.) to quantify the size of adversarial perturbations. Advbox is easy to craft adversarial example as some attack methods could perform internal hyperparameter tuning to find the minimum perturbation.
**advbox.model**
Advbox implements interfaces to PaddlePaddle. Additionally, other deep learning framworks such as TensorFlow can also be defined and employed. The module is use to compute predictions and gradients for given inputs in a specific framework.
**advbox.adversary**
Adversary contains the original object, the target and the adversarial examples. It provides the misclassification as the criterion to accept a adversarial example.
## Tutorials
The `./tutorials/` folder provides some tutorials to generate adversarial examples on the MNIST dataset. You can slightly modify the code to apply to other dataset. These attack methods are supported in Advbox:
* [L-BFGS](https://arxiv.org/abs/1312.6199)
* [FGSM](https://arxiv.org/abs/1412.6572)
* [BIM](https://arxiv.org/abs/1607.02533)
* [ILCM](https://arxiv.org/abs/1607.02533)
* [MI-FGSM](https://arxiv.org/pdf/1710.06081.pdf)
* [JSMA](https://arxiv.org/pdf/1511.07528)
* [DeepFool](https://arxiv.org/abs/1511.04599)
## Testing
Benchmarks on a vanilla CNN model.
> MNIST
| adversarial attacks | fooling rate (non-targeted) | fooling rate (targeted) | max_epsilon | iterations | Strength |
|:-----:| :----: | :---: | :----: | :----: | :----: |
|L-BFGS| --- | 89.2% | --- | One shot | *** |
|FGSM| 57.8% | 26.55% | 0.3 | One shot| *** |
|BIM| 97.4% | --- | 0.1 | 100 | **** |
|ILCM| --- | 100.0% | 0.1 | 100 | **** |
|MI-FGSM| 94.4% | 100.0% | 0.1 | 100 | **** |
|JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** |
|DeepFool| 97.7% | 51.3% | --- | 100 | **** |
* The strength (higher for more asterisks) is based on the impression from the reviewed literature.
---
## References
* [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014
* [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015
* [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017
* [Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081), Yinpeng Dong et al., arxiv 2018
* [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016
* [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016
* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models](https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018
* [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans)
* [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018
......@@ -14,7 +14,8 @@ __all__ = [
'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM',
'FastGradientSignMethodTargetedAttack', 'FGSMT',
'BasicIterativeMethodAttack', 'BIM',
'IterativeLeastLikelyClassMethodAttack', 'ILCM'
'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack',
'MIFGSM'
]
......@@ -32,7 +33,12 @@ class GradientMethodAttack(Attack):
super(GradientMethodAttack, self).__init__(model)
self.support_targeted = support_targeted
def _apply(self, adversary, norm_ord=np.inf, epsilons=0.01, steps=100):
def _apply(self,
adversary,
norm_ord=np.inf,
epsilons=0.01,
steps=1,
epsilon_steps=100):
"""
Apply the gradient attack method.
:param adversary(Adversary):
......@@ -41,8 +47,11 @@ class GradientMethodAttack(Attack):
Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
:param epsilons(list|tuple|int):
Attack step size (input variation).
Largest step size if epsilons is not iterable.
:param steps:
The number of iterator steps.
The number of attack iteration.
:param epsilon_steps:
The number of Epsilons' iteration for each attack iteration.
:return:
adversary(Adversary): The Adversary object.
"""
......@@ -55,7 +64,7 @@ class GradientMethodAttack(Attack):
"This attack method doesn't support targeted attack!")
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(epsilons, epsilons + 1e-10, num=steps)
epsilons = np.linspace(0, epsilons, num=epsilon_steps)
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
......@@ -65,30 +74,33 @@ class GradientMethodAttack(Attack):
self.model.channel_axis() == adversary.original.shape[0] or
self.model.channel_axis() == adversary.original.shape[-1])
step = 1
adv_img = adversary.original
for epsilon in epsilons[:steps]:
for epsilon in epsilons[:]:
step = 1
adv_img = adversary.original
if epsilon == 0.0:
continue
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img, adversary.target_label)
else:
gradient = self.model.gradient(adv_img,
adversary.original_label)
if norm_ord == np.inf:
gradient_norm = np.sign(gradient)
else:
gradient_norm = gradient / self._norm(gradient, ord=norm_ord)
adv_img = adv_img + epsilon * gradient_norm * (max_ - min_)
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('step={}, epsilon = {:.5f}, pre_label = {}, '
'adv_label={}'.format(step, epsilon, pre_label,
adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
for i in range(steps):
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img,
adversary.target_label)
else:
gradient = self.model.gradient(adv_img,
adversary.original_label)
if norm_ord == np.inf:
gradient_norm = np.sign(gradient)
else:
gradient_norm = gradient / self._norm(
gradient, ord=norm_ord)
adv_img = adv_img + epsilon * gradient_norm * (max_ - min_)
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('step={}, epsilon = {:.5f}, pre_label = {}, '
'adv_label={}'.format(step, epsilon, pre_label,
adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
return adversary
@staticmethod
......@@ -113,7 +125,7 @@ class FastGradientSignMethodTargetedAttack(GradientMethodAttack):
Paper link: https://arxiv.org/abs/1412.6572
"""
def _apply(self, adversary, epsilons=0.03):
def _apply(self, adversary, epsilons=0.01):
return GradientMethodAttack._apply(
self,
adversary=adversary,
......@@ -144,7 +156,7 @@ class IterativeLeastLikelyClassMethodAttack(GradientMethodAttack):
Paper link: https://arxiv.org/abs/1607.02533
"""
def _apply(self, adversary, epsilons=0.001, steps=1000):
def _apply(self, adversary, epsilons=0.01, steps=1000):
return GradientMethodAttack._apply(
self,
adversary=adversary,
......@@ -164,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack):
super(BasicIterativeMethodAttack, self).__init__(model, False)
class MomentumIteratorAttack(GradientMethodAttack):
"""
The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017).
This method won the first places in NIPS 2017 Non-targeted Adversarial
Attacks and Targeted Adversarial Attacks. The original paper used
hard labels for this attack; no label smoothing. inf norm.
Paper link: https://arxiv.org/pdf/1710.06081.pdf
"""
def __init__(self, model, support_targeted=True):
"""
:param model(model): The model to be attacked.
:param support_targeted(bool): Does this attack method support targeted.
"""
super(MomentumIteratorAttack, self).__init__(model)
self.support_targeted = support_targeted
def _apply(self,
adversary,
norm_ord=np.inf,
epsilons=0.1,
steps=100,
epsilon_steps=100,
decay_factor=1):
"""
Apply the momentum iterative gradient attack method.
:param adversary(Adversary):
The Adversary object.
:param norm_ord(int):
Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
:param epsilons(list|tuple|float):
Attack step size (input variation).
Largest step size if epsilons is not iterable.
:param epsilon_steps:
The number of Epsilons' iteration for each attack iteration.
:param steps:
The number of attack iteration.
:param decay_factor:
The decay factor for the momentum term.
:return:
adversary(Adversary): The Adversary object.
"""
if norm_ord == 0:
raise ValueError("L0 norm is not supported!")
if not self.support_targeted:
if adversary.is_targeted_attack:
raise ValueError(
"This attack method doesn't support targeted attack!")
assert self.model.channel_axis() == adversary.original.ndim
assert (self.model.channel_axis() == 1 or
self.model.channel_axis() == adversary.original.shape[0] or
self.model.channel_axis() == adversary.original.shape[-1])
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, epsilons, num=epsilon_steps)
min_, max_ = self.model.bounds()
pre_label = adversary.original_label
for epsilon in epsilons[:]:
if epsilon == 0.0:
continue
step = 1
adv_img = adversary.original
momentum = 0
for i in range(steps):
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img,
adversary.target_label)
else:
gradient = self.model.gradient(adv_img, pre_label)
# normalize gradient
velocity = gradient / self._norm(gradient, ord=1)
momentum = decay_factor * momentum + velocity
if norm_ord == np.inf:
normalized_grad = np.sign(momentum)
else:
normalized_grad = self._norm(momentum, ord=norm_ord)
perturbation = epsilon * normalized_grad
adv_img = adv_img + perturbation
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info(
'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}'
.format(step, epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
return adversary
FGSM = FastGradientSignMethodAttack
FGSMT = FastGradientSignMethodTargetedAttack
BIM = BasicIterativeMethodAttack
ILCM = IterativeLeastLikelyClassMethodAttack
MIFGSM = MomentumIteratorAttack
"""
FGSM demos on mnist using advbox tool.
"""
import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.fluid as fluid
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import FGSM
from advbox.models.paddle import PaddleModel
def cnn_model(img):
"""
Mnist cnn model
Args:
img(Varaible): the input image to be recognized
Returns:
Variable: the label prediction
"""
# conv1 = fluid.nets.conv2d()
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
num_filters=20,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
num_filters=50,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return logits
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder(
feed_list=[IMG_NAME, LABEL_NAME],
place=place,
program=fluid.default_main_program())
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
logits.name, avg_cost.name, (-1, 1))
att = FGSM(m)
for data in train_reader():
# fgsm attack
adversary = att(Adversary(data[0][0], data[0][1]))
if adversary.is_successful():
plt.imshow(adversary.target, cmap='Greys_r')
plt.show()
# np.save('adv_img', adversary.target)
break
if __name__ == '__main__':
main()
"""
FGSM demos on mnist using advbox tool.
"""
import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.fluid as fluid
import numpy as np
from advbox import Adversary
from advbox.attacks.saliency import SaliencyMapAttack
from advbox.models.paddle import PaddleModel
def cnn_model(img):
"""
Mnist cnn model
Args:
img(Varaible): the input image to be recognized
Returns:
Variable: the label prediction
"""
# conv1 = fluid.nets.conv2d()
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
num_filters=20,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
num_filters=50,
filter_size=5,
pool_size=2,
pool_stride=2,
act='relu')
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
return logits
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
feeder = fluid.DataFeeder(
feed_list=[IMG_NAME, LABEL_NAME],
place=place,
program=fluid.default_main_program())
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
logits.name, avg_cost.name, (-1, 1))
attack = SaliencyMapAttack(m)
total_num = 0
success_num = 0
for data in train_reader():
total_num += 1
# adversary.set_target(True, target_label=target_label)
jsma_attack = attack(Adversary(data[0][0], data[0][1]))
if jsma_attack is not None and jsma_attack.is_successful():
# plt.imshow(jsma_attack.target, cmap='Greys_r')
# plt.show()
success_num += 1
print('original_label=%d, adversary examples label =%d' %
(data[0][1], jsma_attack.adversarial_label))
# np.save('adv_img', jsma_attack.adversarial_example)
print('total num = %d, success num = %d ' % (total_num, success_num))
if total_num == 100:
break
if __name__ == '__main__':
main()
"""
A set of tutorials for generating adversarial examples with advbox.
"""
\ No newline at end of file
......@@ -30,8 +30,9 @@ def mnist_cnn_model(img):
pool_size=2,
pool_stride=2,
act='relu')
fc = fluid.layers.fc(input=conv_pool_2, size=50, act='relu')
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
logits = fluid.layers.fc(input=fc, size=10, act='softmax')
return logits
......@@ -60,7 +61,10 @@ def main():
paddle.dataset.mnist.train(), buf_size=500),
batch_size=BATCH_SIZE)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
exe.run(fluid.default_startup_program())
......@@ -74,9 +78,11 @@ def main():
feed=feeder.feed(data),
fetch_list=[avg_cost, batch_acc, batch_size])
pass_acc.add(value=acc, weight=b_size)
pass_acc_val = pass_acc.eval()[0]
print("pass_id=" + str(pass_id) + " acc=" + str(acc[0]) +
" pass_acc=" + str(pass_acc.eval()[0]))
if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
" pass_acc=" + str(pass_acc_val))
if loss < LOSS_THRESHOLD and pass_acc_val > ACC_THRESHOLD:
# early stop
break
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc.eval()[
......
"""
BIM tutorial on mnist using advbox tool.
BIM method iteratively take multiple small steps while adjusting the direction after each step.
It only supports non-targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import BIM
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = BIM(m)
attack_config = {"epsilons": 0.1, "steps": 100}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# BIM non-targeted attack
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# BIM non-targeted attack
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("bim attack done")
if __name__ == '__main__':
main()
"""
DeepFool tutorial on mnist using advbox tool.
Deepfool is a simple and accurate adversarial attack method.
It supports both targeted attack and non-targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.deepfool import DeepFoolAttack
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = DeepFoolAttack(m)
attack_config = {"iterations": 100, "overshoot": 9}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# DeepFool non-targeted attack
adversary = attack(adversary, **attack_config)
# DeepFool targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# DeepFool non-targeted attack
adversary = attack(adversary, **attack_config)
# DeepFool targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("deelfool attack done")
if __name__ == '__main__':
main()
"""
FGSM tutorial on mnist using advbox tool.
FGSM method is non-targeted attack while FGSMT is targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import FGSM
from advbox.attacks.gradient_method import FGSMT
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = FGSM(m)
# attack = FGSMT(m)
attack_config = {"epsilons": 0.3}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# FGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# FGSMT targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# FGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# FGSMT targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("fgsm attack done")
if __name__ == '__main__':
main()
"""
ILCM tutorial on mnist using advbox tool.
ILCM method extends "BIM" to support targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import ILCM
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = ILCM(m)
attack_config = {"epsilons": 0.1, "steps": 100}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
tlabel = 0
adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# ILCM targeted attack
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
tlabel = 0
adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# ILCM targeted attack
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("ilcm attack done")
if __name__ == '__main__':
main()
"""
JSMA tutorial on mnist using advbox tool.
JSMA method supports both targeted attack and non-targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.saliency import JSMA
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = JSMA(m)
attack_config = {
"max_iter": 2000,
"theta": 0.1,
"max_perturbations_per_pixel": 7
}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# JSMA non-targeted attack
adversary = attack(adversary, **attack_config)
# JSMA targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
# JSMA may return None
if adversary is not None and adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# JSMA non-targeted attack
adversary = attack(adversary, **attack_config)
# JSMA targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
# JSMA may return None
if adversary is not None and adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("jsma attack done")
if __name__ == '__main__':
main()
"""
LBFGS tutorial on mnist using advbox tool.
LBFGS method only supports targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.lbfgs import LBFGS
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = LBFGS(m)
attack_config = {"epsilon": 0.001, }
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# LBFGS targeted attack
tlabel = 0
adversary.set_target(is_targeted_attack=True, target_label=tlabel)
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# LBFGS targeted attack
tlabel = 0
adversary.set_target(is_targeted_attack=True, target_label=tlabel)
adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("lbfgs attack done")
if __name__ == '__main__':
main()
"""
MIFGSM tutorial on mnist using advbox tool.
MIFGSM is a broad class of momentum iterative gradient-based methods based on FSGM.
It supports non-targeted attack and targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import MIFGSM
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = MIFGSM(m)
attack_config = {
"norm_ord": np.inf,
"epsilons": 0.1,
"steps": 100,
"decay_factor": 1
}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("mifgsm attack done")
if __name__ == '__main__':
main()
### Caffe2Fluid
This tool is used to convert a Caffe model to Fluid model
This tool is used to convert a Caffe model to a Fluid model
### Howto
1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
1) generate it from caffe.proto using protoc
### HowTo
1. Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
- Generate pycaffe from caffe.proto
```
bash ./proto/compile.sh
```
2) download one from github directly
- Download one from github directly
```
cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py
```
2. Convert the Caffe model to Fluid model
- Generate fluid code and weight file
```
python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
```
- Save weights as fluid model file
```
python alexnet.py alexnet.npy ./fluid #only infer the last layer's result
python alexnet.py alexnet.npy ./fluid fc8,prob #infer these 2 layer's result
```
3. Use the converted model to infer
- See more details in '*examples/imagenet/run.sh*'
4. Compare the inference results with caffe
- See more details in '*examples/imagenet/diff.sh*'
### How to convert custom layer
1. Implement your custom layer in a file under '*kaffe/custom_layers*', eg: mylayer.py
- Implement ```shape_func(input_shape, [other_caffe_params])``` to calculate the output shape
- Implement ```layer_func(inputs, name, [other_caffe_params])``` to construct a fluid layer
- Register these two functions ```register(kind='MyType', shape=shape_func, layer=layer_func)```
- Notes: more examples can be found in '*kaffe/custom_layers*'
2. Add ```import mylayer``` to '*kaffe/custom_layers/\_\_init__.py*'
2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file
3. Prepare your pycaffe as your customized version(same as previous env prepare)
- (option1) replace 'proto/caffe.proto' with your own caffe.proto and compile it
- (option2) change your pycaffe to the customized version
3, Use the converted model to predict
4. Convert the Caffe model to Fluid model
see more detail info in 'examples/xxx'
5. Set env $CAFFE2FLUID_CUSTOM_LAYERS to the parent directory of 'custom_layers'
```
export CAFFE2FLUID_CUSTOM_LAYERS=/path/to/caffe2fluid/kaffe
```
6. Use the converted model when loading model in 'xxxnet.py' and 'xxxnet.npy'(no need if model is already in 'fluid/model' and 'fluid/params')
### Tested models
- Lenet on mnist dataset
- Lenet:
[model addr](https://github.com/ethereon/caffe-tensorflow/blob/master/examples/mnist)
- ResNets:(ResNet-50, ResNet-101, ResNet-152)
model addr: `https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`_
[model addr](https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777)
- GoogleNet:
model addr: `https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`_
[model addr](https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034)
- VGG:
model addr: `https://gist.github.com/ksimonyan/211839e770f7b538e2d8`_
[model addr](https://gist.github.com/ksimonyan/211839e770f7b538e2d8)
- AlexNet:
model addr: `https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`_
[model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet)
### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
Some of this code come from here: [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow)
......@@ -43,11 +43,17 @@ def convert(def_path, caffemodel_path, data_output_path, code_output_path,
print_stderr('Saving source...')
with open(code_output_path, 'wb') as src_out:
src_out.write(transformer.transform_source())
print_stderr('set env variable before using converted model '\
'if used custom_layers:')
custom_pk_path = os.path.dirname(os.path.abspath(__file__))
custom_pk_path = os.path.join(custom_pk_path, 'kaffe')
print_stderr('export CAFFE2FLUID_CUSTOM_LAYERS=%s' % (custom_pk_path))
print_stderr('Done.')
return 0
except KaffeError as err:
fatal_error('Error encountered: {}'.format(err))
return 0
return 1
def main():
......
a demo to show converting caffe models on 'imagenet' using caffe2fluid
A demo to show converting caffe models on 'imagenet' using caffe2fluid
---
# How to use
1. prepare python environment
2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. run the tool
eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
1. Prepare python environment
2. Download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. Convert the Caffe model to Fluid model
- generate fluid code and weight file
<pre><code>python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
</code></pre>
- save weights as fluid model file
<pre><code>python alexnet.py alexnet.npy ./fluid_model
</code></pre>
4. Do inference
<pre><code>python infer.py infer ./fluid_mode data/65.jpeg
</code></pre>
5. convert model and do inference together
<pre><code>bash ./run.sh alexnet ./models.caffe/alexnet ./models/alexnet
</code></pre>
The Caffe model is stored in './models.caffe/alexnet/alexnet.prototxt|caffemodel'
and the Fluid model will be save in './models/alexnet/alexnet.py|npy'
6. test the difference with caffe's results(need pycaffe installed)
<pre><code>bash ./diff.sh resnet
</code></pre>
Make sure your caffemodel stored in './models.caffe/resnet'.
The results will be stored in './results/resnet.paddle|caffe'
#!/usr/bin/python
#
#a tool to compare tensors in two files or two directories
#
import sys
import os
def walk_dir(rootdir):
for subdir, dirs, files in os.walk(rootdir):
for file in files:
yield file
def calc_diff(f1, f2):
import numpy as np
d1 = np.load(f1).flatten()
d2 = np.load(f2).flatten()
d1_num = reduce(lambda x, y: x * y, d1.shape)
d2_num = reduce(lambda x, y: x * y, d2.shape)
if d1_num != d2_num:
print d1.shape
print d2.shape
assert (d1_num == d2_num), "their shape is not consistent"
try:
df = np.abs(d1 - d2)
max_df = np.max(df)
sq_df = np.mean(df * df)
return max_df, sq_df
except Exception as e:
return -1.0, -1.0
def compare(path1, path2):
def diff(f1, f2):
max_df, sq_df = calc_diff(f1, f2)
print('compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]' %
(f1, f2, max_df, sq_df))
assert (max_df < 1e-5), \
'max_df is too large with value[%.6e]' % (max_df)
assert (sq_df < 1e-10), \
'sq_df is too large with value[%.6e]' % (sq_df)
if os.path.exists(path1) is False:
print('not found %s' % (path1))
return 1
elif os.path.exists(path2) is False:
print('not found %s' % (path2))
return 1
if path1.find('.npy') > 0 and path2.find('.npy') > 0:
diff(path1, path2)
return
for f in walk_dir(path2):
if f.find('.npy') < 0:
continue
f1 = os.path.join(path1, f)
f2 = os.path.join(path2, f)
diff(f1, f2)
print('all checking succeed to pass')
return 0
if __name__ == "__main__":
if len(sys.argv) == 1:
path1 = 'lenet.tf/results'
path2 = 'lenet.paddle/results'
elif len(sys.argv) == 3:
path1 = sys.argv[1]
path2 = sys.argv[2]
else:
print('usage:')
print(' %s [path1] [path2]' % (sys.argv[0]))
exit(1)
print('compare inner result in %s %s' % (path1, path2))
exit(compare(path1, path2))
#!/bin/bash
#
#function:
# a tool used to check the difference of models' results generated by caffe model and paddle model
#
#howto:
# bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
#
#notes:
# 0, in order to infer using caffe, we need pycaffe installed
# 1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
# 2, converted paddle model will be in 'models'
# 3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
# 4, only the last layer will be checked by default
model_name="resnet50"
results_root="results/"
if [[ -n $1 ]];then
if [ $1 = "-h" ];then
echo "usage:"
echo " bash $0 [model_name]"
echo " eg:bash $0 resnet50"
exit 0
fi
model_name=$1
fi
mkdir -p $results_root
model_prototxt="models.caffe/$model_name/${model_name}.prototxt"
model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
#1, dump layers' results from paddle
paddle_results="$results_root/${model_name}.paddle"
rm -rf $paddle_results
rm -rf "results.paddle"
bash run.sh $model_name ./models.caffe/$model_name ./models/$model_name
if [[ $? -ne 0 ]] || [[ ! -e "results.paddle" ]];then
echo "not found paddle's results, maybe failed to convert"
exit 1
fi
mv results.paddle $paddle_results
#2, dump layers' results from caffe
caffe_results="$results_root/${model_name}.caffe"
rm -rf $caffe_results
rm -rf "results.caffe"
cfpython ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy
if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
echo "not found caffe's results, maybe failed to do inference with caffe"
exit 1
fi
mv results.caffe $caffe_results
#3, extract layer names
cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
#4, compare one by one
for i in $(cat ".layer_names" | tail -n1);do
echo "process $i"
python compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy
done
......@@ -10,8 +10,11 @@ import os
import sys
import inspect
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def import_fluid():
import paddle.fluid as fluid
return fluid
def load_data(imgfile, shape):
......@@ -40,7 +43,7 @@ def build_model(net_file, net_name):
(net_file, net_name))
net_path = os.path.dirname(net_file)
module_name = os.path.basename(net_file).rstrip('.py')
module_name = os.path.splitext(os.path.basename(net_file))[0]
if net_path not in sys.path:
sys.path.insert(0, net_path)
......@@ -48,23 +51,25 @@ def build_model(net_file, net_name):
m = __import__(module_name, fromlist=[net_name])
MyNet = getattr(m, net_name)
except Exception as e:
print('failed to load module[%s]' % (module_name))
print('failed to load module[%s.%s]' % (module_name, net_name))
print(e)
return None
input_name = 'data'
input_shape = MyNet.input_shapes()[input_name]
images = fluid.layers.data(name='image', shape=input_shape, dtype='float32')
fluid = import_fluid()
inputs_dict = MyNet.input_shapes()
input_name = inputs_dict.keys()[0]
input_shape = inputs_dict[input_name]
images = fluid.layers.data(
name=input_name, shape=input_shape, dtype='float32')
#label = fluid.layers.data(name='label', shape=[1], dtype='int64')
net = MyNet({input_name: images})
input_shape = MyNet.input_shapes()[input_name]
return net, input_shape
return net, inputs_dict
def dump_results(results, names, root):
if os.path.exists(root) is False:
os.path.mkdir(root)
os.mkdir(root)
for i in range(len(names)):
n = names[i]
......@@ -73,23 +78,27 @@ def dump_results(results, names, root):
np.save(filename + '.npy', res)
def infer(net_file, net_name, model_file, imgfile, debug=False):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
def load_model(exe, place, net_file, net_name, net_weight, debug):
""" load model using xxxnet.py and xxxnet.npy
"""
fluid = import_fluid()
#1, build model
net, input_shape = build_model(net_file, net_name)
net, input_map = build_model(net_file, net_name)
feed_names = input_map.keys()
feed_shapes = [v for k, v in input_map.items()]
prediction = net.get_output()
#2, load weights for this model
place = fluid.CPUPlace()
exe = fluid.Executor(place)
startup_program = fluid.default_startup_program()
exe.run(startup_program)
if model_file.find('.npy') > 0:
net.load(data_path=model_file, exe=exe, place=place)
#place = fluid.CPUPlace()
if net_weight.find('.npy') > 0:
net.load(data_path=net_weight, exe=exe, place=place)
else:
net.load(data_path=model_file, exe=exe)
raise ValueError('not found weight file')
#3, test this model
test_program = fluid.default_main_program().clone()
......@@ -103,18 +112,116 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
fetch_list_var.append(v)
fetch_list_name.append(k)
return {
'program': test_program,
'feed_names': feed_names,
'fetch_vars': fetch_list_var,
'fetch_names': fetch_list_name,
'feed_shapes': feed_shapes
}
def get_shape(fluid, program, name):
for var in program.list_vars():
if var.name == 'data':
return list(var.shape[1:])
raise ValueError('not found shape for input layer[%s], '
'you can specify by yourself' % (name))
def load_inference_model(dirname, exe):
""" load fluid's inference model
"""
fluid = import_fluid()
model_fn = 'model'
params_fn = 'params'
if os.path.exists(os.path.join(dirname, model_fn)) \
and os.path.exists(os.path.join(dirname, params_fn)):
program, feed_names, fetch_targets = fluid.io.load_inference_model(\
dirname, exe, model_fn, params_fn)
else:
raise ValueError('not found model files in direcotry[%s]' % (dirname))
#print fluid.global_scope().find_var(feed_names[0])
input_shape = get_shape(fluid, program, feed_names[0])
feed_shapes = [input_shape]
return program, feed_names, fetch_targets, feed_shapes
def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
fluid = import_fluid()
place = fluid.CPUPlace()
exe = fluid.Executor(place)
try:
ret = load_inference_model(model_path, exe)
program, feed_names, fetch_targets, feed_shapes = ret
debug = False
print('found a inference model for fluid')
except ValueError as e:
print('try to load model using net file and weight file')
net_weight = model_path
ret = load_model(exe, place, net_file, net_name, net_weight, debug)
program = ret['program']
feed_names = ret['feed_names']
fetch_targets = ret['fetch_vars']
fetch_list_name = ret['fetch_names']
feed_shapes = ret['feed_shapes']
input_name = feed_names[0]
input_shape = feed_shapes[0]
np_images = load_data(imgfile, input_shape)
results = exe.run(program=test_program,
feed={'image': np_images},
fetch_list=fetch_list_var)
results = exe.run(program=program,
feed={input_name: np_images},
fetch_list=fetch_targets)
if debug is True:
dump_path = 'results.layers'
dump_path = 'results.paddle'
dump_results(results, fetch_list_name, dump_path)
print('all results dumped to [%s]' % (dump_path))
print('all result of layers dumped to [%s]' % (dump_path))
else:
result = results[0]
print('predicted class:', np.argmax(result))
print('succeed infer with results[class:%d]' % (np.argmax(result)))
return 0
def caffe_infer(prototxt, caffemodel, datafile):
""" do inference using pycaffe for debug,
all intermediate results will be dumpped to 'results.caffe'
"""
import caffe
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
input_layer = net.blobs.keys()[0]
print('got name of input layer is:%s' % (input_layer))
input_shape = list(net.blobs[input_layer].data.shape[1:])
if '.npy' in datafile:
np_images = np.load(datafile)
else:
np_images = load_data(datafile, input_shape)
inputs = {input_layer: np_images}
net.forward_all(**inputs)
results = []
names = []
for k, v in net.blobs.items():
k = k.rstrip('_output')
k = k.replace('/', '_')
names.append(k)
results.append(v.data.copy())
dump_path = 'results.caffe'
dump_results(results, names, dump_path)
print('all result of layers dumped to [%s]' % (dump_path))
return 0
if __name__ == "__main__":
......@@ -122,21 +229,50 @@ if __name__ == "__main__":
"""
net_file = 'models/resnet50/resnet50.py'
weight_file = 'models/resnet50/resnet50.npy'
imgfile = 'data/65.jpeg'
datafile = 'data/65.jpeg'
net_name = 'ResNet50'
model_file = 'models/resnet50/fluid'
ret = None
if len(sys.argv) <= 2:
pass
elif sys.argv[1] == 'caffe':
if len(sys.argv) != 5:
print('usage:')
print('\tpython %s caffe [prototxt] [caffemodel] [datafile]' %
(sys.argv[0]))
sys.exit(1)
prototxt = sys.argv[2]
caffemodel = sys.argv[3]
datafile = sys.argv[4]
ret = caffe_infer(prototxt, caffemodel, datafile)
elif sys.argv[1] == 'infer':
if len(sys.argv) != 4:
print('usage:')
print('\tpython %s infer [fluid_model] [datafile]' % (sys.argv[0]))
sys.exit(1)
model_path = sys.argv[2]
datafile = sys.argv[3]
ret = infer(model_path, datafile)
elif sys.argv[1] == 'dump':
if len(sys.argv) != 6:
print('usage:')
print('\tpython %s dump [net_file] [weight_file] [datafile] [net_name]' \
% (sys.argv[0]))
print('\teg:python dump %s %s %s %s %s' % (sys.argv[0],\
net_file, weight_file, datafile, net_name))
sys.exit(1)
net_file = sys.argv[2]
weight_file = sys.argv[3]
datafile = sys.argv[4]
net_name = sys.argv[5]
ret = infer(weight_file, datafile, net_file, net_name)
argc = len(sys.argv)
if argc == 5:
net_file = sys.argv[1]
weight_file = sys.argv[2]
imgfile = sys.argv[3]
net_name = sys.argv[4]
elif argc > 1:
if ret is None:
print('usage:')
print('\tpython %s [net_file] [weight_file] [imgfile] [net_name]' %
(sys.argv[0]))
print('\teg:python %s %s %s %s %s' % (sys.argv[0], net_file,
weight_file, imgfile, net_name))
print(' python %s [infer] [fluid_model] [imgfile]' % (sys.argv[0]))
print(' eg:python %s infer %s %s' % (sys.argv[0], model_file, datafile))
sys.exit(1)
infer(net_file, net_name, weight_file, imgfile)
sys.exit(ret)
......@@ -3,7 +3,7 @@
#function:
# a tool used to:
# 1, convert a caffe model
# 2, do inference using this model
# 2, do inference(only in fluid) using this model
#
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
......@@ -65,8 +65,13 @@ if [[ -z $only_convert ]];then
PYTHON=`which python`
fi
imgfile="data/65.jpeg"
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
$PYTHON ./infer.py $net_file $weight_file $imgfile $net_name
#FIX ME:
# only look the first line in prototxt file for the name of this network, maybe not correct
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
if [[ -z $net_name ]];then
net_name="MyNet"
fi
$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name
ret=$?
fi
exit $ret
......@@ -7,8 +7,8 @@
import sys
import os
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def test_model(exe, test_program, fetch_list, test_reader, feeder):
......@@ -34,9 +34,6 @@ def evaluate(net_file, model_file):
from lenet import LeNet as MyNet
with_gpu = False
paddle.init(use_gpu=with_gpu)
#1, define network topology
images = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
......@@ -45,7 +42,7 @@ def evaluate(net_file, model_file):
prediction = net.layers['prob']
acc = fluid.layers.accuracy(input=prediction, label=label)
place = fluid.CUDAPlace(0) if with_gpu is True else fluid.CPUPlace()
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
......
文件模式从 100644 更改为 100755
"""
"""
from .register import get_registered_layers
#custom layer import begins
import axpy
import flatten
import argmax
#custom layer import ends
custom_layers = get_registered_layers()
def set_args(f, params):
""" set args for function 'f' using the parameters in node.layer.parameters
Args:
f (function): a python function object
params (object): a object contains attributes needed by f's arguments
Returns:
arg_names (list): a list of argument names
kwargs (dict): a dict contains needed arguments
"""
argc = f.__code__.co_argcount
arg_list = f.__code__.co_varnames[0:argc]
kwargs = {}
for arg_name in arg_list:
try:
v = getattr(params, arg_name, None)
except Exception as e:
#maybe failed to extract caffe's parameters
v = None
if v is not None:
kwargs[arg_name] = v
return arg_list, kwargs
def has_layer(kind):
""" test whether this layer exists in custom layer
"""
return kind in custom_layers
def compute_output_shape(kind, node):
assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind)
shape_func = custom_layers[kind]['shape']
parents = node.parents
inputs = [list(p.output_shape) for p in parents]
arg_names, kwargs = set_args(shape_func, node.layer.parameters)
if len(inputs) == 1:
inputs = inputs[0]
return shape_func(inputs, **kwargs)
def make_node(template, kind, node):
""" make a TensorFlowNode for custom layer which means construct
a piece of code to define a layer implemented in 'custom_layers'
Args:
@template (TensorFlowNode): a factory to new a instance of TensorFLowNode
@kind (str): type of custom layer
@node (graph.Node): a layer in the net
Returns:
instance of TensorFlowNode
"""
assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind)
layer_func = custom_layers[kind]['layer']
#construct arguments needed by custom layer function from node's parameters
arg_names, kwargs = set_args(layer_func, node.layer.parameters)
return template('custom_layer', kind, **kwargs)
def make_custom_layer(kind, inputs, name, *args, **kwargs):
""" execute a custom layer which is implemented by users
Args:
@kind (str): type name of this layer
@inputs (vars): variable list created by fluid
@namme (str): name for this layer
@args (tuple): other positional arguments
@kwargs (dict): other kv arguments
Returns:
output (var): output variable for this layer
"""
assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind)
layer_func = custom_layers[kind]['layer']
return layer_func(inputs, name, *args, **kwargs)
""" a custom layer for 'argmax', maybe we should implement this in standard way.
more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/argmax.html
"""
from .register import register
def import_fluid():
import paddle.fluid as fluid
return fluid
def argmax_shape(input_shape, out_max_val=False, top_k=1, axis=-1):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
@out_max_val (bool): parameter from caffe's ArgMax layer
@top_k (int): parameter from caffe's ArgMax layer
@axis (int): parameter from caffe's ArgMax layer
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
input_shape = list(input_shape)
if axis < 0:
axis += len(input_shape)
assert (axis + 1 == len(input_shape)
), 'only can be applied on the last dimension[axis:%d, %s] now,'\
'make sure you have set axis param in xxx.prototxt file' \
% (axis, str(input_shape))
output_shape = input_shape
output_shape[-1] = top_k
if out_max_val is True:
output_shape[-1] *= 2
return output_shape
def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
""" build a layer of type 'ArgMax' using fluid
Args:
@input (variable): input fluid variable for this layer
@name (str): name for this layer
@out_max_val (bool): parameter from caffe's ArgMax layer
@top_k (int): parameter from caffe's ArgMax layer
@axis (int): parameter from caffe's ArgMax layer
Returns:
output (variable): output variable for this layer
"""
fluid = import_fluid()
if axis < 0:
axis += len(input.shape)
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
if out_max_val is True:
index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
output = fluid.layers.concat([index_var, topk_var], axis=axis)
else:
output = index_var
return output
register(kind='ArgMax', shape=argmax_shape, layer=argmax_layer)
""" A custom layer for 'axpy' which receives 3 tensors and output 1 tensor.
the function performed is:(the mupltiplication and add are elementewise)
output = inputs[0] * inputs[1] + inputs[2]
"""
from .register import register
def axpy_shape(input_shapes):
""" calculate the output shape of this layer using input shapes
Args:
@input_shapes (list of tuples): a list of input shapes
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
assert len(input_shapes) == 3, "not valid input shape for axpy layer"
assert len(input_shapes[0]) == len(input_shapes[1]), 'should have same dims'
output_shape = input_shapes[1]
assert (input_shapes[2] == output_shape),\
"shape not consistent for axpy[%s <--> %s]" \
% (str(output_shape), str(input_shapes[2]))
return output_shape
def axpy_layer(inputs, name):
""" build a layer of type 'Axpy' using fluid
Args:
@inputs (list of variables): input fluid variables for this layer
@name (str): name for this layer
Returns:
output (variable): output variable for this layer
"""
import paddle.fluid as fluid
assert len(inputs) == 3, "invalid inputs for axpy[%s]" % (name)
alpha = inputs[0]
x = inputs[1]
y = inputs[2]
output = fluid.layers.elementwise_mul(x, alpha, axis=0)
output = fluid.layers.elementwise_add(output, y)
return output
register(kind='Axpy', shape=axpy_shape, layer=axpy_layer)
""" a custom layer for 'flatten', maybe we should implement this in standard way.
more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/flatten.html
"""
from .register import register
def import_fluid():
import paddle.fluid as fluid
return fluid
def flatten_shape(input_shape, axis=1, end_axis=-1):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
@axis (int): parameter from caffe's Flatten layer
@end_axis (int): parameter from caffe's Flatten layer
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
start_axis = axis
end_axis = end_axis
input_shape = list(input_shape)
if start_axis < 0:
start_axis += len(input_shape)
if end_axis < 0:
end_axis += len(input_shape)
assert start_axis <= end_axis, 'invalid axis[%d] or end_axis[%d] params'\
% (start_axis, end_axis)
output_shape = input_shape[0:start_axis]
flat_sz = reduce(lambda a, b: a * b, input_shape[start_axis:end_axis])
output_shape += [flat_sz]
output_shape += input_shape[end_axis:-1]
return output_shape
def flatten_layer(input, name, axis=1, end_axis=-1):
""" build a layer of type 'Flatten' using fluid
Args:
@input (variable): input fluid variable for this layer
@name (str): name for this layer
@axis (int): parameter from caffe's Flatten layer
@end_axis (int): parameter from caffe's Flatten layer
Returns:
output (variable): output variable for this layer
"""
fluid = import_fluid()
input_shape = list(input.shape)
dims = len(input_shape)
start_axis = axis if axis >= 0 else axis + dims
end_axis = end_axis if end_axis >= 0 else end_axis + dims
assert start_axis <= end_axis, 'invalid axis or end_axis params'
output_shape = input_shape[0:start_axis]
flat_sz = reduce(lambda a, b: a * b, input_shape[start_axis:end_axis])
output_shape += [flat_sz]
output_shape += input_shape[end_axis:-1]
output = fluid.layers.reshape(input, shape=output_shape, name=name)
return output
register(kind='Flatten', shape=flatten_shape, layer=flatten_layer)
""" this module provides 'register' for registering customized layers
"""
g_custom_layers = {}
def register(kind, shape, layer):
""" register a custom layer or a list of custom layers
Args:
@kind (str or list): type name of the layer
@shape (function): a function to generate the shape of layer's output
@layer (function): a function to generate the shape of layer's output
Returns:
None
"""
assert type(shape).__name__ == 'function', 'shape should be a function'
assert type(layer).__name__ == 'function', 'layer should be a function'
if type(kind) is str:
kind = [kind]
else:
assert type(
kind) is list, 'invalid param "kind" for register, not a list or str'
for k in kind:
assert type(
k) is str, 'invalid param "kind" for register, not a list of str'
assert k not in g_custom_layers, 'this type[%s] has already been registered' % (
k)
print('register layer[%s]' % (k))
g_custom_layers[k] = {'shape': shape, 'layer': layer}
def get_registered_layers():
return g_custom_layers
......@@ -3,7 +3,7 @@ from google.protobuf import text_format
from .caffe import get_caffe_resolver
from .errors import KaffeError, print_stderr
from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
from .shapes import TensorShape
from .shapes import make_tensor
class Node(object):
......@@ -52,7 +52,10 @@ class Graph(object):
def __init__(self, nodes=None, name=None):
self.nodes = nodes or []
self.node_lut = {node.name: node for node in self.nodes}
self.name = name
if name is None or name == '':
self.name = 'MyNet'
else:
self.name = name
def add_node(self, node):
self.nodes.append(node)
......@@ -95,7 +98,7 @@ class Graph(object):
def compute_output_shapes(self):
sorted_nodes = self.topologically_sorted()
for node in sorted_nodes:
node.output_shape = TensorShape(
node.output_shape = make_tensor(
*NodeKind.compute_output_shape(node))
def replaced(self, new_nodes):
......@@ -108,6 +111,7 @@ class Graph(object):
if graph is None:
raise KaffeError('Transformer failed: {}'.format(transformer))
assert isinstance(graph, Graph)
return graph
def __contains__(self, key):
......@@ -120,10 +124,18 @@ class Graph(object):
for node in self.topologically_sorted():
# If the node has learned parameters, display the first one's shape.
# In case of convolutions, this corresponds to the weights.
data_shape = node.data[0].shape if node.data else '--'
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
if node.data is None:
data_shape = '--'
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
else:
for d in node.data:
#data_shape = node.data[0].shape if node.data else '--'
data_shape = d.shape
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
return '\n'.join(s)
......@@ -234,6 +246,7 @@ class GraphBuilder(object):
if (parent_node is None) or (parent_node == node):
parent_node = graph.get_node(input_name)
node.add_parent(parent_node)
if len(layer.top) > 1:
raise KaffeError('Multiple top nodes are not supported.')
......
......@@ -2,6 +2,7 @@ import re
import numbers
from collections import namedtuple
import custom_layers
from .shapes import *
LAYER_DESCRIPTORS = {
......@@ -116,6 +117,9 @@ def get_v1_layer_map():
class NodeKind(LayerType):
@staticmethod
def map_raw_kind(kind):
if custom_layers.has_layer(kind):
return kind
if kind in LAYER_TYPES:
return kind
......@@ -127,6 +131,9 @@ class NodeKind(LayerType):
@staticmethod
def compute_output_shape(node):
if custom_layers.has_layer(node.kind):
return custom_layers.compute_output_shape(node.kind, node)
try:
val = LAYER_DESCRIPTORS[node.kind](node)
return val
......@@ -137,14 +144,13 @@ class NodeKind(LayerType):
class NodeDispatchError(KaffeError):
pass
class NodeDispatch(object):
@staticmethod
def get_handler_name(node_kind):
if len(node_kind) <= 4:
if len(node_kind) <= 6:
# A catch-all for things like ReLU and tanh
return node_kind.lower()
# Convert from CamelCase to under_scored
......@@ -152,6 +158,9 @@ class NodeDispatch(object):
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
def get_handler(self, node_kind, prefix):
if custom_layers.has_layer(node_kind):
return getattr(self, 'map_custom')
name = self.get_handler_name(node_kind)
name = '_'.join((prefix, name))
try:
......@@ -174,8 +183,10 @@ class LayerAdapter(object):
try:
return getattr(self.layer, name)
except AttributeError:
print(dir(self.layer))
raise NodeDispatchError(
'Caffe parameters not found for layer kind: %s' % (self.kind))
'Caffe parameters not found attr[%s] for layer kind[%s]' %
(name, self.kind))
@staticmethod
def get_kernel_value(scalar, repeated, idx, default=None):
......
""" this module is used as a template for generating sub class of Network
"""
class MyNet(object):
### automatically generated by caffe2fluid ###
inputs_info = "INPUTS_INFO"
custom_layers_path = "CAFFE2FLUID_CUSTOM_LAYERS"
def custom_layer_factory(self):
import os
pk_paths = []
default = os.path.dirname(os.path.abspath(__file__))
location = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
pk_name = 'custom_layers'
pk_dir = os.path.join(location, pk_name)
pk_paths.append((location, pk_dir))
location = MyNet.custom_layers_path
pk_dir = os.path.join(MyNet.custom_layers_path, pk_name)
pk_paths.append((location, pk_dir))
for loc, pk_dir in pk_paths:
if os.path.exists(pk_dir):
if loc not in sys.path:
sys.path.insert(0, loc)
break
try:
from custom_layers import make_custom_layer
return make_custom_layer
except Exception as e:
print('maybe you should set $CAFFE2FLUID_CUSTOM_LAYERS first')
raise e
@classmethod
def input_shapes(cls):
return cls.inputs_info
@classmethod
def convert(cls, npy_model, fluid_path, outputs=None):
fluid = import_fluid()
shapes = cls.input_shapes()
input_name = shapes.keys()[0]
feed_data = {}
for name, shape in shapes.items():
data_layer = fluid.layers.data(
name=name, shape=shape, dtype="float32")
feed_data[name] = data_layer
net = cls(feed_data)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
net.load(data_path=npy_model, exe=exe, place=place)
output_vars = []
if outputs is None:
output_vars.append(net.get_output())
else:
if type(outputs) is list:
for n in outputs:
assert n in net.layers, 'not found layer with this name[%s]' % (
n)
output_vars.append(net.layers[n])
fluid.io.save_inference_model(
fluid_path, [input_name],
output_vars,
exe,
main_program=None,
model_filename='model',
params_filename='params')
return 0
def main():
""" a tool used to convert caffe model to fluid
"""
import sys
import os
filename = os.path.splitext(os.path.basename(sys.argv[0]))[0]
if len(sys.argv) < 3:
print('usage:')
print(' python %s %s.npy [save_dir] [layer names seperated by comma]' \
% (sys.argv[0], filename))
print(' eg: python %s %s.npy ./fluid' % (sys.argv[0], filename))
print(' eg: python %s %s.npy ./fluid layer_name1,layer_name2' \
% (sys.argv[0], filename))
return 1
npy_weight = sys.argv[1]
fluid_model = sys.argv[2]
outputs = None
if len(sys.argv) >= 4:
outputs = sys.argv[3].split(',')
ret = MyNet.convert(npy_weight, fluid_model, outputs)
if ret == 0:
outputs = 'last output layer' if outputs is None else outputs
print('succeed to convert to fluid format with output layers[%s]'
' in directory[%s]' % (outputs, fluid_model))
else:
print('failed to convert model to fluid format')
return ret
def generate_net_code(net_name, inputs_info):
""" generate framework of a custom net code which represent a subclass of Network
Args:
@net_name (str): class name for this net
@inputs_info (str): a str which represents a dict, eg: '{"data": [3, 32, 32]}'
Returns:
net_codes (str): codes for this subclass
"""
import os
import inspect
net_codes = str(inspect.getsource(MyNet))
net_codes = net_codes.replace('MyNet(object)', '%s(Network)' % net_name)
net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
net_codes = net_codes.replace('CAFFE2FLUID_CUSTOM_LAYERS', custom_layer_dir)
return net_codes
def generate_main_code(net_name):
""" generate a piece of code for 'main' function
Args:
@net_name (str): class name for this net
Returns:
main_codes (str): codes for this main function
"""
import inspect
main_codes = str(inspect.getsource(main))
main_codes = main_codes.replace('MyNet', net_name)
return main_codes
if __name__ == "__main__":
""" just for testing
"""
print generate_net_code('Attribute', "{'data': [3, 277, 277]}")
print generate_main_code('Attribute')
import math
import sys
import os
import math
import numpy as np
def import_fluid():
import paddle.v2.fluid as fluid
import paddle.fluid as fluid
return fluid
......@@ -64,7 +65,7 @@ class Network(object):
if os.path.isdir(data_path):
assert (exe is not None), \
'must provide a executor to load fluid model'
fluid.io.load_persistables_if_exist(executor=exe, dirname=data_path)
fluid.io.load_persistables(executor=exe, dirname=data_path)
return True
#load model from a npy file
......@@ -161,57 +162,53 @@ class Network(object):
output = fluid.layers.relu(x=input)
return output
def _adjust_pad_if_needed(self, i_hw, k_hw, s_hw, p_hw):
#adjust the padding if needed
i_h, i_w = i_hw
k_h, k_w = k_hw
s_h, s_w = s_hw
p_h, p_w = p_hw
def is_consistent(i, k, s, p):
o = i + 2 * p - k
if o % s == 0:
return True
else:
return False
real_p_h = 0
real_p_w = 0
if is_consistent(i_h, k_h, s_h, p_h) is False:
real_p_h = int(k_h / 2)
if is_consistent(i_w, k_w, s_w, p_w) is False:
real_p_w = int(k_w / 2)
return [real_p_h, real_p_w]
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, name, padding):
def pool(self, pool_type, input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name):
# Get the number of channels in the input
in_hw = input.shape[2:]
k_hw = [k_h, k_w]
s_hw = [s_h, s_w]
if padding is None:
#fix bug about the difference between conv and pool
#more info: https://github.com/BVLC/caffe/issues/1318
padding = self._adjust_pad_if_needed(in_hw, k_hw, s_hw, [0, 0])
fluid = import_fluid()
output = fluid.layers.pool2d(
input=input,
pool_size=k_hw,
pool_stride=s_hw,
pool_padding=padding,
ceil_mode=ceil_mode,
pool_type=pool_type)
return output
@layer
def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
return self.pool('max', input, k_h, k_w, s_h, s_w, name, padding)
def max_pool(self,
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding=[0, 0],
name=None):
return self.pool('max', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name)
@layer
def avg_pool(self,
input,
k_h,
k_w,
s_h,
s_w,
ceil_mode,
padding=[0, 0],
name=None):
return self.pool('avg', input, k_h, k_w, s_h, s_w, ceil_mode, padding,
name)
@layer
def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=None):
return self.pool('avg', input, k_h, k_w, s_h, s_w, name, padding)
def sigmoid(self, input, name):
fluid = import_fluid()
return fluid.layers.sigmoid(input)
@layer
def lrn(self, input, radius, alpha, beta, name, bias=1.0):
......@@ -258,7 +255,12 @@ class Network(object):
return output
@layer
def batch_normalization(self, input, name, scale_offset=True, relu=False):
def batch_normalization(self,
input,
name,
scale_offset=True,
eps=1e-5,
relu=False):
# NOTE: Currently, only inference is supported
fluid = import_fluid()
prefix = name + '_'
......@@ -276,7 +278,7 @@ class Network(object):
bias_attr=bias_attr,
moving_mean_name=mean_name,
moving_variance_name=variance_name,
epsilon=1e-5,
epsilon=eps,
act='relu' if relu is True else None)
return output
......@@ -287,3 +289,16 @@ class Network(object):
output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test, name=name)
return output
def custom_layer_factory(self):
""" get a custom layer maker provided by subclass
"""
raise NotImplementedError(
'[custom_layer_factory] must be implemented by the subclass.')
@layer
def custom_layer(self, inputs, kind, name, *args, **kwargs):
""" make custom layer
"""
layer_factory = self.custom_layer_factory()
return layer_factory(kind, inputs, name, *args, **kwargs)
......@@ -109,9 +109,17 @@ class TensorFlowMapper(NodeMapper):
# Stochastic pooling, for instance.
raise KaffeError('Unsupported pooling type.')
(kernel_params, padding) = self.get_kernel_params(node)
ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
return TensorFlowNode(pool_op, kernel_params.kernel_h,
kernel_params.kernel_w, kernel_params.stride_h,
kernel_params.stride_w, **padding)
kernel_params.stride_w, ceil_mode, **padding)
def map_sigmoid(self, node):
return TensorFlowNode('sigmoid')
def map_custom(self, node):
from .. import custom_layers
return custom_layers.make_node(TensorFlowNode, node.kind, node)
def map_inner_product(self, node):
#TODO: Axis
......@@ -142,7 +150,13 @@ class TensorFlowMapper(NodeMapper):
def map_batch_norm(self, node):
scale_offset = len(node.data) == 4
kwargs = {} if scale_offset else {'scale_offset': False}
#this default value comes from caffe's param in batch_norm
default_eps = 1e-5
kwargs = {'scale_offset': scale_offset}
if node.parameters.eps != default_eps:
kwargs['eps'] = node.parameters.eps
return MaybeActivated(
node, default=False)('batch_normalization', **kwargs)
......@@ -184,18 +198,10 @@ class TensorFlowEmitter(object):
codes.append(network_source + '\n')
return self.statement('\n'.join(codes))
def emit_class_def(self, name):
return self.statement('class %s(Network):' % (name))
def emit_setup_def(self):
return self.statement('def setup(self):')
def emit_shape_def(self, input_nodes):
self.outdent()
func_def = self.statement('@classmethod')
func_def += self.statement('def input_shapes(cls):')
self.indent()
def get_inputs_info(self, input_nodes):
input_shapes = {}
for n in input_nodes:
name = n.name
......@@ -204,42 +210,7 @@ class TensorFlowEmitter(object):
input_shapes[name] = ', '.join(shape)
input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
shape_str = ','.join(input_shapes)
func_def += self.statement('return {%s}' % (shape_str))
return '\n\n' + func_def
def emit_convert_def(self, input_nodes):
codes = []
inputs = {}
codes.append('shapes = cls.input_shapes()')
for n in input_nodes:
name = n.name
layer_var = name + '_layer'
layer_def = '%s = fluid.layers.data(name="%s", shape=shapes["%s"],'\
' dtype="float32")' % (layer_var, name, name)
#layer_var, layer_def = data_layer_def(n.name, n.output_shape)
codes.append(layer_def)
inputs[name] = layer_var
input_dict = ','.join(['"%s": %s' % (n, l) for n, l in inputs.items()])
codes.append('feed_data = {' + input_dict + '}')
codes.append('net = cls(feed_data)')
codes.append("place = fluid.CPUPlace()")
codes.append("exe = fluid.Executor(place)")
codes.append("exe.run(fluid.default_startup_program())")
codes.append("net.load(data_path=npy_model, exe=exe, place=place)")
codes.append(
"fluid.io.save_persistables(executor=exe, dirname=fluid_path)")
self.outdent()
func_def = self.statement('@classmethod')
func_def += self.statement('def convert(cls, npy_model, fluid_path):')
self.indent()
func_def += self.statement('import paddle.v2.fluid as fluid')
for l in codes:
func_def += self.statement(l)
return '\n' + func_def
return '{%s}' % (shape_str)
def emit_main_def(self, name):
if name is None:
......@@ -248,13 +219,7 @@ class TensorFlowEmitter(object):
self.prefix = ''
main_def = self.statement('if __name__ == "__main__":')
self.indent()
main_def += self.statement("#usage: python xxxnet.py xxx.npy ./model\n")
main_def += self.statement("import sys")
main_def += self.statement("npy_weight = sys.argv[1]")
main_def += self.statement("fluid_model = sys.argv[2]")
main_def += self.statement("%s.convert(npy_weight, fluid_model)" %
(name))
main_def += self.statement("exit(0)")
main_def += self.statement('exit(main())')
return '\n\n' + main_def
def emit_parents(self, chain):
......@@ -269,10 +234,17 @@ class TensorFlowEmitter(object):
return self.statement('self.' + node.emit())
def emit(self, name, chains, input_nodes=None):
from ..net_template import generate_net_code
from ..net_template import generate_main_code
self.net_name = name
inputs_info = self.get_inputs_info(input_nodes)
s = self.emit_imports()
s += self.emit_class_def(name)
s += generate_net_code(name, inputs_info) + '\n'
self.indent()
# define the net using api
s += self.emit_setup_def()
self.indent()
blocks = []
......@@ -283,8 +255,9 @@ class TensorFlowEmitter(object):
b += self.emit_node(node)
blocks.append(b[:-1])
s = s + '\n\n'.join(blocks)
s += self.emit_shape_def(input_nodes)
s += self.emit_convert_def(input_nodes)
# define the main function
s += '\n\n\n' + generate_main_code(name)
s += self.emit_main_def(name)
return s
......@@ -323,6 +296,7 @@ class Transformer(object):
# (Caffe's GoogLeNet implementation uses slashes)
NodeRenamer(lambda node: node.name.replace('/', '_'))
]
self.graph = graph.transformed(transformers)
# Display the graph
......@@ -334,9 +308,6 @@ class Transformer(object):
transformers = [
# Reshape the parameters to TensorFlow's ordering
DataReshaper({
# (c_o, c_i, h, w) -> (h, w, c_i, c_o) for TF
NodeKind.Convolution: (0, 1, 2, 3),
# (c_o, c_i) -> (c_i, c_o)
NodeKind.InnerProduct: (1, 0)
}),
......
......@@ -3,8 +3,24 @@ from collections import namedtuple
from .errors import KaffeError
TensorShape = namedtuple('TensorShape',
['batch_size', 'channels', 'height', 'width'])
Tensor4DShape = namedtuple('Tensor4DShape',
['batch_size', 'channels', 'height', 'width'])
Tensor2DShape = namedtuple('Tensor2DShape', ['batch_size', 'data'])
ScalarShape = namedtuple('ScalarShape', ['batch_size'])
def make_tensor(batch_size, d1=None, d2=None, d3=None):
if d3 is not None:
return Tensor4DShape(batch_size, d1, d2, d3)
elif d1 is not None and d2 is None:
return Tensor2DShape(batch_size, d1)
elif d1 is None and d2 is None and d3 is None:
return ScalarShape(batch_size)
else:
raise NotImplementedError('invalid params for make_tensor %s' \
% (str((batch_size, d1, d2, d3))))
def get_filter_output_shape(i_h, i_w, params, round_func):
......@@ -23,7 +39,7 @@ def get_strided_kernel_output_shape(node, round_func):
params = node.layer.parameters
has_c_o = hasattr(params, 'num_output')
c = params.num_output if has_c_o else input_shape.channels
return TensorShape(input_shape.batch_size, c, o_h, o_w)
return make_tensor(input_shape.batch_size, c, o_h, o_w)
def shape_not_implemented(node):
......@@ -36,7 +52,7 @@ def shape_identity(node):
def shape_scalar(node):
return TensorShape(1, 1, 1, 1)
return make_tensor(1, 1, 1, 1)
def shape_data(node):
......@@ -59,7 +75,7 @@ def shape_data(node):
def shape_mem_data(node):
params = node.parameters
return TensorShape(params.batch_size, params.channels, params.height,
return make_tensor(params.batch_size, params.channels, params.height,
params.width)
......@@ -79,10 +95,15 @@ def shape_convolution(node):
def shape_pool(node):
return get_strided_kernel_output_shape(node, math.ceil)
ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
if ceil_mode is True:
method = math.ceil
else:
method = math.floor
return get_strided_kernel_output_shape(node, method)
def shape_inner_product(node):
input_shape = node.get_only_parent().output_shape
return TensorShape(input_shape.batch_size, node.layer.parameters.num_output,
1, 1)
return make_tensor(input_shape.batch_size, node.layer.parameters.num_output)
......@@ -66,12 +66,14 @@ class DataInjector(object):
def adjust_parameters(self, node, data):
if not self.did_use_pb:
return data
# When using the protobuf-backend, each parameter initially has four dimensions.
# In certain cases (like FC layers), we want to eliminate the singleton dimensions.
# This implementation takes care of the common cases. However, it does leave the
# potential for future issues.
# The Caffe-backend does not suffer from this problem.
data = list(data)
squeeze_indices = [1] # Squeeze biases.
if node.kind == NodeKind.InnerProduct:
squeeze_indices.append(0) # Squeeze FC.
......@@ -80,8 +82,22 @@ class DataInjector(object):
if idx >= len(data):
continue
shape_old = data[idx].shape
data[idx] = np.squeeze(data[idx])
d = data[idx]
assert len(
d.shape
) == 4, 'invalid shape[%s] from caffe when adjust_parameters' % (
str(d.shape))
shape_old = d.shape
sq_axis = None
if idx == 0:
sq_axis = (0, 1)
elif idx == 1:
sq_axis = (0, 1, 2)
else:
continue
data[idx] = np.squeeze(d, axis=sq_axis)
shape_new = data[idx].shape
if len(shape_old) != shape_new:
debug('squeeze idx:%d, with kind:%s,name:%s' % \
......@@ -113,7 +129,10 @@ class DataReshaper(object):
try:
parent = node.get_only_parent()
s = parent.output_shape
return s.height > 1 or s.width > 1
if len(s) == 4:
return s.height > 1 or s.width > 1
else:
return False
except KaffeError:
return False
......@@ -121,25 +140,26 @@ class DataReshaper(object):
try:
return self.mapping[node_kind]
except KeyError:
raise
#raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
raise KaffeError('Ordering not found for node kind: {}'.format(
node_kind))
def __call__(self, graph):
for node in graph.nodes:
if node.data is None:
continue
if node.kind not in self.reshaped_node_types:
# Check for 2+ dimensional data
if any(len(tensor.shape) > 1 for tensor in node.data):
notice('parmaters not reshaped for node: {}'.format(node))
continue
transpose_order = self.map(node.kind)
weights = node.data[0]
if (node.kind == NodeKind.InnerProduct
) and self.has_spatial_parent(node):
if node.kind == NodeKind.InnerProduct:
# The FC layer connected to the spatial layer needs to be
# re-wired to match the new spatial ordering.
in_shape = node.get_only_parent().output_shape
#in_shape = node.get_only_parent().output_shape
fc_shape = weights.shape
output_channels = fc_shape[0]
weights = weights.reshape((output_channels, -1))
......@@ -178,7 +198,8 @@ class SubNodeFuser(object):
continue
# Rewrite the fused node's children to its parent.
for child in node.children:
child.parents.remove(node)
pos = child.parents.index(node)
child.parents[pos] = parent
parent.add_child(child)
# Disconnect the fused node from the graph.
parent.children.remove(node)
......
文件模式从 100644 更改为 100755
import os
import paddle.v2 as paddle
import paddle.fluid as fluid
import reader
def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
......@@ -65,20 +63,44 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
def SE_ResNeXt(input, class_dim, infer=False):
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
conv = conv_bn_layer(
input=input, num_filters=64, filter_size=3, stride=2, act='relu')
conv = conv_bn_layer(
input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
conv = conv_bn_layer(
input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
conv = fluid.layers.pool2d(
input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
def SE_ResNeXt(input, class_dim, infer=False, layers=50):
supported_layers = [50, 152]
if layers not in supported_layers:
print("supported layers are", supported_layers, "but input layer is",
layers)
exit()
if layers == 50:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
conv = conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
conv = conv_bn_layer(
input=input, num_filters=64, filter_size=3, stride=2, act='relu')
conv = conv_bn_layer(
input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
conv = conv_bn_layer(
input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
......@@ -97,93 +119,3 @@ def SE_ResNeXt(input, class_dim, infer=False):
drop = pool
out = fluid.layers.fc(input=drop, size=class_dim, act='softmax')
return out
def train(learning_rate,
batch_size,
num_passes,
init_model=None,
model_save_dir='model',
parallel=True):
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
if parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
image_ = pd.read_input(image)
label_ = pd.read_input(label)
out = SE_ResNeXt(input=image_, class_dim=class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label_)
avg_cost = fluid.layers.mean(x=cost)
accuracy = fluid.layers.accuracy(input=out, label=label_)
pd.write_output(avg_cost)
pd.write_output(accuracy)
avg_cost, accuracy = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
accuracy = fluid.layers.mean(x=accuracy)
else:
out = SE_ResNeXt(input=image, class_dim=class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
accuracy = fluid.layers.accuracy(input=out, label=label)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
opts = optimizer.minimize(avg_cost)
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program([avg_cost, accuracy])
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if init_model is not None:
fluid.io.load_persistables(exe, init_model)
train_reader = paddle.batch(reader.train(), batch_size=batch_size)
test_reader = paddle.batch(reader.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
for pass_id in range(num_passes):
for batch_id, data in enumerate(train_reader()):
loss = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost])
print("Pass {0}, batch {1}, loss {2}".format(pass_id, batch_id,
float(loss[0])))
total_loss = 0.0
total_acc = 0.0
total_batch = 0
for data in test_reader():
loss, acc = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=[avg_cost, accuracy])
total_loss += float(loss)
total_acc += float(acc)
total_batch += 1
print("End pass {0}, test_loss {1}, test_acc {2}".format(
pass_id, total_loss / total_batch, total_acc / total_batch))
model_path = os.path.join(model_save_dir, str(pass_id))
fluid.io.save_inference_model(model_path, ['image'], [out], exe)
if __name__ == '__main__':
train(
learning_rate=0.1,
batch_size=8,
num_passes=100,
init_model=None,
parallel=False)
import os
import numpy as np
import time
import sys
import paddle.v2 as paddle
import paddle.fluid as fluid
from se_resnext import SE_ResNeXt
import reader
import argparse
import functools
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 256, "Minibatch size.")
add_arg('num_layers', int, 50, "How many layers for SE-ResNeXt model.")
add_arg('with_mem_opt', bool, True, "Whether to use memory optimization or not.")
add_arg('parallel_exe', bool, True, "Whether to use ParallelExecutor to train or not.")
# yapf: enable
def train_parallel_do(args,
learning_rate,
batch_size,
num_passes,
init_model=None,
model_save_dir='model',
parallel=True,
use_nccl=True,
lr_strategy=None,
layers=50):
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
if parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
with pd.do():
image_ = pd.read_input(image)
label_ = pd.read_input(label)
out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers)
cost = fluid.layers.cross_entropy(input=out, label=label_)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5)
pd.write_output(avg_cost)
pd.write_output(acc_top1)
pd.write_output(acc_top5)
avg_cost, acc_top1, acc_top5 = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
acc_top1 = fluid.layers.mean(x=acc_top1)
acc_top5 = fluid.layers.mean(x=acc_top5)
else:
out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
inference_program = fluid.default_main_program().clone(for_test=True)
if lr_strategy is None:
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
else:
bd = lr_strategy["bd"]
lr = lr_strategy["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
opts = optimizer.minimize(avg_cost)
if args.with_mem_opt:
fluid.memory_optimize(fluid.default_main_program())
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if init_model is not None:
fluid.io.load_persistables(exe, init_model)
train_reader = paddle.batch(reader.train(), batch_size=batch_size)
test_reader = paddle.batch(reader.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
for pass_id in range(num_passes):
train_info = [[], [], []]
test_info = [[], [], []]
for batch_id, data in enumerate(train_reader()):
t1 = time.time()
loss, acc1, acc5 = exe.run(
fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, acc_top1, acc_top5])
t2 = time.time()
period = t2 - t1
train_info[0].append(loss[0])
train_info[1].append(acc1[0])
train_info[2].append(acc5[0])
if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, loss {2}, \
acc1 {3}, acc5 {4} time {5}"
.format(pass_id, \
batch_id, loss[0], acc1[0], acc5[0], \
"%2.2f sec" % period))
sys.stdout.flush()
train_loss = np.array(train_info[0]).mean()
train_acc1 = np.array(train_info[1]).mean()
train_acc5 = np.array(train_info[2]).mean()
for data in test_reader():
t1 = time.time()
loss, acc1, acc5 = exe.run(
inference_program,
feed=feeder.feed(data),
fetch_list=[avg_cost, acc_top1, acc_top5])
t2 = time.time()
period = t2 - t1
test_info[0].append(loss[0])
test_info[1].append(acc1[0])
test_info[2].append(acc5[0])
if batch_id % 10 == 0:
print("Pass {0},testbatch {1},loss {2}, \
acc1 {3},acc5 {4},time {5}"
.format(pass_id, \
batch_id, loss[0], acc1[0], acc5[0], \
"%2.2f sec" % period))
sys.stdout.flush()
test_loss = np.array(test_info[0]).mean()
test_acc1 = np.array(test_info[1]).mean()
test_acc5 = np.array(test_info[2]).mean()
print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
test_loss {4}, test_acc1 {5}, test_acc5 {6}"
.format(pass_id, \
train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
test_acc5))
sys.stdout.flush()
model_path = os.path.join(model_save_dir, str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
def train_parallel_exe(args,
learning_rate,
batch_size,
num_passes,
init_model=None,
model_save_dir='model',
parallel=True,
use_nccl=True,
lr_strategy=None,
layers=50):
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
cost = fluid.layers.cross_entropy(input=out, label=label)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
avg_cost = fluid.layers.mean(x=cost)
test_program = fluid.default_main_program().clone(for_test=True)
if lr_strategy is None:
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
else:
bd = lr_strategy["bd"]
lr = lr_strategy["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
opts = optimizer.minimize(avg_cost)
if args.with_mem_opt:
fluid.memory_optimize(fluid.default_main_program())
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if init_model is not None:
fluid.io.load_persistables(exe, init_model)
train_reader = paddle.batch(reader.train(), batch_size=batch_size)
test_reader = paddle.batch(reader.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
test_exe = fluid.ParallelExecutor(
use_cuda=True, main_program=test_program, share_vars_from=train_exe)
fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
for pass_id in range(num_passes):
train_info = [[], [], []]
test_info = [[], [], []]
for batch_id, data in enumerate(train_reader()):
t1 = time.time()
loss, acc1, acc5 = train_exe.run(fetch_list,
feed_dict=feeder.feed(data))
t2 = time.time()
period = t2 - t1
loss = np.mean(np.array(loss))
acc1 = np.mean(np.array(acc1))
acc5 = np.mean(np.array(acc5))
train_info[0].append(loss)
train_info[1].append(acc1)
train_info[2].append(acc5)
if batch_id % 10 == 0:
print("Pass {0}, trainbatch {1}, loss {2}, \
acc1 {3}, acc5 {4} time {5}"
.format(pass_id, \
batch_id, loss, acc1, acc5, \
"%2.2f sec" % period))
sys.stdout.flush()
train_loss = np.array(train_info[0]).mean()
train_acc1 = np.array(train_info[1]).mean()
train_acc5 = np.array(train_info[2]).mean()
for data in test_reader():
t1 = time.time()
loss, acc1, acc5 = test_exe.run(fetch_list,
feed_dict=feeder.feed(data))
t2 = time.time()
period = t2 - t1
loss = np.mean(np.array(loss))
acc1 = np.mean(np.array(acc1))
acc5 = np.mean(np.array(acc5))
test_info[0].append(loss)
test_info[1].append(acc1)
test_info[2].append(acc5)
if batch_id % 10 == 0:
print("Pass {0},testbatch {1},loss {2}, \
acc1 {3},acc5 {4},time {5}"
.format(pass_id, \
batch_id, loss, acc1, acc5, \
"%2.2f sec" % period))
sys.stdout.flush()
test_loss = np.array(test_info[0]).mean()
test_acc1 = np.array(test_info[1]).mean()
test_acc5 = np.array(test_info[2]).mean()
print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
test_loss {4}, test_acc1 {5}, test_acc5 {6}"
.format(pass_id, \
train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
test_acc5))
sys.stdout.flush()
model_path = os.path.join(model_save_dir, str(pass_id))
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
epoch_points = [30, 60, 90]
total_images = 1281167
batch_size = args.batch_size
step = int(total_images / batch_size + 1)
bd = [e * step for e in epoch_points]
lr = [0.1, 0.01, 0.001, 0.0001]
lr_strategy = {"bd": bd, "lr": lr}
use_nccl = True
# layers: 50, 152
layers = args.num_layers
method = train_parallel_exe if args.parallel_exe else train_parallel_do
method(
args,
learning_rate=0.1,
batch_size=batch_size,
num_passes=120,
init_model=None,
parallel=True,
use_nccl=True,
lr_strategy=lr_strategy,
layers=layers)
"""A dummy reader for test."""
"""Contains common utility functions."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
......@@ -13,40 +13,50 @@
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import paddle.v2 as paddle
from paddle.fluid import core
DATA_SHAPE = [1, 512, 512]
NUM_CLASSES = 20
def print_arguments(args):
"""Print argparse's arguments.
def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
def reader():
for i in range(num_sample):
sequence_len = np.random.randint(min_seq_len, max_seq_len)
x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32")
y = np.random.randint(0, NUM_CLASSES + 1,
[sequence_len]).astype("int32")
yield x, y
Usage:
return reader
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
def train(batch_size, num_sample=128):
"""Get train dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def test(batch_size=1, num_sample=16):
"""Get test dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
def data_shape():
"""Get image shape in CHW order."""
return DATA_SHAPE
.. code-block:: python
def num_classes():
"""Get number of total classes."""
return NUM_CLASSES
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
# 语言模型
以下是本例的简要目录结构及说明:
```text
.
├── README.md # 文档
├── train.py # 训练脚本
├── infer.py # 预测脚本
└── utils.py # 通用函数
```
## 简介
循环神经网络语言模型的介绍可以参阅论文[Recurrent Neural Network Regularization](https://arxiv.org/abs/1409.2329),在本例中,我们实现了GRU-RNN语言模型。
## 训练
运行命令 `python train.py` 开始训练模型。
```python
python train.py
```
当前支持的参数可参见[train.py](./train.py) `train_net` 函数
```python
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=20, # batch size
buffer_size=1000, # buffer size, default value is OK
word_freq_threshold=0) # vocabulary related parameter, and words with frequency below this value will be filtered
train(train_reader=train_reader,
vocab=vocab,
network=network,
hid_size=200, # embedding and hidden size
base_lr=1.0, # base learning rate
batch_size=20, # batch size, the same as that in prepare_data
pass_num=12, # the number of passes for training
use_cuda=True, # whether to use GPU card
parallel=False, # whether to be parallel
model_dir="model", # directory to save model
init_low_bound=-0.1, # uniform parameter initialization lower bound
init_high_bound=0.1) # uniform parameter initialization upper bound
```
## 自定义网络结构
可在[train.py](./train.py) `network` 函数中调整网络结构,当前的网络结构如下:
```python
emb = fluid.layers.embedding(input=src, size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=emb_lr_x),
is_sparse=True)
fc0 = fluid.layers.fc(input=emb, size=hid_size * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
gru_h0 = fluid.layers.dynamic_gru(input=fc0, size=hid_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
fc = fluid.layers.fc(input=gru_h0, size=vocab_size, act='softmax',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst)
```
## 训练结果示例
我们在Tesla K40m单GPU卡上训练的日志如下所示
```text
epoch_1 start
step:100 ppl:771.053
step:200 ppl:449.597
step:300 ppl:642.654
step:400 ppl:458.128
step:500 ppl:510.912
step:600 ppl:451.545
step:700 ppl:364.404
step:800 ppl:324.272
step:900 ppl:360.797
step:1000 ppl:275.761
step:1100 ppl:294.599
step:1200 ppl:335.877
step:1300 ppl:185.262
step:1400 ppl:241.744
step:1500 ppl:211.507
step:1600 ppl:233.431
step:1700 ppl:298.767
step:1800 ppl:203.403
step:1900 ppl:158.828
step:2000 ppl:171.148
step:2100 ppl:280.884
epoch:1 num_steps:2104 time_cost(s):47.478780
model saved in model/epoch_1
epoch_2 start
step:100 ppl:238.099
step:200 ppl:136.527
step:300 ppl:204.184
step:400 ppl:252.886
step:500 ppl:177.377
step:600 ppl:197.688
step:700 ppl:131.650
step:800 ppl:223.906
step:900 ppl:144.785
step:1000 ppl:176.286
step:1100 ppl:148.158
step:1200 ppl:203.581
step:1300 ppl:168.208
step:1400 ppl:159.412
step:1500 ppl:114.032
step:1600 ppl:157.985
step:1700 ppl:147.743
step:1800 ppl:88.676
step:1900 ppl:141.962
step:2000 ppl:106.087
step:2100 ppl:122.709
epoch:2 num_steps:2104 time_cost(s):47.583789
model saved in model/epoch_2
...
```
## 预测
运行命令 `python infer.py model_dir start_epoch last_epoch(inclusive)` 开始预测,其中,start_epoch指定开始预测的轮次,last_epoch指定结束的轮次,例如
```python
python infer.py model 1 12 # prediction from epoch 1 to epoch 12
```
## 预测结果示例
```text
model:model/epoch_1 ppl:254.540 time_cost(s):3.29
model:model/epoch_2 ppl:177.671 time_cost(s):3.27
model:model/epoch_3 ppl:156.251 time_cost(s):3.27
model:model/epoch_4 ppl:139.036 time_cost(s):3.27
model:model/epoch_5 ppl:132.661 time_cost(s):3.27
model:model/epoch_6 ppl:130.092 time_cost(s):3.28
model:model/epoch_7 ppl:128.751 time_cost(s):3.27
model:model/epoch_8 ppl:125.411 time_cost(s):3.27
model:model/epoch_9 ppl:124.604 time_cost(s):3.28
model:model/epoch_10 ppl:124.754 time_cost(s):3.29
model:model/epoch_11 ppl:125.421 time_cost(s):3.27
model:model/epoch_12 ppl:125.676 time_cost(s):3.27
```
import sys
import time
import math
import unittest
import contextlib
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import utils
def infer(test_reader, use_cuda, model_path):
""" inference function """
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
with fluid.scope_guard(fluid.core.Scope()):
infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
model_path, exe)
accum_cost = 0.0
accum_words = 0
t0 = time.time()
for data in test_reader():
src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place)
dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place)
avg_cost = exe.run(
infer_program,
feed={"src_wordseq": src_wordseq,
"dst_wordseq": dst_wordseq},
fetch_list=fetch_vars)
nwords = src_wordseq.lod()[0][-1]
cost = np.array(avg_cost) * nwords
accum_cost += cost
accum_words += nwords
ppl = math.exp(accum_cost / accum_words)
t1 = time.time()
print("model:%s ppl:%.3f time_cost(s):%.2f" %
(model_path, ppl, t1 - t0))
if __name__ == "__main__":
if len(sys.argv) != 4:
print("Usage: %s model_dir start_epoch last_epoch(inclusive)")
exit(0)
model_dir = sys.argv[1]
try:
start_index = int(sys.argv[2])
last_index = int(sys.argv[3])
except:
print("Usage: %s model_dir start_epoch last_epoch(inclusive)")
exit(-1)
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=20, buffer_size=1000, word_freq_threshold=0)
for epoch in xrange(start_index, last_index + 1):
epoch_path = model_dir + "/epoch_" + str(epoch)
infer(test_reader=test_reader, use_cuda=True, model_path=epoch_path)
import sys
import time
import numpy as np
import math
import paddle.fluid as fluid
import paddle.v2 as paddle
import utils
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
""" network definition """
emb_lr_x = 10.0
gru_lr_x = 1.0
fc_lr_x = 1.0
emb = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound),
learning_rate=emb_lr_x),
is_sparse=True)
fc0 = fluid.layers.fc(input=emb,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
fc = fluid.layers.fc(input=gru_h0,
size=vocab_size,
act='softmax',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound),
learning_rate=fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst)
return cost
def train(train_reader,
vocab,
network,
hid_size,
base_lr,
batch_size,
pass_num,
use_cuda,
parallel,
model_dir,
init_low_bound=-0.04,
init_high_bound=0.04):
""" train network """
vocab_size = len(vocab)
src_wordseq = fluid.layers.data(
name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
dst_wordseq = fluid.layers.data(
name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
avg_cost = None
if not parallel:
cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
init_low_bound, init_high_bound)
avg_cost = fluid.layers.mean(x=cost)
else:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
cost = network(
pd.read_input(src_wordseq),
pd.read_input(dst_wordseq), vocab_size, hid_size,
init_low_bound, init_high_bound)
pd.write_output(cost)
cost = pd()
avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=base_lr,
decay_steps=2100 * 4,
decay_rate=0.5,
staircase=True))
sgd_optimizer.minimize(avg_cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
total_time = 0.0
for pass_idx in xrange(pass_num):
epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx
t0 = time.time()
i = 0
for data in train_reader():
i += 1
lod_src_wordseq = utils.to_lodtensor(
map(lambda x: x[0], data), place)
lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place)
ret_avg_cost = exe.run(fluid.default_main_program(),
feed={
"src_wordseq": lod_src_wordseq,
"dst_wordseq": lod_dst_wordseq
},
fetch_list=[avg_cost],
use_program_cache=True)
avg_ppl = math.exp(ret_avg_cost[0])
if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, avg_ppl)
t1 = time.time()
total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx)
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"]
fetch_vars = [avg_cost]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
print("model saved in %s" % save_dir)
print("finish training")
def train_net():
""" do training """
batch_size = 20
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=batch_size, buffer_size=1000, word_freq_threshold=0)
train(
train_reader=train_reader,
vocab=vocab,
network=network,
hid_size=200,
base_lr=1.0,
batch_size=batch_size,
pass_num=12,
use_cuda=True,
parallel=False,
model_dir="model",
init_low_bound=-0.1,
init_high_bound=0.1)
if __name__ == "__main__":
train_net()
import sys
import time
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
def to_lodtensor(data, place):
""" convert to LODtensor """
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def prepare_data(batch_size, buffer_size=1000, word_freq_threshold=0):
""" prepare the English Pann Treebank (PTB) data """
vocab = paddle.dataset.imikolov.build_dict(word_freq_threshold)
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imikolov.train(
vocab,
buffer_size,
data_type=paddle.dataset.imikolov.DataType.SEQ),
buf_size=buffer_size),
batch_size)
test_reader = paddle.batch(
paddle.dataset.imikolov.test(
vocab, buffer_size, data_type=paddle.dataset.imikolov.DataType.SEQ),
batch_size)
return vocab, train_reader, test_reader
......@@ -15,6 +15,9 @@ class TrainTaskConfig(object):
# the parameters for learning rate scheduling.
warmup_steps = 4000
# the flag indicating to use average loss or sum loss when training.
use_avg_cost = False
# the directory for saving trained models.
model_dir = "trained_models"
......@@ -22,8 +25,7 @@ class TrainTaskConfig(object):
class InferTaskConfig(object):
use_gpu = False
# the number of examples in one run for sequence generation.
# currently the batch size can only be set to 1.
batch_size = 1
batch_size = 10
# the parameters for beam search.
beam_size = 5
......@@ -31,37 +33,38 @@ class InferTaskConfig(object):
# the number of decoded sentences to output.
n_best = 1
# the flags indicating whether to output the special tokens.
output_bos = False
output_eos = False
output_unk = False
# the directory for loading the trained model.
model_path = "trained_models/pass_1.infer.model"
class ModelHyperParams(object):
# Dictionary size for source and target language. This model directly uses
# paddle.dataset.wmt16 in which <bos>, <eos> and <unk> token has
# alreay been added, but the <pad> token is not added. Transformer requires
# sequences in a mini-batch are padded to have the same length. A <pad> token is
# added into the original dictionary in paddle.dateset.wmt16.
# This model directly uses paddle.dataset.wmt16 in which <bos>, <eos> and
# <unk> token has alreay been added. As for the <pad> token, any token
# included in dict can be used to pad, since the paddings' loss will be
# masked out and make no effect on parameter gradients.
# size of source word dictionary.
src_vocab_size = 10000
# index for <pad> token in source language.
src_pad_idx = src_vocab_size
# size of target word dictionay
trg_vocab_size = 10000
# index for <pad> token in target language.
trg_pad_idx = trg_vocab_size
# index for <bos> token
bos_idx = 0
# index for <eos> token
eos_idx = 1
# index for <unk> token
unk_idx = 2
# position value corresponding to the <pad> token.
pos_pad_idx = 0
# max length of sequences. It should plus 1 to include position
# padding token for position encoding.
# max length of sequences.
# The size of position encoding table should at least plus 1, since the
# sinusoid position encoding starts from 1 and 0 can be used as the padding
# token for position encoding.
max_length = 50
# the dimension for word embeddings, which is also the last dimension of
......@@ -92,7 +95,10 @@ pos_enc_param_names = (
encoder_input_data_names = (
"src_word",
"src_pos",
"src_slf_attn_bias", )
"src_slf_attn_bias",
"src_data_shape",
"src_slf_attn_pre_softmax_shape",
"src_slf_attn_post_softmax_shape", )
# Names of all data layers in decoder listed in order.
decoder_input_data_names = (
......@@ -100,6 +106,11 @@ decoder_input_data_names = (
"trg_pos",
"trg_slf_attn_bias",
"trg_src_attn_bias",
"trg_data_shape",
"trg_slf_attn_pre_softmax_shape",
"trg_slf_attn_post_softmax_shape",
"trg_src_attn_pre_softmax_shape",
"trg_src_attn_post_softmax_shape",
"enc_output", )
# Names of label related data layers listed in order.
......
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
import model
......@@ -11,10 +11,26 @@ from config import InferTaskConfig, ModelHyperParams, \
from train import pad_batch_data
def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
decoder, dec_in_names, dec_out_names, beam_size, max_length,
n_best, batch_size, n_head, src_pad_idx, trg_pad_idx,
bos_idx, eos_idx):
def translate_batch(exe,
src_words,
encoder,
enc_in_names,
enc_out_names,
decoder,
dec_in_names,
dec_out_names,
beam_size,
max_length,
n_best,
batch_size,
n_head,
d_model,
src_pad_idx,
trg_pad_idx,
bos_idx,
eos_idx,
unk_idx,
output_unk=True):
"""
Run the encoder program once and run the decoder program multiple times to
implement beam search externally.
......@@ -25,9 +41,21 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
src_pad_idx,
n_head,
is_target=False,
return_pos=True,
is_label=False,
return_attn_bias=True,
return_max_len=True)
return_max_len=False)
# Append the data shape input to reshape the output of embedding layer.
enc_in_data = enc_in_data + [
np.array(
[-1, enc_in_data[2].shape[-1], d_model], dtype="int32")
]
# Append the shape inputs to reshape before and after softmax in encoder
# self attention.
enc_in_data = enc_in_data + [
np.array(
[-1, enc_in_data[2].shape[-1]], dtype="int32"), np.array(
enc_in_data[2].shape, dtype="int32")
]
enc_output = exe.run(encoder,
feed=dict(zip(enc_in_names, enc_in_data)),
fetch_list=enc_out_names)[0]
......@@ -35,13 +63,18 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
# Beam Search.
# To store the beam info.
scores = np.zeros((batch_size, beam_size), dtype="float32")
prev_branchs = [[]] * batch_size
next_ids = [[]] * batch_size
# Use beam_map to map the instance idx in batch to beam idx, since the
prev_branchs = [[] for i in range(batch_size)]
next_ids = [[] for i in range(batch_size)]
# Use beam_inst_map to map beam idx to the instance idx in batch, since the
# size of feeded batch is changing.
beam_map = range(batch_size)
beam_inst_map = {
beam_idx: inst_idx
for inst_idx, beam_idx in enumerate(range(batch_size))
}
# Use active_beams to recode the alive.
active_beams = range(batch_size)
def beam_backtrace(prev_branchs, next_ids, n_best=beam_size, add_bos=True):
def beam_backtrace(prev_branchs, next_ids, n_best=beam_size):
"""
Decode and select n_best sequences for one instance by backtrace.
"""
......@@ -53,7 +86,8 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
seq.append(next_ids[j][k])
k = prev_branchs[j][k]
seq = seq[::-1]
seq = [bos_idx] + seq if add_bos else seq
# Add the <bos>, since next_ids don't include the <bos>.
seq = [bos_idx] + seq
seqs.append(seq)
return seqs
......@@ -64,8 +98,8 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_words = np.array(
[[bos_idx]] * batch_size * beam_size, dtype="int64")
trg_pos = np.array([[1]] * batch_size * beam_size, dtype="int64")
src_max_length, src_slf_attn_bias, trg_max_len = enc_in_data[
-1], enc_in_data[-2], 1
src_max_length, src_slf_attn_bias, trg_max_len = enc_in_data[2].shape[
-1], enc_in_data[2], 1
# This is used to remove attention on subsequent words.
trg_slf_attn_bias = np.ones((batch_size * beam_size, trg_max_len,
trg_max_len))
......@@ -75,22 +109,47 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
[-1e9]).astype("float32")
# This is used to remove attention on the paddings of source sequences.
trg_src_attn_bias = np.tile(
src_slf_attn_bias[:, :, ::src_max_length, :],
[beam_size, 1, trg_max_len, 1])
enc_output = np.tile(enc_output, [beam_size, 1, 1])
return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, enc_output
src_slf_attn_bias[:, :, ::src_max_length, :][:, np.newaxis],
[1, beam_size, 1, trg_max_len, 1]).reshape([
-1, src_slf_attn_bias.shape[1], trg_max_len,
src_slf_attn_bias.shape[-1]
])
# Append the shape input to reshape the output of embedding layer.
trg_data_shape = np.array(
[batch_size * beam_size, trg_max_len, d_model], dtype="int32")
# Append the shape inputs to reshape before and after softmax in
# decoder self attention.
trg_slf_attn_pre_softmax_shape = np.array(
[-1, trg_slf_attn_bias.shape[-1]], dtype="int32")
trg_slf_attn_post_softmax_shape = np.array(
trg_slf_attn_bias.shape, dtype="int32")
# Append the shape inputs to reshape before and after softmax in
# encoder-decoder attention.
trg_src_attn_pre_softmax_shape = np.array(
[-1, trg_src_attn_bias.shape[-1]], dtype="int32")
trg_src_attn_post_softmax_shape = np.array(
trg_src_attn_bias.shape, dtype="int32")
enc_output = np.tile(
enc_output[:, np.newaxis], [1, beam_size, 1, 1]).reshape(
[-1, enc_output.shape[-2], enc_output.shape[-1]])
return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
trg_data_shape, trg_slf_attn_pre_softmax_shape, \
trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
trg_src_attn_post_softmax_shape, enc_output
def update_dec_in_data(dec_in_data, next_ids, active_beams):
def update_dec_in_data(dec_in_data, next_ids, active_beams, beam_inst_map):
"""
Update the input data of decoder mainly by slicing from the previous
input data and dropping the finished instance beams.
"""
trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, enc_output = dec_in_data
trg_cur_len = len(next_ids[0]) + 1 # include the <bos>
trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
trg_data_shape, trg_slf_attn_pre_softmax_shape, \
trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
trg_src_attn_post_softmax_shape, enc_output = dec_in_data
trg_cur_len = trg_slf_attn_bias.shape[-1] + 1
trg_words = np.array(
[
beam_backtrace(
prev_branchs[beam_idx], next_ids[beam_idx], add_bos=True)
beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx])
for beam_idx in active_beams
],
dtype="int64")
......@@ -98,6 +157,7 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_pos = np.array(
[range(1, trg_cur_len + 1)] * len(active_beams) * beam_size,
dtype="int64").reshape([-1, 1])
active_beams = [beam_inst_map[beam_idx] for beam_idx in active_beams]
active_beams_indice = (
(np.array(active_beams) * beam_size)[:, np.newaxis] +
np.array(range(beam_size))[np.newaxis, :]).flatten()
......@@ -112,8 +172,27 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_src_attn_bias = np.tile(trg_src_attn_bias[
active_beams_indice, :, ::trg_src_attn_bias.shape[2], :],
[1, 1, trg_cur_len, 1])
# Append the shape input to reshape the output of embedding layer.
trg_data_shape = np.array(
[len(active_beams) * beam_size, trg_cur_len, d_model],
dtype="int32")
# Append the shape inputs to reshape before and after softmax in
# decoder self attention.
trg_slf_attn_pre_softmax_shape = np.array(
[-1, trg_slf_attn_bias.shape[-1]], dtype="int32")
trg_slf_attn_post_softmax_shape = np.array(
trg_slf_attn_bias.shape, dtype="int32")
# Append the shape inputs to reshape before and after softmax in
# encoder-decoder attention.
trg_src_attn_pre_softmax_shape = np.array(
[-1, trg_src_attn_bias.shape[-1]], dtype="int32")
trg_src_attn_post_softmax_shape = np.array(
trg_src_attn_bias.shape, dtype="int32")
enc_output = enc_output[active_beams_indice, :, :]
return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, enc_output
return trg_words, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
trg_data_shape, trg_slf_attn_pre_softmax_shape, \
trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape, \
trg_src_attn_post_softmax_shape, enc_output
dec_in_data = init_dec_in_data(batch_size, beam_size, enc_in_data,
enc_output)
......@@ -122,13 +201,18 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
feed=dict(zip(dec_in_names, dec_in_data)),
fetch_list=dec_out_names)[0]
predict_all = np.log(
predict_all.reshape([len(beam_map) * beam_size, i + 1, -1])[:,
-1, :])
predict_all = (predict_all + scores[beam_map].reshape(
[len(beam_map) * beam_size, -1])).reshape(
[len(beam_map), beam_size, -1])
predict_all.reshape([len(beam_inst_map) * beam_size, i + 1, -1])
[:, -1, :])
predict_all = (predict_all + scores[active_beams].reshape(
[len(beam_inst_map) * beam_size, -1])).reshape(
[len(beam_inst_map), beam_size, -1])
if not output_unk: # To exclude the <unk> token.
predict_all[:, :, unk_idx] = -1e9
active_beams = []
for inst_idx, beam_idx in enumerate(beam_map):
for beam_idx in range(batch_size):
if not beam_inst_map.has_key(beam_idx):
continue
inst_idx = beam_inst_map[beam_idx]
predict = (predict_all[inst_idx, :, :]
if i != 0 else predict_all[inst_idx, 0, :]).flatten()
top_k_indice = np.argpartition(predict, -beam_size)[-beam_size:]
......@@ -141,13 +225,20 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
next_ids[beam_idx].append(top_scores_ids % predict_all.shape[-1])
if next_ids[beam_idx][-1][0] != eos_idx:
active_beams.append(beam_idx)
beam_map = active_beams
if len(beam_map) == 0:
if len(active_beams) == 0:
break
dec_in_data = update_dec_in_data(dec_in_data, next_ids, active_beams)
dec_in_data = update_dec_in_data(dec_in_data, next_ids, active_beams,
beam_inst_map)
beam_inst_map = {
beam_idx: inst_idx
for inst_idx, beam_idx in enumerate(active_beams)
}
# Decode beams and select n_best sequences for each instance by backtrace.
seqs = [beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx], n_best)]
seqs = [
beam_backtrace(prev_branchs[beam_idx], next_ids[beam_idx], n_best)
for beam_idx in range(batch_size)
]
return seqs, scores[:, :n_best].tolist()
......@@ -155,29 +246,24 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
def main():
place = fluid.CUDAPlace(0) if InferTaskConfig.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
# The current program desc is coupled with batch_size and the only
# supported batch size is 1 currently.
encoder_program = fluid.Program()
model.batch_size = InferTaskConfig.batch_size
with fluid.program_guard(main_program=encoder_program):
enc_output = encoder(
ModelHyperParams.src_vocab_size + 1,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.src_pad_idx, ModelHyperParams.pos_pad_idx)
model.batch_size = InferTaskConfig.batch_size * InferTaskConfig.beam_size
ModelHyperParams.src_vocab_size, ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer, ModelHyperParams.n_head,
ModelHyperParams.d_key, ModelHyperParams.d_value,
ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
ModelHyperParams.dropout)
decoder_program = fluid.Program()
with fluid.program_guard(main_program=decoder_program):
predict = decoder(
ModelHyperParams.trg_vocab_size + 1,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout,
ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer, ModelHyperParams.n_head,
ModelHyperParams.d_key, ModelHyperParams.d_value,
ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
ModelHyperParams.dropout)
# Load model parameters of encoder and decoder separately from the saved
# transformer model.
......@@ -214,17 +300,51 @@ def main():
trg_idx2word = paddle.dataset.wmt16.get_dict(
"de", dict_size=ModelHyperParams.trg_vocab_size, reverse=True)
def post_process_seq(seq,
bos_idx=ModelHyperParams.bos_idx,
eos_idx=ModelHyperParams.eos_idx,
output_bos=InferTaskConfig.output_bos,
output_eos=InferTaskConfig.output_eos):
"""
Post-process the beam-search decoded sequence. Truncate from the first
<eos> and remove the <bos> and <eos> tokens currently.
"""
eos_pos = len(seq) - 1
for i, idx in enumerate(seq):
if idx == eos_idx:
eos_pos = i
break
seq = seq[:eos_pos + 1]
return filter(
lambda idx: (output_bos or idx != bos_idx) and \
(output_eos or idx != eos_idx),
seq)
for batch_id, data in enumerate(test_data()):
batch_seqs, batch_scores = translate_batch(
exe, [item[0] for item in data], encoder_program,
encoder_input_data_names, [enc_output.name], decoder_program,
decoder_input_data_names, [predict.name], InferTaskConfig.beam_size,
InferTaskConfig.max_length, InferTaskConfig.n_best,
len(data), ModelHyperParams.n_head, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.bos_idx,
ModelHyperParams.eos_idx)
exe,
[item[0] for item in data],
encoder_program,
encoder_input_data_names,
[enc_output.name],
decoder_program,
decoder_input_data_names,
[predict.name],
InferTaskConfig.beam_size,
InferTaskConfig.max_length,
InferTaskConfig.n_best,
len(data),
ModelHyperParams.n_head,
ModelHyperParams.d_model,
ModelHyperParams.eos_idx, # Use eos_idx to pad.
ModelHyperParams.eos_idx, # Use eos_idx to pad.
ModelHyperParams.bos_idx,
ModelHyperParams.eos_idx,
ModelHyperParams.unk_idx,
output_unk=InferTaskConfig.output_unk)
for i in range(len(batch_seqs)):
seqs = batch_seqs[i]
# Post-process the beam-search decoded sequences.
seqs = map(post_process_seq, batch_seqs[i])
scores = batch_scores[i]
for seq in seqs:
print(" ".join([trg_idx2word[idx] for idx in seq]))
......
......@@ -7,9 +7,6 @@ import paddle.fluid.layers as layers
from config import TrainTaskConfig, pos_enc_param_names, \
encoder_input_data_names, decoder_input_data_names, label_data_names
# FIXME(guosheng): Remove out the batch_size from the model.
batch_size = TrainTaskConfig.batch_size
def position_encoding_init(n_position, d_pos_vec):
"""
......@@ -32,7 +29,9 @@ def multi_head_attention(queries,
d_value,
d_model,
n_head=1,
dropout_rate=0.):
dropout_rate=0.,
pre_softmax_shape=None,
post_softmax_shape=None):
"""
Multi-Head Attention. Note that attn_bias is added to the logit before
computing softmax activiation to mask certain selected positions so that
......@@ -83,9 +82,10 @@ def multi_head_attention(queries,
return x
hidden_size = x.shape[-1]
# FIXME(guosheng): Decouple the program desc with batch_size.
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
reshaped = layers.reshape(
x=x, shape=[batch_size, -1, n_head, hidden_size // n_head])
x=x, shape=[0, -1, n_head, hidden_size // n_head])
# permuate the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
......@@ -101,36 +101,26 @@ def multi_head_attention(queries,
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
# FIXME(guosheng): Decouple the program desc with batch_size.
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return layers.reshape(
x=trans_x,
shape=map(int,
[batch_size, -1, trans_x.shape[2] * trans_x.shape[3]]))
shape=map(int, [0, -1, trans_x.shape[2] * trans_x.shape[3]]))
def scaled_dot_product_attention(q, k, v, attn_bias, d_model, dropout_rate):
"""
Scaled Dot-Product Attention
"""
# FIXME(guosheng): Optimize the shape in reshape_op or softmax_op.
# The current implementation of softmax_op only supports 2D tensor,
# consequently it cannot be directly used here.
# If to use the reshape_op, Besides, the shape of product inferred in
# compile-time is not the actual shape in run-time. It cann't be used
# to set the attribute of reshape_op.
# So, here define the softmax for temporary solution.
def __softmax(x, eps=1e-9):
exp_out = layers.exp(x=x)
sum_out = layers.reduce_sum(exp_out, dim=-1, keep_dim=False)
return layers.elementwise_div(x=exp_out, y=sum_out, axis=0)
scaled_q = layers.scale(x=q, scale=d_model**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
weights = __softmax(
layers.elementwise_add(
x=product, y=attn_bias) if attn_bias else product)
weights = layers.reshape(
x=layers.elementwise_add(
x=product, y=attn_bias) if attn_bias else product,
shape=[-1, product.shape[-1]],
actual_shape=pre_softmax_shape,
act="softmax")
weights = layers.reshape(
x=weights, shape=product.shape, actual_shape=post_softmax_shape)
if dropout_rate:
weights = layers.dropout(
weights, dropout_prob=dropout_rate, is_test=False)
......@@ -177,7 +167,7 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid):
return out
def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.):
def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.):
"""
Add residual connection, layer normalization and droput to the out tensor
optionally according to the value of process_cmd.
......@@ -195,8 +185,9 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.):
param_attr=fluid.initializer.Constant(1.),
bias_attr=fluid.initializer.Constant(0.))
elif cmd == "d": # add dropout
if dropout:
out = layers.dropout(out, dropout_prob=dropout, is_test=False)
if dropout_rate:
out = layers.dropout(
out, dropout_prob=dropout_rate, is_test=False)
return out
......@@ -208,10 +199,9 @@ def prepare_encoder(src_word,
src_pos,
src_vocab_size,
src_emb_dim,
src_pad_idx,
src_max_len,
dropout=0.,
pos_pad_idx=0,
dropout_rate=0.,
src_data_shape=None,
pos_enc_param_name=None):
"""Add word embeddings and position encodings.
The output tensor has a shape of:
......@@ -222,21 +212,20 @@ def prepare_encoder(src_word,
src_word_emb = layers.embedding(
src_word,
size=[src_vocab_size, src_emb_dim],
padding_idx=src_pad_idx,
param_attr=fluid.initializer.Normal(0., 1.))
src_pos_enc = layers.embedding(
src_pos,
size=[src_max_len, src_emb_dim],
padding_idx=pos_pad_idx,
param_attr=fluid.ParamAttr(
name=pos_enc_param_name, trainable=False))
enc_input = src_word_emb + src_pos_enc
# FIXME(guosheng): Decouple the program desc with batch_size.
enc_input = layers.reshape(x=enc_input, shape=[batch_size, -1, src_emb_dim])
enc_input = layers.reshape(
x=enc_input,
shape=[-1, src_max_len, src_emb_dim],
actual_shape=src_data_shape)
return layers.dropout(
enc_input, dropout_prob=dropout,
is_test=False) if dropout else enc_input
enc_input, dropout_prob=dropout_rate,
is_test=False) if dropout_rate else enc_input
prepare_encoder = partial(
......@@ -252,7 +241,9 @@ def encoder_layer(enc_input,
d_value,
d_model,
d_inner_hid,
dropout_rate=0.):
dropout_rate=0.,
pre_softmax_shape=None,
post_softmax_shape=None):
"""The encoder layers that can be stacked to form a deep encoder.
This module consits of a multi-head (self) attention followed by
......@@ -260,9 +251,9 @@ def encoder_layer(enc_input,
with the post_process_layer to add residual connection, layer normalization
and droput.
"""
attn_output = multi_head_attention(enc_input, enc_input, enc_input,
attn_bias, d_key, d_value, d_model,
n_head, dropout_rate)
attn_output = multi_head_attention(
enc_input, enc_input, enc_input, attn_bias, d_key, d_value, d_model,
n_head, dropout_rate, pre_softmax_shape, post_softmax_shape)
attn_output = post_process_layer(enc_input, attn_output, "dan",
dropout_rate)
ffd_output = positionwise_feed_forward(attn_output, d_inner_hid, d_model)
......@@ -277,7 +268,9 @@ def encoder(enc_input,
d_value,
d_model,
d_inner_hid,
dropout_rate=0.):
dropout_rate=0.,
pre_softmax_shape=None,
post_softmax_shape=None):
"""
The encoder is composed of a stack of identical layers returned by calling
encoder_layer.
......@@ -291,7 +284,9 @@ def encoder(enc_input,
d_value,
d_model,
d_inner_hid,
dropout_rate, )
dropout_rate,
pre_softmax_shape,
post_softmax_shape, )
enc_input = enc_output
return enc_output
......@@ -305,7 +300,11 @@ def decoder_layer(dec_input,
d_value,
d_model,
d_inner_hid,
dropout_rate=0.):
dropout_rate=0.,
slf_attn_pre_softmax_shape=None,
slf_attn_post_softmax_shape=None,
src_attn_pre_softmax_shape=None,
src_attn_post_softmax_shape=None):
""" The layer to be stacked in decoder part.
The structure of this module is similar to that in the encoder part except
......@@ -320,7 +319,9 @@ def decoder_layer(dec_input,
d_value,
d_model,
n_head,
dropout_rate, )
dropout_rate,
slf_attn_pre_softmax_shape,
slf_attn_post_softmax_shape, )
slf_attn_output = post_process_layer(
dec_input,
slf_attn_output,
......@@ -335,7 +336,9 @@ def decoder_layer(dec_input,
d_value,
d_model,
n_head,
dropout_rate, )
dropout_rate,
src_attn_pre_softmax_shape,
src_attn_post_softmax_shape, )
enc_attn_output = post_process_layer(
slf_attn_output,
enc_attn_output,
......@@ -363,7 +366,11 @@ def decoder(dec_input,
d_value,
d_model,
d_inner_hid,
dropout_rate=0.):
dropout_rate=0.,
slf_attn_pre_softmax_shape=None,
slf_attn_post_softmax_shape=None,
src_attn_pre_softmax_shape=None,
src_attn_post_softmax_shape=None):
"""
The decoder is composed of a stack of identical decoder_layer layers.
"""
......@@ -378,7 +385,11 @@ def decoder(dec_input,
d_value,
d_model,
d_inner_hid,
dropout_rate, )
dropout_rate,
slf_attn_pre_softmax_shape,
slf_attn_post_softmax_shape,
src_attn_pre_softmax_shape,
src_attn_post_softmax_shape, )
dec_input = dec_output
return dec_output
......@@ -386,18 +397,23 @@ def decoder(dec_input,
def make_inputs(input_data_names,
n_head,
d_model,
batch_size,
max_length,
is_pos,
slf_attn_bias_flag,
src_attn_bias_flag,
enc_output_flag=False):
enc_output_flag=False,
data_shape_flag=True,
slf_attn_shape_flag=True,
src_attn_shape_flag=True):
"""
Define the input data layers for the transformer model.
"""
input_layers = []
# The shapes here act as placeholder.
# The shapes set here is to pass the infer-shape in compile time.
batch_size = 1 # Only for the infer-shape in compile time.
# The shapes here act as placeholder and are set to pass the infer-shape in
# compile time.
# The actual data shape of word is:
# [batch_size * max_len_in_batch, 1]
word = layers.data(
name=input_data_names[len(input_layers)],
shape=[batch_size * max_length, 1],
......@@ -405,6 +421,8 @@ def make_inputs(input_data_names,
append_batch_size=False)
input_layers += [word]
# This is used for position data or label weight.
# The actual data shape of pos is:
# [batch_size * max_len_in_batch, 1]
pos = layers.data(
name=input_data_names[len(input_layers)],
shape=[batch_size * max_length, 1],
......@@ -415,6 +433,8 @@ def make_inputs(input_data_names,
# This input is used to remove attention weights on paddings for the
# encoder and to remove attention weights on subsequent words for the
# decoder.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, n_head, max_len_in_batch, max_len_in_batch]
slf_attn_bias = layers.data(
name=input_data_names[len(input_layers)],
shape=[batch_size, n_head, max_length, max_length],
......@@ -422,20 +442,67 @@ def make_inputs(input_data_names,
append_batch_size=False)
input_layers += [slf_attn_bias]
if src_attn_bias_flag:
# This input is used to remove attention weights on paddings.
# This input is used to remove attention weights on paddings. It's used
# in encoder-decoder attention.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, n_head, trg_max_len_in_batch, src_max_len_in_batch]
src_attn_bias = layers.data(
name=input_data_names[len(input_layers)],
shape=[batch_size, n_head, max_length, max_length],
dtype="float32",
append_batch_size=False)
input_layers += [src_attn_bias]
if data_shape_flag:
# This input is used to reshape the output of embedding layer.
data_shape = layers.data(
name=input_data_names[len(input_layers)],
shape=[3],
dtype="int32",
append_batch_size=False)
input_layers += [data_shape]
if slf_attn_shape_flag:
# This shape input is used to reshape before softmax in self attention.
slf_attn_pre_softmax_shape = layers.data(
name=input_data_names[len(input_layers)],
shape=[2],
dtype="int32",
append_batch_size=False)
input_layers += [slf_attn_pre_softmax_shape]
# This shape input is used to reshape after softmax in self attention.
slf_attn_post_softmax_shape = layers.data(
name=input_data_names[len(input_layers)],
shape=[4],
dtype="int32",
append_batch_size=False)
input_layers += [slf_attn_post_softmax_shape]
if src_attn_shape_flag:
# This shape input is used to reshape before softmax in encoder-decoder
# attention.
src_attn_pre_softmax_shape = layers.data(
name=input_data_names[len(input_layers)],
shape=[2],
dtype="int32",
append_batch_size=False)
input_layers += [src_attn_pre_softmax_shape]
# This shape input is used to reshape after softmax in encoder-decoder
# attention.
src_attn_post_softmax_shape = layers.data(
name=input_data_names[len(input_layers)],
shape=[4],
dtype="int32",
append_batch_size=False)
input_layers += [src_attn_post_softmax_shape]
if enc_output_flag:
# This input is used in independent decoder program for inference.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, max_len_in_batch, d_model]
enc_output = layers.data(
name=input_data_names[len(input_layers)],
shape=[batch_size, max_length, d_model],
dtype="float32",
append_batch_size=False)
input_layers += [enc_output]
return input_layers
......@@ -449,12 +516,19 @@ def transformer(
d_value,
d_model,
d_inner_hid,
dropout_rate,
src_pad_idx,
trg_pad_idx,
pos_pad_idx, ):
enc_input_layers = make_inputs(encoder_input_data_names, n_head, d_model,
batch_size, max_length, True, True, False)
dropout_rate, ):
enc_inputs = make_inputs(
encoder_input_data_names,
n_head,
d_model,
max_length,
is_pos=True,
slf_attn_bias_flag=True,
src_attn_bias_flag=False,
enc_output_flag=False,
data_shape_flag=True,
slf_attn_shape_flag=True,
src_attn_shape_flag=False)
enc_output = wrap_encoder(
src_vocab_size,
......@@ -466,12 +540,20 @@ def transformer(
d_model,
d_inner_hid,
dropout_rate,
src_pad_idx,
pos_pad_idx,
enc_input_layers, )
enc_inputs, )
dec_input_layers = make_inputs(decoder_input_data_names, n_head, d_model,
batch_size, max_length, True, True, True)
dec_inputs = make_inputs(
decoder_input_data_names,
n_head,
d_model,
max_length,
is_pos=True,
slf_attn_bias_flag=True,
src_attn_bias_flag=True,
enc_output_flag=False,
data_shape_flag=True,
slf_attn_shape_flag=True,
src_attn_shape_flag=True)
predict = wrap_decoder(
trg_vocab_size,
......@@ -483,18 +565,29 @@ def transformer(
d_model,
d_inner_hid,
dropout_rate,
trg_pad_idx,
pos_pad_idx,
dec_input_layers,
dec_inputs,
enc_output, )
# Padding index do not contribute to the total loss. The weights is used to
# cancel padding index in calculating the loss.
gold, weights = make_inputs(label_data_names, n_head, d_model, batch_size,
max_length, False, False, False)
cost = layers.cross_entropy(input=predict, label=gold)
gold, weights = make_inputs(
label_data_names,
n_head,
d_model,
max_length,
is_pos=False,
slf_attn_bias_flag=False,
src_attn_bias_flag=False,
enc_output_flag=False,
data_shape_flag=False,
slf_attn_shape_flag=False,
src_attn_shape_flag=False)
cost = layers.softmax_with_cross_entropy(logits=predict, label=gold)
weighted_cost = cost * weights
return layers.reduce_sum(weighted_cost), predict
sum_cost = layers.reduce_sum(weighted_cost)
token_num = layers.reduce_sum(weights)
avg_cost = sum_cost / token_num
return sum_cost, avg_cost, predict, token_num
def wrap_encoder(src_vocab_size,
......@@ -506,27 +599,38 @@ def wrap_encoder(src_vocab_size,
d_model,
d_inner_hid,
dropout_rate,
src_pad_idx,
pos_pad_idx,
enc_input_layers=None):
enc_inputs=None):
"""
The wrapper assembles together all needed layers for the encoder.
"""
if enc_input_layers is None:
if enc_inputs is None:
# This is used to implement independent encoder program in inference.
src_word, src_pos, src_slf_attn_bias = make_inputs(
encoder_input_data_names, n_head, d_model, batch_size, max_length,
True, True, False)
src_word, src_pos, src_slf_attn_bias, src_data_shape, \
slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \
make_inputs(
encoder_input_data_names,
n_head,
d_model,
max_length,
is_pos=True,
slf_attn_bias_flag=True,
src_attn_bias_flag=False,
enc_output_flag=False,
data_shape_flag=True,
slf_attn_shape_flag=True,
src_attn_shape_flag=False)
else:
src_word, src_pos, src_slf_attn_bias = enc_input_layers
src_word, src_pos, src_slf_attn_bias, src_data_shape, \
slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape = \
enc_inputs
enc_input = prepare_encoder(
src_word,
src_pos,
src_vocab_size,
d_model,
src_pad_idx,
max_length,
dropout_rate, )
dropout_rate,
src_data_shape, )
enc_output = encoder(
enc_input,
src_slf_attn_bias,
......@@ -536,7 +640,9 @@ def wrap_encoder(src_vocab_size,
d_value,
d_model,
d_inner_hid,
dropout_rate, )
dropout_rate,
slf_attn_pre_softmax_shape,
slf_attn_post_softmax_shape, )
return enc_output
......@@ -549,29 +655,42 @@ def wrap_decoder(trg_vocab_size,
d_model,
d_inner_hid,
dropout_rate,
trg_pad_idx,
pos_pad_idx,
dec_input_layers=None,
dec_inputs=None,
enc_output=None):
"""
The wrapper assembles together all needed layers for the decoder.
"""
if dec_input_layers is None:
if dec_inputs is None:
# This is used to implement independent decoder program in inference.
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, enc_output = make_inputs(
decoder_input_data_names, n_head, d_model, batch_size, max_length,
True, True, True, True)
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
trg_data_shape, slf_attn_pre_softmax_shape, \
slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \
src_attn_post_softmax_shape, enc_output = make_inputs(
decoder_input_data_names,
n_head,
d_model,
max_length,
is_pos=True,
slf_attn_bias_flag=True,
src_attn_bias_flag=True,
enc_output_flag=True,
data_shape_flag=True,
slf_attn_shape_flag=True,
src_attn_shape_flag=True)
else:
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias = dec_input_layers
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, \
trg_data_shape, slf_attn_pre_softmax_shape, \
slf_attn_post_softmax_shape, src_attn_pre_softmax_shape, \
src_attn_post_softmax_shape = dec_inputs
dec_input = prepare_decoder(
trg_word,
trg_pos,
trg_vocab_size,
d_model,
trg_pad_idx,
max_length,
dropout_rate, )
dropout_rate,
trg_data_shape, )
dec_output = decoder(
dec_input,
enc_output,
......@@ -583,13 +702,17 @@ def wrap_decoder(trg_vocab_size,
d_value,
d_model,
d_inner_hid,
dropout_rate, )
dropout_rate,
slf_attn_pre_softmax_shape,
slf_attn_post_softmax_shape,
src_attn_pre_softmax_shape,
src_attn_post_softmax_shape, )
# Return logits for training and probs for inference.
predict = layers.reshape(
x=layers.fc(input=dec_output,
size=trg_vocab_size,
bias_attr=False,
num_flatten_dims=2),
shape=[-1, trg_vocab_size],
act="softmax")
act="softmax" if dec_inputs is None else None)
return predict
import os
import time
import numpy as np
import paddle.v2 as paddle
import paddle
import paddle.fluid as fluid
from model import transformer, position_encoding_init
......@@ -14,7 +15,7 @@ def pad_batch_data(insts,
pad_idx,
n_head,
is_target=False,
return_pos=True,
is_label=False,
return_attn_bias=True,
return_max_len=True):
"""
......@@ -23,14 +24,20 @@ def pad_batch_data(insts,
"""
return_list = []
max_len = max(len(inst) for inst in insts)
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data = np.array(
[inst + [pad_idx] * (max_len - len(inst)) for inst in insts])
return_list += [inst_data.astype("int64").reshape([-1, 1])]
if return_pos:
inst_pos = np.array([[
pos_i + 1 if w_i != pad_idx else 0 for pos_i, w_i in enumerate(inst)
] for inst in inst_data])
if is_label: # label weight
inst_weight = np.array(
[[1.] * len(inst) + [0.] * (max_len - len(inst)) for inst in insts])
return_list += [inst_weight.astype("float32").reshape([-1, 1])]
else: # position data
inst_pos = np.array([
range(1, len(inst) + 1) + [0] * (max_len - len(inst))
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, 1])]
if return_attn_bias:
if is_target:
......@@ -56,7 +63,7 @@ def pad_batch_data(insts,
def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
max_length, n_head):
n_head, d_model):
"""
Put all padded data needed by training into a dict.
"""
......@@ -66,13 +73,40 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
[inst[1] for inst in insts], trg_pad_idx, n_head, is_target=True)
trg_src_attn_bias = np.tile(src_slf_attn_bias[:, :, ::src_max_len, :],
[1, 1, trg_max_len, 1]).astype("float32")
lbl_word = pad_batch_data([inst[2] for inst in insts], trg_pad_idx, n_head,
False, False, False, False)
lbl_weight = (lbl_word != trg_pad_idx).astype("float32").reshape([-1, 1])
# These shape tensors are used in reshape_op.
src_data_shape = np.array([len(insts), src_max_len, d_model], dtype="int32")
trg_data_shape = np.array([len(insts), trg_max_len, d_model], dtype="int32")
src_slf_attn_pre_softmax_shape = np.array(
[-1, src_slf_attn_bias.shape[-1]], dtype="int32")
src_slf_attn_post_softmax_shape = np.array(
src_slf_attn_bias.shape, dtype="int32")
trg_slf_attn_pre_softmax_shape = np.array(
[-1, trg_slf_attn_bias.shape[-1]], dtype="int32")
trg_slf_attn_post_softmax_shape = np.array(
trg_slf_attn_bias.shape, dtype="int32")
trg_src_attn_pre_softmax_shape = np.array(
[-1, trg_src_attn_bias.shape[-1]], dtype="int32")
trg_src_attn_post_softmax_shape = np.array(
trg_src_attn_bias.shape, dtype="int32")
lbl_word, lbl_weight = pad_batch_data(
[inst[2] for inst in insts],
trg_pad_idx,
n_head,
is_target=False,
is_label=True,
return_attn_bias=False,
return_max_len=False)
input_dict = dict(
zip(input_data_names, [
src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos,
trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight
src_word, src_pos, src_slf_attn_bias, src_data_shape,
src_slf_attn_pre_softmax_shape, src_slf_attn_post_softmax_shape,
trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias,
trg_data_shape, trg_slf_attn_pre_softmax_shape,
trg_slf_attn_post_softmax_shape, trg_src_attn_pre_softmax_shape,
trg_src_attn_post_softmax_shape, lbl_word, lbl_weight
]))
return input_dict
......@@ -81,14 +115,12 @@ def main():
place = fluid.CUDAPlace(0) if TrainTaskConfig.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
cost, predict = transformer(
ModelHyperParams.src_vocab_size + 1,
ModelHyperParams.trg_vocab_size + 1, ModelHyperParams.max_length + 1,
ModelHyperParams.n_layer, ModelHyperParams.n_head,
ModelHyperParams.d_key, ModelHyperParams.d_value,
ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
ModelHyperParams.dropout, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.pos_pad_idx)
sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size,
ModelHyperParams.max_length + 1, ModelHyperParams.n_layer,
ModelHyperParams.n_head, ModelHyperParams.d_key,
ModelHyperParams.d_value, ModelHyperParams.d_model,
ModelHyperParams.d_inner_hid, ModelHyperParams.dropout)
lr_scheduler = LearningRateScheduler(ModelHyperParams.d_model,
TrainTaskConfig.warmup_steps, place,
......@@ -98,7 +130,7 @@ def main():
beta1=TrainTaskConfig.beta1,
beta2=TrainTaskConfig.beta2,
epsilon=TrainTaskConfig.eps)
optimizer.minimize(cost)
optimizer.minimize(avg_cost if TrainTaskConfig.use_avg_cost else sum_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
......@@ -110,27 +142,31 @@ def main():
# Program to do validation.
test_program = fluid.default_main_program().clone()
with fluid.program_guard(test_program):
test_program = fluid.io.get_inference_program([cost])
test_program = fluid.io.get_inference_program([avg_cost])
val_data = paddle.batch(
paddle.dataset.wmt16.validation(ModelHyperParams.src_vocab_size,
ModelHyperParams.trg_vocab_size),
batch_size=TrainTaskConfig.batch_size)
def test(exe):
test_costs = []
test_total_cost = 0
test_total_token = 0
for batch_id, data in enumerate(val_data()):
if len(data) != TrainTaskConfig.batch_size:
continue
data_input = prepare_batch_input(
data, encoder_input_data_names + decoder_input_data_names[:-1] +
label_data_names, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.max_length,
ModelHyperParams.n_head)
test_cost = exe.run(test_program,
feed=data_input,
fetch_list=[cost])[0]
test_costs.append(test_cost)
return np.mean(test_costs)
label_data_names, ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx, ModelHyperParams.n_head,
ModelHyperParams.d_model)
test_sum_cost, test_token_num = exe.run(
test_program,
feed=data_input,
fetch_list=[sum_cost, token_num],
use_program_cache=True)
test_total_cost += test_sum_cost
test_total_token += test_token_num
test_avg_cost = test_total_cost / test_total_token
test_ppl = np.exp([min(test_avg_cost, 100)])
return test_avg_cost, test_ppl
# Initialize the parameters.
exe.run(fluid.framework.default_startup_program())
......@@ -142,27 +178,30 @@ def main():
ModelHyperParams.d_model), place)
for pass_id in xrange(TrainTaskConfig.pass_num):
pass_start_time = time.time()
for batch_id, data in enumerate(train_data()):
# The current program desc is coupled with batch_size, thus all
# mini-batches must have the same number of instances currently.
if len(data) != TrainTaskConfig.batch_size:
continue
data_input = prepare_batch_input(
data, encoder_input_data_names + decoder_input_data_names[:-1] +
label_data_names, ModelHyperParams.src_pad_idx,
ModelHyperParams.trg_pad_idx, ModelHyperParams.max_length,
ModelHyperParams.n_head)
label_data_names, ModelHyperParams.eos_idx,
ModelHyperParams.eos_idx, ModelHyperParams.n_head,
ModelHyperParams.d_model)
lr_scheduler.update_learning_rate(data_input)
outs = exe.run(fluid.framework.default_main_program(),
feed=data_input,
fetch_list=[cost],
fetch_list=[sum_cost, avg_cost],
use_program_cache=True)
cost_val = np.array(outs[0])
print("pass_id = " + str(pass_id) + " batch = " + str(batch_id) +
" cost = " + str(cost_val))
sum_cost_val, avg_cost_val = np.array(outs[0]), np.array(outs[1])
print("epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f" %
(pass_id, batch_id, sum_cost_val, avg_cost_val,
np.exp([min(avg_cost_val[0], 100)])))
# Validate and save the model for inference.
val_cost = test(exe)
print("pass_id = " + str(pass_id) + " val_cost = " + str(val_cost))
val_avg_cost, val_ppl = test(exe)
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
print("epoch: %d, val avg loss: %f, val ppl: %f, "
"consumed %fs" % (pass_id, val_avg_cost, val_ppl, time_consumed))
fluid.io.save_inference_model(
os.path.join(TrainTaskConfig.model_dir,
"pass_" + str(pass_id) + ".infer.model"),
......
./data/pascalvoc/VOCdevkit/
data/pascalvoc/test.txt
data/pascalvoc/trainval.txt
pretrained/ssd_mobilenet_v1_coco.tar.gz
pretrained/ssd_mobilenet_v1_coco
pretrained/mobilenet_v1_imagenet.tar.gz
pretrained/mobilenet_v1_imagenet
log*
*.log
......@@ -2,7 +2,99 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
---
# MobileNet-SSD
## SSD Object Detection
This model built with paddle fluid is still under active development and is not
the final version. We welcome feedbacks.
### Introduction
[Single Shot MultiBox Detector (SSD)](https://arxiv.org/abs/1512.02325) framework for object detection is based on a feed-forward convolutional network. The early network is a standard convolutional architecture for image classification, such as VGG, ResNet, or MobileNet, which is als called base network. In this tutorial we used [MobileNet](https://arxiv.org/abs/1704.04861).
### Data Preparation
You can use [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) or [MS-COCO dataset](http://cocodataset.org/#download).
#### PASCAL VOC Dataset
If you want to train model on PASCAL VOC dataset, please download datset at first, skip this step if you already have one.
```bash
cd data/pascalvoc
./download.sh
```
The command `download.sh` also will create training and testing file lists.
#### MS-COCO Dataset
If you want to train model on MS-COCO dataset, please download datset at first, skip this step if you already have one.
```
cd data/coco
./download.sh
```
### Train
#### Download the Pre-trained Model.
We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other dataset, like PASCAL VOC. Then other pre-trained model is MobileNet v1 trained on ImageNet 2012 dataset, but removed the last weights and bias in Fully-Connected layer.
Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). The MobileNet v1 model is converted [Caffe](https://github.com/shicai/MobileNet-Caffe).
- Download MobileNet-v1 SSD:
```
./pretrained/download_coco.sh
```
- Download MobileNet-v1:
```
./pretrained/download_imagenet.sh
```
#### Train on PASCAL VOC
- Train on one device (/GPU).
```python
env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
- Train on multi devices (/GPUs).
```python
env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='pascalvoc' --pretrained_model='pretrained/ssd_mobilenet_v1_coco/'
```
#### Train on MS-COCO
- Train on one device (/GPU).
```python
env CUDA_VISIABLE_DEVICES=0 python -u train.py --parallel=False --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/'
```
- Train on multi devices (/GPUs).
```python
env CUDA_VISIABLE_DEVICES=0,1 python -u train.py --batch_size=64 --data='coco' --pretrained_model='pretrained/mobilenet_imagenet/'
```
TBD
### Evaluate
```python
env CUDA_VISIABLE_DEVICES=0 python eval.py --model='model/90' --test_list=''
```
TBD
### Infer and Visualize
```python
env CUDA_VISIABLE_DEVICES=0 python infer.py --batch_size=2 --model='model/90' --test_list=''
```
TBD
### Released Model
| Model | Pre-trained Model | Training data | Test data | mAP |
|:------------------------:|:------------------:|:----------------:|:------------:|:----:|
|MobileNet-v1-SSD 300x300 | COCO MobileNet SSD | VOC07+12 trainval| VOC07 test | xx% |
|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | VOC07+12 trainval| VOC07 test | xx% |
|MobileNet-v1-SSD 300x300 | ImageNet MobileNet | MS-COCO trainval | MS-COCO test | xx% |
TBD
......@@ -60,4 +60,5 @@ def prepare_filelist(devkit_dir, years, output_dir):
ftest.write(item[0] + ' ' + item[1] + '\n')
prepare_filelist(devkit_dir, years, '.')
if __name__ == '__main__':
prepare_filelist(devkit_dir, years, '.')
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
# Extract the data.
echo "Extractint..."
tar -xf VOCtrainval_11-May-2012.tar
tar -xf VOCtrainval_06-Nov-2007.tar
tar -xf VOCtest_06-Nov-2007.tar
echo "Creating data lists..."
python create_list.py
import os
import time
import numpy as np
import argparse
import functools
import paddle
import paddle.fluid as fluid
import reader
from mobilenet_ssd import mobile_net
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('dataset', str, 'pascalvoc', "coco or pascalvoc.")
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('data_dir', str, '', "The data root path.")
add_arg('test_list', str, '', "The testing data lists.")
add_arg('label_file', str, '', "The label file, which save the real name and is only used for Pascal VOC.")
add_arg('model_dir', str, '', "The model path.")
add_arg('ap_version', str, '11point', "11point or integral")
add_arg('resize_h', int, 300, "The resized image height.")
add_arg('resize_w', int, 300, "The resized image width.")
add_arg('mean_value_B', float, 127.5, "mean value for B channel which will be subtracted") #123.68
add_arg('mean_value_G', float, 127.5, "mean value for G channel which will be subtracted") #116.78
add_arg('mean_value_R', float, 127.5, "mean value for R channel which will be subtracted") #103.94
# yapf: enable
def eval(args, data_args, test_list, batch_size, model_dir=None):
image_shape = [3, data_args.resize_h, data_args.resize_w]
if data_args.dataset == 'coco':
num_classes = 81
elif data_args.dataset == 'pascalvoc':
num_classes = 21
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
loss = fluid.layers.reduce_sum(loss)
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
map_eval = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
if model_dir:
def if_exist(var):
return os.path.exists(os.path.join(model_dir, var.name))
fluid.io.load_vars(exe, model_dir, predicate=if_exist)
test_reader = paddle.batch(
reader.test(data_args, test_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
place=place, feed_list=[image, gt_box, gt_label, difficult])
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
for idx, data in enumerate(test_reader()):
test_map = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if idx % 50 == 0:
print("Batch {0}, map {1}".format(idx, test_map[0]))
print("Test model {0}, map {1}".format(model_dir, test_map[0]))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
data_args = reader.Settings(
dataset=args.dataset,
data_dir=args.data_dir,
label_file=args.label_file,
resize_h=args.resize_h,
resize_w=args.resize_w,
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R])
eval(
args,
test_list=args.test_list,
data_args=data_args,
batch_size=args.batch_size,
model_dir=args.model_dir)
......@@ -85,8 +85,7 @@ def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
return False
def generate_batch_samples(batch_sampler, bbox_labels, image_width,
image_height):
def generate_batch_samples(batch_sampler, bbox_labels):
sampled_bbox = []
index = []
c = 0
......@@ -216,9 +215,9 @@ def distort_image(img, settings):
def expand_image(img, bbox_labels, img_width, img_height, settings):
prob = random.uniform(0, 1)
if prob < settings._hue_prob:
expand_ratio = random.uniform(1, settings._expand_max_ratio)
if expand_ratio - 1 >= 0.01:
if prob < settings._expand_prob:
if settings._expand_max_ratio - 1 >= 0.01:
expand_ratio = random.uniform(1, settings._expand_max_ratio)
height = int(img_height * expand_ratio)
width = int(img_width * expand_ratio)
h_off = math.floor(random.uniform(0, height - img_height))
......@@ -231,5 +230,5 @@ def expand_image(img, bbox_labels, img_width, img_height, settings):
expand_img = Image.fromarray(expand_img)
expand_img.paste(img, (int(w_off), int(h_off)))
bbox_labels = transform_labels(bbox_labels, expand_bbox)
return expand_img, bbox_labels
return img, bbox_labels
return expand_img, bbox_labels, width, height
return img, bbox_labels, img_width, img_height
import paddle.v2 as paddle
import paddle.fluid as fluid
import numpy as np
# From npy
def load_vars():
vars = {}
name_map = {}
with open('./ssd_mobilenet_v1_coco/names.map', 'r') as map_file:
for param in map_file:
fd_name, tf_name = param.strip().split('\t')
name_map[fd_name] = tf_name
tf_vars = np.load(
'./ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco_2017_11_17.npy').item()
for fd_name in name_map:
tf_name = name_map[fd_name]
tf_var = tf_vars[tf_name]
if len(tf_var.shape) == 4 and 'depthwise' in tf_name:
vars[fd_name] = np.transpose(tf_var, (2, 3, 0, 1))
elif len(tf_var.shape) == 4:
vars[fd_name] = np.transpose(tf_var, (3, 2, 0, 1))
else:
vars[fd_name] = tf_var
return vars
def load_and_set_vars(place):
vars = load_vars()
for k, v in vars.items():
t = fluid.global_scope().find_var(k).get_tensor()
#print(np.array(t).shape, v.shape, k)
assert np.array(t).shape == v.shape
t.set(v, place)
# From Paddle V1
def load_paddlev1_vars(place):
vars = {}
name_map = {}
with open('./caffe2paddle/names.map', 'r') as map_file:
for param in map_file:
fd_name, tf_name = param.strip().split('\t')
name_map[fd_name] = tf_name
from operator import mul
def load(file_name, shape):
with open(file_name, 'rb') as f:
f.read(16)
arr = np.fromfile(f, dtype=np.float32)
#print(arr.size, reduce(mul, shape), file_name)
assert arr.size == reduce(mul, shape)
return arr.reshape(shape)
for fd_name in name_map:
v1_name = name_map[fd_name]
t = fluid.global_scope().find_var(fd_name).get_tensor()
shape = np.array(t).shape
v1_var = load('./caffe2paddle/' + v1_name, shape)
t.set(v1_var, place)
if __name__ == "__main__":
load_vars()
......@@ -27,12 +27,7 @@ def conv_bn(input,
bias_attr=False)
parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA())
bias_attr = ParamAttr(learning_rate=0.2)
return fluid.layers.batch_norm(
input=conv,
act=act,
epsilon=0.00001,
param_attr=parameter_attr,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=conv, act=act)
def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride,
......@@ -76,7 +71,7 @@ def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale):
return normal_conv
def mobile_net(img, img_shape, scale=1.0):
def mobile_net(num_classes, img, img_shape, scale=1.0):
# 300x300
tmp = conv_bn(img, 3, int(32 * scale), 2, 1, 3)
# 150x150
......@@ -104,10 +99,11 @@ def mobile_net(img, img_shape, scale=1.0):
module16 = extra_block(module15, 128, 256, 1, 2, scale)
# 2x2
module17 = extra_block(module16, 64, 128, 1, 2, scale)
mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
inputs=[module11, module13, module14, module15, module16, module17],
image=img,
num_classes=21,
num_classes=num_classes,
min_ratio=20,
max_ratio=90,
min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
......
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_coco.tar.gz
echo "Extractint..."
tar -xf ssd_mobilenet_v1_coco.tar.gz
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://paddlemodels.bj.bcebos.com/mobilenet_v1_imagenet.tar.gz
echo "Extractint..."
tar -xf mobilenet_v1_imagenet.tar.gz
......@@ -16,19 +16,33 @@ import image_util
from paddle.utils.image_util import *
import random
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree
import os
import time
import copy
class Settings(object):
def __init__(self, data_dir, label_file, resize_h, resize_w, mean_value,
apply_distort, apply_expand):
def __init__(self,
dataset=None,
data_dir=None,
label_file=None,
resize_h=300,
resize_w=300,
mean_value=[127.5, 127.5, 127.5],
apply_distort=True,
apply_expand=True,
toy=0):
self._dataset = dataset
self._toy = toy
self._data_dir = data_dir
self._label_list = []
label_fpath = os.path.join(data_dir, label_file)
for line in open(label_fpath):
self._label_list.append(line.strip())
if dataset == "pascalvoc":
self._label_list = []
label_fpath = os.path.join(data_dir, label_file)
for line in open(label_fpath):
self._label_list.append(line.strip())
self._apply_distort = apply_distort
self._apply_expand = apply_expand
......@@ -47,6 +61,14 @@ class Settings(object):
self._brightness_prob = 0.5
self._brightness_delta = 0.125
@property
def dataset(self):
return self._dataset
@property
def toy(self):
return self._toy
@property
def apply_distort(self):
return self._apply_expand
......@@ -59,6 +81,10 @@ class Settings(object):
def data_dir(self):
return self._data_dir
@data_dir.setter
def data_dir(self, data_dir):
self._data_dir = data_dir
@property
def label_list(self):
return self._label_list
......@@ -76,133 +102,249 @@ class Settings(object):
return self._img_mean
def _reader_creator(settings, file_list, mode, shuffle):
def preprocess(img, bbox_labels, mode, settings):
img_width, img_height = img.size
sampled_labels = bbox_labels
if mode == 'train':
if settings._apply_distort:
img = image_util.distort_image(img, settings)
if settings._apply_expand:
img, bbox_labels, img_width, img_height = image_util.expand_image(
img, bbox_labels, img_width, img_height, settings)
# sampling
batch_sampler = []
# hard-code here
batch_sampler.append(
image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
sampled_bbox = image_util.generate_batch_samples(batch_sampler,
bbox_labels)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sampled_labels = image_util.crop_image(
img, bbox_labels, sampled_bbox[idx], img_width, img_height)
img = Image.fromarray(img)
img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
img = np.array(img)
if mode == 'train':
mirror = int(random.uniform(0, 2))
if mirror == 1:
img = img[:, ::-1, :]
for i in xrange(len(sampled_labels)):
tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp
# HWC to CHW
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
# RBG to BGR
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img * 0.007843
return img, sampled_labels
def coco(settings, file_list, mode, shuffle):
# cocoapi
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
coco = COCO(file_list)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
category_ids = coco.getCatIds()
category_names = [item['name'] for item in coco.loadCats(category_ids)]
if not settings.toy == 0:
images = images[:settings.toy] if len(images) > settings.toy else images
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
with open(file_list) as flist:
lines = [line.strip() for line in flist]
if shuffle:
random.shuffle(lines)
for line in lines:
if mode == 'train' or mode == 'test':
img_path, label_path = line.split()
img_path = os.path.join(settings.data_dir, img_path)
label_path = os.path.join(settings.data_dir, label_path)
elif mode == 'infer':
img_path = os.path.join(settings.data_dir, line)
img = Image.open(img_path)
img_width, img_height = img.size
# layout: label | xmin | ymin | xmax | ymax | difficult
if mode == 'train' or mode == 'test':
bbox_labels = []
root = xml.etree.ElementTree.parse(label_path).getroot()
for object in root.findall('object'):
bbox_sample = []
# start from 1
bbox_sample.append(
float(
settings.label_list.index(
object.find('name').text)))
bbox = object.find('bndbox')
difficult = float(object.find('difficult').text)
bbox_sample.append(
float(bbox.find('xmin').text) / img_width)
bbox_sample.append(
float(bbox.find('ymin').text) / img_height)
bbox_sample.append(
float(bbox.find('xmax').text) / img_width)
bbox_sample.append(
float(bbox.find('ymax').text) / img_height)
bbox_sample.append(difficult)
bbox_labels.append(bbox_sample)
sample_labels = bbox_labels
if mode == 'train':
if settings._apply_distort:
img = image_util.distort_image(img, settings)
if settings._apply_expand:
img, bbox_labels = image_util.expand_image(
img, bbox_labels, img_width, img_height,
settings)
batch_sampler = []
# hard-code here
batch_sampler.append(
image_util.sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9,
0.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0,
1.0))
""" random crop """
sampled_bbox = image_util.generate_batch_samples(
batch_sampler, bbox_labels, img_width, img_height)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sample_labels = image_util.crop_image(
img, bbox_labels, sampled_bbox[idx], img_width,
img_height)
img = Image.fromarray(img)
img = img.resize((settings.resize_w, settings.resize_h),
Image.ANTIALIAS)
img = np.array(img)
if mode == 'train':
mirror = int(random.uniform(0, 2))
if mirror == 1:
img = img[:, ::-1, :]
for i in xrange(len(sample_labels)):
tmp = sample_labels[i][1]
sample_labels[i][1] = 1 - sample_labels[i][3]
sample_labels[i][3] = 1 - tmp
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img.flatten()
img = img * 0.007843
sample_labels = np.array(sample_labels)
if mode == 'train' or mode == 'test':
if mode == 'train' and len(sample_labels) == 0: continue
yield img.astype(
'float32'
), sample_labels[:, 1:5], sample_labels[:, 0].astype(
'int32'), sample_labels[:, -1].astype('int32')
elif mode == 'infer':
yield img.astype('float32')
if mode == 'train' and shuffle:
random.shuffle(images)
for image in images:
image_name = image['file_name']
image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
im_width, im_height = im.size
# layout: category_id | xmin | ymin | xmax | ymax | iscrowd |
# origin_coco_bbox | segmentation | area | image_id | annotation_id
bbox_labels = []
annIds = coco.getAnnIds(imgIds=image['id'])
anns = coco.loadAnns(annIds)
for ann in anns:
bbox_sample = []
# start from 1, leave 0 to background
bbox_sample.append(
float(category_ids.index(ann['category_id'])) + 1)
bbox = ann['bbox']
xmin, ymin, w, h = bbox
xmax = xmin + w
ymax = ymin + h
bbox_sample.append(float(xmin) / im_width)
bbox_sample.append(float(ymin) / im_height)
bbox_sample.append(float(xmax) / im_width)
bbox_sample.append(float(ymax) / im_height)
bbox_sample.append(float(ann['iscrowd']))
bbox_labels.append(bbox_sample)
im, sample_labels = preprocess(im, bbox_labels, mode, settings)
sample_labels = np.array(sample_labels)
if len(sample_labels) == 0: continue
im = im.astype('float32')
boxes = sample_labels[:, 1:5]
lbls = sample_labels[:, 0].astype('int32')
difficults = sample_labels[:, -1].astype('int32')
yield im, boxes, lbls, difficults
return reader
def pascalvoc(settings, file_list, mode, shuffle):
flist = open(file_list)
images = [line.strip() for line in flist]
if not settings.toy == 0:
images = images[:settings.toy] if len(images) > settings.toy else images
print("{} on {} with {} images".format(mode, settings.dataset, len(images)))
def reader():
if mode == 'train' and shuffle:
random.shuffle(images)
for image in images:
image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path)
label_path = os.path.join(settings.data_dir, label_path)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
im_width, im_height = im.size
# layout: label | xmin | ymin | xmax | ymax | difficult
bbox_labels = []
root = xml.etree.ElementTree.parse(label_path).getroot()
for object in root.findall('object'):
bbox_sample = []
# start from 1
bbox_sample.append(
float(settings.label_list.index(object.find('name').text)))
bbox = object.find('bndbox')
difficult = float(object.find('difficult').text)
bbox_sample.append(float(bbox.find('xmin').text) / im_width)
bbox_sample.append(float(bbox.find('ymin').text) / im_height)
bbox_sample.append(float(bbox.find('xmax').text) / im_width)
bbox_sample.append(float(bbox.find('ymax').text) / im_height)
bbox_sample.append(difficult)
bbox_labels.append(bbox_sample)
im, sample_labels = preprocess(im, bbox_labels, mode, settings)
sample_labels = np.array(sample_labels)
if len(sample_labels) == 0: continue
im = im.astype('float32')
boxes = sample_labels[:, 1:5]
lbls = sample_labels[:, 0].astype('int32')
difficults = sample_labels[:, -1].astype('int32')
yield im, boxes, lbls, difficults
return reader
def draw_bounding_box_on_image(image,
sample_labels,
image_name,
category_names,
color='red',
thickness=4,
with_text=True,
normalized=True):
image = Image.fromarray(image)
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
if not normalized:
im_width, im_height = 1, 1
for item in sample_labels:
label = item[0]
category_name = category_names[int(label)]
bbox = item[1:5]
xmin, ymin, xmax, ymax = bbox
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
draw.line(
[(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)],
width=thickness,
fill=color)
if with_text:
if image.mode == 'RGB':
draw.text((left, top), category_name, (255, 255, 0))
image.save(image_name)
def train(settings, file_list, shuffle=True):
return _reader_creator(settings, file_list, 'train', shuffle)
file_list = os.path.join(settings.data_dir, file_list)
if settings.dataset == 'coco':
train_settings = copy.copy(settings)
if '2014' in file_list:
sub_dir = "train2014"
elif '2017' in file_list:
sub_dir = "train2017"
train_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(train_settings, file_list, 'train', shuffle)
else:
return pascalvoc(settings, file_list, 'train', shuffle)
def test(settings, file_list):
return _reader_creator(settings, file_list, 'test', False)
file_list = os.path.join(settings.data_dir, file_list)
if settings.dataset == 'coco':
test_settings = copy.copy(settings)
if '2014' in file_list:
sub_dir = "val2014"
elif '2017' in file_list:
sub_dir = "val2017"
test_settings.data_dir = os.path.join(settings.data_dir, sub_dir)
return coco(test_settings, file_list, 'test', False)
else:
return pascalvoc(settings, file_list, 'test', False)
def infer(settings, file_list):
return _reader_creator(settings, file_list, 'infer', False)
def infer(settings, image_path):
def reader():
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
im_width, im_height = im.size
img = img.resize((settings.resize_w, settings.resize_h),
Image.ANTIALIAS)
img = np.array(img)
# HWC to CHW
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
# RBG to BGR
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img * 0.007843
yield img
return reader
import paddle.v2 as paddle
import paddle.fluid as fluid
import reader
import load_model as load_model
from mobilenet_ssd import mobile_net
from utility import add_arguments, print_arguments
import os
import time
import numpy as np
import argparse
import functools
import shutil
import paddle
import paddle.fluid as fluid
import reader
from mobilenet_ssd import mobile_net
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('parallel', bool, True, "Whether use parallel training.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
# yapf: disable
add_arg('learning_rate', float, 0.001, "Learning rate.")
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('num_passes', int, 120, "Epoch number.")
add_arg('parallel', bool, True, "Whether use parallel training.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('use_nccl', bool, False, "Whether to use NCCL or not.")
add_arg('dataset', str, 'pascalvoc', "coco or pascalvoc.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, 'pretrained/ssd_mobilenet_v1_coco/', "The init model path.")
add_arg('apply_distort', bool, True, "Whether apply distort")
add_arg('apply_expand', bool, True, "Whether appley expand")
add_arg('ap_version', str, '11point', "11point or integral")
add_arg('resize_h', int, 300, "The resized image height.")
add_arg('resize_w', int, 300, "The resized image width.")
add_arg('mean_value_B', float, 127.5, "mean value for B channel which will be subtracted") #123.68
add_arg('mean_value_G', float, 127.5, "mean value for G channel which will be subtracted") #116.78
add_arg('mean_value_R', float, 127.5, "mean value for R channel which will be subtracted") #103.94
add_arg('is_toy', int, 0, "Toy for quick debug, 0 means using all data, while n means using only n sample")
# yapf: enable
def train(args,
train_file_list,
val_file_list,
data_args,
learning_rate,
batch_size,
num_passes,
model_save_dir='model',
init_model_path=None):
def parallel_do(args,
train_file_list,
val_file_list,
data_args,
learning_rate,
batch_size,
num_passes,
model_save_dir,
pretrained_model=None):
image_shape = [3, data_args.resize_h, data_args.resize_w]
if data_args.dataset == 'coco':
num_classes = 81
elif data_args.dataset == 'pascalvoc':
num_classes = 21
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
gt_box = fluid.layers.data(
......@@ -38,47 +60,53 @@ def train(args,
if args.parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
pd = fluid.layers.ParallelDo(places, use_nccl=args.use_nccl)
with pd.do():
image_ = pd.read_input(image)
gt_box_ = pd.read_input(gt_box)
gt_label_ = pd.read_input(gt_label)
difficult_ = pd.read_input(difficult)
locs, confs, box, box_var = mobile_net(image_, image_shape)
loss = fluid.layers.ssd_loss(locs, confs, gt_box_, gt_label_,
box, box_var)
locs, confs, box, box_var = mobile_net(num_classes, image_,
image_shape)
loss = fluid.layers.ssd_loss(locs, confs, gt_box_, gt_label_, box,
box_var)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
loss = fluid.layers.reduce_sum(loss)
pd.write_output(loss)
pd.write_output(locs)
pd.write_output(confs)
pd.write_output(box)
pd.write_output(box_var)
pd.write_output(nmsed_out)
loss, locs, confs, box, box_var = pd()
loss = fluid.layers.reduce_sum(loss)
loss, nmsed_out = pd()
loss = fluid.layers.mean(loss)
else:
locs, confs, box, box_var = mobile_net(image, image_shape)
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, mbox_confs, box, box_var, nms_threshold=0.45)
loss = fluid.layers.ssd_loss(locs, mbox_confs, gt_box, gt_label,
box, box_var)
locs, confs, box, box_var, nms_threshold=0.45)
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
box_var)
loss = fluid.layers.reduce_sum(loss)
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
map_eval = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
21,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version='11point')
ap_version=args.ap_version)
boundaries = [40000, 60000]
values = [0.001, 0.0005, 0.00025]
if data_args.dataset == 'coco':
# learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list:
boundaries = [82783 / batch_size * 12, 82783 / batch_size * 19]
elif '2017' in train_file_list:
boundaries = [118287 / batch_size * 12, 118287 / batch_size * 19]
elif data_args.dataset == 'pascalvoc':
boundaries = [40000, 60000]
values = [learning_rate, learning_rate * 0.5, learning_rate * 0.25]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values),
regularization=fluid.regularizer.L2Decay(0.00005), )
......@@ -89,8 +117,13 @@ def train(args,
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_model.load_and_set_vars(place)
#load_model.load_paddlev1_vars(place)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
test_reader = paddle.batch(
......@@ -98,50 +131,207 @@ def train(args,
feeder = fluid.DataFeeder(
place=place, feed_list=[image, gt_box, gt_label, difficult])
#print 'test_program ', test_program
def test(pass_id):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
test_map = None
for _, data in enumerate(test_reader()):
for data in test_reader():
test_map = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
print("Test {0}, map {1}".format(pass_id, test_map[0]))
#print 'main_program ', fluid.default_main_program()
for pass_id in range(num_passes):
start_time = time.time()
prev_start_time = start_time
end_time = 0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
loss_v = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
end_time = time.time()
if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}"
.format(pass_id, batch_id, loss_v[0]))
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v[0], start_time - prev_start_time))
test(pass_id)
if pass_id % 10 == 0:
if pass_id % 10 == 0 or pass_id == num_passes - 1:
model_path = os.path.join(model_save_dir, str(pass_id))
print 'save models to %s' % (model_path)
fluid.io.save_inference_model(model_path, ['image'], [nmsed_out],
exe)
fluid.io.save_persistables(exe, model_path)
def parallel_exe(args,
train_file_list,
val_file_list,
data_args,
learning_rate,
batch_size,
num_passes,
model_save_dir='model',
pretrained_model=None):
image_shape = [3, data_args.resize_h, data_args.resize_w]
if data_args.dataset == 'coco':
num_classes = 81
elif data_args.dataset == 'pascalvoc':
num_classes = 21
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
nmsed_out = fluid.layers.detection_output(
locs, confs, box, box_var, nms_threshold=0.45)
loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
loss = fluid.layers.reduce_sum(loss)
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
map_eval = fluid.evaluator.DetectionMAP(
nmsed_out,
gt_label,
gt_box,
difficult,
num_classes,
overlap_threshold=0.5,
evaluate_difficult=False,
ap_version=args.ap_version)
if data_args.dataset == 'coco':
# learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list:
epocs = 82783 / batch_size
boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list:
epocs = 118287 / batch_size
boundaries = [epcos * 12, epocs * 19]
elif data_args.dataset == 'pascalvoc':
epocs = 19200 / batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25,
learning_rate * 0.1, learning_rate * 0.01
]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values),
regularization=fluid.regularizer.L2Decay(0.00005), )
optimizer.minimize(loss)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
if args.parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
test_reader = paddle.batch(
reader.test(data_args, val_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
place=place, feed_list=[image, gt_box, gt_label, difficult])
def save_model(postfix):
model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
print 'save models to %s' % (model_path)
fluid.io.save_persistables(exe, model_path)
best_map = 0.
def test(pass_id, best_map):
_, accum_map = map_eval.get_map_var()
map_eval.reset(exe)
test_map = None
for data in test_reader():
test_map = exe.run(test_program,
feed=feeder.feed(data),
fetch_list=[accum_map])
if test_map[0] > best_map:
best_map = test_map[0]
save_model('best_model')
print("Test {0}, map {1}".format(pass_id, test_map[0]))
for pass_id in range(num_passes):
start_time = time.time()
prev_start_time = start_time
end_time = 0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
if len(data) < devices_num: continue
if args.parallel:
loss_v, = train_exe.run(fetch_list=[loss.name],
feed_dict=feeder.feed(data))
else:
loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
end_time = time.time()
loss_v = np.mean(np.array(loss_v))
if batch_id % 20 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time))
test(pass_id, best_map)
if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id))
print("Best test map {0}".format(best_map))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
data_dir = 'data/pascalvoc'
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'
label_file = 'label_list'
model_save_dir = args.model_save_dir
if args.dataset == 'coco':
data_dir = './data/COCO17'
train_file_list = 'annotations/instances_train2017.json'
val_file_list = 'annotations/instances_val2017.json'
label_file = 'label_list'
data_args = reader.Settings(
data_dir='./data',
label_file='label_list',
apply_distort=True,
apply_expand=True,
resize_h=300,
resize_w=300,
mean_value=[127.5, 127.5, 127.5])
train(args,
train_file_list='./data/trainval.txt',
val_file_list='./data/test.txt',
data_args=data_args,
learning_rate=0.001,
batch_size=32,
num_passes=300)
dataset=args.dataset,
data_dir=data_dir,
label_file=label_file,
apply_distort=args.apply_distort,
apply_expand=args.apply_expand,
resize_h=args.resize_h,
resize_w=args.resize_w,
mean_value=[args.mean_value_B, args.mean_value_G, args.mean_value_R],
toy=args.is_toy)
#method = parallel_do
method = parallel_exe
method(
args,
train_file_list=train_file_list,
val_file_list=val_file_list,
data_args=data_args,
learning_rate=args.learning_rate,
batch_size=args.batch_size,
num_passes=args.num_passes,
model_save_dir=model_save_dir,
pretrained_model=args.pretrained_model)
# OCR Model

[toc]
This model built with paddle fluid is still under active development and is not
the final version. We welcome feedbacks.
运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求,请按照安装文档中的说明更新PaddlePaddle安装版本。
# Optical Character Recognition
这里将介绍如何在PaddlePaddle fluid下使用CRNN-CTC 和 CRNN-Attention模型对图片中的文字内容进行识别。
## 1. CRNN-CTC
本章的任务是识别含有单行汉语字符图片,首先采用卷积将图片转为`features map`, 然后使用`im2sequence op``features map`转为`sequence`,经过`双向GRU RNN`得到每个step的汉语字符的概率分布。训练过程选用的损失函数为CTC loss,最终的评估指标为`instance error rate`
本路径下各个文件的作用如下:
- **ctc_reader.py :** 下载、读取、处理数据。提供方法`train()``test()` 分别产生训练集和测试集的数据迭代器。
- **crnn_ctc_model.py :** 在该脚本中定义了训练网络、预测网络和evaluate网络。
- **ctc_train.py :** 用于模型的训练,可通过命令`python train.py --help` 获得使用方法。
- **inference.py :** 加载训练好的模型文件,对新数据进行预测。可通过命令`python inference.py --help` 获得使用方法。
- **eval.py :** 评估模型在指定数据集上的效果。可通过命令`python inference.py --help` 获得使用方法。
- **utility.py :** 实现的一些通用方法,包括参数配置、tensor的构造等。
### 1.1 数据
数据的下载和简单预处理都在`ctc_reader.py`中实现。
#### 1.1.1 数据格式
我们使用的训练和测试数据如`图1`所示,每张图片包含单行不定长的中文字符串,这些图片都是经过检测算法进行预框选处理的。
<p align="center">
<img src="images/demo.jpg" width="620" hspace='10'/> <br/>
<strong>图 1</strong>
</p>
在训练集中,每张图片对应的label是由若干数字组成的sequence。 Sequence中的每个数字表示一个字符在字典中的index。 `图1` 对应的label如下所示:
```
3835,8371,7191,2369,6876,4162,1938,168,1517,4590,3793
```
在上边这个label中,`3835` 表示字符‘两’的index,`4590` 表示中文字符逗号的index。
#### 1.1.2 数据准备
**A. 训练集**
我们需要把所有参与训练的图片放入同一个文件夹,暂且记为`train_images`。然后用一个list文件存放每张图片的信息,包括图片大小、图片名称和对应的label,这里暂记该list文件为`train_list`,其格式如下所示:
```
185 48 00508_0215.jpg 7740,5332,2369,3201,4162
48 48 00197_1893.jpg 6569
338 48 00007_0219.jpg 4590,4788,3015,1994,3402,999,4553
150 48 00107_4517.jpg 5936,3382,1437,3382
...
157 48 00387_0622.jpg 2397,1707,5919,1278
```
<center>文件train_list</center>
上述文件中的每一行表示一张图片,每行被空格分为四列,前两列分别表示图片的宽和高,第三列表示图片的名称,第四列表示该图片对应的sequence label。
最终我们应有以下类似文件结构:
```
|-train_data
|- train_list
|- train_imags
|- 00508_0215.jpg
|- 00197_1893.jpg
|- 00007_0219.jpg
| ...
```
在训练时,我们通过选项`--train_images``--train_list` 分别设置准备好的`train_images``train_list`
>**注:** 如果`--train_images` 和 `--train_list`都未设置或设置为None, ctc_reader.py会自动下载使用[示例数据](http://cloud.dlnel.org/filepub/?uuid=df937251-3c0b-480d-9a7b-0080dfeee65c),并将其缓存到`$HOME/.cache/paddle/dataset/ctc_data/data/` 路径下。
**B. 测试集和评估集**
测试集、评估集的准备方式与训练集相同。
在训练阶段,测试集的路径通过train.py的选项`--test_images``--test_list` 来设置。
在评估时,评估集的路径通过eval.py的选项`--input_images_dir``--input_images_list` 来设置。
**C. 待预测数据集**
预测支持三种形式的输入:
第一种:设置`--input_images_dir``--input_images_list`, 与训练集类似, 只不过list文件中的最后一列可以放任意占位字符或字符串,如下所示:
```
185 48 00508_0215.jpg s
48 48 00197_1893.jpg s
338 48 00007_0219.jpg s
...
```
第二种:仅设置`--input_images_list`, 其中list文件中只需放图片的完整路径,如下所示:
```
data/test_images/00000.jpg
data/test_images/00001.jpg
data/test_images/00003.jpg
```
第三种:从stdin读入一张图片的path,然后进行一次inference.
#### 1.2 训练
使用默认数据在GPU单卡上训练:
```
env CUDA_VISIABLE_DEVICES=0 python ctc_train.py
```
使用默认数据在GPU多卡上训练:
```
env CUDA_VISIABLE_DEVICES=0,1,2,3 python ctc_train.py --parallel=True
```
执行`python ctc_train.py --help`可查看更多使用方式和参数详细说明。
图2为使用默认参数和默认数据集训练的收敛曲线,其中横坐标轴为训练pass数,纵轴为在测试集上的sequence_error.
<p align="center">
<img src="images/train.jpg" width="620" hspace='10'/> <br/>
<strong>图 2</strong>
</p>
### 1.3 评估
通过以下命令调用评估脚本用指定数据集对模型进行评估:
```
env CUDA_VISIBLE_DEVICE=0 python eval.py \
--model_path="./models/model_0" \
--input_images_dir="./eval_data/images/" \
--input_images_list="./eval_data/eval_list\" \
```
执行`python ctc_train.py --help`可查看参数详细说明。
### 1.4 预测
从标准输入读取一张图片的路径,并对齐进行预测:
```
env CUDA_VISIBLE_DEVICE=0 python inference.py \
--model_path="models/model_00044_15000"
```
执行上述命令进行预测的效果如下:
```
----------- Configuration Arguments -----------
use_gpu: True
input_images_dir: None
input_images_list: None
model_path: /home/work/models/fluid/ocr_recognition/models/model_00052_15000
------------------------------------------------
Init model from: /home/work/models/fluid/ocr_recognition/models/model_00052_15000.
Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0060.jpg
result: [3298 2371 4233 6514 2378 3298 2363]
Please input the path of image: /home/work/models/fluid/ocr_recognition/data/test_images/00001_0429.jpg
result: [2067 2067 8187 8477 5027 7191 2431 1462]
```
从文件中批量读取图片路径,并对其进行预测:
```
env CUDA_VISIBLE_DEVICE=0 python inference.py \
--model_path="models/model_00044_15000" \
--input_images_list="data/test.list"
```
......@@ -143,7 +143,7 @@ def ctc_train_net(images, label, args, num_classes):
gradient_clip = None
if args.parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
pd = fluid.layers.ParallelDo(places, use_nccl=True)
with pd.do():
images_ = pd.read_input(images)
label_ = pd.read_input(label)
......
......@@ -30,10 +30,10 @@ class DataGenerator(object):
Reader interface for training.
:param img_root_dir: The root path of the image for training.
:type file_list: str
:type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for training.
:type file_list: str
:type img_label_list: str
'''
......@@ -91,10 +91,10 @@ class DataGenerator(object):
Reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type file_list: str
:type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for testing.
:type file_list: list
:type img_label_list: str
'''
def reader():
......@@ -111,6 +111,42 @@ class DataGenerator(object):
return reader
def infer_reader(self, img_root_dir=None, img_label_list=None):
'''A reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type img_root_dir: str
:param img_label_list: The path of the <image_name, label> file for
inference. It should be the path of <image_path> file if img_root_dir
was None. If img_label_list was set to None, it will read image path
from stdin.
:type img_root_dir: str
'''
def reader():
if img_label_list is not None:
for line in open(img_label_list):
if img_root_dir is not None:
# h, w, img_name, labels
img_name = line.split(' ')[2]
img_path = os.path.join(img_root_dir, img_name)
else:
img_path = line.strip("\t\n\r")
img = Image.open(img_path).convert('L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
yield img, label
else:
while True:
img_path = raw_input("Please input the path of image: ")
img = Image.open(img_path).convert('L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
yield img, [[0]]
return reader
def num_classes():
'''Get classes number of this dataset.
......@@ -124,21 +160,31 @@ def data_shape():
return DATA_SHAPE
def train(batch_size):
def train(batch_size, train_images_dir=None, train_list_file=None):
generator = DataGenerator()
data_dir = download_data()
return generator.train_reader(
path.join(data_dir, TRAIN_DATA_DIR_NAME),
path.join(data_dir, TRAIN_LIST_FILE_NAME), batch_size)
if train_images_dir is None:
data_dir = download_data()
train_images_dir = path.join(data_dir, TRAIN_DATA_DIR_NAME)
if train_list_file is None:
train_list_file = path.join(data_dir, TRAIN_LIST_FILE_NAME)
return generator.train_reader(train_images_dir, train_list_file, batch_size)
def test(batch_size=1, test_images_dir=None, test_list_file=None):
generator = DataGenerator()
if test_images_dir is None:
data_dir = download_data()
test_images_dir = path.join(data_dir, TEST_DATA_DIR_NAME)
if test_list_file is None:
test_list_file = path.join(data_dir, TEST_LIST_FILE_NAME)
return paddle.batch(
generator.test_reader(test_images_dir, test_list_file), batch_size)
def test(batch_size=1):
def inference(infer_images_dir=None, infer_list_file=None):
generator = DataGenerator()
data_dir = download_data()
return paddle.batch(
generator.test_reader(
path.join(data_dir, TRAIN_DATA_DIR_NAME),
path.join(data_dir, TRAIN_LIST_FILE_NAME)), batch_size)
generator.infer_reader(infer_images_dir, infer_list_file), 1)
def download_data():
......
"""Trainer for OCR CTC model."""
import paddle.fluid as fluid
import dummy_reader
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net
import ctc_reader
import argparse
from load_model import load_param
import functools
import sys
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_train_net
import time
import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('pass_num', int, 100, "# of training epochs.")
add_arg('log_period', int, 1000, "Log period.")
add_arg('learning_rate', float, 1.0e-3, "Learning rate.")
add_arg('l2', float, 0.0004, "L2 regularizer.")
add_arg('max_clip', float, 10.0, "Max clip threshold.")
add_arg('min_clip', float, -10.0, "Min clip threshold.")
add_arg('momentum', float, 0.9, "Momentum.")
add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.")
add_arg('device', int, 0, "Device id.'-1' means running on CPU"
"while '0' means GPU-0.")
add_arg('min_average_window', int, 10000, "Min average window.")
add_arg('max_average_window', int, 15625, "Max average window.")
add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, False, "Whether use parallel training.")
# yapf: disable
def load_parameter(place):
params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/')
for name in params:
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('pass_num', int, 100, "Number of training epochs.")
add_arg('log_period', int, 1000, "Log period.")
add_arg('save_model_period', int, 15000, "Save model period. '-1' means never saving the model.")
add_arg('eval_period', int, 15000, "Evaluate period. '-1' means never evaluating the model.")
add_arg('save_model_dir', str, "./models", "The directory the model to be saved to.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('learning_rate', float, 1.0e-3, "Learning rate.")
add_arg('l2', float, 0.0004, "L2 regularizer.")
add_arg('momentum', float, 0.9, "Momentum.")
add_arg('rnn_hidden_size', int, 200, "Hidden size of rnn layers.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
add_arg('min_average_window',int, 10000, "Min average window.")
add_arg('max_average_window',int, 15625, "Max average window. It is proposed to be set as the number of minibatch in a pass.")
add_arg('average_window', float, 0.15, "Average window.")
add_arg('parallel', bool, False, "Whether use parallel training.")
add_arg('train_images', str, None, "The directory of training images."
"None means using the default training images of reader.")
add_arg('train_list', str, None, "The list file of training images."
"None means using the default train_list file of reader.")
add_arg('test_images', str, None, "The directory of training images."
"None means using the default test images of reader.")
add_arg('test_list', str, None, "The list file of training images."
"None means using the default test_list file of reader.")
add_arg('num_classes', int, None, "The number of classes."
"None means using the default num_classes from reader.")
# yapf: enable
def train(args, data_reader=dummy_reader):
def train(args, data_reader=ctc_reader):
"""OCR CTC training"""
num_classes = data_reader.num_classes()
num_classes = data_reader.num_classes(
) if args.num_classes is None else args.num_classes
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(images, label, args, num_classes)
label = fluid.layers.data(
name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator, inference_program, model_average = ctc_train_net(
images, label, args, num_classes)
# data reader
train_reader = data_reader.train(args.batch_size)
test_reader = data_reader.test()
train_reader = data_reader.train(
args.batch_size,
train_images_dir=args.train_images,
train_list_file=args.train_list)
test_reader = data_reader.test(
test_images_dir=args.test_images, test_list_file=args.test_list)
# prepare environment
place = fluid.CPUPlace()
if args.device >= 0:
place = fluid.CUDAPlace(args.device)
if args.use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
#load_parameter(place)
# load init model
if args.init_model is not None:
model_dir = args.init_model
model_file_name = None
if not os.path.isdir(args.init_model):
model_dir = os.path.dirname(args.init_model)
model_file_name = os.path.basename(args.init_model)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.init_model
for pass_id in range(args.pass_num):
error_evaluator.reset(exe)
......@@ -70,29 +91,41 @@ def train(args, data_reader=dummy_reader):
fetch_list=[sum_cost] + error_evaluator.metrics)
total_loss += batch_loss[0]
total_seq_error += batch_seq_error[0]
if batch_id % 100 == 1:
print '.',
sys.stdout.flush()
if batch_id % args.log_period == 1:
# training log
if batch_id % args.log_period == 0:
print "\nTime: %s; Pass[%d]-batch[%d]; Avg Warp-CTC loss: %s; Avg seq error: %s." % (
time.time(),
pass_id, batch_id, total_loss / (batch_id * args.batch_size), total_seq_error / (batch_id * args.batch_size))
time.time(), pass_id, batch_id,
total_loss / (batch_id * args.batch_size),
total_seq_error / (batch_id * args.batch_size))
sys.stdout.flush()
batch_id += 1
# evaluate
if batch_id % args.eval_period == 0:
with model_average.apply(exe):
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program,
feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
with model_average.apply(exe):
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
_, test_seq_error = error_evaluator.eval(exe)
print "\nTime: %s; Pass[%d]-batch[%d]; Test seq error: %s.\n" % (
time.time(), pass_id, batch_id, str(test_seq_error[0]))
# save model
if batch_id % args.save_model_period == 0:
with model_average.apply(exe):
filename = "model_%05d_%d" % (pass_id, batch_id)
fluid.io.save_params(
exe, dirname=args.save_model_dir, filename=filename)
print "Saved model to: %s/%s." % (args.save_model_dir,
filename)
batch_id += 1
print "\nEnd pass[%d]; Test seq error: %s.\n" % (
pass_id, str(test_seq_error[0]))
def main():
args = parser.parse_args()
print_arguments(args)
train(args, data_reader=ctc_reader)
if __name__ == "__main__":
main()
import paddle.v2 as paddle
import paddle.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer
from crnn_ctc_model import ctc_eval
import ctc_reader
import dummy_reader
import argparse
import functools
import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to eval.")
# yapf: enable
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def evaluate(eval=ctc_eval, data_reader=dummy_reader):
def evaluate(args, eval=ctc_eval, data_reader=ctc_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
......@@ -26,29 +29,41 @@ def evaluate(eval=ctc_eval, data_reader=dummy_reader):
evaluator, cost = eval(images, label, num_classes)
# data reader
test_reader = data_reader.test()
test_reader = data_reader.test(
test_images_dir=args.input_images_dir,
test_list_file=args.input_images_list)
# prepare environment
place = fluid.CUDAPlace(0)
#place = fluid.CPUPlace()
place = fluid.CPUPlace()
if use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
print fluid.default_main_program()
load_parameter(place)
# load init model
model_dir = args.model_path
model_file_name = None
if not os.path.isdir(args.model_path):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path
evaluator.reset(exe)
count = 0
for data in test_reader():
count += 1
print 'Process samples: %d\r' % (count, ),
result, avg_distance, avg_seq_error = exe.run(
fluid.default_main_program(),
feed=get_feeder_data(data, place),
fetch_list=[cost] + evaluator.metrics)
exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
avg_distance, avg_seq_error = evaluator.eval(exe)
print "avg_distance: %s; avg_seq_error: %s" % (avg_distance, avg_seq_error)
print "Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
count, avg_distance, avg_seq_error)
def main():
evaluate(data_reader=ctc_reader)
args = parser.parse_args()
print_arguments(args)
evaluate(args, data_reader=ctc_reader)
if __name__ == "__main__":
......
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from load_model import load_param
from utility import get_feeder_data
import paddle.fluid as fluid
from utility import add_arguments, print_arguments, to_lodtensor, get_feeder_data
from crnn_ctc_model import ctc_infer
import numpy as np
import ctc_reader
import dummy_reader
import argparse
import functools
import os
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('model_path', str, None, "The model path to be used for inference.")
add_arg('input_images_dir', str, None, "The directory of images.")
add_arg('input_images_list', str, None, "The list file of images.")
add_arg('use_gpu', bool, True, "Whether use GPU to infer.")
# yapf: enable
def load_parameter(place):
params = load_param('./name.map', './data/model/results/pass-00062/')
for name in params:
print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)
def inference(infer=ctc_infer, data_reader=dummy_reader):
def inference(args, infer=ctc_infer, data_reader=ctc_reader):
"""OCR inference"""
num_classes = data_reader.num_classes()
data_shape = data_reader.data_shape()
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
sequence, tmp = infer(images, num_classes)
fluid.layers.Print(tmp)
sequence = infer(images, num_classes)
# data reader
test_reader = data_reader.test()
infer_reader = data_reader.inference(
infer_images_dir=args.input_images_dir,
infer_list_file=args.input_images_list)
# prepare environment
place = fluid.CUDAPlace(0)
place = fluid.CPUPlace()
if use_gpu:
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
load_parameter(place)
# load init model
model_dir = args.model_path
model_file_name = None
if not os.path.isdir(args.model_path):
model_dir = os.path.dirname(args.model_path)
model_file_name = os.path.basename(args.model_path)
fluid.io.load_params(exe, dirname=model_dir, filename=model_file_name)
print "Init model from: %s." % args.model_path
for data in test_reader():
for data in infer_reader():
result = exe.run(fluid.default_main_program(),
feed=get_feeder_data(
data, place, need_label=False),
fetch_list=[tmp])
print "result: %s" % (list(result[0].flatten()), )
fetch_list=[sequence],
return_numpy=False)
print "result: %s" % (np.array(result[0]).flatten(), )
def main():
inference(data_reader=ctc_reader)
args = parser.parse_args()
print_arguments(args)
inference(args, data_reader=ctc_reader)
if __name__ == "__main__":
......
import sys
import numpy as np
import ast
def load_parameter(file_name):
with open(file_name, 'rb') as f:
f.read(16) # skip header.
return np.fromfile(f, dtype=np.float32)
def load_param(name_map_file, old_param_dir):
result = {}
name_map = {}
shape_map = {}
with open(name_map_file, 'r') as map_file:
for param in map_file:
old_name, new_name, shape = param.strip().split('=')
name_map[new_name] = old_name
shape_map[new_name] = ast.literal_eval(shape)
for new_name in name_map:
result[new_name] = load_parameter("/".join(
[old_param_dir, name_map[new_name]])).reshape(shape_map[new_name])
return result
if __name__ == "__main__":
name_map_file = "./name.map"
old_param_dir = "./data/model/results/pass-00062/"
result = load_param(name_map_file, old_param_dir)
for p in result:
print "name: %s; param.shape: %s" % (p, result[p].shape)
......@@ -30,32 +30,28 @@ class PolicyGradient:
acts = fluid.layers.data(name='acts', shape=[1], dtype='int64')
vt = fluid.layers.data(name='vt', shape=[1], dtype='float32')
# fc1
fc1 = fluid.layers.fc(
input=obs,
size=10,
act="tanh" # tanh activation
)
fc1 = fluid.layers.fc(input=obs, size=10, act="tanh") # tanh activation
# fc2
self.all_act_prob = fluid.layers.fc(input=fc1,
size=self.n_actions,
act="softmax")
all_act_prob = fluid.layers.fc(input=fc1,
size=self.n_actions,
act="softmax")
self.inferece_program = fluid.defaul_main_program().clone()
# to maximize total reward (log_p * R) is to minimize -(log_p * R)
neg_log_prob = fluid.layers.cross_entropy(
input=self.all_act_prob,
label=acts) # this is negative log of chosen action
neg_log_prob_weight = fluid.layers.elementwise_mul(x=neg_log_prob, y=vt)
loss = fluid.layers.reduce_mean(
x=neg_log_prob_weight) # reward guided loss
neg_log_prob_weight) # reward guided loss
sgd_optimizer = fluid.optimizer.SGD(self.lr)
sgd_optimizer.minimize(loss)
self.exe.run(fluid.default_startup_program())
def choose_action(self, observation):
prob_weights = self.exe.run(
fluid.default_main_program().prune(self.all_act_prob),
feed={"obs": observation[np.newaxis, :]},
fetch_list=[self.all_act_prob])
prob_weights = self.exe.run(self.inferece_program,
feed={"obs": observation[np.newaxis, :]},
fetch_list=[self.all_act_prob])
prob_weights = np.array(prob_weights[0])
action = np.random.choice(
range(prob_weights.shape[1]),
......
The minimum PaddlePaddle version needed for the code sample in this directory is the lastest develop branch. If you are on a version of PaddlePaddle earlier than this, [please update your installation](http://www.paddlepaddle.org/docs/develop/documentation/en/build_and_install/pip_install_en.html).
# 文本分类
---
以下是本例的简要目录结构及说明:
# Text Classification
## Data Preparation
```
wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
tar zxf aclImdb_v1.tar.gz
```text
.
├── nets.py # 模型定义
├── README.md # 文档
├── train.py # 训练脚本
├── infer.py # 预测脚本
└── utils.py # 定义通用函数,从外部获取
```
## Training
## 简介,模型详解
在PaddlePaddle v2版本[文本分类](https://github.com/PaddlePaddle/models/blob/develop/text/README.md)中对于文本分类任务有较详细的介绍,在本例中不再重复介绍。
在模型上,我们采用了bow, cnn, lstm, gru四种常见的文本分类模型。
## 训练
1. 运行命令 `python train.py bow` 开始训练模型。
```python
python train.py bow # bow指定网络结构,可替换成cnn, lstm, gru
```
2. (可选)想自定义网络结构,需在[nets.py](./nets.py)中自行添加,并设置[train.py](./train.py)中的相应参数。
```python
def train(train_reader, # 训练数据
word_dict, # 数据字典
network, # 模型配置
use_cuda, # 是否用GPU
parallel, # 是否并行
save_dirname, # 保存模型路径
lr=0.2, # 学习率大小
batch_size=128, # 每个batch的样本数
pass_num=30): # 训练的轮数
```
## 训练结果示例
```text
pass_id: 0, avg_acc: 0.848040, avg_cost: 0.354073
pass_id: 1, avg_acc: 0.914200, avg_cost: 0.217945
pass_id: 2, avg_acc: 0.929800, avg_cost: 0.184302
pass_id: 3, avg_acc: 0.938680, avg_cost: 0.164240
pass_id: 4, avg_acc: 0.945120, avg_cost: 0.149150
pass_id: 5, avg_acc: 0.951280, avg_cost: 0.137117
pass_id: 6, avg_acc: 0.955360, avg_cost: 0.126434
pass_id: 7, avg_acc: 0.961400, avg_cost: 0.117405
pass_id: 8, avg_acc: 0.963560, avg_cost: 0.110070
pass_id: 9, avg_acc: 0.965840, avg_cost: 0.103273
pass_id: 10, avg_acc: 0.969800, avg_cost: 0.096314
pass_id: 11, avg_acc: 0.971720, avg_cost: 0.090206
pass_id: 12, avg_acc: 0.974800, avg_cost: 0.084970
pass_id: 13, avg_acc: 0.977400, avg_cost: 0.078981
pass_id: 14, avg_acc: 0.980000, avg_cost: 0.073685
pass_id: 15, avg_acc: 0.981080, avg_cost: 0.069898
pass_id: 16, avg_acc: 0.982080, avg_cost: 0.064923
pass_id: 17, avg_acc: 0.984680, avg_cost: 0.060861
pass_id: 18, avg_acc: 0.985840, avg_cost: 0.057095
pass_id: 19, avg_acc: 0.988080, avg_cost: 0.052424
pass_id: 20, avg_acc: 0.989160, avg_cost: 0.049059
pass_id: 21, avg_acc: 0.990120, avg_cost: 0.045882
pass_id: 22, avg_acc: 0.992080, avg_cost: 0.042140
pass_id: 23, avg_acc: 0.992280, avg_cost: 0.039722
pass_id: 24, avg_acc: 0.992840, avg_cost: 0.036607
pass_id: 25, avg_acc: 0.994440, avg_cost: 0.034040
pass_id: 26, avg_acc: 0.995000, avg_cost: 0.031501
pass_id: 27, avg_acc: 0.995440, avg_cost: 0.028988
pass_id: 28, avg_acc: 0.996240, avg_cost: 0.026639
pass_id: 29, avg_acc: 0.996960, avg_cost: 0.024186
```
python train.py --dict_path 'aclImdb/imdb.vocab'
## 预测
1. 运行命令 `python infer.py bow_model`, 开始预测。
```python
python infer.py bow_model # bow_model指定需要导入的模型
## 预测结果示例
```text
model_path: bow_model/epoch0, avg_acc: 0.882800
model_path: bow_model/epoch1, avg_acc: 0.882360
model_path: bow_model/epoch2, avg_acc: 0.881400
model_path: bow_model/epoch3, avg_acc: 0.877800
model_path: bow_model/epoch4, avg_acc: 0.872920
model_path: bow_model/epoch5, avg_acc: 0.872640
model_path: bow_model/epoch6, avg_acc: 0.869960
model_path: bow_model/epoch7, avg_acc: 0.865160
model_path: bow_model/epoch8, avg_acc: 0.863680
model_path: bow_model/epoch9, avg_acc: 0.861200
model_path: bow_model/epoch10, avg_acc: 0.853520
model_path: bow_model/epoch11, avg_acc: 0.850400
model_path: bow_model/epoch12, avg_acc: 0.855960
model_path: bow_model/epoch13, avg_acc: 0.853480
model_path: bow_model/epoch14, avg_acc: 0.855960
model_path: bow_model/epoch15, avg_acc: 0.854120
model_path: bow_model/epoch16, avg_acc: 0.854160
model_path: bow_model/epoch17, avg_acc: 0.852240
model_path: bow_model/epoch18, avg_acc: 0.852320
model_path: bow_model/epoch19, avg_acc: 0.850280
model_path: bow_model/epoch20, avg_acc: 0.849760
model_path: bow_model/epoch21, avg_acc: 0.850160
model_path: bow_model/epoch22, avg_acc: 0.846800
model_path: bow_model/epoch23, avg_acc: 0.845440
model_path: bow_model/epoch24, avg_acc: 0.845640
model_path: bow_model/epoch25, avg_acc: 0.846200
model_path: bow_model/epoch26, avg_acc: 0.845880
model_path: bow_model/epoch27, avg_acc: 0.844880
model_path: bow_model/epoch28, avg_acc: 0.844680
model_path: bow_model/epoch29, avg_acc: 0.844960
```
注:过拟合导致acc持续下降,请忽略
class TrainConfig(object):
# Whether to use GPU in training or not.
use_gpu = False
# The training batch size.
batch_size = 4
# The epoch number.
num_passes = 30
# The global learning rate.
learning_rate = 0.01
# Training log will be printed every log_period.
log_period = 100
import sys
import time
import unittest
import contextlib
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import utils
def infer(test_reader, use_cuda, model_path=None):
"""
inference function
"""
if model_path is None:
print(str(model_path) + " cannot be found")
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
total_acc = 0.0
total_count = 0
for data in test_reader():
acc = exe.run(inference_program,
feed=utils.data2tensor(data, place),
fetch_list=fetch_targets,
return_numpy=True)
total_acc += acc[0] * len(data)
total_count += len(data)
avg_acc = total_acc / total_count
print("model_path: %s, avg_acc: %f" % (model_path, avg_acc))
if __name__ == "__main__":
word_dict, train_reader, test_reader = utils.prepare_data(
"imdb", self_dict=False, batch_size=128, buf_size=50000)
model_path = sys.argv[1]
for i in range(30):
epoch_path = model_path + "/" + "epoch" + str(i)
infer(test_reader, use_cuda=False, model_path=epoch_path)
import sys
import time
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
def bow_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2):
"""
bow net
"""
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
bow_tanh = fluid.layers.tanh(bow)
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
def cnn_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
win_size=3):
"""
conv net
"""
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=hid_dim,
filter_size=win_size,
act="tanh",
pool_type="max")
fc_1 = fluid.layers.fc(input=[conv_3], size=hid_dim2)
prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
def lstm_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=30.0):
"""
lstm net
"""
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4, act='tanh')
lstm_h, c = fluid.layers.dynamic_lstm(
input=fc0, size=hid_dim * 4, is_reverse=False)
lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
lstm_max_tanh = fluid.layers.tanh(lstm_max)
fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
def gru_net(data,
label,
dict_dim,
emb_dim=128,
hid_dim=128,
hid_dim2=96,
class_dim=2,
emb_lr=400.0):
"""
gru net
"""
emb = fluid.layers.embedding(
input=data,
size=[dict_dim, emb_dim],
param_attr=fluid.ParamAttr(learning_rate=emb_lr))
fc0 = fluid.layers.fc(input=emb, size=hid_dim * 3)
gru_h = fluid.layers.dynamic_gru(input=fc0, size=hid_dim, is_reverse=False)
gru_max = fluid.layers.sequence_pool(input=gru_h, pool_type='max')
gru_max_tanh = fluid.layers.tanh(gru_max)
fc1 = fluid.layers.fc(input=gru_max_tanh, size=hid_dim2, act='tanh')
prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return avg_cost, acc, prediction
import numpy as np
import sys
import os
import argparse
import time
import unittest
import contextlib
import paddle.v2 as paddle
import paddle.fluid as fluid
import paddle.v2 as paddle
from config import TrainConfig as conf
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--dict_path',
type=str,
required=True,
help="Path of the word dictionary.")
return parser.parse_args()
# Define to_lodtensor function to process the sequential data.
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
# Load the dictionary.
def load_vocab(filename):
vocab = {}
with open(filename) as f:
for idx, line in enumerate(f):
vocab[line.strip()] = idx
return vocab
# Define the convolution model.
def conv_net(dict_dim,
window_size=3,
emb_dim=128,
num_filters=128,
fc0_dim=96,
class_dim=2):
import utils
from nets import bow_net
from nets import cnn_net
from nets import lstm_net
from nets import gru_net
def train(train_reader,
word_dict,
network,
use_cuda,
parallel,
save_dirname,
lr=0.2,
batch_size=128,
pass_num=30):
"""
train network
"""
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=num_filters,
filter_size=window_size,
act="tanh",
pool_type="max")
fc_0 = fluid.layers.fc(input=[conv_3], size=fc0_dim)
prediction = fluid.layers.fc(input=[fc_0], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return data, label, prediction, avg_cost
def main(dict_path):
word_dict = load_vocab(dict_path)
word_dict["<unk>"] = len(word_dict)
dict_dim = len(word_dict)
print("The dictionary size is : %d" % dict_dim)
data, label, prediction, avg_cost = conv_net(dict_dim)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=conf.learning_rate)
sgd_optimizer.minimize(avg_cost)
batch_size_var = fluid.layers.create_tensor(dtype='int64')
batch_acc_var = fluid.layers.accuracy(
input=prediction, label=label, total=batch_size_var)
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
target_vars=[batch_acc_var, batch_size_var])
if not parallel:
cost, acc, prediction = network(data, label, len(word_dict))
else:
places = fluid.layers.get_places(device_count=2)
pd = fluid.layers.ParallelDo(places)
with pd.do():
cost, acc, prediction = network(
pd.read_input(data), pd.read_input(label), len(word_dict))
# The training data set.
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=51200),
batch_size=conf.batch_size)
pd.write_output(cost)
pd.write_output(acc)
# The testing data set.
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.test(word_dict), buf_size=51200),
batch_size=conf.batch_size)
cost, acc = pd()
cost = fluid.layers.mean(cost)
acc = fluid.layers.mean(acc)
if conf.use_gpu:
place = fluid.CUDAPlace(0)
else:
place = fluid.CPUPlace()
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=lr)
sgd_optimizer.minimize(cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[cost, acc])
data_size = len(data)
total_acc += data_size * avg_acc_np
total_cost += data_size * avg_cost_np
data_count += data_size
avg_cost = total_cost / data_count
avg_acc = total_acc / data_count
print("pass_id: %d, avg_acc: %f, avg_cost: %f" %
(pass_id, avg_acc, avg_cost))
epoch_model = save_dirname + "/" + "epoch" + str(pass_id)
fluid.io.save_inference_model(epoch_model, ["words", "label"], acc, exe)
def train_net():
word_dict, train_reader, test_reader = utils.prepare_data(
"imdb", self_dict=False, batch_size=128, buf_size=50000)
if sys.argv[1] == "bow":
train(
train_reader,
word_dict,
bow_net,
use_cuda=False,
parallel=False,
save_dirname="bow_model",
lr=0.002,
pass_num=30,
batch_size=128)
elif sys.argv[1] == "cnn":
train(
train_reader,
word_dict,
cnn_net,
use_cuda=True,
parallel=False,
save_dirname="cnn_model",
lr=0.01,
pass_num=30,
batch_size=4)
elif sys.argv[1] == "lstm":
train(
train_reader,
word_dict,
lstm_net,
use_cuda=True,
parallel=False,
save_dirname="lstm_model",
lr=0.05,
pass_num=30,
batch_size=4)
elif sys.argv[1] == "gru":
train(
train_reader,
word_dict,
lstm_net,
use_cuda=True,
parallel=False,
save_dirname="gru_model",
lr=0.05,
pass_num=30,
batch_size=128)
else:
print("network name cannot be found!")
sys.exit(1)
train_pass_acc_evaluator = fluid.average.WeightedAverage()
test_pass_acc_evaluator = fluid.average.WeightedAverage()
def test(exe):
test_pass_acc_evaluator.reset()
for batch_id, data in enumerate(test_reader()):
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
b_acc, b_size = exe.run(inference_program,
feed={"words": input_seq,
"label": y_data},
fetch_list=[batch_acc_var, batch_size_var])
test_pass_acc_evaluator.add(value=b_acc, weight=b_size)
test_acc = test_pass_acc_evaluator.eval()
return test_acc
total_time = 0.
for pass_id in xrange(conf.num_passes):
train_pass_acc_evaluator.reset()
start_time = time.time()
for batch_id, data in enumerate(train_reader()):
cost_val, acc_val, size_val = exe.run(
fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, batch_acc_var, batch_size_var])
train_pass_acc_evaluator.add(value=acc_val, weight=size_val)
if batch_id and batch_id % conf.log_period == 0:
print("Pass id: %d, batch id: %d, cost: %f, pass_acc: %f" %
(pass_id, batch_id, cost_val,
train_pass_acc_evaluator.eval()))
end_time = time.time()
total_time += (end_time - start_time)
pass_test_acc = test(exe)
print("Pass id: %d, test_acc: %f" % (pass_id, pass_test_acc))
print("Total train time: %f" % (total_time))
if __name__ == '__main__':
args = parse_args()
main(args.dict_path)
if __name__ == "__main__":
train_net()
import sys
import time
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import light_imdb
import tiny_imdb
def to_lodtensor(data, place):
"""
convert to LODtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def load_vocab(filename):
"""
load imdb vocabulary
"""
vocab = {}
with open(filename) as f:
wid = 0
for line in f:
vocab[line.strip()] = wid
wid += 1
vocab["<unk>"] = len(vocab)
return vocab
def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
def prepare_data(data_type="imdb",
self_dict=False,
batch_size=128,
buf_size=50000):
"""
prepare data
"""
if self_dict:
word_dict = load_vocab(data_type + ".vocab")
else:
if data_type == "imdb":
word_dict = paddle.dataset.imdb.word_dict()
elif data_type == "light_imdb":
word_dict = light_imdb.word_dict()
elif data_type == "tiny_imdb":
word_dict = tiny_imdb.word_dict()
else:
raise RuntimeError("No such dataset")
if data_type == "imdb":
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=buf_size),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.test(word_dict), buf_size=buf_size),
batch_size=batch_size)
elif data_type == "light_imdb":
train_reader = paddle.batch(
paddle.reader.shuffle(
light_imdb.train(word_dict), buf_size=buf_size),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
light_imdb.test(word_dict), buf_size=buf_size),
batch_size=batch_size)
elif data_type == "tiny_imdb":
train_reader = paddle.batch(
paddle.reader.shuffle(
tiny_imdb.train(word_dict), buf_size=buf_size),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
tiny_imdb.test(word_dict), buf_size=buf_size),
batch_size=batch_size)
else:
raise RuntimeError("no such dataset")
return word_dict, train_reader, test_reader
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册