提交 744908f0 编写于 作者: W wanghaoshuang

Merge branch 'develop' of https://github.com/PaddlePaddle/models into ocr_ctc

......@@ -24,11 +24,22 @@ unittest(){
trap 'abort' 0
set -e
for proj in */ ; do
for proj in * ; do
if [ -d $proj ]; then
unittest $proj
if [ $? != 0 ]; then
exit 1
if [ "$proj" = "fluid" ]; then
for proj in fluid/* ; do
if [ -d $proj ]; then
unittest $proj
if [ $? != 0 ]; then
exit 1
fi
fi
done
else
unittest $proj
if [ $? != 0 ]; then
exit 1
fi
fi
fi
done
......
16.2845556399 11.6891798673
17.21509949 12.3788567902
18.1143704548 14.9912618017
19.2335963752 18.5419556172
19.9266772451 21.2768220522
19.8245737202 21.2347210705
19.5432940972 20.2784036567
19.4631271754 20.2934452329
19.3929919324 20.457971868
19.2924788362 20.3626439234
18.9207244502 19.9196569759
18.7202605641 19.5920276899
18.4844279398 19.2068349019
18.2670948624 18.8716893824
18.0929628855 18.5439666541
17.8428896026 18.0255891747
17.6646850635 17.473764296
17.4955705896 16.8966859471
17.3706720293 16.4294027467
17.2530867792 16.0514717623
17.1304341172 15.7234699057
17.0038353287 15.4344471514
16.902550309 15.1603287337
16.8375590047 14.9304337826
16.816287853 14.9119310513
16.828838265 15.0930023024
16.8602209498 15.3771992423
16.9101763812 15.6897991789
16.9466065143 15.9364556489
16.9486061956 16.0699417826
16.9041374104 16.0796970272
16.8410093699 16.0111444599
16.7045718836 15.7991985601
16.51128489 15.5208920129
16.3253910608 15.2603181921
16.1297317333 14.9499965958
15.903428372 14.5958280409
15.6131718105 14.2709618
15.1395035533 13.9993939893
14.4298229999 13.3841189151
0.0034970565424 0.246184766149
0.00501284154705 0.238484972472
0.00605942680019 0.269064381708
0.00687266156243 0.319479238011
0.00734065019253 0.371947383205
0.00718807218417 0.384426479694
0.00652195540212 0.384676838281
0.00660416525951 0.395543910317
0.00680202057642 0.400803979681
0.00659144183007 0.393228973031
0.00605294530423 0.385021118038
0.00590452969394 0.361763039625
0.00612315374687 0.346777773373
0.00582354093973 0.335802403976
0.00574556002554 0.320733728218
0.00612254485891 0.310153103033
0.00626733043219 0.299854747445
0.00567398408041 0.293353685493
0.00519236700706 0.287668810947
0.00529581474367 0.281479660772
0.00479019484082 0.27451415777
0.00486381039428 0.266294391154
0.00491126372868 0.258105116126
0.00452105305011 0.252926328298
0.00531483334271 0.250910887373
0.00546572110469 0.253302256977
0.00479544857908 0.258484183394
0.00422106426297 0.264582900173
0.00401824135188 0.268467945623
0.0041705465252 0.269699480291
0.00405239564143 0.270406162975
0.0040059737566 0.270407601782
0.00406426729317 0.267951582656
0.00416613791013 0.264543833042
0.00427847607653 0.26247798891
0.00428050903034 0.259635263243
0.00454842971786 0.255829377617
0.00393747552387 0.253802307025
0.00374143688909 0.251011478787
0.00335475310258 0.236543650856
0.000373194755312 0.0419494800709
0.000230909648678 0.0394102370205
0.000150840015851 0.0414956922398
8.44401840771e-05 0.0460502231327
-6.24759314572e-06 0.0528049937739
-8.82957758148e-05 0.055711244886
1.16795791952e-05 0.0563188428833
-1.68716267856e-05 0.0575232763711
-0.000112625308645 0.057979929947
-0.000122619090002 0.0564126233493
1.73569637319e-05 0.05522573909
6.49872782342e-05 0.0507353361334
4.17746389178e-05 0.0479568131253
5.13884475653e-05 0.0461253238047
1.8860115143e-05 0.0436860476919
-5.64317701105e-05 0.042516381059
-0.000136859948115 0.0413574820205
-7.00847019726e-05 0.0409516370727
-5.39392223336e-05 0.040441504085
-9.24897162815e-05 0.0397800398173
4.7104970622e-05 0.039046286243
6.24805896165e-06 0.0380185986602
-2.35272813418e-05 0.036851063786
5.88344154127e-05 0.0361640489242
-8.39162076993e-05 0.0357639427311
-0.000108702805776 0.0358774639538
3.22013961834e-06 0.0363644530435
9.43501518394e-05 0.0370309934774
0.000134406229423 0.0374972993343
3.84007008533e-05 0.037676222515
3.05989328157e-05 0.0379111939182
9.52201629091e-05 0.0380927209106
0.000102126083729 0.0379925358499
6.98628072264e-05 0.0377276252241
4.55782256339e-05 0.0375165468654
4.76370987786e-05 0.0371482526345
-2.24128832709e-05 0.0366810742947
0.000125621306953 0.036628355271
0.000134568666093 0.0364860461759
0.000159858844464 0.0345583593149
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import unittest
import numpy as np
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
class TestTransMeanVarianceNorm(unittest.TestCase):
"""unit test for TransMeanVarianceNorm
"""
def setUp(self):
self._file_path = "./data_utils/augmentor/tests/data/" \
"global_mean_var_search26kHr"
def test(self):
feature = np.zeros((2, 120), dtype="float32")
feature.fill(1)
trans = trans_mean_variance_norm.TransMeanVarianceNorm(self._file_path)
(feature1, label1) = trans.perform_trans((feature, None))
(mean, var) = trans.get_mean_var()
feature_flat1 = feature1.flatten()
feature_flat = feature.flatten()
one = np.ones((1), dtype="float32")
for idx, val in enumerate(feature_flat1):
cur_idx = idx % 120
self.assertAlmostEqual(val, (one[0] - mean[cur_idx]) * var[cur_idx])
class TestTransAddDelta(unittest.TestCase):
"""unit test TestTransAddDelta
"""
def test_regress(self):
"""test regress
"""
feature = np.zeros((14, 120), dtype="float32")
feature[0:5, 0:40].fill(1)
feature[0 + 5, 0:40].fill(1)
feature[1 + 5, 0:40].fill(2)
feature[2 + 5, 0:40].fill(3)
feature[3 + 5, 0:40].fill(4)
feature[8:14, 0:40].fill(4)
trans = trans_add_delta.TransAddDelta()
feature = feature.reshape((14 * 120))
trans._regress(feature, 5 * 120, feature, 5 * 120 + 40, 40, 4, 120)
trans._regress(feature, 5 * 120 + 40, feature, 5 * 120 + 80, 40, 4, 120)
feature = feature.reshape((14, 120))
tmp_feature = feature[5:5 + 4, :]
self.assertAlmostEqual(1.0, tmp_feature[0][0])
self.assertAlmostEqual(0.24, tmp_feature[0][119])
self.assertAlmostEqual(2.0, tmp_feature[1][0])
self.assertAlmostEqual(0.13, tmp_feature[1][119])
self.assertAlmostEqual(3.0, tmp_feature[2][0])
self.assertAlmostEqual(-0.13, tmp_feature[2][119])
self.assertAlmostEqual(4.0, tmp_feature[3][0])
self.assertAlmostEqual(-0.24, tmp_feature[3][119])
def test_perform(self):
"""test perform
"""
feature = np.zeros((4, 40), dtype="float32")
feature[0, 0:40].fill(1)
feature[1, 0:40].fill(2)
feature[2, 0:40].fill(3)
feature[3, 0:40].fill(4)
trans = trans_add_delta.TransAddDelta()
(feature, label) = trans.perform_trans((feature, None))
self.assertAlmostEqual(feature.shape[0], 4)
self.assertAlmostEqual(feature.shape[1], 120)
self.assertAlmostEqual(1.0, feature[0][0])
self.assertAlmostEqual(0.24, feature[0][119])
self.assertAlmostEqual(2.0, feature[1][0])
self.assertAlmostEqual(0.13, feature[1][119])
self.assertAlmostEqual(3.0, feature[2][0])
self.assertAlmostEqual(-0.13, feature[2][119])
self.assertAlmostEqual(4.0, feature[3][0])
self.assertAlmostEqual(-0.24, feature[3][119])
class TestTransSplict(unittest.TestCase):
"""unit test Test TransSplict
"""
def test_perfrom(self):
feature = np.zeros((8, 10), dtype="float32")
for i in xrange(feature.shape[0]):
feature[i, :].fill(i)
trans = trans_splice.TransSplice()
(feature, label) = trans.perform_trans((feature, None))
self.assertEqual(feature.shape[1], 110)
for i in xrange(8):
nzero_num = 5 - i
cur_val = 0.0
if nzero_num < 0:
cur_val = i - 5 - 1
for j in xrange(11):
if j <= nzero_num:
for k in xrange(10):
self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
else:
if cur_val < 7:
cur_val += 1.0
for k in xrange(10):
self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
if __name__ == '__main__':
unittest.main()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import math
import copy
class TransAddDelta(object):
""" add delta of feature data
trans feature for shape(a, b) to shape(a, b * 3)
Attributes:
_norder(int):
_window(int):
"""
def __init__(self, norder=2, nwindow=2):
""" init construction
Args:
norder: default 2
nwindow: default 2
"""
self._norder = norder
self._nwindow = nwindow
def perform_trans(self, sample):
""" add delta for feature
trans feature shape from (a,b) to (a, b * 3)
Args:
sample(object,tuple): contain feature numpy and label numpy
Returns:
(feature, label)
"""
(feature, label) = sample
frame_dim = feature.shape[1]
d_frame_dim = frame_dim * 3
head_filled = 5
tail_filled = 5
mat = np.zeros(
(feature.shape[0] + head_filled + tail_filled, d_frame_dim),
dtype="float32")
#copy first frame
for i in xrange(head_filled):
np.copyto(mat[i, 0:frame_dim], feature[0, :])
np.copyto(mat[head_filled:head_filled + feature.shape[0], 0:frame_dim],
feature[:, :])
# copy last frame
for i in xrange(head_filled + feature.shape[0], mat.shape[0], 1):
np.copyto(mat[i, 0:frame_dim], feature[feature.shape[0] - 1, :])
nframe = feature.shape[0]
start = head_filled
tmp_shape = mat.shape
mat = mat.reshape((tmp_shape[0] * tmp_shape[1]))
self._regress(mat, start * d_frame_dim, mat,
start * d_frame_dim + frame_dim, frame_dim, nframe,
d_frame_dim)
self._regress(mat, start * d_frame_dim + frame_dim, mat,
start * d_frame_dim + 2 * frame_dim, frame_dim, nframe,
d_frame_dim)
mat.shape = tmp_shape
return (mat[head_filled:mat.shape[0] - tail_filled, :], label)
def _regress(self, data_in, start_in, data_out, start_out, size, n, step):
""" regress
Args:
data_in: in data
start_in: start index of data_in
data_out: out data
start_out: start index of data_out
size: frame dimentional
n: frame num
step: 3 * (frame num)
Returns:
None
"""
sigma_t2 = 0.0
delta_window = self._nwindow
for t in xrange(1, delta_window + 1):
sigma_t2 += t * t
sigma_t2 *= 2.0
for i in xrange(n):
fp1 = start_in
fp2 = start_out
for j in xrange(size):
back = fp1
forw = fp1
sum = 0.0
for t in xrange(1, delta_window + 1):
back -= step
forw += step
sum += t * (data_in[forw] - data_in[back])
data_out[fp2] = sum / sigma_t2
fp1 += 1
fp2 += 1
start_in += step
start_out += step
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import math
class TransMeanVarianceNorm(object):
""" normalization of mean variance for feature data
Attributes:
_mean(numpy.array): the feature mean vector
_var(numpy.array): the feature variance
"""
def __init__(self, snorm_path):
"""init construction
Args:
snorm_path: the path of mean and variance
"""
self._mean = None
self._var = None
self._load_norm(snorm_path)
def _load_norm(self, snorm_path):
""" load mean var file
Args:
snorm_path(str):the file path
"""
lLines = open(snorm_path).readlines()
nLen = len(lLines)
self._mean = np.zeros((nLen), dtype="float32")
self._var = np.zeros((nLen), dtype="float32")
self._nLen = nLen
for nidx, l in enumerate(lLines):
s = l.split()
assert len(s) == 2
self._mean[nidx] = float(s[0])
self._var[nidx] = 1.0 / math.sqrt(float(s[1]))
if self._var[nidx] > 100000.0:
self._var[nidx] = 100000.0
def get_mean_var(self):
""" get mean and var
Args:
Returns:
(mean, var)
"""
return (self._mean, self._var)
def perform_trans(self, sample):
""" feature = (feature - mean) * var
Args:
sample(object):input sample, contain feature numpy and label numpy
Returns:
(feature, label)
"""
(feature, label) = sample
shape = feature.shape
assert len(shape) == 2
nfeature_len = shape[0] * shape[1]
assert nfeature_len % self._nLen == 0
ncur_idx = 0
feature = feature.reshape((nfeature_len))
while ncur_idx < nfeature_len:
block = feature[ncur_idx:ncur_idx + self._nLen]
block = (block - self._mean) * self._var
feature[ncur_idx:ncur_idx + self._nLen] = block
ncur_idx += self._nLen
feature = feature.reshape(shape)
return (feature, label)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import math
class TransSplice(object):
""" copy feature context to construct new feature
expand feature data from shape (frame_num, frame_dim)
to shape (frame_num, frame_dim * 11)
Attributes:
_nleft_context(int): copy left context number
_nright_context(int): copy right context number
"""
def __init__(self, nleft_context=5, nright_context=5):
""" init construction
Args:
nleft_context(int):
nright_context(int):
"""
self._nleft_context = nleft_context
self._nright_context = nright_context
def perform_trans(self, sample):
""" copy feature context
Args:
sample(object): input sample(feature, label)
Return:
(feature, label)
"""
(feature, label) = sample
nframe_num = feature.shape[0]
nframe_dim = feature.shape[1]
nnew_frame_dim = nframe_dim * (
self._nleft_context + self._nright_context + 1)
mat = np.zeros(
(nframe_num + self._nleft_context + self._nright_context,
nframe_dim),
dtype="float32")
ret = np.zeros((nframe_num, nnew_frame_dim), dtype="float32")
#copy left
for i in xrange(self._nleft_context):
mat[i, :] = feature[0, :]
#copy middle
mat[self._nleft_context:self._nleft_context +
nframe_num, :] = feature[:, :]
#copy right
for i in xrange(self._nright_context):
mat[i + self._nleft_context + nframe_num, :] = feature[-1, :]
mat = mat.reshape(mat.shape[0] * mat.shape[1])
ret = ret.reshape(ret.shape[0] * ret.shape[1])
for i in xrange(nframe_num):
np.copyto(ret[i * nnew_frame_dim:(i + 1) * nnew_frame_dim],
mat[i * nframe_dim:i * nframe_dim + nnew_frame_dim])
ret = ret.reshape((nframe_num, nnew_frame_dim))
return (ret, label)
"""This module contains data processing related logic.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import random
import struct
import Queue
import time
import numpy as np
from threading import Thread
import signal
from multiprocessing import Manager, Process
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
from data_utils.util import suppress_complaints, suppress_signal
class SampleInfo(object):
"""SampleInfo holds the necessary information to load a sample from disk.
Args:
feature_bin_path (str): File containing the feature data.
feature_start (int): Start position of the sample's feature data.
feature_size (int): Byte count of the sample's feature data.
feature_frame_num (int): Time length of the sample.
feature_dim (int): Feature dimension of one frame.
label_bin_path (str): File containing the label data.
label_size (int): Byte count of the sample's label data.
label_frame_num (int): Label number of the sample.
"""
def __init__(self, feature_bin_path, feature_start, feature_size,
feature_frame_num, feature_dim, label_bin_path, label_start,
label_size, label_frame_num):
self.feature_bin_path = feature_bin_path
self.feature_start = feature_start
self.feature_size = feature_size
self.feature_frame_num = feature_frame_num
self.feature_dim = feature_dim
self.label_bin_path = label_bin_path
self.label_start = label_start
self.label_size = label_size
self.label_frame_num = label_frame_num
class SampleInfoBucket(object):
"""SampleInfoBucket contains paths of several description files. Feature
description file contains necessary information (including path of binary
data, sample start position, sample byte number etc.) to access samples'
feature data and the same with the label description file. SampleInfoBucket
is the minimum unit to do shuffle.
Args:
feature_bin_paths (list|tuple): Files containing the binary feature
data.
feature_desc_paths (list|tuple): Files containing the description of
samples' feature data.
label_bin_paths (list|tuple): Files containing the binary label data.
label_desc_paths (list|tuple): Files containing the description of
samples' label data.
"""
def __init__(self, feature_bin_paths, feature_desc_paths, label_bin_paths,
label_desc_paths):
block_num = len(label_bin_paths)
assert len(label_desc_paths) == block_num
assert len(feature_bin_paths) == block_num
assert len(feature_desc_paths) == block_num
self._block_num = block_num
self._feature_bin_paths = feature_bin_paths
self._feature_desc_paths = feature_desc_paths
self._label_bin_paths = label_bin_paths
self._label_desc_paths = label_desc_paths
def generate_sample_info_list(self):
sample_info_list = []
for block_idx in xrange(self._block_num):
label_bin_path = self._label_bin_paths[block_idx]
label_desc_path = self._label_desc_paths[block_idx]
feature_bin_path = self._feature_bin_paths[block_idx]
feature_desc_path = self._feature_desc_paths[block_idx]
label_desc_lines = open(label_desc_path).readlines()
feature_desc_lines = open(feature_desc_path).readlines()
sample_num = int(label_desc_lines[0].split()[1])
assert sample_num == int(feature_desc_lines[0].split()[1])
for i in xrange(sample_num):
feature_desc_split = feature_desc_lines[i + 1].split()
feature_start = int(feature_desc_split[2])
feature_size = int(feature_desc_split[3])
feature_frame_num = int(feature_desc_split[4])
feature_dim = int(feature_desc_split[5])
label_desc_split = label_desc_lines[i + 1].split()
label_start = int(label_desc_split[2])
label_size = int(label_desc_split[3])
label_frame_num = int(label_desc_split[4])
sample_info_list.append(
SampleInfo(feature_bin_path, feature_start, feature_size,
feature_frame_num, feature_dim, label_bin_path,
label_start, label_size, label_frame_num))
return sample_info_list
class EpochEndSignal():
pass
class DataReader(object):
"""DataReader provides basic audio sample preprocessing pipeline including
data loading and data augmentation.
Args:
feature_file_list (str): File containing paths of feature data file and
corresponding description file.
label_file_list (str): File containing paths of label data file and
corresponding description file.
drop_frame_len (int): Samples whose label length above the value will be
dropped.
process_num (int): Number of processes for processing data.
sample_buffer_size (int): Buffer size to indicate the maximum samples
cached.
sample_info_buffer_size (int): Buffer size to indicate the maximum
sample information cached.
batch_buffer_size (int): Buffer size to indicate the maximum batch
cached.
shuffle_block_num (int): Block number indicating the minimum unit to do
shuffle.
random_seed (int): Random seed.
verbose (int): If set to 0, complaints including exceptions and signal
traceback from sub-process will be suppressed. If set
to 1, all complaints will be printed.
"""
def __init__(self,
feature_file_list,
label_file_list,
drop_frame_len=512,
process_num=10,
sample_buffer_size=1024,
sample_info_buffer_size=1024,
batch_buffer_size=1024,
shuffle_block_num=1,
random_seed=0,
verbose=0):
self._feature_file_list = feature_file_list
self._label_file_list = label_file_list
self._drop_frame_len = drop_frame_len
self._shuffle_block_num = shuffle_block_num
self._block_info_list = None
self._rng = random.Random(random_seed)
self._bucket_list = None
self.generate_bucket_list(True)
self._order_id = 0
self._manager = Manager()
self._sample_buffer_size = sample_buffer_size
self._sample_info_buffer_size = sample_info_buffer_size
self._batch_buffer_size = batch_buffer_size
self._process_num = process_num
self._verbose = verbose
def generate_bucket_list(self, is_shuffle):
if self._block_info_list is None:
block_feature_info_lines = open(self._feature_file_list).readlines()
block_label_info_lines = open(self._label_file_list).readlines()
assert len(block_feature_info_lines) == len(block_label_info_lines)
self._block_info_list = []
for i in xrange(0, len(block_feature_info_lines), 2):
block_info = (block_feature_info_lines[i],
block_feature_info_lines[i + 1],
block_label_info_lines[i],
block_label_info_lines[i + 1])
self._block_info_list.append(
map(lambda line: line.strip(), block_info))
if is_shuffle:
self._rng.shuffle(self._block_info_list)
self._bucket_list = []
for i in xrange(0, len(self._block_info_list), self._shuffle_block_num):
bucket_block_info = self._block_info_list[i:i +
self._shuffle_block_num]
self._bucket_list.append(
SampleInfoBucket(
map(lambda info: info[0], bucket_block_info),
map(lambda info: info[1], bucket_block_info),
map(lambda info: info[2], bucket_block_info),
map(lambda info: info[3], bucket_block_info)))
# @TODO make this configurable
def set_transformers(self, transformers):
self._transformers = transformers
def _sample_generator(self):
sample_info_queue = self._manager.Queue(self._sample_info_buffer_size)
sample_queue = self._manager.Queue(self._sample_buffer_size)
self._order_id = 0
@suppress_complaints(verbose=self._verbose)
def ordered_feeding_task(sample_info_queue):
for sample_info_bucket in self._bucket_list:
sample_info_list = sample_info_bucket.generate_sample_info_list(
)
self._rng.shuffle(sample_info_list) # do shuffle here
for sample_info in sample_info_list:
sample_info_queue.put((sample_info, self._order_id))
self._order_id += 1
for i in xrange(self._process_num):
sample_info_queue.put(EpochEndSignal())
feeding_thread = Thread(
target=ordered_feeding_task, args=(sample_info_queue, ))
feeding_thread.daemon = True
feeding_thread.start()
@suppress_complaints(verbose=self._verbose)
def ordered_processing_task(sample_info_queue, sample_queue, out_order):
if self._verbose == 0:
signal.signal(signal.SIGTERM, suppress_signal)
signal.signal(signal.SIGINT, suppress_signal)
def read_bytes(fpath, start, size):
f = open(fpath, 'r')
f.seek(start, 0)
binary_bytes = f.read(size)
f.close()
return binary_bytes
ins = sample_info_queue.get()
while not isinstance(ins, EpochEndSignal):
sample_info, order_id = ins
feature_bytes = read_bytes(sample_info.feature_bin_path,
sample_info.feature_start,
sample_info.feature_size)
label_bytes = read_bytes(sample_info.label_bin_path,
sample_info.label_start,
sample_info.label_size)
assert sample_info.label_frame_num * 4 == len(label_bytes)
label_array = struct.unpack('I' * sample_info.label_frame_num,
label_bytes)
label_data = np.array(
label_array, dtype='int64').reshape(
(sample_info.label_frame_num, 1))
feature_frame_num = sample_info.feature_frame_num
feature_dim = sample_info.feature_dim
assert feature_frame_num * feature_dim * 4 == len(feature_bytes)
feature_array = struct.unpack('f' * feature_frame_num *
feature_dim, feature_bytes)
feature_data = np.array(
feature_array, dtype='float32').reshape((
sample_info.feature_frame_num, sample_info.feature_dim))
sample_data = (feature_data, label_data)
for transformer in self._transformers:
# @TODO(pkuyym) to make transfomer only accept feature_data
sample_data = transformer.perform_trans(sample_data)
while order_id != out_order[0]:
time.sleep(0.001)
# drop long sentence
if self._drop_frame_len >= sample_data[0].shape[0]:
sample_queue.put(sample_data)
out_order[0] += 1
ins = sample_info_queue.get()
sample_queue.put(EpochEndSignal())
out_order = self._manager.list([0])
args = (sample_info_queue, sample_queue, out_order)
workers = [
Process(
target=ordered_processing_task, args=args)
for _ in xrange(self._process_num)
]
for w in workers:
w.daemon = True
w.start()
finished_process_num = 0
while finished_process_num < self._process_num:
sample = sample_queue.get()
if isinstance(sample, EpochEndSignal):
finished_process_num += 1
continue
yield sample
feeding_thread.join()
for w in workers:
w.join()
def batch_iterator(self, batch_size, minimum_batch_size):
def batch_to_ndarray(batch_samples, lod):
assert len(batch_samples)
frame_dim = batch_samples[0][0].shape[1]
batch_feature = np.zeros((lod[-1], frame_dim), dtype="float32")
batch_label = np.zeros((lod[-1], 1), dtype="int64")
start = 0
for sample in batch_samples:
frame_num = sample[0].shape[0]
batch_feature[start:start + frame_num, :] = sample[0]
batch_label[start:start + frame_num, :] = sample[1]
start += frame_num
return (batch_feature, batch_label)
@suppress_complaints(verbose=self._verbose)
def batch_assembling_task(sample_generator, batch_queue):
batch_samples = []
lod = [0]
for sample in sample_generator():
batch_samples.append(sample)
lod.append(lod[-1] + sample[0].shape[0])
if len(batch_samples) == batch_size:
(batch_feature, batch_label) = batch_to_ndarray(
batch_samples, lod)
batch_queue.put((batch_feature, batch_label, lod))
batch_samples = []
lod = [0]
if len(batch_samples) >= minimum_batch_size:
(batch_feature, batch_label) = batch_to_ndarray(batch_samples,
lod)
batch_queue.put((batch_feature, batch_label, lod))
batch_queue.put(EpochEndSignal())
batch_queue = Queue.Queue(self._batch_buffer_size)
assembling_thread = Thread(
target=batch_assembling_task,
args=(self._sample_generator, batch_queue))
assembling_thread.daemon = True
assembling_thread.start()
while True:
try:
batch_data = batch_queue.get_nowait()
except Queue.Empty:
time.sleep(0.001)
else:
if isinstance(batch_data, EpochEndSignal):
break
yield batch_data
assembling_thread.join()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
from six import reraise
from tblib import Traceback
import numpy as np
def to_lodtensor(data, place):
"""convert tensor to lodtensor
"""
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = numpy.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def lodtensor_to_ndarray(lod_tensor):
"""conver lodtensor to ndarray
"""
dims = lod_tensor.get_dims()
ret = np.zeros(shape=dims).astype('float32')
for i in xrange(np.product(dims)):
ret.ravel()[i] = lod_tensor.get_float_element(i)
return ret, lod_tensor.lod()
def suppress_signal(signo, stack_frame):
pass
def suppress_complaints(verbose):
def decorator_maker(func):
def suppress_warpper(*args, **kwargs):
try:
func(*args, **kwargs)
except:
et, ev, tb = sys.exc_info()
tb = Traceback(tb)
if verbose == 1:
reraise(et, ev, tb.as_traceback())
return suppress_warpper
return decorator_maker
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def stacked_lstmp_model(hidden_dim,
proj_dim,
stacked_num,
class_num,
parallel=False,
is_train=True):
""" The model for DeepASR. The main structure is composed of stacked
identical LSTMP (LSTM with recurrent projection) layers.
When running in training and validation phase, the feeding dictionary
is {'feature', 'label'}, fed by the LodTensor for feature data and
label data respectively. And in inference, only `feature` is needed.
Args:
hidden_dim(int): The hidden state's dimension of the LSTMP layer.
proj_dim(int): The projection size of the LSTMP layer.
stacked_num(int): The number of stacked LSTMP layers.
parallel(bool): Run in parallel or not, default `False`.
is_train(bool): Run in training phase or not, default `True`.
class_dim(int): The number of output classes.
"""
# network configuration
def _net_conf(feature, label):
seq_conv1 = fluid.layers.sequence_conv(
input=feature,
num_filters=1024,
filter_size=3,
filter_stride=1,
bias_attr=True)
bn1 = fluid.layers.batch_norm(
input=seq_conv1,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn1
for i in range(stacked_num):
fc = fluid.layers.fc(input=stack_input,
size=hidden_dim * 4,
bias_attr=True)
proj, cell = fluid.layers.dynamic_lstmp(
input=fc,
size=hidden_dim * 4,
proj_size=proj_dim,
bias_attr=True,
use_peepholes=True,
is_reverse=False,
cell_activation="tanh",
proj_activation="tanh")
bn = fluid.layers.batch_norm(
input=proj,
act="sigmoid",
is_test=not is_train,
momentum=0.9,
epsilon=1e-05,
data_layout='NCHW')
stack_input = bn
prediction = fluid.layers.fc(input=stack_input,
size=class_num,
act='softmax')
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
return prediction, avg_cost, acc
# data feeder
feature = fluid.layers.data(
name="feature", shape=[-1, 120 * 11], dtype="float32", lod_level=1)
label = fluid.layers.data(
name="label", shape=[-1, 1], dtype="int64", lod_level=1)
if parallel:
# When the execution place is specified to CUDAPlace, the program will
# run on all $CUDA_VISIBLE_DEVICES GPUs. Otherwise the program will
# run on all CPU devices.
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
feat_ = pd.read_input(feature)
label_ = pd.read_input(label)
prediction, avg_cost, acc = _net_conf(feat_, label_)
for out in [avg_cost, acc]:
pd.write_output(out)
# get mean loss and acc through every devices.
avg_cost, acc = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
acc = fluid.layers.mean(x=acc)
else:
prediction, avg_cost, acc = _net_conf(feature, label)
return prediction, avg_cost, acc
"""Add the parent directory to $PYTHONPATH"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import sys
def add_path(path):
if path not in sys.path:
sys.path.insert(0, path)
this_dir = os.path.dirname(__file__)
# Add project path to PYTHONPATH
proj_path = os.path.join(this_dir, '..')
add_path(proj_path)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import numpy as np
import argparse
import time
import paddle.v2.fluid as fluid
import paddle.v2.fluid.profiler as profiler
import _init_paths
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
from model_utils.model import stacked_lstmp_model
from data_utils.util import lodtensor_to_ndarray
def parse_args():
parser = argparse.ArgumentParser("Profiling for the stacked LSTMP model.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--minimum_batch_size',
type=int,
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
default=5,
help='Number of lstmp layers to stack. (default: %(default)d)')
parser.add_argument(
'--proj_dim',
type=int,
default=512,
help='Project size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=1024,
help='Hidden size of lstmp unit. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
default=0.002,
help='Learning rate used to train. (default: %(default)f)')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--feature_lst',
type=str,
default='data/feature.lst',
help='feature list path.')
parser.add_argument(
'--label_lst',
type=str,
default='data/label.lst',
help='label list path.')
parser.add_argument(
'--max_batch_num',
type=int,
default=10,
help='Maximum number of batches for profiling. (default: %(default)d)')
parser.add_argument(
'--first_batches_to_skip',
type=int,
default=1,
help='Number of first batches to skip for profiling. '
'(default: %(default)d)')
parser.add_argument(
'--print_train_acc',
action='store_true',
help='If set, output training accuray.')
parser.add_argument(
'--sorted_key',
type=str,
default='total',
choices=['None', 'total', 'calls', 'min', 'max', 'ave'],
help='Different types of time to sort the profiling report. '
'(default: %(default)s)')
args = parser.parse_args()
return args
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def profile(args):
"""profile the training process.
"""
if not args.first_batches_to_skip < args.max_batch_num:
raise ValueError("arg 'first_batches_to_skip' must be smaller than "
"'max_batch_num'.")
if not args.first_batches_to_skip >= 0:
raise ValueError(
"arg 'first_batches_to_skip' must not be smaller than 0.")
_, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice()
]
data_reader = reader.DataReader(args.feature_lst, args.label_lst)
data_reader.set_transformers(ltrans)
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
sorted_key = None if args.sorted_key is 'None' else args.sorted_key
with profiler.profiler(args.device, sorted_key) as prof:
frames_seen, start_time = 0, 0.0
for batch_id, batch_data in enumerate(
data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
if batch_id >= args.max_batch_num:
break
if args.first_batches_to_skip == batch_id:
profiler.reset_profiler()
start_time = time.time()
frames_seen = 0
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
frames_seen += lod[-1]
outs = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
if args.print_train_acc:
print("Batch %d acc: %f" %
(batch_id, lodtensor_to_ndarray(outs[1])[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
time_consumed = time.time() - start_time
frames_per_sec = frames_seen / time_consumed
print("\nTime consumed: %f s, performance: %f frames/s." %
(time_consumed, frames_per_sec))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
profile(args)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import os
import numpy as np
import argparse
import time
import paddle.v2.fluid as fluid
import data_utils.augmentor.trans_mean_variance_norm as trans_mean_variance_norm
import data_utils.augmentor.trans_add_delta as trans_add_delta
import data_utils.augmentor.trans_splice as trans_splice
import data_utils.data_reader as reader
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
def parse_args():
parser = argparse.ArgumentParser("Training for stacked LSTMP model.")
parser.add_argument(
'--batch_size',
type=int,
default=32,
help='The sequence number of a batch data. (default: %(default)d)')
parser.add_argument(
'--minimum_batch_size',
type=int,
default=1,
help='The minimum sequence number of a batch data. '
'(default: %(default)d)')
parser.add_argument(
'--stacked_num',
type=int,
default=5,
help='Number of lstm layers to stack. (default: %(default)d)')
parser.add_argument(
'--proj_dim',
type=int,
default=512,
help='Project size of lstm unit. (default: %(default)d)')
parser.add_argument(
'--hidden_dim',
type=int,
default=1024,
help='Hidden size of lstm unit. (default: %(default)d)')
parser.add_argument(
'--pass_num',
type=int,
default=100,
help='Epoch number to train. (default: %(default)d)')
parser.add_argument(
'--print_per_batches',
type=int,
default=100,
help='Interval to print training accuracy. (default: %(default)d)')
parser.add_argument(
'--learning_rate',
type=float,
default=0.002,
help='Learning rate used to train. (default: %(default)f)')
parser.add_argument(
'--device',
type=str,
default='GPU',
choices=['CPU', 'GPU'],
help='The device type. (default: %(default)s)')
parser.add_argument(
'--parallel', action='store_true', help='If set, run in parallel.')
parser.add_argument(
'--mean_var',
type=str,
default='data/global_mean_var_search26kHr',
help='mean var path')
parser.add_argument(
'--train_feature_lst',
type=str,
default='data/feature.lst',
help='feature list path for training.')
parser.add_argument(
'--train_label_lst',
type=str,
default='data/label.lst',
help='label list path for training.')
parser.add_argument(
'--val_feature_lst',
type=str,
default='data/val_feature.lst',
help='feature list path for validation.')
parser.add_argument(
'--val_label_lst',
type=str,
default='data/val_label.lst',
help='label list path for validation.')
parser.add_argument(
'--model_save_dir',
type=str,
default='./checkpoints',
help='directory to save model. Do not save model if set to '
'.')
args = parser.parse_args()
return args
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
def train(args):
"""train in loop.
"""
# prediction, avg_cost, accuracy = stacked_lstmp_model(args.hidden_dim,
# args.proj_dim, args.stacked_num, class_num=1749, args.parallel)
prediction, avg_cost, accuracy = stacked_lstmp_model(
hidden_dim=args.hidden_dim,
proj_dim=args.proj_dim,
stacked_num=args.stacked_num,
class_num=1749,
parallel=args.parallel)
adam_optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
adam_optimizer.minimize(avg_cost)
# program for test
test_program = fluid.default_main_program().clone()
with fluid.program_guard(test_program):
test_program = fluid.io.get_inference_program([avg_cost, accuracy])
place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
ltrans = [
trans_add_delta.TransAddDelta(2, 2),
trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var),
trans_splice.TransSplice()
]
feature_t = fluid.LoDTensor()
label_t = fluid.LoDTensor()
# validation
def test(exe):
# If test data not found, return invalid cost and accuracy
if not (os.path.exists(args.val_feature_lst) and
os.path.exists(args.val_label_lst)):
return -1.0, -1.0
# test data reader
test_data_reader = reader.DataReader(args.val_feature_lst,
args.val_label_lst)
test_data_reader.set_transformers(ltrans)
test_costs, test_accs = [], []
for batch_id, batch_data in enumerate(
test_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(test_program,
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
test_costs.append(lodtensor_to_ndarray(cost)[0])
test_accs.append(lodtensor_to_ndarray(acc)[0])
return np.mean(test_costs), np.mean(test_accs)
# train data reader
train_data_reader = reader.DataReader(args.train_feature_lst,
args.train_label_lst)
train_data_reader.set_transformers(ltrans)
# train
for pass_id in xrange(args.pass_num):
pass_start_time = time.time()
for batch_id, batch_data in enumerate(
train_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
label_t.set_lod([lod])
cost, acc = exe.run(fluid.default_main_program(),
feed={"feature": feature_t,
"label": label_t},
fetch_list=[avg_cost, accuracy],
return_numpy=False)
if batch_id > 0 and (batch_id % args.print_per_batches == 0):
print("\nBatch %d, train cost: %f, train acc: %f" %
(batch_id, lodtensor_to_ndarray(cost)[0],
lodtensor_to_ndarray(acc)[0]))
else:
sys.stdout.write('.')
sys.stdout.flush()
# run test
val_cost, val_acc = test(exe)
# save model
if args.model_save_dir != '':
model_path = os.path.join(
args.model_save_dir, "deep_asr.pass_" + str(pass_id) + ".model")
fluid.io.save_inference_model(model_path, ["feature"],
[prediction], exe)
# cal pass time
pass_end_time = time.time()
time_consumed = pass_end_time - pass_start_time
# print info at pass end
print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" %
(pass_id, time_consumed, val_cost, val_acc))
if __name__ == '__main__':
args = parse_args()
print_arguments(args)
if args.model_save_dir != '' and not os.path.exists(args.model_save_dir):
os.mkdir(args.model_save_dir)
train(args)
"""
A set of tools for generating adversarial example on paddle platform
A set of tools for generating adversarial example on paddle platform
"""
from . import attacks
from . import models
from .adversary import Adversary
"""
Defines a class that contains the original object, the target and the
adversarial example.
"""
class Adversary(object):
"""
Adversary contains the original object, the target and the adversarial
example.
"""
def __init__(self, original, original_label=None):
"""
:param original: The original instance, such as an image.
:param original_label: The original instance's label.
"""
assert original is not None
self.__original = original
self.__original_label = original_label
self.__target_label = None
self.__target = None
self.__is_targeted_attack = False
self.__adversarial_example = None
self.__adversarial_label = None
def set_target(self, is_targeted_attack, target=None, target_label=None):
"""
Set the target be targeted or untargeted.
:param is_targeted_attack: bool
:param target: The target.
:param target_label: If is_targeted_attack is true and target_label is
None, self.target_label will be set by the Attack class.
If is_targeted_attack is false, target_label must be None.
"""
assert (target_label is None) or is_targeted_attack
self.__is_targeted_attack = is_targeted_attack
self.__target_label = target_label
self.__target = target
if not is_targeted_attack:
self.__target_label = None
self.__target = None
def set_original(self, original, original_label=None):
"""
Reset the original.
:param original: Original instance.
:param original_label: Original instance's label.
"""
if original != self.__original:
self.__original = original
self.__original_label = original_label
self.__adversarial_example = None
if original is None:
self.__original_label = None
def _is_successful(self, adversarial_label):
"""
Is the adversarial_label is the expected adversarial label.
:param adversarial_label: adversarial label.
:return: bool
"""
if self.__target_label is not None:
return adversarial_label == self.__target_label
else:
return (adversarial_label is not None) and \
(adversarial_label != self.__original_label)
def is_successful(self):
"""
Has the adversarial example been found.
:return: bool
"""
return self._is_successful(self.__adversarial_label)
def try_accept_the_example(self, adversarial_example, adversarial_label):
"""
If adversarial_label the target label that we are finding.
The adversarial_example and adversarial_label will be accepted and
True will be returned.
:return: bool
"""
assert adversarial_example is not None
assert self.__original.shape == adversarial_example.shape
ok = self._is_successful(adversarial_label)
if ok:
self.__adversarial_example = adversarial_example
self.__adversarial_label = adversarial_label
return ok
def perturbation(self, multiplying_factor=1.0):
"""
The perturbation that the adversarial_example is added.
:param multiplying_factor: float.
:return: The perturbation that is multiplied by multiplying_factor.
"""
assert self.__original is not None
assert self.__adversarial_example is not None
return multiplying_factor * (
self.__adversarial_example - self.__original)
@property
def is_targeted_attack(self):
"""
:property: is_targeted_attack
"""
return self.__is_targeted_attack
@property
def target_label(self):
"""
:property: target_label
"""
return self.__target_label
@target_label.setter
def target_label(self, label):
"""
:property: target_label
"""
self.__target_label = label
@property
def target(self):
"""
:property: target
"""
return self.__target
@property
def original(self):
"""
:property: original
"""
return self.__original
@property
def original_label(self):
"""
:property: original
"""
return self.__original_label
@original_label.setter
def original_label(self, label):
"""
original_label setter
"""
self.__original_label = label
@property
def adversarial_example(self):
"""
:property: adversarial_example
"""
return self.__adversarial_example
@adversarial_example.setter
def adversarial_example(self, example):
"""
adversarial_example setter
"""
self.__adversarial_example = example
@property
def adversarial_label(self):
"""
:property: adversarial_label
"""
return self.__adversarial_label
@adversarial_label.setter
def adversarial_label(self, label):
"""
adversarial_label setter
"""
self.__adversarial_label = label
"""
Attack methods
"""
from .base import Attack
from .deepfool import DeepFoolAttack
from .gradientsign import FGSM
from .gradientsign import GradientSignAttack
from .iterator_gradientsign import IFGSM
from .iterator_gradientsign import IteratorGradientSignAttack
"""
The base model of the model.
"""
from abc import ABCMeta, abstractmethod
import logging
from abc import ABCMeta
from abc import abstractmethod
import numpy as np
class Attack(object):
"""
Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
which search an adversarial example. subclass should implement the _apply() method.
Abstract base class for adversarial attacks. `Attack` represent an
adversarial attack which search an adversarial example. subclass should
implement the _apply() method.
Args:
model(Model): an instance of the class advbox.base.Model.
......@@ -18,22 +23,50 @@ class Attack(object):
def __init__(self, model):
self.model = model
def __call__(self, image_label):
def __call__(self, adversary, **kwargs):
"""
Generate the adversarial sample.
Args:
image_label(list): The image and label tuple list with one element.
adversary(object): The adversary object.
**kwargs: Other named arguments.
"""
adv_img = self._apply(image_label)
return adv_img
self._preprocess(adversary)
return self._apply(adversary, **kwargs)
@abstractmethod
def _apply(self, image_label):
def _apply(self, adversary, **kwargs):
"""
Search an adversarial example.
Args:
image_batch(list): The image and label tuple list with one element.
adversary(object): The adversary object.
**kwargs: Other named arguments.
"""
raise NotImplementedError
def _preprocess(self, adversary):
"""
Preprocess the adversary object.
:param adversary: adversary
:return: None
"""
if adversary.original_label is None:
adversary.original_label = np.argmax(
self.model.predict(adversary.original))
if adversary.is_targeted_attack and adversary.target_label is None:
if adversary.target is None:
raise ValueError(
'When adversary.is_targeted_attack is True, '
'adversary.target_label or adversary.target must be set.')
else:
adversary.target_label_label = np.argmax(
self.model.predict(
self.model.scale_input(adversary.target)))
logging.info('adversary:\noriginal_label: {}'
'\n target_lable: {}'
'\n is_targeted_attack: {}'
''.format(adversary.original_label, adversary.target_label,
adversary.is_targeted_attack))
"""
This module provide the attack method for deepfool. Deepfool is a simple and
accurate adversarial attack.
"""
from __future__ import division
import logging
import numpy as np
from .base import Attack
class DeepFoolAttack(Attack):
"""
DeepFool: a simple and accurate method to fool deep neural networks",
Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard,
https://arxiv.org/abs/1511.04599
"""
def _apply(self, adversary, iterations=100, overshoot=0.02):
"""
Apply the deep fool attack.
Args:
adversary(Adversary): The Adversary object.
iterations(int): The iterations.
overshoot(float): We add (1+overshoot)*pert every iteration.
Return:
adversary: The Adversary object.
"""
assert adversary is not None
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
f = self.model.predict(adversary.original)
if adversary.is_targeted_attack:
labels = [adversary.target_label]
else:
max_class_count = 10
class_count = self.model.num_classes()
if class_count > max_class_count:
labels = np.argsort(f)[-(max_class_count + 1):-1]
else:
labels = np.arange(class_count)
gradient = self.model.gradient(adversary.original, pre_label)
x = adversary.original
for iteration in xrange(iterations):
w = np.inf
w_norm = np.inf
pert = np.inf
for k in labels:
if k == pre_label:
continue
gradient_k = self.model.gradient(x, k)
w_k = gradient_k - gradient
f_k = f[k] - f[pre_label]
w_k_norm = np.linalg.norm(w_k) + 1e-8
pert_k = (np.abs(f_k) + 1e-8) / w_k_norm
if pert_k < pert:
pert = pert_k
w = w_k
w_norm = w_k_norm
r_i = -w * pert / w_norm # The gradient is -gradient in the paper.
x = x + (1 + overshoot) * r_i
x = np.clip(x, min_, max_)
f = self.model.predict(x)
gradient = self.model.gradient(x, pre_label)
adv_label = np.argmax(f)
logging.info('iteration = {}, f = {}, pre_label = {}'
', adv_label={}'.format(iteration, f[pre_label],
pre_label, adv_label))
if adversary.try_accept_the_example(x, adv_label):
return adversary
return adversary
......@@ -2,37 +2,59 @@
This module provide the attack method for FGSM's implement.
"""
from __future__ import division
import numpy as np
import logging
from collections import Iterable
import numpy as np
from .base import Attack
class GradientSignAttack(Attack):
"""
This attack was originally implemented by Goodfellow et al. (2015) with the
infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
the Fast Gradient Method.
infinity norm (and is known as the "Fast Gradient Sign Method").
This is therefore called the Fast Gradient Method.
Paper link: https://arxiv.org/abs/1412.6572
"""
def _apply(self, image_label, epsilons=1000):
assert len(image_label) == 1
pre_label = np.argmax(self.model.predict(image_label))
def _apply(self, adversary, epsilons=1000):
"""
Apply the gradient sign attack.
Args:
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
Return:
adversary: The Adversary object.
"""
assert adversary is not None
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
gradient = self.model.gradient(image_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)
if adversary.is_targeted_attack:
gradient = self.model.gradient(adversary.original,
adversary.target_label)
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient(adversary.original,
adversary.original_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
for epsilon in epsilons:
adv_img = image_label[0][0].reshape(
gradient_sign.shape) + epsilon * gradient_sign
adv_img = adversary.original + epsilon * gradient_sign
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
if pre_label != adv_label:
return adv_img
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
FGSM = GradientSignAttack
......@@ -2,8 +2,12 @@
This module provide the attack method for Iterator FGSM's implement.
"""
from __future__ import division
import numpy as np
import logging
from collections import Iterable
import numpy as np
from .base import Attack
......@@ -13,31 +17,43 @@ class IteratorGradientSignAttack(Attack):
Paper link: https://arxiv.org/pdf/1607.02533.pdf
"""
def _apply(self, image_label, epsilons=100, steps=10):
def _apply(self, adversary, epsilons=100, steps=10):
"""
Apply the iterative gradient sign attack.
Args:
image_label(list): The image and label tuple list of one element.
adversary(Adversary): The Adversary object.
epsilons(list|tuple|int): The epsilon (input variation parameter).
steps(int): The number of iterator steps.
Return:
numpy.ndarray: The adversarail sample generated by the algorithm.
adversary(Adversary): The Adversary object.
"""
assert len(image_label) == 1
pre_label = np.argmax(self.model.predict(image_label))
gradient = self.model.gradient(image_label)
min_, max_ = self.model.bounds()
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, 1, num=epsilons + 1)
epsilons = np.linspace(0, 1 / steps, num=epsilons + 1)[1:]
pre_label = adversary.original_label
min_, max_ = self.model.bounds()
for epsilon in epsilons:
adv_img = image_label[0][0].reshape(gradient.shape)
adv_img = adversary.original
for _ in range(steps):
gradient = self.model.gradient([(adv_img, image_label[0][1])])
gradient_sign = np.sign(gradient) * (max_ - min_)
adv_img = adv_img + epsilon * gradient_sign
if adversary.is_targeted_attack:
gradient = self.model.gradient(adversary.original,
adversary.target_label)
gradient_sign = -np.sign(gradient) * (max_ - min_)
else:
gradient = self.model.gradient(adversary.original,
adversary.original_label)
gradient_sign = np.sign(gradient) * (max_ - min_)
adv_img = adv_img + gradient_sign * epsilon
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
if pre_label != adv_label:
return adv_img
adv_label = np.argmax(self.model.predict(adv_img))
logging.info('epsilon = {:.3f}, pre_label = {}, adv_label={}'.
format(epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
return adversary
IFGSM = IteratorGradientSignAttack
"""
Paddle model for target of attack
Paddle model for target of attack
"""
from .base import Model
from .paddle import PaddleModel
......@@ -2,21 +2,21 @@
The base model of the model.
"""
from abc import ABCMeta
import abc
from abc import abstractmethod
abstractmethod = abc.abstractmethod
import numpy as np
class Model(object):
"""
Base class of model to provide attack.
Args:
bounds(tuple): The lower and upper bound for the image pixel.
channel_axis(int): The index of the axis that represents the color channel.
preprocess(tuple): Two element tuple used to preprocess the input. First
substract the first element, then divide the second element.
channel_axis(int): The index of the axis that represents the color
channel.
preprocess(tuple): Two element tuple used to preprocess the input.
First substract the first element, then divide the second element.
"""
__metaclass__ = ABCMeta
......@@ -43,25 +43,32 @@ class Model(object):
return self._channel_axis
def _process_input(self, input_):
res = input_
res = None
sub, div = self._preprocess
if sub != 0:
if np.any(sub != 0):
res = input_ - sub
assert div != 0
if div != 1:
res /= div
assert np.any(div != 0)
if np.any(div != 1):
if res is None: # "res = input_ - sub" is not executed!
res = input_ / div
else:
res /= div
if res is None: # "res = (input_ - sub)/ div" is not executed!
return input_
return res
@abstractmethod
def predict(self, image_batch):
def predict(self, data):
"""
Calculate the prediction of the image batch.
Calculate the prediction of the data.
Args:
image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
data(numpy.ndarray): input data with shape (size,
height, width, channels).
Return:
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
numpy.ndarray: predictions of the data with shape (batch_size,
num_of_classes).
"""
raise NotImplementedError
......@@ -76,15 +83,17 @@ class Model(object):
raise NotImplementedError
@abstractmethod
def gradient(self, image_batch):
def gradient(self, data, label):
"""
Calculate the gradient of the cross-entropy loss w.r.t the image.
Args:
image_batch(list): The image and label tuple list.
data(numpy.ndarray): input data with shape (size, height, width,
channels).
label(int): Label used to calculate the gradient.
Return:
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
the shape (height, width, channel).
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image
with the shape (height, width, channel).
"""
raise NotImplementedError
"""
Paddle model
"""
from __future__ import absolute_import
import numpy as np
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from paddle.v2.fluid.framework import program_guard
from .base import Model
......@@ -11,10 +12,12 @@ from .base import Model
class PaddleModel(Model):
"""
Create a PaddleModel instance.
When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
When you need to generate a adversarial sample, you should construct an
instance of PaddleModel.
Args:
program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
program(paddle.v2.fluid.framework.Program): The program of the model
which generate the adversarial sample.
input_name(string): The name of the input.
logits_name(string): The name of the logits.
predict_name(string): The name of the predict.
......@@ -30,12 +33,12 @@ class PaddleModel(Model):
bounds,
channel_axis=3,
preprocess=None):
super(PaddleModel, self).__init__(
bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
if preprocess is None:
preprocess = (0, 1)
super(PaddleModel, self).__init__(
bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
self._program = program
self._place = fluid.CPUPlace()
self._exe = fluid.Executor(self._place)
......@@ -49,30 +52,36 @@ class PaddleModel(Model):
loss = self._program.block(0).var(self._cost_name)
param_grads = fluid.backward.append_backward(
loss, parameter_list=[self._input_name])
self._gradient = dict(param_grads)[self._input_name]
self._gradient = filter(lambda p: p[0].name == self._input_name,
param_grads)[0][1]
def predict(self, image_batch):
def predict(self, data):
"""
Predict the label of the image_batch.
Calculate the prediction of the data.
Args:
image_batch(list): The image and label tuple list.
Return:
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
Args:
data(numpy.ndarray): input data with shape (size,
height, width, channels).
Return:
numpy.ndarray: predictions of the data with shape (batch_size,
num_of_classes).
"""
scaled_data = self._process_input(data)
feeder = fluid.DataFeeder(
feed_list=[self._input_name, self._logits_name],
place=self._place,
program=self._program)
predict_var = self._program.block(0).var(self._predict_name)
predict = self._exe.run(self._program,
feed=feeder.feed(image_batch),
feed=feeder.feed([(scaled_data, 0)]),
fetch_list=[predict_var])
predict = np.squeeze(predict, axis=0)
return predict
def num_classes(self):
"""
Calculate the number of classes of the output label.
Calculate the number of classes of the output label.
Return:
int: the number of classes
......@@ -81,21 +90,27 @@ class PaddleModel(Model):
assert len(predict_var.shape) == 2
return predict_var.shape[1]
def gradient(self, image_batch):
def gradient(self, data, label):
"""
Calculate the gradient of the loss w.r.t the input.
Calculate the gradient of the cross-entropy loss w.r.t the image.
Args:
image_batch(list): The image and label tuple list.
data(numpy.ndarray): input data with shape (size, height, width,
channels).
label(int): Label used to calculate the gradient.
Return:
list: The list of the gradient of the image.
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image
with the shape (height, width, channel).
"""
scaled_data = self._process_input(data)
feeder = fluid.DataFeeder(
feed_list=[self._input_name, self._logits_name],
place=self._place,
program=self._program)
grad, = self._exe.run(self._program,
feed=feeder.feed(image_batch),
feed=feeder.feed([(scaled_data, label)]),
fetch_list=[self._gradient])
return grad
return grad.reshape(data.shape)
"""
FGSM demos on mnist using advbox tool.
"""
import matplotlib.pyplot as plt
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import matplotlib.pyplot as plt
import numpy as np
from advbox.models.paddle import PaddleModel
from advbox import Adversary
from advbox.attacks.gradientsign import GradientSignAttack
from advbox.models.paddle import PaddleModel
def cnn_model(img):
......@@ -18,7 +18,7 @@ def cnn_model(img):
Returns:
Variable: the label prediction
"""
#conv1 = fluid.nets.conv2d()
# conv1 = fluid.nets.conv2d()
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
num_filters=20,
......@@ -76,10 +76,11 @@ def main():
att = GradientSignAttack(m)
for data in train_reader():
# fgsm attack
adv_img = att(data)
plt.imshow(n[0][0], cmap='Greys_r')
plt.show()
#np.save('adv_img', adv_img)
adversary = att(Adversary(data[0][0], data[0][1]))
if adversary.is_successful():
plt.imshow(adversary.target, cmap='Greys_r')
plt.show()
# np.save('adv_img', adversary.target)
break
......
# SE-ResNeXt for image classification
This model built with paddle fluid is still under active development and is not
the final version. We welcome feedbacks.
import os
import math
import random
import functools
import numpy as np
......@@ -7,10 +8,6 @@ from PIL import Image, ImageEnhance
random.seed(0)
_R_MEAN = 123.0
_G_MEAN = 117.0
_B_MEAN = 104.0
DATA_DIM = 224
THREAD = 8
......@@ -20,7 +17,8 @@ DATA_DIR = 'ILSVRC2012'
TRAIN_LIST = 'ILSVRC2012/train_list.txt'
TEST_LIST = 'ILSVRC2012/test_list.txt'
img_mean = np.array([_R_MEAN, _G_MEAN, _B_MEAN]).reshape((3, 1, 1))
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
def resize_short(img, target_size):
......@@ -46,6 +44,36 @@ def crop_image(img, target_size, center):
return img
def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
aspect_ratio = math.sqrt(random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = random.randint(0, img.size[0] - w)
j = random.randint(0, img.size[1] - h)
img = img.crop((i, j, i + w, j + h))
img = img.resize((size, size), Image.LANCZOS)
return img
def rotate_image(img):
angle = random.randint(-10, 10)
img = img.rotate(angle)
return img
def distort_color(img):
def random_brightness(img, lower=0.5, upper=1.5):
e = random.uniform(lower, upper)
......@@ -69,25 +97,28 @@ def distort_color(img):
return img
def process_image(sample, mode):
def process_image(sample, mode, color_jitter, rotate):
img_path = sample[0]
img = Image.open(img_path)
if mode == 'train':
img = resize_short(img, DATA_DIM + 32)
if rotate: img = rotate_image(img)
img = random_crop(img, DATA_DIM)
else:
img = resize_short(img, DATA_DIM)
img = crop_image(img, target_size=DATA_DIM, center=(mode != 'train'))
img = crop_image(img, target_size=DATA_DIM, center=True)
if mode == 'train':
img = distort_color(img)
if color_jitter:
img = distort_color(img)
if random.randint(0, 1) == 1:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1))
img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
img -= img_mean
img /= img_std
if mode == 'train' or mode == 'test':
return img, sample[1]
......@@ -95,7 +126,11 @@ def process_image(sample, mode):
return img
def _reader_creator(file_list, mode, shuffle=False):
def _reader_creator(file_list,
mode,
shuffle=False,
color_jitter=False,
rotate=False):
def reader():
with open(file_list) as flist:
lines = [line.strip() for line in flist]
......@@ -110,13 +145,15 @@ def _reader_creator(file_list, mode, shuffle=False):
img_path = os.path.join(DATA_DIR, line)
yield [img_path]
mapper = functools.partial(process_image, mode=mode)
mapper = functools.partial(
process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
def train():
return _reader_creator(TRAIN_LIST, 'train', shuffle=True)
return _reader_creator(
TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True)
def test():
......
import os
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
import reader
......@@ -35,7 +34,11 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
def shortcut(input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out:
return conv_bn_layer(input, ch_out, 3, stride)
if stride == 1:
filter_size = 1
else:
filter_size = 3
return conv_bn_layer(input, ch_out, filter_size, stride)
else:
return input
......@@ -75,7 +78,7 @@ def SE_ResNeXt(input, class_dim, infer=False):
conv = conv_bn_layer(
input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
conv = fluid.layers.pool2d(
input=conv, pool_size=3, pool_stride=2, pool_type='max')
input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
......@@ -96,7 +99,11 @@ def SE_ResNeXt(input, class_dim, infer=False):
return out
def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
def train(learning_rate,
batch_size,
num_passes,
init_model=None,
model_save_dir='model'):
class_dim = 1000
image_shape = [3, 224, 224]
......@@ -109,9 +116,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate / batch_size,
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4 * batch_size))
regularization=fluid.regularizer.L2Decay(1e-4))
opts = optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=out, label=label)
......@@ -125,6 +132,9 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if init_model is not None:
fluid.io.load_persistables_if_exist(exe, init_model)
train_reader = paddle.batch(reader.train(), batch_size=batch_size)
test_reader = paddle.batch(reader.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
......@@ -141,16 +151,18 @@ def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
test_accuracy.reset(exe)
for data in test_reader():
out, acc = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=[avg_cost] + test_accuracy.metrics)
loss, acc = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=[avg_cost] + test_accuracy.metrics)
test_pass_acc = test_accuracy.eval(exe)
print("End pass {0}, train_acc {1}, test_acc {2}".format(
pass_id, pass_acc, test_pass_acc))
model_path = os.path.join(model_save_dir, str(pass_id))
fluid.io.save_inference_model(model_path, ['image'], [out], exe)
if not os.path.isdir(model_path):
os.makedirs(model_path)
fluid.io.save_persistables(exe, model_path)
if __name__ == '__main__':
train(learning_rate=0.1, batch_size=7, num_passes=100)
train(learning_rate=0.1, batch_size=8, num_passes=100, init_model=None)
import os
import cv2
import numpy as np
from PIL import Image
from paddle.v2.image import load_image
class DataGenerator(object):
def __init__(self):
pass
def train_reader(self, img_root_dir, img_label_list, batchsize):
'''
Reader interface for training.
:param img_root_dir: The root path of the image for training.
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for training.
:type file_list: str
'''
img_label_lines = []
if batchsize == 1:
to_file = "tmp.txt"
cmd = "cat " + img_label_list + " | awk '{print $1,$2,$3,$4;}' | shuf > " + to_file
print "cmd: " + cmd
os.system(cmd)
print "finish batch shuffle"
img_label_lines = open(to_file, 'r').readlines()
else:
to_file = "tmp.txt"
#cmd1: partial shuffle
cmd = "cat " + img_label_list + " | awk '{printf(\"%04d%.4f %s\\n\", $1, rand(), $0)}' | sort | sed 1,$((1 + RANDOM % 100))d | "
#cmd2: batch merge and shuffle
cmd += "awk '{printf $2\" \"$3\" \"$4\" \"$5\" \"; if(NR % " + str(
batchsize) + " == 0) print \"\";}' | shuf | "
#cmd3: batch split
cmd += "awk '{if(NF == " + str(
batchsize
) + " * 4) {for(i = 0; i < " + str(
batchsize
) + "; i++) print $(4*i+1)\" \"$(4*i+2)\" \"$(4*i+3)\" \"$(4*i+4);}}' > " + to_file
print "cmd: " + cmd
os.system(cmd)
print "finish batch shuffle"
img_label_lines = open(to_file, 'r').readlines()
def reader():
sizes = len(img_label_lines) / batchsize
for i in range(sizes):
result = []
sz = [0, 0]
for j in range(batchsize):
line = img_label_lines[i * batchsize + j]
# h, w, img_name, labels
items = line.split(' ')
label = [int(c) for c in items[-1].split(',')]
img = Image.open(os.path.join(img_root_dir, items[
2])).convert('L') #zhuanhuidu
if j == 0:
sz = img.size
img = img.resize((sz[0], sz[1]))
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
result.append([img, label])
yield result
return reader
def test_reader(self, img_root_dir, img_label_list):
'''
Reader interface for inference.
:param img_root_dir: The root path of the images for training.
:type file_list: str
:param img_label_list: The path of the <image_name, label> file for testing.
:type file_list: list
'''
def reader():
for line in open(img_label_list):
# h, w, img_name, labels
items = line.split(' ')
label = [int(c) for c in items[-1].split(',')]
img = Image.open(os.path.join(img_root_dir, items[2])).convert(
'L')
img = np.array(img) - 127.5
img = img[np.newaxis, ...]
yield img, label
return reader
# Text Classification
## Data Preparation
```
wget http://ai.stanford.edu/%7Eamaas/data/sentiment/aclImdb_v1.tar.gz
tar zxf aclImdb_v1.tar.gz
```
## Training
```
python train.py --dict_path 'aclImdb/imdb.vocab'
```
class TrainConfig(object):
# Whether to use GPU in training or not.
use_gpu = False
# The training batch size.
batch_size = 4
# The epoch number.
num_passes = 30
# The global learning rate.
learning_rate = 0.01
# Training log will be printed every log_period.
log_period = 100
import numpy as np
import sys
import os
import argparse
import time
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
from config import TrainConfig as conf
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'--dict_path',
type=str,
required=True,
help="Path of the word dictionary.")
return parser.parse_args()
# Define to_lodtensor function to process the sequential data.
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
# Load the dictionary.
def load_vocab(filename):
vocab = {}
with open(filename) as f:
for idx, line in enumerate(f):
vocab[line.strip()] = idx
return vocab
# Define the convolution model.
def conv_net(dict_dim,
window_size=3,
emb_dim=128,
num_filters=128,
fc0_dim=96,
class_dim=2):
data = fluid.layers.data(
name="words", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
conv_3 = fluid.nets.sequence_conv_pool(
input=emb,
num_filters=num_filters,
filter_size=window_size,
act="tanh",
pool_type="max")
fc_0 = fluid.layers.fc(input=[conv_3], size=fc0_dim)
prediction = fluid.layers.fc(input=[fc_0], size=class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
return data, label, prediction, avg_cost
def main(dict_path):
word_dict = load_vocab(dict_path)
word_dict["<unk>"] = len(word_dict)
dict_dim = len(word_dict)
print("The dictionary size is : %d" % dict_dim)
data, label, prediction, avg_cost = conv_net(dict_dim)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=conf.learning_rate)
sgd_optimizer.minimize(avg_cost)
accuracy = fluid.evaluator.Accuracy(input=prediction, label=label)
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
test_target = accuracy.metrics + accuracy.states
inference_program = fluid.io.get_inference_program(test_target)
# The training data set.
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=51200),
batch_size=conf.batch_size)
# The testing data set.
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.test(word_dict), buf_size=51200),
batch_size=conf.batch_size)
if conf.use_gpu:
place = fluid.CUDAPlace(0)
else:
place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program())
def test(exe):
accuracy.reset(exe)
for batch_id, data in enumerate(test_reader()):
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
y_data = y_data.reshape([-1, 1])
acc = exe.run(inference_program,
feed={"words": input_seq,
"label": y_data})
test_acc = accuracy.eval(exe)
return test_acc
total_time = 0.
for pass_id in xrange(conf.num_passes):
accuracy.reset(exe)
start_time = time.time()
for batch_id, data in enumerate(train_reader()):
cost_val, acc_val = exe.run(
fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, accuracy.metrics[0]])
pass_acc = accuracy.eval(exe)
if batch_id and batch_id % conf.log_period == 0:
print("Pass id: %d, batch id: %d, cost: %f, pass_acc %f" %
(pass_id, batch_id, cost_val, pass_acc))
end_time = time.time()
total_time += (end_time - start_time)
pass_test_acc = test(exe)
print("Pass id: %d, test_acc: %f" % (pass_id, pass_test_acc))
print("Total train time: %f" % (total_time))
if __name__ == '__main__':
args = parse_args()
main(args.dict_path)
......@@ -106,7 +106,6 @@ NCE 层的一些重要参数解释如下:
| param\_attr / bias\_attr | 用来设置参数名字 |方便预测阶段加载参数,具体在预测一节中介绍。|
| num\_neg\_samples | 负样本采样个数|可以控制正负样本比例,这个值取值区间为 [1, 字典大小-1],负样本个数越多则整个模型的训练速度越慢,模型精度也会越高 |
| neg\_distribution | 生成负样例标签的分布,默认是一个均匀分布| 可以自行控制负样本采样时各个类别的采样权重。例如:希望正样例为“晴天”时,负样例“洪水”在训练时更被着重区分,则可以将“洪水”这个类别的采样权重增加|
| act | 使用何种激活函数| 根据 NCE 的原理,这里应该使用 sigmoid 函数 |
## 预测
1. 在命令行运行 :
......
......@@ -82,7 +82,7 @@ SSD使用一个卷积神经网络实现“端到端”的检测:输入为原
文件共两个字段,第一个字段为图像文件的相对路径,第二个字段为对应标注文件的相对路径。
### 预训练模型准备
下载预训练的VGG-16模型,我们提供了一个转换好的模型,下载好模型后,放置路径为```vgg/vgg_model.tar.gz```。
下载预训练的VGG-16模型,我们提供了一个转换好的模型,下载模型[http://paddlepaddle.bj.bcebos.com/model_zoo/detection/ssd_model/vgg_model.tar.gz](http://paddlepaddle.bj.bcebos.com/model_zoo/detection/ssd_model/vgg_model.tar.gz),并将其放置路径为```vgg/vgg_model.tar.gz```。
### 模型训练
直接执行```python train.py```即可进行训练。需要注意本示例仅支持CUDA GPU环境,无法在CPU上训练,主要因为使用CPU训练速度很慢,实践中一般使用GPU来处理图像任务,这里实现采用硬编码方式使用cuDNN,不提供CPU版本。```train.py```的一些关键执行逻辑:
......
......@@ -46,7 +46,7 @@ def train(topology,
word_dict = paddle.dataset.imdb.word_dict()
train_reader = paddle.batch(
paddle.reader.shuffle(
lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=1000),
lambda: paddle.dataset.imdb.train(word_dict)(), buf_size=51200),
batch_size=100)
test_reader = paddle.batch(
lambda: paddle.dataset.imdb.test(word_dict)(), batch_size=100)
......@@ -83,16 +83,14 @@ def train(topology,
train_reader = paddle.batch(
paddle.reader.shuffle(
reader.train_reader(train_data_dir, word_dict, lbl_dict),
buf_size=1000),
buf_size=51200),
batch_size=batch_size)
if test_data_dir is not None:
# here, because training and testing data share a same format,
# we still use the reader.train_reader to read the testing data.
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.train_reader(test_data_dir, word_dict, lbl_dict),
buf_size=1000),
reader.train_reader(test_data_dir, word_dict, lbl_dict),
batch_size=batch_size)
else:
test_reader = None
......
此差异已折叠。
此差异已折叠。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import argparse
import os
import cPickle
from utils import logger
"""
This script will output 2 files:
1. feature_dict.pkl
2. item_freq.pkl
"""
class FeatureGenerator(object):
"""
Encode feature values with low-frequency filtering.
"""
def __init__(self, feat_appear_limit=20):
"""
@feat_appear_limit: int
"""
self._dic = None # feature value --> id
self._count = None # numbers of appearances of feature values
self._feat_appear_limit = feat_appear_limit
def add_feat_val(self, feat_val):
"""
Add feature values and count numbers of its appearance.
"""
if self._count is None:
self._count = {'<unk>': 0}
if feat_val == "NULL":
feat_val = '<unk>'
if feat_val not in self._count:
self._count[feat_val] = 1
else:
self._count[feat_val] += 1
self._count['<unk>'] += 1
def _filter_feat(self):
"""
Filter low-frequency feature values.
"""
self._items = filter(lambda x: x[1] > self._feat_appear_limit,
self._count.items())
self._items.sort(key=lambda x: x[1], reverse=True)
def _build_dict(self):
"""
Build feature values --> ids dict.
"""
self._dic = {}
self._filter_feat()
for i in xrange(len(self._items)):
self._dic[self._items[i][0]] = i
self.dim = len(self._dic)
def get_feat_id(self, feat_val):
"""
Get id of feature value after encoding.
"""
# build dict
if self._dic is None:
self._build_dict()
# find id
if feat_val in self._dic:
return self._dic[feat_val]
else:
return self._dic['<unk>']
def get_dim(self):
"""
Get dim.
"""
# build dict
if self._dic is None:
self._build_dict()
return len(self._dic)
def get_dict(self):
"""
Get dict.
"""
# build dict
if self._dic is None:
self._build_dict()
return self._dic
def get_total_count(self):
"""
Compute total num of count.
"""
total_count = 0
for i in xrange(len(self._items)):
feat_val = self._items[i][0]
c = self._items[i][1]
total_count += c
return total_count
def count_iterator(self):
"""
Iterate feature values and its num of appearance.
"""
for i in xrange(len(self._items)):
yield self._items[i][0], self._items[i][1]
def __repr__(self):
"""
"""
return '<FeatureGenerator %d>' % self._dim
def scan_build_dict(data_path, features_dict):
"""
Scan the raw data and add all feature values.
"""
logger.info('scan data set')
with open(data_path, 'r') as f:
for (line_id, line) in enumerate(f):
fields = line.strip('\n').split('\t')
user_id = fields[0]
province = fields[1]
features_dict['province'].add_feat_val(province)
city = fields[2]
features_dict['city'].add_feat_val(city)
item_infos = fields[3]
phone = fields[4]
features_dict['phone'].add_feat_val(phone)
for item_info in item_infos.split(";"):
item_info_array = item_info.split(":")
item = item_info_array[0]
features_dict['history_clicked_items'].add_feat_val(item)
features_dict['user_id'].add_feat_val(user_id)
category = item_info_array[1]
features_dict['history_clicked_categories'].add_feat_val(
category)
tags = item_info_array[2]
for tag in tags.split("_"):
features_dict['history_clicked_tags'].add_feat_val(tag)
def parse_args():
"""
parse arguments
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--train_set_path',
type=str,
required=True,
help="path of the train set")
parser.add_argument(
'--output_dir', type=str, required=True, help="directory to output")
parser.add_argument(
'--feat_appear_limit',
type=int,
default=20,
help="the minimum number of feature values appears (default: 20)")
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
# check argument
assert os.path.exists(
args.train_set_path), 'The train set path does not exist.'
# features used
features = [
'user_id', 'province', 'city', 'phone', 'history_clicked_items',
'history_clicked_tags', 'history_clicked_categories'
]
# init feature generators
features_dict = {}
for feature in features:
features_dict[feature] = FeatureGenerator(
feat_appear_limit=args.feat_appear_limit)
# scan data for building dict
scan_build_dict(args.train_set_path, features_dict)
# generate feature_dict.pkl
feature_encoding_dict = {}
for feature in features:
d = features_dict[feature].get_dict()
feature_encoding_dict[feature] = d
logger.info('Feature:%s, dimension is %d' % (feature, len(d)))
output_dict_path = os.path.join(args.output_dir, 'feature_dict.pkl')
with open(output_dict_path, "w") as f:
cPickle.dump(feature_encoding_dict, f, -1)
# generate item_freq.pkl
item_freq_list = []
g = features_dict['history_clicked_items']
total_count = g.get_total_count()
for feat_val, feat_count in g.count_iterator():
item_freq_list.append(float(feat_count) / total_count)
logger.info('item_freq, dimension is %d' % (len(item_freq_list)))
output_item_freq_path = os.path.join(args.output_dir, 'item_freq.pkl')
with open(output_item_freq_path, "w") as f:
cPickle.dump(item_freq_list, f, -1)
logger.info('Complete!')
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import paddle.v2 as paddle
import argparse
import cPickle
from reader import Reader
from network_conf import DNNmodel
from utils import logger
def parse_args():
"""
parse arguments
:return:
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--infer_set_path',
type=str,
required=True,
help="path of the infer set")
parser.add_argument(
'--model_path', type=str, required=True, help="path of the model")
parser.add_argument(
'--feature_dict',
type=str,
required=True,
help="path of feature_dict.pkl")
parser.add_argument(
'--batch_size',
type=int,
default=50,
help="size of mini-batch (default:50)")
return parser.parse_args()
def infer():
"""
infer
"""
args = parse_args()
# check argument
assert os.path.exists(
args.infer_set_path), 'The infer_set_path path does not exist.'
assert os.path.exists(
args.model_path), 'The model_path path does not exist.'
assert os.path.exists(
args.feature_dict), 'The feature_dict path does not exist.'
paddle.init(use_gpu=False, trainer_count=1)
with open(args.feature_dict) as f:
feature_dict = cPickle.load(f)
nid_dict = feature_dict['history_clicked_items']
nid_to_word = dict((v, k) for k, v in nid_dict.items())
# load the trained model.
with gzip.open(args.model_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
# build model
prediction_layer, fc = DNNmodel(
dnn_layer_dims=[256, 31], feature_dict=feature_dict,
is_infer=True).model_cost
inferer = paddle.inference.Inference(
output_layer=[prediction_layer, fc], parameters=parameters)
reader = Reader(feature_dict)
test_batch = []
for idx, item in enumerate(reader.infer(args.infer_set_path)):
test_batch.append(item)
if len(test_batch) == args.batch_size:
infer_a_batch(inferer, test_batch, nid_to_word)
test_batch = []
if len(test_batch):
infer_a_batch(inferer, test_batch, nid_to_word)
def infer_a_batch(inferer, test_batch, nid_to_word):
"""
input a batch of data and infer
"""
feeding = {
'user_id': 0,
'province': 1,
'city': 2,
'history_clicked_items': 3,
'history_clicked_categories': 4,
'history_clicked_tags': 5,
'phone': 6
}
probs = inferer.infer(
input=test_batch,
feeding=feeding,
field=["value"],
flatten_result=False)
for i, res in enumerate(zip(test_batch, probs[0], probs[1])):
softmax_output = res[1]
sort_nid = res[1].argsort()
# print top 30 recommended item
ret = ""
for j in range(1, 30):
item_id = sort_nid[-1 * j]
item_id_to_word = nid_to_word[item_id]
ret += "%s:%.6f," \
% (item_id_to_word, softmax_output[item_id])
print ret.rstrip(",")
if __name__ == "__main__":
infer()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import paddle.v2 as paddle
import argparse
import cPickle
from reader import Reader
from network_conf import DNNmodel
from utils import logger
import numpy as np
def parse_args():
"""
parse arguments
:return:
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--model_path', type=str, required=True, help="path of the model")
parser.add_argument(
'--feature_dict',
type=str,
required=True,
help="path of feature_dict.pkl")
return parser.parse_args()
def infer_user():
"""
infer_user
"""
args = parse_args()
# check argument
assert os.path.exists(
args.model_path), 'The model_path path does not exist.'
assert os.path.exists(
args.feature_dict), 'The feature_dict path does not exist.'
paddle.init(use_gpu=False, trainer_count=1)
with open(args.feature_dict) as f:
feature_dict = cPickle.load(f)
nid_dict = feature_dict['history_clicked_items']
nid_to_word = dict((v, k) for k, v in nid_dict.items())
# load the trained model.
with gzip.open(args.model_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
parameters.set('_proj_province', \
np.zeros(shape=parameters.get('_proj_province').shape))
parameters.set('_proj_city', \
np.zeros(shape=parameters.get('_proj_city').shape))
parameters.set('_proj_phone', \
np.zeros(shape=parameters.get('_proj_phone').shape))
parameters.set('_proj_history_clicked_items', \
np.zeros(shape= parameters.get('_proj_history_clicked_items').shape))
parameters.set('_proj_history_clicked_categories', \
np.zeros(shape= parameters.get('_proj_history_clicked_categories').shape))
parameters.set('_proj_history_clicked_tags', \
np.zeros(shape= parameters.get('_proj_history_clicked_tags').shape))
# build model
prediction_layer, fc = DNNmodel(
dnn_layer_dims=[256, 31], feature_dict=feature_dict,
is_infer=True).model_cost
inferer = paddle.inference.Inference(
output_layer=[prediction_layer, fc], parameters=parameters)
reader = Reader(feature_dict)
test_batch = []
for idx, item in enumerate(
reader.infer_user(['USER_ID_0', 'USER_ID_981', 'USER_ID_310806'])):
test_batch.append(item)
infer_a_batch(inferer, test_batch, nid_to_word)
def infer_a_batch(inferer, test_batch, nid_to_word):
"""
input a batch of data and infer
"""
feeding = {
'user_id': 0,
'province': 1,
'city': 2,
'history_clicked_items': 3,
'history_clicked_categories': 4,
'history_clicked_tags': 5,
'phone': 6
}
probs = inferer.infer(
input=test_batch,
feeding=feeding,
field=["value"],
flatten_result=False)
for i, res in enumerate(zip(test_batch, probs[0], probs[1])):
softmax_output = res[1]
sort_nid = res[1].argsort()
# print top 30 recommended item
ret = ""
for j in range(1, 30):
item_id = sort_nid[-1 * j]
item_id_to_word = nid_to_word[item_id]
ret += "%s:%.6f," \
% (item_id_to_word, softmax_output[item_id])
print ret.rstrip(",")
if __name__ == "__main__":
infer_user()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import paddle.v2 as paddle
import argparse
import cPickle
from reader import Reader
from network_conf import DNNmodel
from utils import logger
import numpy as np
import math
def parse_args():
"""
parse arguments
:return:
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--model_path', type=str, required=True, help="path of the model")
parser.add_argument(
'--feature_dict',
type=str,
required=True,
help="path of feature_dict.pkl")
return parser.parse_args()
def get_item_vec_from_softmax(nce_w, nce_b):
"""
get item vectors from softmax parameter
"""
if nce_w is None or nce_b is None:
return None
vector = []
total_items_num = nce_w.shape[0]
if total_items_num != nce_b.shape[1]:
return None
dim_vector = nce_w.shape[1] + 1
for i in range(0, total_items_num):
vector.append([])
vector[i].append(nce_b[0][i])
for j in range(1, dim_vector):
vector[i].append(nce_w[i][j - 1])
return vector
def convt_simple_lsh(vector):
"""
do simple lsh conversion
"""
max_norm = 0
num_of_vec = len(vector)
for i in range(0, num_of_vec):
norm = np.linalg.norm(vector[i])
if norm > max_norm:
max_norm = norm
for i in range(0, num_of_vec):
vector[i].append(
math.sqrt(
math.pow(max_norm, 2) - math.pow(np.linalg.norm(vector[i]), 2)))
return vector
def item_vector():
"""
get item vectors
"""
args = parse_args()
# check argument
assert os.path.exists(
args.model_path), 'The model_path path does not exist.'
assert os.path.exists(
args.feature_dict), 'The feature_dict path does not exist.'
paddle.init(use_gpu=False, trainer_count=1)
with open(args.feature_dict) as f:
feature_dict = cPickle.load(f)
# load the trained model.
with gzip.open(args.model_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
nid_dict = feature_dict['history_clicked_items']
nid_to_word = dict((v, k) for k, v in nid_dict.items())
nce_w = parameters.get("nce_w")
nce_b = parameters.get("nce_b")
item_vector = convt_simple_lsh(get_item_vec_from_softmax(nce_w, nce_b))
for i in range(0, len(item_vector)):
itemid = nid_to_word[i]
print itemid + "\t" + ",".join(map(str, item_vector[i]))
if __name__ == "__main__":
item_vector()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import paddle.v2 as paddle
import cPickle
class DNNmodel(object):
"""
Deep Neural Networks for YouTube candidate generation
"""
def __init__(self,
dnn_layer_dims=None,
feature_dict=None,
item_freq=None,
is_infer=False):
"""
initialize model
@dnn_layer_dims: dimension of each hidden layer
@feature_dict: dictionary of encoded feature
@item_freq: dictionary of feature values and its frequency
@is_infer: if infer mode
"""
self._dnn_layer_dims = dnn_layer_dims
self._feature_dict = feature_dict
self._item_freq = item_freq
self._is_infer = is_infer
# build model
self._build_input_layer()
self._build_embedding_layer()
self.model_cost = self._build_dnn_model()
def _build_input_layer(self):
"""
build input layer
"""
self._history_clicked_items = paddle.layer.data(
name="history_clicked_items",
type=paddle.data_type.integer_value_sequence(
len(self._feature_dict['history_clicked_items'])))
self._history_clicked_categories = paddle.layer.data(
name="history_clicked_categories",
type=paddle.data_type.integer_value_sequence(
len(self._feature_dict['history_clicked_categories'])))
self._history_clicked_tags = paddle.layer.data(
name="history_clicked_tags",
type=paddle.data_type.integer_value_sequence(
len(self._feature_dict['history_clicked_tags'])))
self._user_id = paddle.layer.data(
name="user_id",
type=paddle.data_type.integer_value(
len(self._feature_dict['user_id'])))
self._province = paddle.layer.data(
name="province",
type=paddle.data_type.integer_value(
len(self._feature_dict['province'])))
self._city = paddle.layer.data(
name="city",
type=paddle.data_type.integer_value(
len(self._feature_dict['city'])))
self._phone = paddle.layer.data(
name="phone",
type=paddle.data_type.integer_value(
len(self._feature_dict['phone'])))
self._target_item = paddle.layer.data(
name="target_item",
type=paddle.data_type.integer_value(
len(self._feature_dict['history_clicked_items'])))
def _create_emb_attr(self, name):
"""
create embedding parameter
"""
return paddle.attr.Param(
name=name,
initial_std=0.001,
learning_rate=1,
l2_rate=0,
sparse_update=False)
def _build_embedding_layer(self):
"""
build embedding layer
"""
self._user_id_emb = paddle.layer.embedding(
input=self._user_id,
size=64,
param_attr=self._create_emb_attr('_proj_user_id'))
self._province_emb = paddle.layer.embedding(
input=self._province,
size=8,
param_attr=self._create_emb_attr('_proj_province'))
self._city_emb = paddle.layer.embedding(
input=self._city,
size=16,
param_attr=self._create_emb_attr('_proj_city'))
self._phone_emb = paddle.layer.embedding(
input=self._phone,
size=16,
param_attr=self._create_emb_attr('_proj_phone'))
self._history_clicked_items_emb = paddle.layer.embedding(
input=self._history_clicked_items,
size=64,
param_attr=self._create_emb_attr('_proj_history_clicked_items'))
self._history_clicked_categories_emb = paddle.layer.embedding(
input=self._history_clicked_categories,
size=8,
param_attr=self._create_emb_attr(
'_proj_history_clicked_categories'))
self._history_clicked_tags_emb = paddle.layer.embedding(
input=self._history_clicked_tags,
size=64,
param_attr=self._create_emb_attr('_proj_history_clicked_tags'))
def _build_dnn_model(self):
"""
build dnn model
"""
self._rnn_cell = paddle.networks.simple_lstm(
input=self._history_clicked_items_emb, size=64)
self._lstm_last = paddle.layer.pooling(
input=self._rnn_cell, pooling_type=paddle.pooling.Max())
self._avg_emb_cats = paddle.layer.pooling(
input=self._history_clicked_categories_emb,
pooling_type=paddle.pooling.Avg())
self._avg_emb_tags = paddle.layer.pooling(
input=self._history_clicked_tags_emb,
pooling_type=paddle.pooling.Avg())
self._fc_0 = paddle.layer.fc(
name="Relu1",
input=[
self._lstm_last, self._user_id_emb, self._province_emb,
self._city_emb, self._avg_emb_cats, self._avg_emb_tags,
self._phone_emb
],
size=self._dnn_layer_dims[0],
act=paddle.activation.Relu())
self._fc_1 = paddle.layer.fc(name="Relu2",
input=self._fc_0,
size=self._dnn_layer_dims[1],
act=paddle.activation.Relu())
if not self._is_infer:
return paddle.layer.nce(
input=self._fc_1,
label=self._target_item,
num_classes=len(self._feature_dict['history_clicked_items']),
param_attr=paddle.attr.Param(name="nce_w"),
bias_attr=paddle.attr.Param(name="nce_b"),
num_neg_samples=5,
neg_distribution=self._item_freq)
else:
self.prediction_layer = paddle.layer.mixed(
size=len(self._feature_dict['history_clicked_items']),
input=paddle.layer.trans_full_matrix_projection(
self._fc_1, param_attr=paddle.attr.Param(name="nce_w")),
act=paddle.activation.Softmax(),
bias_attr=paddle.attr.Param(name="nce_b"))
return self.prediction_layer, self._fc_1
if __name__ == "__main__":
# this is to test and debug the network topology defination.
# please set the hyper-parameters as needed.
item_freq_path = "./output/item_freq.pkl"
with open(item_freq_path) as f:
item_freq = cPickle.load(f)
feature_dict_path = "./output/feature_dict.pkl"
with open(feature_dict_path) as f:
feature_dict = cPickle.load(f)
a = DNNmodel(
dnn_layer_dims=[256, 31],
feature_dict=feature_dict,
item_freq=item_freq,
is_infer=False)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
from utils import logger
from utils import TaskMode
class Reader(object):
"""
Reader
"""
def __init__(self, feature_dict=None, window_size=20):
"""
init
@window_size: window_size
"""
self._feature_dict = feature_dict
self._window_size = window_size
def train(self, path):
"""
load train set
@path: train set path
"""
logger.info("start train reader from %s" % path)
mode = TaskMode.create_train()
return self._reader(path, mode)
def test(self, path):
"""
load test set
@path: test set path
"""
logger.info("start test reader from %s" % path)
mode = TaskMode.create_test()
return self._reader(path, mode)
def infer(self, path):
"""
load infer set
@path: infer set path
"""
logger.info("start infer reader from %s" % path)
mode = TaskMode.create_infer()
return self._reader(path, mode)
def infer_user(self, user_list):
"""
load user set to infer
@user_list: user list
"""
return self._reader_user(user_list)
def _reader(self, path, mode):
"""
parse data set
"""
USER_ID_UNK = self._feature_dict['user_id'].get('<unk>')
PROVINCE_UNK = self._feature_dict['province'].get('<unk>')
CITY_UNK = self._feature_dict['city'].get('<unk>')
ITEM_UNK = self._feature_dict['history_clicked_items'].get('<unk>')
CATEGORY_UNK = self._feature_dict['history_clicked_categories'].get(
'<unk>')
TAG_UNK = self._feature_dict['history_clicked_tags'].get('<unk>')
PHONE_UNK = self._feature_dict['phone'].get('<unk>')
with open(path) as f:
for line in f:
fields = line.strip('\n').split('\t')
user_id = self._feature_dict['user_id'].get(fields[0],
USER_ID_UNK)
province = self._feature_dict['province'].get(fields[1],
PROVINCE_UNK)
city = self._feature_dict['city'].get(fields[2], CITY_UNK)
item_infos = fields[3]
phone = self._feature_dict['phone'].get(fields[4], PHONE_UNK)
history_clicked_items_all = []
history_clicked_tags_all = []
history_clicked_categories_all = []
for item_info in item_infos.split(';'):
item_info_array = item_info.split(':')
item = item_info_array[0]
item_encoded_id = self._feature_dict['history_clicked_items'].get(\
item, ITEM_UNK)
if item_encoded_id != ITEM_UNK:
history_clicked_items_all.append(item_encoded_id)
category = item_info_array[1]
history_clicked_categories_all.append(
self._feature_dict['history_clicked_categories'].get(\
category, CATEGORY_UNK))
tags = item_info_array[2]
tag_split = map(str, [self._feature_dict['history_clicked_tags'].get(\
tag, TAG_UNK) \
for tag in tags.strip().split("_")])
history_clicked_tags_all.append("_".join(tag_split))
if not mode.is_infer():
history_clicked_items_all.insert(0, 0)
history_clicked_tags_all.insert(0, "0")
history_clicked_categories_all.insert(0, 0)
for i in range(1, len(history_clicked_items_all)):
start = max(0, i - self._window_size)
history_clicked_items = history_clicked_items_all[start:
i]
history_clicked_categories = history_clicked_categories_all[
start:i]
history_clicked_tags_str = history_clicked_tags_all[
start:i]
history_clicked_tags = []
for tags_a in history_clicked_tags_str:
for tag in tags_a.split("_"):
history_clicked_tags.append(int(tag))
target_item = history_clicked_items_all[i]
yield user_id, province, city, \
history_clicked_items, history_clicked_categories, \
history_clicked_tags, phone, target_item
else:
history_clicked_items = history_clicked_items_all
history_clicked_categories = history_clicked_categories_all
history_clicked_tags_str = history_clicked_tags_all
history_clicked_tags = []
for tags_a in history_clicked_tags_str:
for tag in tags_a.split("_"):
history_clicked_tags.append(int(tag))
yield user_id, province, city, \
history_clicked_items, history_clicked_categories, \
history_clicked_tags, phone
def _reader_user(self, user_list):
"""
parse user list
"""
USER_ID_UNK = self._feature_dict['user_id'].get('<unk>')
for user in user_list:
user_id = self._feature_dict['user_id'].get(user, USER_ID_UNK)
yield user_id, 0, 0, [0], [0], [0], 0
if __name__ == "__main__":
# this is to test and debug reader function
train_data = sys.argv[1]
feature_dict = sys.argv[2]
window_size = int(sys.argv[3])
import cPickle
with open(feature_dict) as f:
feature_dict = cPickle.load(f)
r = Reader(feature_dict, window_size)
for dat in r.train(train_data):
print dat
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import paddle.v2 as paddle
import argparse
import cPickle
from reader import Reader
from network_conf import DNNmodel
from utils import logger
def parse_args():
"""
parse arguments
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--train_set_path',
type=str,
required=True,
help="path of the train set")
parser.add_argument(
'--test_set_path', type=str, required=True, help="path of the test set")
parser.add_argument(
'--model_output_dir',
type=str,
required=True,
help="directory to output")
parser.add_argument(
'--feature_dict',
type=str,
required=True,
help="path of feature_dict.pkl")
parser.add_argument(
'--item_freq', type=str, required=True, help="path of item_freq.pkl ")
parser.add_argument(
'--window_size', type=int, default=20, help="window size(default: 20)")
parser.add_argument(
'--num_passes', type=int, default=1, help="number of passes to train")
parser.add_argument(
'--batch_size',
type=int,
default=50,
help="size of mini-batch (default:50)")
return parser.parse_args()
def train():
"""
train
"""
args = parse_args()
# check argument
assert os.path.exists(
args.train_set_path), 'The train_set_path path does not exist.'
assert os.path.exists(
args.test_set_path), 'The test_set_path path does not exist.'
assert os.path.exists(
args.feature_dict), 'The feature_dict path does not exist.'
assert os.path.exists(args.item_freq), 'The item_freq path does not exist.'
assert os.path.exists(
args.model_output_dir), 'The model_output_dir path does not exist.'
paddle.init(use_gpu=False, trainer_count=1)
with open(args.feature_dict) as f:
feature_dict = cPickle.load(f)
with open(args.item_freq) as f:
item_freq = cPickle.load(f)
feeding = {
'user_id': 0,
'province': 1,
'city': 2,
'history_clicked_items': 3,
'history_clicked_categories': 4,
'history_clicked_tags': 5,
'phone': 6,
'target_item': 7
}
optimizer = paddle.optimizer.AdaGrad(
learning_rate=1e-1,
regularization=paddle.optimizer.L2Regularization(rate=1e-3))
cost = DNNmodel(
dnn_layer_dims=[256, 31],
feature_dict=feature_dict,
item_freq=item_freq,
is_infer=False).model_cost
parameters = paddle.parameters.create(cost)
trainer = paddle.trainer.SGD(cost, parameters, optimizer)
def event_handler(event):
"""
event handler
"""
if isinstance(event, paddle.event.EndIteration):
if event.batch_id and not event.batch_id % 10:
logger.info("Pass %d, Batch %d, Cost %f" %
(event.pass_id, event.batch_id, event.cost))
elif isinstance(event, paddle.event.EndPass):
save_path = os.path.join(args.model_output_dir,
"model_pass_%05d.tar.gz" % event.pass_id)
logger.info("Save model into %s ..." % save_path)
with gzip.open(save_path, "w") as f:
trainer.save_parameter_to_tar(f)
reader = Reader(feature_dict, args.window_size)
trainer.train(
paddle.batch(
paddle.reader.shuffle(
lambda: reader.train(args.train_set_path), buf_size=7000),
args.batch_size),
num_passes=args.num_passes,
feeding=feeding,
event_handler=event_handler)
if __name__ == "__main__":
train()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import gzip
import paddle.v2 as paddle
import argparse
import cPickle
from reader import Reader
from network_conf import DNNmodel
from utils import logger
import numpy as np
def parse_args():
"""
parse arguments
"""
parser = argparse.ArgumentParser(
description="PaddlePaddle Youtube Recall Model Example")
parser.add_argument(
'--infer_set_path',
type=str,
required=True,
help="path of the infer set")
parser.add_argument(
'--model_path', type=str, required=True, help="path of the model")
parser.add_argument(
'--feature_dict',
type=str,
required=True,
help="path of feature_dict.pkl")
parser.add_argument(
'--batch_size',
type=int,
default=50,
help="size of mini-batch (default:50)")
return parser.parse_args()
def user_vector():
"""
get user vectors
"""
args = parse_args()
# check argument
assert os.path.exists(
args.infer_set_path), 'The infer_set_path path does not exist.'
assert os.path.exists(
args.model_path), 'The model_path path does not exist.'
assert os.path.exists(
args.feature_dict), 'The feature_dict path does not exist.'
paddle.init(use_gpu=False, trainer_count=1)
with open(args.feature_dict) as f:
feature_dict = cPickle.load(f)
# load the trained model.
with gzip.open(args.model_path) as f:
parameters = paddle.parameters.Parameters.from_tar(f)
# build model
prediction_layer, fc = DNNmodel(
dnn_layer_dims=[256, 31], feature_dict=feature_dict,
is_infer=True).model_cost
inferer = paddle.inference.Inference(
output_layer=[prediction_layer, fc], parameters=parameters)
reader = Reader(feature_dict)
test_batch = []
for idx, item in enumerate(reader.infer(args.infer_set_path)):
test_batch.append(item)
if len(test_batch) == args.batch_size:
get_a_batch_user_vector(inferer, test_batch)
test_batch = []
if len(test_batch):
get_a_batch_user_vector(inferer, test_batch)
def get_a_batch_user_vector(inferer, test_batch):
"""
input a batch of data and get user vectors
"""
feeding = {
'user_id': 0,
'province': 1,
'city': 2,
'history_clicked_items': 3,
'history_clicked_categories': 4,
'history_clicked_tags': 5,
'phone': 6
}
probs = inferer.infer(
input=test_batch,
feeding=feeding,
field=["value"],
flatten_result=False)
for i, res in enumerate(zip(probs[1])):
# do simple lsh conversion
user_vector = [1.000]
for i in res[0]:
user_vector.append(i)
user_vector.append(0.000)
norm = np.linalg.norm(user_vector)
user_vector_norm = [str(_ / norm) for _ in user_vector]
print ",".join(user_vector_norm)
if __name__ == "__main__":
user_vector()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
logging.basicConfig()
logger = logging.getLogger("paddle")
logger.setLevel(logging.INFO)
class TaskMode(object):
"""
TaskMode
"""
TRAIN_MODE = 0
TEST_MODE = 1
INFER_MODE = 2
def __init__(self, mode):
"""
:param mode:
"""
self.mode = mode
def is_train(self):
"""
:return:
"""
return self.mode == self.TRAIN_MODE
def is_test(self):
"""
:return:
"""
return self.mode == self.TEST_MODE
def is_infer(self):
"""
:return:
"""
return self.mode == self.INFER_MODE
@staticmethod
def create_train():
"""
:return:
"""
return TaskMode(TaskMode.TRAIN_MODE)
@staticmethod
def create_test():
"""
:return:
"""
return TaskMode(TaskMode.TEST_MODE)
@staticmethod
def create_infer():
"""
:return:
"""
return TaskMode(TaskMode.INFER_MODE)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册