提交 440ab2f3 编写于 作者: Z zhxfl

Merge remote-tracking branch 'upstream/develop' into fix-812

data_dir=~/.cache/paddle/dataset/speech/deep_asr_data/aishell
data_url='http://deep-asr-data.gz.bcebos.com/aishell_data.tar.gz'
lst_url='http://deep-asr-data.gz.bcebos.com/aishell_lst.tar.gz'
md5=e017d858d9e509c8a84b73f673f08b9a
if [ ! -e $data_dir ]; then
mkdir -p $data_dir
fi
if [ ! -e $data_dir/aishell_data.tar.gz ]; then
echo "Download $data_dir/aishell_data.tar.gz ..."
wget -c -P $data_dir $data_url
else
echo "Skip downloading for $data_dir/aishell_data.tar.gz has already existed!"
fi
echo "Checking md5 sum ..."
md5sum_tmp=`md5sum $data_dir/aishell_data.tar.gz | cut -d ' ' -f1`
if [ $md5sum_tmp != $md5 ]; then
echo "Md5sum check failed, please remove and redownload "
"$data_dir/aishell_data.tar.gz"
exit 1
fi
echo "Untar aishell_data.tar.gz ..."
tar xzf $data_dir/aishell_data.tar.gz -C $data_dir
if [ ! -e data ]; then
mkdir data
fi
echo "Download and untar lst files ..."
wget -c -P data $lst_url
tar xvf data/aishell_lst.tar.gz -C data
ln -s $data_dir data/aishell
export CUDA_VISIBLE_DEVICES=2,3,4,5
python -u ../../tools/profile.py --feature_lst data/train_feature.lst \
--label_lst data/train_label.lst \
--mean_var data/aishell/global_mean_var \
--parallel \
--frame_dim 2640 \
--class_num 101 \
export CUDA_VISIBLE_DEVICES=2,3,4,5
python -u ../../train.py --train_feature_lst data/train_feature.lst \
--train_label_lst data/train_label.lst \
--val_feature_lst data/val_feature.lst \
--val_label_lst data/val_label.lst \
--mean_var data/aishell/global_mean_var \
--checkpoints checkpoints \
--frame_dim 2640 \
--class_num 101 \
--infer_models '' \
--batch_size 128 \
--learning_rate 0.00016 \
--parallel
......@@ -18,6 +18,7 @@ from decoder.post_decode_faster import Decoder
from data_utils.util import lodtensor_to_ndarray
from model_utils.model import stacked_lstmp_model
from data_utils.util import split_infer_result
from tools.error_rate import char_errors
def parse_args():
......@@ -87,6 +88,11 @@ def parse_args():
type=str,
default='data/infer_label.lst',
help='The label list path for inference. (default: %(default)s)')
parser.add_argument(
'--ref_txt',
type=str,
default='data/text.test',
help='The reference text for decoding. (default: %(default)s)')
parser.add_argument(
'--checkpoint',
type=str,
......@@ -112,6 +118,11 @@ def parse_args():
type=float,
default=0.2,
help="Scaling factor for acoustic likelihoods. (default: %(default)f)")
parser.add_argument(
'--target_trans',
type=str,
default="./decoder/target_trans.txt",
help="The path to target transcription. (default: %(default)s)")
args = parser.parse_args()
return args
......@@ -123,6 +134,18 @@ def print_arguments(args):
print('------------------------------------------------')
def get_trg_trans(args):
trans_dict = {}
with open(args.target_trans) as trg_trans:
line = trg_trans.readline()
while line:
items = line.strip().split()
key = items[0]
trans_dict[key] = ''.join(items[1:])
line = trg_trans.readline()
return trans_dict
def infer_from_ckpt(args):
"""Inference by using checkpoint."""
......@@ -146,6 +169,7 @@ def infer_from_ckpt(args):
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
trg_trans = get_trg_trans(args)
# load checkpoint.
fluid.io.load_persistables(exe, args.checkpoint)
......@@ -167,11 +191,12 @@ def infer_from_ckpt(args):
args.infer_label_lst)
infer_data_reader.set_transformers(ltrans)
infer_costs, infer_accs = [], []
total_edit_dist, total_ref_len = 0.0, 0
for batch_id, batch_data in enumerate(
infer_data_reader.batch_iterator(args.batch_size,
args.minimum_batch_size)):
# load_data
(features, labels, lod) = batch_data
(features, labels, lod, name_lst) = batch_data
feature_t.set(features, place)
feature_t.set_lod([lod])
label_t.set(labels, place)
......@@ -187,11 +212,19 @@ def infer_from_ckpt(args):
probs, lod = lodtensor_to_ndarray(results[0])
infer_batch = split_infer_result(probs, lod)
for index, sample in enumerate(infer_batch):
key = "utter#%d" % (batch_id * args.batch_size + index)
print(key, ": ", decoder.decode(key, sample).encode("utf8"), "\n")
print(np.mean(infer_costs), np.mean(infer_accs))
for index, sample in enumerate(infer_batch):
key = name_lst[index]
ref = trg_trans[key]
hyp = decoder.decode(key, sample)
edit_dist, ref_len = char_errors(ref.decode("utf8"), hyp)
total_edit_dist += edit_dist
total_ref_len += ref_len
print(key + "|Ref:", ref)
print(key + "|Hyp:", hyp.encode("utf8"))
print("Instance CER: ", edit_dist / ref_len)
print("Total CER = %f" % (total_edit_dist / total_ref_len))
if __name__ == '__main__':
......
# -*- coding: utf-8 -*-
"""This module provides functions to calculate error rate in different level.
e.g. wer for word-level, cer for char-level.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
def _levenshtein_distance(ref, hyp):
"""Levenshtein distance is a string metric for measuring the difference
between two sequences. Informally, the levenshtein disctance is defined as
the minimum number of single-character edits (substitutions, insertions or
deletions) required to change one word into the other. We can naturally
extend the edits to word level when calculate levenshtein disctance for
two sentences.
"""
m = len(ref)
n = len(hyp)
# special case
if ref == hyp:
return 0
if m == 0:
return n
if n == 0:
return m
if m < n:
ref, hyp = hyp, ref
m, n = n, m
# use O(min(m, n)) space
distance = np.zeros((2, n + 1), dtype=np.int32)
# initialize distance matrix
for j in xrange(n + 1):
distance[0][j] = j
# calculate levenshtein distance
for i in xrange(1, m + 1):
prev_row_idx = (i - 1) % 2
cur_row_idx = i % 2
distance[cur_row_idx][0] = i
for j in xrange(1, n + 1):
if ref[i - 1] == hyp[j - 1]:
distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
else:
s_num = distance[prev_row_idx][j - 1] + 1
i_num = distance[cur_row_idx][j - 1] + 1
d_num = distance[prev_row_idx][j] + 1
distance[cur_row_idx][j] = min(s_num, i_num, d_num)
return distance[m % 2][n]
def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
"""Compute the levenshtein distance between reference sequence and
hypothesis sequence in word-level.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param delimiter: Delimiter of input sentences.
:type delimiter: char
:return: Levenshtein distance and word number of reference sentence.
:rtype: list
"""
if ignore_case == True:
reference = reference.lower()
hypothesis = hypothesis.lower()
ref_words = filter(None, reference.split(delimiter))
hyp_words = filter(None, hypothesis.split(delimiter))
edit_distance = _levenshtein_distance(ref_words, hyp_words)
return float(edit_distance), len(ref_words)
def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
"""Compute the levenshtein distance between reference sequence and
hypothesis sequence in char-level.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param remove_space: Whether remove internal space characters
:type remove_space: bool
:return: Levenshtein distance and length of reference sentence.
:rtype: list
"""
if ignore_case == True:
reference = reference.lower()
hypothesis = hypothesis.lower()
join_char = ' '
if remove_space == True:
join_char = ''
reference = join_char.join(filter(None, reference.split(' ')))
hypothesis = join_char.join(filter(None, hypothesis.split(' ')))
edit_distance = _levenshtein_distance(reference, hypothesis)
return float(edit_distance), len(reference)
def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
"""Calculate word error rate (WER). WER compares reference text and
hypothesis text in word-level. WER is defined as:
.. math::
WER = (Sw + Dw + Iw) / Nw
where
.. code-block:: text
Sw is the number of words subsituted,
Dw is the number of words deleted,
Iw is the number of words inserted,
Nw is the number of words in the reference
We can use levenshtein distance to calculate WER. Please draw an attention
that empty items will be removed when splitting sentences by delimiter.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param delimiter: Delimiter of input sentences.
:type delimiter: char
:return: Word error rate.
:rtype: float
:raises ValueError: If word number of reference is zero.
"""
edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
delimiter)
if ref_len == 0:
raise ValueError("Reference's word number should be greater than 0.")
wer = float(edit_distance) / ref_len
return wer
def cer(reference, hypothesis, ignore_case=False, remove_space=False):
"""Calculate charactor error rate (CER). CER compares reference text and
hypothesis text in char-level. CER is defined as:
.. math::
CER = (Sc + Dc + Ic) / Nc
where
.. code-block:: text
Sc is the number of characters substituted,
Dc is the number of characters deleted,
Ic is the number of characters inserted
Nc is the number of characters in the reference
We can use levenshtein distance to calculate CER. Chinese input should be
encoded to unicode. Please draw an attention that the leading and tailing
space characters will be truncated and multiple consecutive space
characters in a sentence will be replaced by one space character.
:param reference: The reference sentence.
:type reference: basestring
:param hypothesis: The hypothesis sentence.
:type hypothesis: basestring
:param ignore_case: Whether case-sensitive or not.
:type ignore_case: bool
:param remove_space: Whether remove internal space characters
:type remove_space: bool
:return: Character error rate.
:rtype: float
:raises ValueError: If the reference length is zero.
"""
edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
remove_space)
if ref_len == 0:
raise ValueError("Length of reference should be greater than 0.")
cer = float(edit_distance) / ref_len
return cer
#-*- coding: utf-8 -*-
#File: DQN.py
from agent import Model
import gym
import argparse
from tqdm import tqdm
from expreplay import ReplayMemory, Experience
import numpy as np
import os
UPDATE_FREQ = 4
MEMORY_WARMUP_SIZE = 1000
def run_episode(agent, env, exp, train_or_test):
assert train_or_test in ['train', 'test'], train_or_test
total_reward = 0
state = env.reset()
for step in range(200):
action = agent.act(state, train_or_test)
next_state, reward, isOver, _ = env.step(action)
if train_or_test == 'train':
exp.append(Experience(state, action, reward, isOver))
# train model
# start training
if len(exp) > MEMORY_WARMUP_SIZE:
batch_idx = np.random.randint(
len(exp) - 1, size=(args.batch_size))
if step % UPDATE_FREQ == 0:
batch_state, batch_action, batch_reward, \
batch_next_state, batch_isOver = exp.sample(batch_idx)
agent.train(batch_state, batch_action, batch_reward, \
batch_next_state, batch_isOver)
total_reward += reward
state = next_state
if isOver:
break
return total_reward
def train_agent():
env = gym.make(args.env)
state_shape = env.observation_space.shape
exp = ReplayMemory(args.mem_size, state_shape)
action_dim = env.action_space.n
agent = Model(state_shape[0], action_dim, gamma=0.99)
while len(exp) < MEMORY_WARMUP_SIZE:
run_episode(agent, env, exp, train_or_test='train')
max_episode = 4000
# train
total_episode = 0
pbar = tqdm(total=max_episode)
recent_100_reward = []
for episode in xrange(max_episode):
# start epoch
total_reward = run_episode(agent, env, exp, train_or_test='train')
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
pbar.update()
# recent 100 reward
total_reward = run_episode(agent, env, exp, train_or_test='test')
recent_100_reward.append(total_reward)
if len(recent_100_reward) > 100:
recent_100_reward = recent_100_reward[1:]
pbar.write("episode:{} test_reward:{}".format(\
episode, np.mean(recent_100_reward)))
pbar.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default='MountainCar-v0', \
help='enviroment to train DQN model, e.g CartPole-v0')
parser.add_argument('--gamma', type=float, default=0.99, \
help='discount factor for accumulated reward computation')
parser.add_argument('--mem_size', type=int, default=500000, \
help='memory size for experience replay')
parser.add_argument('--batch_size', type=int, default=192, \
help='batch size for training')
args = parser.parse_args()
train_agent()
<img src="mountain_car.gif" width="300" height="200">
# Reproduce DQN model
+ DQN in:
[Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
# Mountain-CAR benchmark & performance
[MountainCar-v0](https://gym.openai.com/envs/MountainCar-v0/)
A car is on a one-dimensional track, positioned between two "mountains". The goal is to drive up the mountain on the right; however, the car's engine is not strong enough to scale the mountain in a single pass. Therefore, the only way to succeed is to drive back and forth to build up momentum.
<img src="curve.png" >
# How to use
+ Dependencies:
+ python2.7
+ gym
+ tqdm
+ paddle-fluid
+ Start Training:
```
# use mountain-car enviroment as default
python DQN.py
# use other enviorment
python DQN.py --env CartPole-v0
```
#-*- coding: utf-8 -*-
#File: agent.py
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import numpy as np
from tqdm import tqdm
import math
UPDATE_TARGET_STEPS = 200
class Model(object):
def __init__(self, state_dim, action_dim, gamma):
self.global_step = 0
self.state_dim = state_dim
self.action_dim = action_dim
self.gamma = gamma
self.exploration = 1.0
self._build_net()
def _get_inputs(self):
return [fluid.layers.data(\
name='state', shape=[self.state_dim], dtype='float32'),
fluid.layers.data(\
name='action', shape=[1], dtype='int32'),
fluid.layers.data(\
name='reward', shape=[], dtype='float32'),
fluid.layers.data(\
name='next_s', shape=[self.state_dim], dtype='float32'),
fluid.layers.data(\
name='isOver', shape=[], dtype='bool')]
def _build_net(self):
state, action, reward, next_s, isOver = self._get_inputs()
self.pred_value = self.get_DQN_prediction(state)
self.predict_program = fluid.default_main_program().clone()
action_onehot = fluid.layers.one_hot(action, self.action_dim)
action_onehot = fluid.layers.cast(action_onehot, dtype='float32')
pred_action_value = fluid.layers.reduce_sum(\
fluid.layers.elementwise_mul(action_onehot, self.pred_value), dim=1)
targetQ_predict_value = self.get_DQN_prediction(next_s, target=True)
best_v = fluid.layers.reduce_max(targetQ_predict_value, dim=1)
best_v.stop_gradient = True
target = reward + (1.0 - fluid.layers.cast(\
isOver, dtype='float32')) * self.gamma * best_v
cost = fluid.layers.square_error_cost(\
input=pred_action_value, label=target)
cost = fluid.layers.reduce_mean(cost)
self._sync_program = self._build_sync_target_network()
optimizer = fluid.optimizer.Adam(1e-3)
optimizer.minimize(cost)
# define program
self.train_program = fluid.default_main_program()
# fluid exe
place = fluid.CUDAPlace(0)
self.exe = fluid.Executor(place)
self.exe.run(fluid.default_startup_program())
def get_DQN_prediction(self, state, target=False):
variable_field = 'target' if target else 'policy'
# layer fc1
param_attr = ParamAttr(name='{}_fc1'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc1_b'.format(variable_field))
fc1 = fluid.layers.fc(input=state,
size=256,
act='relu',
param_attr=param_attr,
bias_attr=bias_attr)
param_attr = ParamAttr(name='{}_fc2'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc2_b'.format(variable_field))
fc2 = fluid.layers.fc(input=fc1,
size=128,
act='tanh',
param_attr=param_attr,
bias_attr=bias_attr)
param_attr = ParamAttr(name='{}_fc3'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc3_b'.format(variable_field))
value = fluid.layers.fc(input=fc2,
size=self.action_dim,
param_attr=param_attr,
bias_attr=bias_attr)
return value
def _build_sync_target_network(self):
vars = fluid.default_main_program().list_vars()
policy_vars = []
target_vars = []
for var in vars:
if 'GRAD' in var.name: continue
if 'policy' in var.name:
policy_vars.append(var)
elif 'target' in var.name:
target_vars.append(var)
policy_vars.sort(key=lambda x: x.name.split('policy_')[1])
target_vars.sort(key=lambda x: x.name.split('target_')[1])
sync_program = fluid.default_main_program().clone()
with fluid.program_guard(sync_program):
sync_ops = []
for i, var in enumerate(policy_vars):
sync_op = fluid.layers.assign(policy_vars[i], target_vars[i])
sync_ops.append(sync_op)
sync_program = sync_program.prune(sync_ops)
return sync_program
def act(self, state, train_or_test):
sample = np.random.random()
if train_or_test == 'train' and sample < self.exploration:
act = np.random.randint(self.action_dim)
else:
state = np.expand_dims(state, axis=0)
pred_Q = self.exe.run(self.predict_program,
feed={'state': state.astype('float32')},
fetch_list=[self.pred_value])[0]
pred_Q = np.squeeze(pred_Q, axis=0)
act = np.argmax(pred_Q)
self.exploration = max(0.1, self.exploration - 1e-6)
return act
def train(self, state, action, reward, next_state, isOver):
if self.global_step % UPDATE_TARGET_STEPS == 0:
self.sync_target_network()
self.global_step += 1
action = np.expand_dims(action, -1)
self.exe.run(self.train_program, \
feed={'state': state, \
'action': action, \
'reward': reward, \
'next_s': next_state, \
'isOver': isOver})
def sync_target_network(self):
self.exe.run(self._sync_program)
#-*- coding: utf-8 -*-
#File: expreplay.py
from collections import namedtuple
import numpy as np
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'isOver'])
class ReplayMemory(object):
def __init__(self, max_size, state_shape):
self.max_size = int(max_size)
self.state_shape = state_shape
self.state = np.zeros((self.max_size, ) + state_shape, dtype='float32')
self.action = np.zeros((self.max_size, ), dtype='int32')
self.reward = np.zeros((self.max_size, ), dtype='float32')
self.isOver = np.zeros((self.max_size, ), dtype='bool')
self._curr_size = 0
self._curr_pos = 0
def append(self, exp):
if self._curr_size < self.max_size:
self._assign(self._curr_pos, exp)
self._curr_size += 1
else:
self._assign(self._curr_pos, exp)
self._curr_pos = (self._curr_pos + 1) % self.max_size
def _assign(self, pos, exp):
self.state[pos] = exp.state
self.action[pos] = exp.action
self.reward[pos] = exp.reward
self.isOver[pos] = exp.isOver
def __len__(self):
return self._curr_size
def sample(self, batch_idx):
# index mapping to avoid sampling lastest state
batch_idx = (self._curr_pos + batch_idx) % self._curr_size
next_idx = (batch_idx + 1) % self._curr_size
state = self.state[batch_idx]
reward = self.reward[batch_idx]
action = self.action[batch_idx]
next_state = self.state[next_idx]
isOver = self.isOver[batch_idx]
return (state, action, reward, next_state, isOver)
......@@ -54,6 +54,7 @@ The structure of Advbox module are as follows:
| ├── mnist_tutorial_fgsm.py
| ├── mnist_tutorial_bim.py
| ├── mnist_tutorial_ilcm.py
| ├── mnist_tutorial_mifgsm.py
| ├── mnist_tutorial_jsma.py
| └── mnist_tutorial_deepfool.py
└── README.md
......@@ -77,6 +78,7 @@ The `./tutorials/` folder provides some tutorials to generate adversarial exampl
* [FGSM](https://arxiv.org/abs/1412.6572)
* [BIM](https://arxiv.org/abs/1607.02533)
* [ILCM](https://arxiv.org/abs/1607.02533)
* [MI-FGSM](https://arxiv.org/pdf/1710.06081.pdf)
* [JSMA](https://arxiv.org/pdf/1511.07528)
* [DeepFool](https://arxiv.org/abs/1511.04599)
......@@ -91,6 +93,7 @@ Benchmarks on a vanilla CNN model.
|FGSM| 57.8% | 26.55% | 0.3 | One shot| *** |
|BIM| 97.4% | --- | 0.1 | 100 | **** |
|ILCM| --- | 100.0% | 0.1 | 100 | **** |
|MI-FGSM| 94.4% | 100.0% | 0.1 | 100 | **** |
|JSMA| 96.8% | 90.4%| 0.1 | 2000 | *** |
|DeepFool| 97.7% | 51.3% | --- | 100 | **** |
......@@ -101,8 +104,9 @@ Benchmarks on a vanilla CNN model.
* [Intriguing properties of neural networks](https://arxiv.org/abs/1312.6199), C. Szegedy et al., arxiv 2014
* [Explaining and Harnessing Adversarial Examples](https://arxiv.org/abs/1412.6572), I. Goodfellow et al., ICLR 2015
* [Adversarial Examples In The Physical World](https://arxiv.org/pdf/1607.02533v3.pdf), A. Kurakin et al., ICLR workshop 2017
* [Boosting Adversarial Attacks with Momentum](https://arxiv.org/abs/1710.06081), Yinpeng Dong et al., arxiv 2018
* [The Limitations of Deep Learning in Adversarial Settings](https://arxiv.org/abs/1511.07528), N. Papernot et al., ESSP 2016
* [DeepFool: a simple and accurate method to fool deep neural networks](https://arxiv.org/abs/1511.04599), S. Moosavi-Dezfooli et al., CVPR 2016
* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models] (https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018
* [Foolbox: A Python toolbox to benchmark the robustness of machine learning models](https://arxiv.org/abs/1707.04131), Jonas Rauber et al., arxiv 2018
* [CleverHans: An adversarial example library for constructing attacks, building defenses, and benchmarking both](https://github.com/tensorflow/cleverhans#setting-up-cleverhans)
* [Threat of Adversarial Attacks on Deep Learning in Computer Vision: A Survey](https://arxiv.org/abs/1801.00553), Naveed Akhtar, Ajmal Mian, arxiv 2018
......@@ -14,7 +14,8 @@ __all__ = [
'GradientMethodAttack', 'FastGradientSignMethodAttack', 'FGSM',
'FastGradientSignMethodTargetedAttack', 'FGSMT',
'BasicIterativeMethodAttack', 'BIM',
'IterativeLeastLikelyClassMethodAttack', 'ILCM'
'IterativeLeastLikelyClassMethodAttack', 'ILCM', 'MomentumIteratorAttack',
'MIFGSM'
]
......@@ -76,9 +77,9 @@ class GradientMethodAttack(Attack):
for epsilon in epsilons[:]:
step = 1
adv_img = adversary.original
if epsilon == 0.0:
continue
for i in range(steps):
if epsilon == 0.0:
continue
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img,
adversary.target_label)
......@@ -175,7 +176,103 @@ class BasicIterativeMethodAttack(IterativeLeastLikelyClassMethodAttack):
super(BasicIterativeMethodAttack, self).__init__(model, False)
class MomentumIteratorAttack(GradientMethodAttack):
"""
The Momentum Iterative Fast Gradient Sign Method (Dong et al. 2017).
This method won the first places in NIPS 2017 Non-targeted Adversarial
Attacks and Targeted Adversarial Attacks. The original paper used
hard labels for this attack; no label smoothing. inf norm.
Paper link: https://arxiv.org/pdf/1710.06081.pdf
"""
def __init__(self, model, support_targeted=True):
"""
:param model(model): The model to be attacked.
:param support_targeted(bool): Does this attack method support targeted.
"""
super(MomentumIteratorAttack, self).__init__(model)
self.support_targeted = support_targeted
def _apply(self,
adversary,
norm_ord=np.inf,
epsilons=0.1,
steps=100,
epsilon_steps=100,
decay_factor=1):
"""
Apply the momentum iterative gradient attack method.
:param adversary(Adversary):
The Adversary object.
:param norm_ord(int):
Order of the norm, such as np.inf, 1, 2, etc. It can't be 0.
:param epsilons(list|tuple|float):
Attack step size (input variation).
Largest step size if epsilons is not iterable.
:param epsilon_steps:
The number of Epsilons' iteration for each attack iteration.
:param steps:
The number of attack iteration.
:param decay_factor:
The decay factor for the momentum term.
:return:
adversary(Adversary): The Adversary object.
"""
if norm_ord == 0:
raise ValueError("L0 norm is not supported!")
if not self.support_targeted:
if adversary.is_targeted_attack:
raise ValueError(
"This attack method doesn't support targeted attack!")
assert self.model.channel_axis() == adversary.original.ndim
assert (self.model.channel_axis() == 1 or
self.model.channel_axis() == adversary.original.shape[0] or
self.model.channel_axis() == adversary.original.shape[-1])
if not isinstance(epsilons, Iterable):
epsilons = np.linspace(0, epsilons, num=epsilon_steps)
min_, max_ = self.model.bounds()
pre_label = adversary.original_label
for epsilon in epsilons[:]:
if epsilon == 0.0:
continue
step = 1
adv_img = adversary.original
momentum = 0
for i in range(steps):
if adversary.is_targeted_attack:
gradient = -self.model.gradient(adv_img,
adversary.target_label)
else:
gradient = self.model.gradient(adv_img, pre_label)
# normalize gradient
velocity = gradient / self._norm(gradient, ord=1)
momentum = decay_factor * momentum + velocity
if norm_ord == np.inf:
normalized_grad = np.sign(momentum)
else:
normalized_grad = self._norm(momentum, ord=norm_ord)
perturbation = epsilon * normalized_grad
adv_img = adv_img + perturbation
adv_img = np.clip(adv_img, min_, max_)
adv_label = np.argmax(self.model.predict(adv_img))
logging.info(
'step={}, epsilon = {:.5f}, pre_label = {}, adv_label={}'
.format(step, epsilon, pre_label, adv_label))
if adversary.try_accept_the_example(adv_img, adv_label):
return adversary
step += 1
return adversary
FGSM = FastGradientSignMethodAttack
FGSMT = FastGradientSignMethodTargetedAttack
BIM = BasicIterativeMethodAttack
ILCM = IterativeLeastLikelyClassMethodAttack
MIFGSM = MomentumIteratorAttack
"""
MIFGSM tutorial on mnist using advbox tool.
MIFGSM is a broad class of momentum iterative gradient-based methods based on FSGM.
It supports non-targeted attack and targeted attack.
"""
import sys
sys.path.append("..")
import matplotlib.pyplot as plt
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
from advbox.adversary import Adversary
from advbox.attacks.gradient_method import MIFGSM
from advbox.models.paddle import PaddleModel
from tutorials.mnist_model import mnist_cnn_model
def main():
"""
Advbox demo which demonstrate how to use advbox.
"""
TOTAL_NUM = 500
IMG_NAME = 'img'
LABEL_NAME = 'label'
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
# gradient should flow
img.stop_gradient = False
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
logits = mnist_cnn_model(img)
cost = fluid.layers.cross_entropy(input=logits, label=label)
avg_cost = fluid.layers.mean(x=cost)
# use CPU
place = fluid.CPUPlace()
# use GPU
# place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
BATCH_SIZE = 1
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.train(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.mnist.test(), buf_size=128 * 10),
batch_size=BATCH_SIZE)
fluid.io.load_params(
exe, "./mnist/", main_program=fluid.default_main_program())
# advbox demo
m = PaddleModel(
fluid.default_main_program(),
IMG_NAME,
LABEL_NAME,
logits.name,
avg_cost.name, (-1, 1),
channel_axis=1)
attack = MIFGSM(m)
attack_config = {
"norm_ord": np.inf,
"epsilons": 0.1,
"steps": 100,
"decay_factor": 1
}
# use train data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in train_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TRAIN_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
# use test data to generate adversarial examples
total_count = 0
fooling_count = 0
for data in test_reader():
total_count += 1
adversary = Adversary(data[0][0], data[0][1])
# MIFGSM non-targeted attack
adversary = attack(adversary, **attack_config)
# MIFGSM targeted attack
# tlabel = 0
# adversary.set_target(is_targeted_attack=True, target_label=tlabel)
# adversary = attack(adversary, **attack_config)
if adversary.is_successful():
fooling_count += 1
print(
'attack success, original_label=%d, adversarial_label=%d, count=%d'
% (data[0][1], adversary.adversarial_label, total_count))
# plt.imshow(adversary.target, cmap='Greys_r')
# plt.show()
# np.save('adv_img', adversary.target)
else:
print('attack failed, original_label=%d, count=%d' %
(data[0][1], total_count))
if total_count >= TOTAL_NUM:
print(
"[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
% (fooling_count, total_count,
float(fooling_count) / total_count))
break
print("mifgsm attack done")
if __name__ == '__main__':
main()
# 使用ParallelExecutor的中文命名实体识别示例
以下是本例的简要目录结构及说明:
```text
.
├── data # 存储运行本例所依赖的数据,从外部获取
├── reader.py # 数据读取接口, 从外部获取
├── README.md # 文档
├── train.py # 训练脚本
├── infer.py # 预测脚本
```
## 数据
在data目录下,有两个文件夹,train_files中保存的是训练数据,test_files中保存的是测试数据,作为示例,在目录下我们各放置了两个文件,实际训练时,根据自己的实际需要将数据放置在对应目录,并根据数据格式,修改reader.py中的数据读取函数。
## 训练
修改 [train.py](./train.py)`main` 函数,指定数据路径,运行`python train.py`开始训练。
训练记录形如
```txt
pass_id:0, time_cost:4.92960214615s
[Train] precision:0.000862136531076, recall:0.0059880239521, f1:0.00150726226363
[Test] precision:0.000796178343949, recall:0.00335758254057, f1:0.00128713933283
pass_id:1, time_cost:0.715255975723s
[Train] precision:0.00474094141551, recall:0.00762112139358, f1:0.00584551148225
[Test] precision:0.0228873239437, recall:0.00727476217124, f1:0.0110403397028
pass_id:2, time_cost:0.740842103958s
[Train] precision:0.0120967741935, recall:0.00163309744148, f1:0.00287769784173
[Test] precision:0, recall:0.0, f1:0
```
## 预测
修改 [infer.py](./infer.py)`infer` 函数,指定:需要测试的模型的路径、测试数据、预测标记文件的路径,运行`python infer.py`开始预测。
预测结果如下
```txt
152804 O O
130048 O O
38862 10-B O
784 O O
1540 O O
4145 O O
2255 O O
0 O O
1279 O O
7793 O O
373 O O
1621 O O
815 O O
2 O O
247 24-B O
401 24-I O
```
输出分为三列,以"\t"分割,第一列是输入的词语的序号,第二列是标准结果,第三列为标记结果。多条输入序列之间以空行分隔。
24-B
24-I
27-B
27-I
20-B
20-I
21-B
21-I
22-B
22-I
23-B
23-I
28-B
28-I
29-B
29-I
12-B
12-I
11-B
11-I
10-B
10-I
13-B
13-I
38-B
38-I
14-B
14-I
16-B
16-I
33-B
33-I
18-B
18-I
31-B
31-I
30-B
30-I
37-B
37-I
36-B
36-I
35-B
35-I
19-B
19-I
32-B
32-I
O
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
import numpy as np
import paddle.fluid as fluid
import paddle
import reader
def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
def infer(model_path, batch_size, test_data_file, target_file):
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data(
name='mention', shape=[1], dtype='int64', lod_level=1)
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
label_reverse_dict = load_reverse_dict(target_file)
test_data = paddle.batch(
reader.file_reader(test_data_file), batch_size=batch_size)
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place)
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_data():
crf_decode = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_targets,
return_numpy=False)
lod_info = (crf_decode[0].lod())[0]
np_data = np.array(crf_decode[0])
assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)):
assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index]
word_index = 0
for tag_index in xrange(lod_info[sen_index],
lod_info[sen_index + 1]):
word = str(data[sen_index][0][word_index])
gold_tag = label_reverse_dict[data[sen_index][2][
word_index]]
tag = label_reverse_dict[np_data[tag_index][0]]
print word + "\t" + gold_tag + "\t" + tag
word_index += 1
print ""
if __name__ == "__main__":
infer(
model_path="output/params_pass_0",
batch_size=6,
test_data_file="data/test_files",
target_file="data/label_dict")
import os
def file_reader(file_dir):
def reader():
files = os.listdir(file_dir)
for fi in files:
for line in open(file_dir + '/' + fi, 'r'):
line = line.strip()
features = line.split(";")
word_idx = []
for item in features[1].strip().split(" "):
word_idx.append(int(item))
target_idx = []
for item in features[2].strip().split(" "):
label_index = int(item)
if label_index == 0:
label_index = 48
else:
label_index -= 1
target_idx.append(label_index)
mention_idx = []
for item in features[3].strip().split(" "):
mention_idx.append(int(item))
yield word_idx, mention_idx, target_idx,
return reader
import os
import math
import time
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import NormalInitializer
import reader
def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def ner_net(word_dict_len, label_dict_len):
IS_SPARSE = False
word_dim = 32
mention_dict_len = 57
mention_dim = 20
grnn_hidden = 36
emb_lr = 5
init_bound = 0.1
def _net_conf(word, mark, target):
word_embedding = fluid.layers.embedding(
input=word,
size=[word_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
mention_embedding = fluid.layers.embedding(
input=mention,
size=[mention_dict_len, mention_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="mention_emb",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
word_embedding_r = fluid.layers.embedding(
input=word,
size=[word_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="word_emb_r",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
mention_embedding_r = fluid.layers.embedding(
input=mention,
size=[mention_dict_len, mention_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="mention_emb_r",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
word_mention_vector = fluid.layers.concat(
input=[word_embedding, mention_embedding], axis=1)
word_mention_vector_r = fluid.layers.concat(
input=[word_embedding_r, mention_embedding_r], axis=1)
pre_gru = fluid.layers.fc(
input=word_mention_vector,
size=grnn_hidden * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru = fluid.layers.dynamic_gru(
input=pre_gru,
size=grnn_hidden,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
pre_gru_r = fluid.layers.fc(
input=word_mention_vector_r,
size=grnn_hidden * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru_r = fluid.layers.dynamic_gru(
input=pre_gru_r,
size=grnn_hidden,
is_reverse=True,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru_merged = fluid.layers.concat(input=[gru, gru_r], axis=1)
emission = fluid.layers.fc(
size=label_dict_len,
input=gru_merged,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
crf_cost = fluid.layers.linear_chain_crf(
input=emission,
label=target,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=0.2, ))
avg_cost = fluid.layers.mean(x=crf_cost)
return avg_cost, emission
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data(
name='mention', shape=[1], dtype='int64', lod_level=1)
target = fluid.layers.data(
name="target", shape=[1], dtype='int64', lod_level=1)
avg_cost, emission = _net_conf(word, mention, target)
return avg_cost, emission, word, mention, target
def test2(exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
result_list = exe.run(
inference_program,
feed={"word": word,
"mention": mention,
"target": target},
fetch_list=cur_fetch_list)
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer[0], number_label[0],
number_correct[0])
return chunk_evaluator.eval()
def test(test_exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
result_list = test_exe.run(
fetch_list=cur_fetch_list,
feed={"word": word,
"mention": mention,
"target": target})
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer.sum(),
number_label.sum(), number_correct.sum())
return chunk_evaluator.eval()
def main(train_data_file, test_data_file, model_save_dir, num_passes):
if not os.path.exists(model_save_dir):
os.mkdir(model_save_dir)
BATCH_SIZE = 256
word_dict_len = 1942563
label_dict_len = 49
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
avg_cost, feature_out, word, mention, target = ner_net(word_dict_len,
label_dict_len)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(
name='crfw', ))
(precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
chunk_evaluator = fluid.metrics.ChunkEvaluator()
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
[num_infer_chunks, num_label_chunks, num_correct_chunks])
train_reader = paddle.batch(
paddle.reader.shuffle(
reader.file_reader(train_data_file), buf_size=2000000),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.file_reader(test_data_file), buf_size=2000000),
batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0)
feeder = fluid.DataFeeder(
feed_list=[word, mention, target], place=place)
exe = fluid.Executor(place)
exe.run(startup)
train_exe = fluid.ParallelExecutor(
loss_name=avg_cost.name, use_cuda=True)
test_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=inference_program,
share_vars_from=train_exe)
batch_id = 0
for pass_id in xrange(num_passes):
chunk_evaluator.reset()
train_reader_iter = train_reader()
start_time = time.time()
while True:
try:
cur_batch = next(train_reader_iter)
cost, nums_infer, nums_label, nums_correct = train_exe.run(
fetch_list=[
avg_cost.name, num_infer_chunks.name,
num_label_chunks.name, num_correct_chunks.name
],
feed=feeder.feed(cur_batch))
chunk_evaluator.update(
np.array(nums_infer).sum(),
np.array(nums_label).sum(),
np.array(nums_correct).sum())
cost_list = np.array(cost)
batch_id += 1
except StopIteration:
break
end_time = time.time()
print("pass_id:" + str(pass_id) + ", time_cost:" + str(
end_time - start_time) + "s")
precision, recall, f1_score = chunk_evaluator.eval()
print("[Train] precision:" + str(precision) + ", recall:" + str(
recall) + ", f1:" + str(f1_score))
p, r, f1 = test2(
exe, chunk_evaluator, inference_program, test_reader, place,
[num_infer_chunks, num_label_chunks, num_correct_chunks])
print("[Test] precision:" + str(p) + ", recall:" + str(r) + ", f1:"
+ str(f1))
save_dirname = os.path.join(model_save_dir,
"params_pass_%d" % pass_id)
fluid.io.save_inference_model(
save_dirname, ['word', 'mention', 'target'], [crf_decode], exe)
if __name__ == "__main__":
main(
train_data_file="./data/train_files",
test_data_file="./data/test_files",
model_save_dir="./output",
num_passes=1000)
from PIL import Image, ImageEnhance, ImageDraw
from PIL import ImageFile
import numpy as np
import random
import math
ImageFile.LOAD_TRUNCATED_IMAGES = True #otherwise IOError raised image file is truncated
class sampler():
def __init__(self, max_sample, max_trial, min_scale, max_scale,
min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap,
max_jaccard_overlap):
self.max_sample = max_sample
self.max_trial = max_trial
self.min_scale = min_scale
self.max_scale = max_scale
self.min_aspect_ratio = min_aspect_ratio
self.max_aspect_ratio = max_aspect_ratio
self.min_jaccard_overlap = min_jaccard_overlap
self.max_jaccard_overlap = max_jaccard_overlap
class bbox():
def __init__(self, xmin, ymin, xmax, ymax):
self.xmin = xmin
self.ymin = ymin
self.xmax = xmax
self.ymax = ymax
def bbox_area(src_bbox):
width = src_bbox.xmax - src_bbox.xmin
height = src_bbox.ymax - src_bbox.ymin
return width * height
def generate_sample(sampler):
scale = random.uniform(sampler.min_scale, sampler.max_scale)
min_aspect_ratio = max(sampler.min_aspect_ratio, (scale**2.0))
max_aspect_ratio = min(sampler.max_aspect_ratio, 1 / (scale**2.0))
aspect_ratio = random.uniform(min_aspect_ratio, max_aspect_ratio)
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = random.uniform(0, xmin_bound)
ymin = random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = bbox(xmin, ymin, xmax, ymax)
return sampled_bbox
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox.xmin >= object_bbox.xmax or \
sample_bbox.xmax <= object_bbox.xmin or \
sample_bbox.ymin >= object_bbox.ymax or \
sample_bbox.ymax <= object_bbox.ymin:
return 0
intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin)
intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin)
intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax)
intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax)
intersect_size = (intersect_xmax - intersect_xmin) * (
intersect_ymax - intersect_ymin)
sample_bbox_size = bbox_area(sample_bbox)
object_bbox_size = bbox_area(object_bbox)
overlap = intersect_size / (
sample_bbox_size + object_bbox_size - intersect_size)
return overlap
def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0:
return True
for i in range(len(bbox_labels)):
object_bbox = bbox(
bbox_labels[i][0],
bbox_labels[i][1], # tangxu @ 2018-05-17
bbox_labels[i][2],
bbox_labels[i][3])
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler.min_jaccard_overlap != 0 and \
overlap < sampler.min_jaccard_overlap:
continue
if sampler.max_jaccard_overlap != 0 and \
overlap > sampler.max_jaccard_overlap:
continue
return True
return False
def generate_batch_samples(batch_sampler, bbox_labels):
sampled_bbox = []
index = []
c = 0
for sampler in batch_sampler:
found = 0
for i in range(sampler.max_trial):
if found >= sampler.max_sample:
break
sample_bbox = generate_sample(sampler)
if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
sampled_bbox.append(sample_bbox)
found = found + 1
index.append(c)
c = c + 1
return sampled_bbox
def clip_bbox(src_bbox):
src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0)
src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0)
src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0)
src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0)
return src_bbox
def meet_emit_constraint(src_bbox, sample_bbox):
center_x = (src_bbox.xmax + src_bbox.xmin) / 2
center_y = (src_bbox.ymax + src_bbox.ymin) / 2
if center_x >= sample_bbox.xmin and \
center_x <= sample_bbox.xmax and \
center_y >= sample_bbox.ymin and \
center_y <= sample_bbox.ymax:
return True
return False
def transform_labels(bbox_labels, sample_bbox):
proj_bbox = bbox(0, 0, 0, 0)
sample_labels = []
for i in range(len(bbox_labels)):
sample_label = []
object_bbox = bbox(bbox_labels[i][0], bbox_labels[i][1],
bbox_labels[i][2], bbox_labels[i][3])
if not meet_emit_constraint(object_bbox, sample_bbox):
continue
sample_width = sample_bbox.xmax - sample_bbox.xmin
sample_height = sample_bbox.ymax - sample_bbox.ymin
proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width
proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height
proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width
proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height
proj_bbox = clip_bbox(proj_bbox)
if bbox_area(proj_bbox) > 0:
sample_label.append(bbox_labels[i][0])
sample_label.append(float(proj_bbox.xmin))
sample_label.append(float(proj_bbox.ymin))
sample_label.append(float(proj_bbox.xmax))
sample_label.append(float(proj_bbox.ymax))
#sample_label.append(bbox_labels[i][5])
sample_label = sample_label + bbox_labels[i][5:]
sample_labels.append(sample_label)
return sample_labels
def crop_image(img, bbox_labels, sample_bbox, image_width, image_height):
sample_bbox = clip_bbox(sample_bbox)
xmin = int(sample_bbox.xmin * image_width)
xmax = int(sample_bbox.xmax * image_width)
ymin = int(sample_bbox.ymin * image_height)
ymax = int(sample_bbox.ymax * image_height)
sample_img = img[ymin:ymax, xmin:xmax]
sample_labels = transform_labels(bbox_labels, sample_bbox)
return sample_img, sample_labels
def random_brightness(img, settings):
prob = random.uniform(0, 1)
if prob < settings._brightness_prob:
delta = random.uniform(-settings._brightness_delta,
settings._brightness_delta) + 1
img = ImageEnhance.Brightness(img).enhance(delta)
return img
def random_contrast(img, settings):
prob = random.uniform(0, 1)
if prob < settings._contrast_prob:
delta = random.uniform(-settings._contrast_delta,
settings._contrast_delta) + 1
img = ImageEnhance.Contrast(img).enhance(delta)
return img
def random_saturation(img, settings):
prob = random.uniform(0, 1)
if prob < settings._saturation_prob:
delta = random.uniform(-settings._saturation_delta,
settings._saturation_delta) + 1
img = ImageEnhance.Color(img).enhance(delta)
return img
def random_hue(img, settings):
prob = random.uniform(0, 1)
if prob < settings._hue_prob:
delta = random.uniform(-settings._hue_delta, settings._hue_delta)
img_hsv = np.array(img.convert('HSV'))
img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
return img
def distort_image(img, settings):
prob = random.uniform(0, 1)
# Apply different distort order
if prob > 0.5:
img = random_brightness(img, settings)
img = random_contrast(img, settings)
img = random_saturation(img, settings)
img = random_hue(img, settings)
else:
img = random_brightness(img, settings)
img = random_saturation(img, settings)
img = random_hue(img, settings)
img = random_contrast(img, settings)
return img
def expand_image(img, bbox_labels, img_width, img_height, settings):
prob = random.uniform(0, 1)
if prob < settings._expand_prob:
if settings._expand_max_ratio - 1 >= 0.01:
expand_ratio = random.uniform(1, settings._expand_max_ratio)
height = int(img_height * expand_ratio)
width = int(img_width * expand_ratio)
h_off = math.floor(random.uniform(0, height - img_height))
w_off = math.floor(random.uniform(0, width - img_width))
expand_bbox = bbox(-w_off / img_width, -h_off / img_height,
(width - w_off) / img_width,
(height - h_off) / img_height)
expand_img = np.ones((height, width, 3))
expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean))
expand_img = Image.fromarray(expand_img)
expand_img.paste(img, (int(w_off), int(h_off)))
bbox_labels = transform_labels(bbox_labels, expand_bbox)
return expand_img, bbox_labels, width, height
return img, bbox_labels, img_width, img_height
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Xavier
from paddle.fluid.initializer import Constant
from paddle.fluid.regularizer import L2Decay
def conv_bn(input, filter, ksize, stride, padding, act='relu', bias_attr=False):
conv = fluid.layers.conv2d(
input=input,
filter_size=ksize,
num_filters=filter,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=conv, act=act)
def conv_block(input, groups, filters, ksizes, strides=None, with_pool=True):
assert len(filters) == groups
assert len(ksizes) == groups
strides = [1] * groups if strides is None else strides
w_attr = ParamAttr(learning_rate=1., initializer=Xavier())
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
conv = input
for i in xrange(groups):
conv = fluid.layers.conv2d(
input=conv,
num_filters=filters[i],
filter_size=ksizes[i],
stride=strides[i],
padding=(ksizes[i] - 1) / 2,
param_attr=w_attr,
bias_attr=b_attr,
act='relu')
if with_pool:
pool = fluid.layers.pool2d(
input=conv, pool_size=2, pool_type='max', pool_stride=2)
return pool
else:
return conv
class PyramidBox(object):
def __init__(self, data_shape, is_infer=False, sub_network=False):
self.data_shape = data_shape
self.min_sizes = [16., 32., 64., 128., 256., 512.]
self.steps = [4., 8., 16., 32., 64., 128.]
self.is_infer = is_infer
# the base network is VGG with atrus layers
self._input()
self._vgg()
if sub_network:
self._low_level_fpn()
self._cpm_module()
self._pyramidbox()
def _input(self):
self.image = fluid.layers.data(
name='image', shape=self.data_shape, dtype='float32')
if not self.is_infer:
self.gt_box = fluid.layers.data(
name='gt_box', shape=[4], dtype='float32', lod_level=1)
self.gt_label = fluid.layers.data(
name='gt_label', shape=[1], dtype='int32', lod_level=1)
self.difficult = fluid.layers.data(
name='gt_difficult', shape=[1], dtype='int32', lod_level=1)
def _vgg(self):
self.conv1 = conv_block(self.image, 2, [64] * 2, [3] * 2)
self.conv2 = conv_block(self.conv1, 2, [128] * 2, [3] * 2)
#priorbox min_size is 16
self.conv3 = conv_block(self.conv2, 3, [256] * 3, [3] * 3)
#priorbox min_size is 32
self.conv4 = conv_block(self.conv3, 3, [512] * 3, [3] * 3)
#priorbox min_size is 64
self.conv5 = conv_block(self.conv4, 3, [512] * 3, [3] * 3)
# fc6 and fc7 in paper, priorbox min_size is 128
self.conv6 = conv_block(
self.conv5, 2, [1024, 1024], [3, 1], with_pool=False)
# conv6_1 and conv6_2 in paper, priorbox min_size is 256
self.conv7 = conv_block(
self.conv6, 2, [256, 512], [1, 3], [1, 2], with_pool=False)
# conv7_1 and conv7_2 in paper, priorbox mini_size is 512
self.conv8 = conv_block(
self.conv7, 2, [128, 256], [1, 3], [1, 2], with_pool=False)
def _low_level_fpn(self):
"""
Low-level feature pyramid network.
"""
def fpn(up_from, up_to):
ch = up_to.shape[1]
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
conv1 = fluid.layers.conv2d(
up_from, ch, 1, act='relu', bias_attr=b_attr)
# TODO: add group
conv_trans = fluid.layers.conv2d_transpose(
conv1, ch, None, 4, 1, 2, bias_attr=False)
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
conv2 = fluid.layers.conv2d(
up_to, ch, 1, act='relu', bias_attr=b_attr)
# eltwise mul
conv_fuse = conv_trans * conv2
return conv_fuse
self.lfpn2_on_conv5 = fpn(self.conv6, self.conv5)
self.lfpn1_on_conv4 = fpn(self.lfpn2_on_conv5, self.conv4)
self.lfpn0_on_conv3 = fpn(self.lfpn1_on_conv4, self.conv3)
def _cpm_module(self):
"""
Context-sensitive Prediction Module
"""
def cpm(input):
# residual
branch1 = conv_bn(input, 1024, 1, 1, 0, None)
branch2a = conv_bn(input, 256, 1, 1, 0, act='relu')
branch2b = conv_bn(branch2a, 256, 3, 1, 1, act='relu')
branch2c = conv_bn(branch2b, 1024, 1, 1, 0, None)
sum = branch1 + branch2c
rescomb = fluid.layers.relu(x=sum)
# ssh
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
ssh_1 = fluid.layers.conv2d(rescomb, 256, 3, 1, 1, bias_attr=b_attr)
ssh_dimred = fluid.layers.conv2d(
rescomb, 128, 3, 1, 1, act='relu', bias_attr=b_attr)
ssh_2 = fluid.layers.conv2d(
ssh_dimred, 128, 3, 1, 1, bias_attr=b_attr)
ssh_3a = fluid.layers.conv2d(
ssh_dimred, 128, 3, 1, 1, act='relu', bias_attr=b_attr)
ssh_3b = fluid.layers.conv2d(ssh_3a, 128, 3, 1, 1, bias_attr=b_attr)
ssh_concat = fluid.layers.concat([ssh_1, ssh_2, ssh_3b], axis=1)
ssh_out = fluid.layers.relu(x=ssh_concat)
return ssh_out
self.ssh_conv3 = cpm(self.lfpn0_on_conv3)
self.ssh_conv4 = cpm(self.lfpn1_on_conv4)
self.ssh_conv5 = cpm(self.lfpn2_on_conv5)
self.ssh_conv6 = cpm(self.conv6)
self.ssh_conv7 = cpm(self.conv7)
self.ssh_conv8 = cpm(self.conv8)
def _l2_norm_scale(self, input, init_scale=1.0, channel_shared=False):
from paddle.fluid.layer_helper import LayerHelper
helper = LayerHelper("Scale")
l2_norm = fluid.layers.l2_normalize(
input, axis=1) # l2 norm along channel
shape = [1] if channel_shared else [input.shape[1]]
scale = helper.create_parameter(
attr=helper.param_attr,
shape=shape,
dtype=input.dtype,
default_initializer=Constant(init_scale))
out = fluid.layers.elementwise_mul(
x=l2_norm, y=scale, axis=-1 if channel_shared else 1)
return out
def _pyramidbox(self):
"""
Get prior-boxes and pyramid-box
"""
self.ssh_conv3_norm = self._l2_norm_scale(self.ssh_conv3)
self.ssh_conv4_norm = self._l2_norm_scale(self.ssh_conv4)
self.ssh_conv5_norm = self._l2_norm_scale(self.ssh_conv5)
def permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
new_shape = [
trans.shape[0], np.prod(trans.shape[1:]) / last_dim, last_dim
]
return fluid.layers.reshape(trans, shape=new_shape)
face_locs, face_confs = [], []
head_locs, head_confs = [], []
boxes, vars = [], []
inputs = [
self.ssh_conv3_norm, self.ssh_conv4_norm, self.ssh_conv5_norm,
self.ssh_conv6, self.ssh_conv7, self.ssh_conv8
]
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
for i, input in enumerate(inputs):
mbox_loc = fluid.layers.conv2d(input, 8, 3, 1, 1, bias_attr=b_attr)
face_loc, head_loc = fluid.layers.split(
mbox_loc, num_or_sections=2, dim=1)
face_loc = permute_and_reshape(face_loc, 4)
head_loc = permute_and_reshape(head_loc, 4)
mbox_conf = fluid.layers.conv2d(input, 6, 3, 1, 1, bias_attr=b_attr)
face_conf1, face_conf3, head_conf = fluid.layers.split(
mbox_conf, num_or_sections=[1, 3, 2], dim=1)
face_conf3_maxin = fluid.layers.reduce_max(
face_conf3, dim=1, keep_dim=True)
face_conf = fluid.layers.concat(
[face_conf1, face_conf3_maxin], axis=1)
face_conf = permute_and_reshape(face_conf, 2)
head_conf = permute_and_reshape(head_conf, 2)
face_locs.append(face_loc)
face_confs.append(face_conf)
head_locs.append(head_loc)
head_confs.append(head_conf)
box, var = fluid.layers.prior_box(
input,
self.image,
min_sizes=[self.min_sizes[1]],
steps=[self.steps[i]] * 2,
aspect_ratios=[1.],
offset=0.5)
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
boxes.append(box)
vars.append(var)
self.face_mbox_loc = fluid.layers.concat(face_locs, axis=1)
self.face_mbox_conf = fluid.layers.concat(face_confs, axis=1)
self.head_mbox_loc = fluid.layers.concat(head_locs, axis=1)
self.head_mbox_conf = fluid.layers.concat(head_confs, axis=1)
self.prior_boxes = fluid.layers.concat(boxes)
self.box_vars = fluid.layers.concat(vars)
def vgg_ssd(self, num_classes, image_shape): # tangxu
self.conv3_norm = self._l2_norm_scale(self.conv3)
self.conv4_norm = self._l2_norm_scale(self.conv4)
self.conv5_norm = self._l2_norm_scale(self.conv5)
mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
inputs=[
self.conv3_norm, self.conv4_norm, self.conv5_norm, self.conv6,
self.conv7, self.conv8
],
image=self.image,
num_classes=num_classes,
# min_ratio=20,
# max_ratio=90,
min_sizes=[16.0, 32.0, 64.0, 128.0, 256.0, 512.0],
max_sizes=[[], [], [], [], [], []],
# max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
aspect_ratios=[[1.], [1.], [1.], [1.], [1.], [1.]],
steps=[4.0, 8.0, 16.0, 32.0, 64.0, 128.0],
base_size=image_shape[2],
offset=0.5,
flip=False)
# locs, confs, box, box_var = vgg_extra_net(num_classes, image, image_shape)
# nmsed_out = fluid.layers.detection_output(
# locs, confs, box, box_var, nms_threshold=args.nms_threshold)
loss = fluid.layers.ssd_loss(mbox_locs, mbox_confs, self.gt_box,
self.gt_label, box, box_var)
loss = fluid.layers.reduce_sum(loss)
return loss
def train(self):
face_loss = fluid.layers.ssd_loss(
self.face_mbox_loc, self.face_mbox_conf, self.gt_box, self.gt_label,
self.prior_boxes, self.box_vars)
head_loss = fluid.layers.ssd_loss(
self.head_mbox_loc, self.head_mbox_conf, self.gt_box, self.gt_label,
self.prior_boxes, self.box_vars)
return face_loss, head_loss
def test(self):
test_program = fluid.default_main_program().clone(for_test=True)
with fluid.program_guard(test_program):
face_nmsed_out = fluid.layers.detection_output(
self.face_mbox_loc,
self.face_mbox_conf,
self.prior_boxes,
self.box_vars,
nms_threshold=0.45)
head_nmsed_out = fluid.layers.detection_output(
self.head_mbox_loc,
self.head_mbox_conf,
self.prior_boxes,
self.box_vars,
nms_threshold=0.45)
face_map_eval = fluid.evaluator.DetectionMAP(
face_nmsed_out,
self.gt_label,
self.gt_box,
class_num=2,
overlap_threshold=0.5,
ap_version='11point')
head_map_eval = fluid.evaluator.DetectionMAP(
head_nmsed_out,
self.gt_label,
self.gt_box,
class_num=2,
overlap_threshold=0.5,
ap_version='11point')
return test_program, face_map_eval, head_map_eval
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import image_util
from paddle.utils.image_util import *
import random
from PIL import Image
from PIL import ImageDraw
import numpy as np
import xml.etree.ElementTree
import os
import time
import copy
class Settings(object):
def __init__(self,
dataset=None,
data_dir=None,
label_file=None,
resize_h=300,
resize_w=300,
mean_value=[127.5, 127.5, 127.5],
apply_distort=True,
apply_expand=True,
ap_version='11point',
toy=0):
self._dataset = dataset
self._ap_version = ap_version
self._toy = toy
self._data_dir = data_dir
self._apply_distort = apply_distort
self._apply_expand = apply_expand
self._resize_height = resize_h
self._resize_width = resize_w
self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype(
'float32')
self._expand_prob = 0.5
self._expand_max_ratio = 4
self._hue_prob = 0.5
self._hue_delta = 18
self._contrast_prob = 0.5
self._contrast_delta = 0.5
self._saturation_prob = 0.5
self._saturation_delta = 0.5
self._brightness_prob = 0.5
self._brightness_delta = 0.125
@property
def dataset(self):
return self._dataset
@property
def ap_version(self):
return self._ap_version
@property
def toy(self):
return self._toy
@property
def apply_distort(self):
return self._apply_expand
@property
def apply_distort(self):
return self._apply_distort
@property
def data_dir(self):
return self._data_dir
@data_dir.setter
def data_dir(self, data_dir):
self._data_dir = data_dir
@property
def label_list(self):
return self._label_list
@property
def resize_h(self):
return self._resize_height
@property
def resize_w(self):
return self._resize_width
@property
def img_mean(self):
return self._img_mean
def preprocess(img, bbox_labels, mode, settings):
img_width, img_height = img.size
sampled_labels = bbox_labels
if mode == 'train':
if settings._apply_distort:
img = image_util.distort_image(img, settings)
if settings._apply_expand:
img, bbox_labels, img_width, img_height = image_util.expand_image(
img, bbox_labels, img_width, img_height, settings)
# sampling
batch_sampler = []
# hard-code here
batch_sampler.append(
image_util.sampler(1, 50, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0))
batch_sampler.append(
image_util.sampler(1, 50, 0.3, 1.0, 1.0, 1.0, 1.0, 1.0))
sampled_bbox = image_util.generate_batch_samples(batch_sampler,
bbox_labels)
img = np.array(img)
if len(sampled_bbox) > 0:
idx = int(random.uniform(0, len(sampled_bbox)))
img, sampled_labels = image_util.crop_image(
img, bbox_labels, sampled_bbox[idx], img_width, img_height)
img = Image.fromarray(img)
img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
img = np.array(img)
if mode == 'train':
mirror = int(random.uniform(0, 2))
if mirror == 1:
img = img[:, ::-1, :]
for i in xrange(len(sampled_labels)):
tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp
# HWC to CHW
if len(img.shape) == 3:
img = np.swapaxes(img, 1, 2)
img = np.swapaxes(img, 1, 0)
# RBG to BGR
img = img[[2, 1, 0], :, :]
img = img.astype('float32')
img -= settings.img_mean
img = img * 0.007843
return img, sampled_labels
def put_txt_in_dict(input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
dict_input_txt = {}
num_class = 0
for i in range(len(lines_input_txt)):
tmp_line_txt = lines_input_txt[i].strip('\n\t\r')
if '--' in tmp_line_txt:
if i != 0:
num_class += 1
dict_input_txt[num_class] = []
dict_name = tmp_line_txt
dict_input_txt[num_class].append(tmp_line_txt)
if '--' not in tmp_line_txt:
if len(tmp_line_txt) > 6:
# tmp_line_txt = tmp_line_txt[:-2]
split_str = tmp_line_txt.split(' ')
x1_min = float(split_str[0])
y1_min = float(split_str[1])
x2_max = float(split_str[2])
y2_max = float(split_str[3])
tmp_line_txt = str(x1_min) + ' ' + str(y1_min) + ' ' + str(
x2_max) + ' ' + str(y2_max)
dict_input_txt[num_class].append(tmp_line_txt)
else:
dict_input_txt[num_class].append(tmp_line_txt)
return dict_input_txt
def pyramidbox(settings, file_list, mode, shuffle):
dict_input_txt = {}
dict_input_txt = put_txt_in_dict(file_list)
def reader():
if mode == 'train' and shuffle:
random.shuffle(dict_input_txt)
for index_image in range(len(dict_input_txt)):
image_name = dict_input_txt[index_image][0] + '.jpg'
image_path = os.path.join(settings.data_dir, image_name)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
im_width, im_height = im.size
# layout: category_id | xmin | ymin | xmax | ymax | iscrowd
bbox_labels = []
for index_box in range(len(dict_input_txt[index_image])):
if index_box >= 2:
bbox_sample = []
temp_info_box = dict_input_txt[index_image][
index_box].split(' ')
xmin = float(temp_info_box[0])
ymin = float(temp_info_box[1])
w = float(temp_info_box[2])
h = float(temp_info_box[3])
xmax = xmin + w
ymax = ymin + h
bbox_sample.append(float(xmin) / im_width)
bbox_sample.append(float(ymin) / im_height)
bbox_sample.append(float(xmax) / im_width)
bbox_sample.append(float(ymax) / im_height)
bbox_labels.append(bbox_sample)
im, sample_labels = preprocess(im, bbox_labels, mode, settings)
sample_labels = np.array(sample_labels)
if len(sample_labels) == 0: continue
im = im.astype('float32')
boxes = sample_labels[:, 0:4]
lbls = [1] * len(boxes)
difficults = [1] * len(boxes)
yield im, boxes, lbls, difficults
return reader
def train(settings, file_list, shuffle=True):
return pyramidbox(settings, file_list, 'train', shuffle)
def test(settings, file_list):
return pyramidbox(settings, file_list, 'test', False)
import os
import numpy as np
import time
import argparse
import functools
import reader
import paddle
import paddle.fluid as fluid
from pyramidbox import PyramidBox
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('parallel', bool, True, "parallel")
add_arg('learning_rate', float, 0.0001, "Learning rate.")
add_arg('batch_size', int, 16, "Minibatch size.")
add_arg('num_passes', int, 120, "Epoch number.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('dataset', str, 'WIDERFACE', "coco2014, coco2017, and pascalvoc.")
add_arg('model_save_dir', str, 'model', "The path to save model.")
add_arg('pretrained_model', str, './vgg_model/', "The init model path.")
add_arg('resize_h', int, 640, "The resized image height.")
add_arg('resize_w', int, 640, "The resized image height.")
#yapf: enable
def train(args, data_args, learning_rate, batch_size, pretrained_model,
num_passes):
num_classes = 2
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
image_shape = [3, data_args.resize_h, data_args.resize_w]
network = PyramidBox(image_shape)
loss = network.vgg_ssd(num_classes, image_shape)
epocs = 12880 / batch_size
boundaries = [epocs * 100, epocs * 125, epocs * 150]
values = [
learning_rate, learning_rate * 0.1, learning_rate * 0.01,
learning_rate * 0.001
]
optimizer = fluid.optimizer.RMSProp(
learning_rate=fluid.layers.piecewise_decay(boundaries, values),
regularization=fluid.regularizer.L2Decay(0.0005),
)
optimizer.minimize(loss)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
# fluid.io.save_inference_model('./vgg_model/', ['image'], [loss], exe)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
print('Load pre-trained model.')
fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
if args.parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=args.use_gpu, loss_name=loss.name)
train_reader = paddle.batch(
reader.train(data_args, train_file_list), batch_size=batch_size)
feeder = fluid.DataFeeder(
place=place,
feed_list=[
network.image, network.gt_box, network.gt_label, network.difficult
])
def save_model(postfix):
model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
print 'save models to %s' % (model_path)
fluid.io.save_persistables(exe, model_path)
best_map = 0.
for pass_id in range(num_passes):
start_time = time.time()
prev_start_time = start_time
end_time = 0
for batch_id, data in enumerate(train_reader()):
prev_start_time = start_time
start_time = time.time()
if len(data) < devices_num: continue
if args.parallel:
loss_v, = train_exe.run(fetch_list=[loss.name],
feed=feeder.feed(data))
else:
loss_v, = exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[loss])
end_time = time.time()
loss_v = np.mean(np.array(loss_v))
if batch_id % 1 == 0:
print("Pass {0}, batch {1}, loss {2}, time {3}".format(
pass_id, batch_id, loss_v, start_time - prev_start_time))
test(pass_id, best_map)
if pass_id % 10 == 0 or pass_id == num_passes - 1:
save_model(str(pass_id))
print("Best test map {0}".format(best_map))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
data_dir = 'data/WIDERFACE/WIDER_train/images/'
train_file_list = 'label/train_gt_widerface.res'
val_file_list = 'label/val_gt_widerface.res'
model_save_dir = args.model_save_dir
data_args = reader.Settings(
dataset=args.dataset,
data_dir=data_dir,
resize_h=args.resize_h,
resize_w=args.resize_w,
ap_version='11point')
train(
args,
data_args=data_args,
learning_rate=0.01,
batch_size=args.batch_size,
pretrained_model=args.pretrained_model,
num_passes=args.num_passes)
"""A dummy reader for test."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
......@@ -13,40 +12,48 @@
#See the License for the specific language governing permissions and
#limitations under the License.
import numpy as np
import paddle.v2 as paddle
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
DATA_SHAPE = [1, 512, 512]
NUM_CLASSES = 20
def print_arguments(args):
"""Print argparse's arguments.
def _read_creater(num_sample=1024, min_seq_len=1, max_seq_len=10):
def reader():
for i in range(num_sample):
sequence_len = np.random.randint(min_seq_len, max_seq_len)
x = np.random.uniform(0.1, 1, DATA_SHAPE).astype("float32")
y = np.random.randint(0, NUM_CLASSES + 1,
[sequence_len]).astype("int32")
yield x, y
Usage:
return reader
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
def train(batch_size, num_sample=128):
"""Get train dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def test(batch_size=1, num_sample=16):
"""Get test dataset reader."""
return paddle.batch(_read_creater(num_sample=num_sample), batch_size)
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
def data_shape():
"""Get image shape in CHW order."""
return DATA_SHAPE
.. code-block:: python
def num_classes():
"""Get number of total classes."""
return NUM_CLASSES
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
......@@ -6,3 +6,82 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
This model built with paddle fluid is still under active development and is not
the final version. We welcome feedbacks.
## Introduction
The current code support the training of [SE-ResNeXt](https://arxiv.org/abs/1709.01507) (50/152 layers).
## Data Preparation
1. Download ImageNet-2012 dataset
```
cd data/
mkdir -p ILSVRC2012/
cd ILSVRC2012/
# get training set
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar
# get validation set
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar
# prepare directory
tar xf ILSVRC2012_img_train.tar
tar xf ILSVRC2012_img_val.tar
# unzip all classes data using unzip.sh
sh unzip.sh
```
2. Download training and validation label files from [ImageNet2012 url](https://pan.baidu.com/s/1Y6BCo0nmxsm_FsEqmx2hKQ)(password:```wx99```). Untar it into workspace ```ILSVRC2012/```. The files include
**train_list.txt**: training list of imagenet 2012 classification task, with each line seperated by SPACE.
```
train/n02483708/n02483708_2436.jpeg 369
train/n03998194/n03998194_7015.jpeg 741
train/n04523525/n04523525_38118.jpeg 884
train/n04596742/n04596742_3032.jpeg 909
train/n03208938/n03208938_7065.jpeg 535
...
```
**val_list.txt**: validation list of imagenet 2012 classification task, with each line seperated by SPACE.
```
val/ILSVRC2012_val_00000001.jpeg 65
val/ILSVRC2012_val_00000002.jpeg 970
val/ILSVRC2012_val_00000003.jpeg 230
val/ILSVRC2012_val_00000004.jpeg 809
val/ILSVRC2012_val_00000005.jpeg 516
...
```
**synset_words.txt**: the semantic label of each class.
## Training a model
To start a training task, one can use command line as:
```
python train.py --num_layers=50 --batch_size=8 --with_mem_opt=True --parallel_exe=False
```
## Finetune a model
```
python train.py --num_layers=50 --batch_size=8 --with_mem_opt=True --parallel_exe=False --pretrained_model="pretrain/96/"
```
TBD
## Inference
```
python infer.py --num_layers=50 --batch_size=8 --model='model/90' --test_list=''
```
TBD
## Results
The SE-ResNeXt-50 model is trained by starting with learning rate ```0.1``` and decaying it by ```0.1``` after each ```10``` epoches. Top-1/Top-5 Validation Accuracy on ImageNet 2012 is listed in table.
|model | [original paper(Fig.5)](https://arxiv.org/abs/1709.01507) | Pytorch | Paddle fluid
|- | :-: |:-: | -:
|SE-ResNeXt-50 | 77.6%/- | 77.71%/93.63% | 77.42%/93.50%
## Released models
|model | Baidu Cloud
|- | -:
|SE-ResNeXt-50 | [url]()
TBD
### Caffe2Fluid
This tool is used to convert a Caffe model to Fluid model
This tool is used to convert a Caffe model to a Fluid model
### Howto
1, Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
### Key Features
1. Convert caffe model to fluid model with codes of defining a network(useful for re-training)
1) generate it from caffe.proto using protoc
2. Pycaffe is not necessary when just want convert model without do caffe-inference
3. Caffe's customized layers convertion also be supported by extending this tool
4. A bunch of tools in `examples/imagenet/tools` are provided to compare the difference
### HowTo
1. Prepare `caffepb.py` in `./proto` if your python has no `pycaffe` module, two options provided here:
- Generate pycaffe from caffe.proto
```
bash ./proto/compile.sh
```
2) download one from github directly
- Download one from github directly
```
cd proto/ && wget https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/caffe/caffepb.py
```
2. Convert the Caffe model to Fluid model
- Generate fluid code and weight file
```
python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
```
- Save weights as fluid model file
```
# only infer the last layer's result
python alexnet.py alexnet.npy ./fluid
# infer these 2 layer's result
python alexnet.py alexnet.npy ./fluid fc8,prob
```
3. Use the converted model to infer
- See more details in `examples/imagenet/tools/run.sh`
4. Compare the inference results with caffe
- See more details in `examples/imagenet/tools/diff.sh`
### How to convert custom layer
1. Implement your custom layer in a file under `kaffe/custom_layers`, eg: mylayer.py
- Implement ```shape_func(input_shape, [other_caffe_params])``` to calculate the output shape
- Implement ```layer_func(inputs, name, [other_caffe_params])``` to construct a fluid layer
- Register these two functions ```register(kind='MyType', shape=shape_func, layer=layer_func)```
- Notes: more examples can be found in `kaffe/custom_layers`
2. Add ```import mylayer``` to `kaffe/custom_layers/\_\_init__.py`
2, Convert the caffe model using 'convert.py' which will generate a python script and a weight(in .npy) file
3. Prepare your pycaffe as your customized version(same as previous env prepare)
- (option1) replace `proto/caffe.proto` with your own caffe.proto and compile it
- (option2) change your `pycaffe` to the customized version
3, Use the converted model to predict
4. Convert the Caffe model to Fluid model
see more detail info in 'examples/xxx'
5. Set env $CAFFE2FLUID_CUSTOM_LAYERS to the parent directory of 'custom_layers'
```
export CAFFE2FLUID_CUSTOM_LAYERS=/path/to/caffe2fluid/kaffe
```
6. Use the converted model when loading model in `xxxnet.py` and `xxxnet.npy`(no need if model is already in `fluid/model` and `fluid/params`)
### Tested models
- Lenet on mnist dataset
- Lenet:
[model addr](https://github.com/ethereon/caffe-tensorflow/blob/master/examples/mnist)
- ResNets:(ResNet-50, ResNet-101, ResNet-152)
model addr: `https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`_
[model addr](https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777)
- GoogleNet:
model addr: `https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`_
[model addr](https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034)
- VGG:
model addr: `https://gist.github.com/ksimonyan/211839e770f7b538e2d8`_
[model addr](https://gist.github.com/ksimonyan/211839e770f7b538e2d8)
- AlexNet:
model addr: `https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`_
[model addr](https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet)
### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
Some of this code come from here: [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow)
......@@ -43,11 +43,17 @@ def convert(def_path, caffemodel_path, data_output_path, code_output_path,
print_stderr('Saving source...')
with open(code_output_path, 'wb') as src_out:
src_out.write(transformer.transform_source())
print_stderr('set env variable before using converted model '\
'if used custom_layers:')
custom_pk_path = os.path.dirname(os.path.abspath(__file__))
custom_pk_path = os.path.join(custom_pk_path, 'kaffe')
print_stderr('export CAFFE2FLUID_CUSTOM_LAYERS=%s' % (custom_pk_path))
print_stderr('Done.')
return 0
except KaffeError as err:
fatal_error('Error encountered: {}'.format(err))
return 0
return 1
def main():
......
a demo to show converting caffe models on 'imagenet' using caffe2fluid
A demo to show converting caffe models trained on 'imagenet' using caffe2fluid
---
# How to use
1. prepare python environment
2. download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. run the tool
eg: bash ./run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
1. Prepare python environment
2. Download caffe model to "models.caffe/xxx" which contains "xxx.caffemodel" and "xxx.prototxt"
3. Convert the Caffe model to Fluid model
- generate fluid code and weight file
```python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
```
- save weights as fluid model file
```
python alexnet.py alexnet.npy ./fluid
```
4. Do inference
```
python infer.py infer ./fluid data/65.jpeg
```
5. convert model and do inference together
```
bash ./tools/run.sh alexnet ./models.caffe/alexnet ./models/alexnet
```
* Assume the Caffe model is stored in '*./models.caffe/alexnet/alexnet.prototxt|caffemodel*'
* converted model will be stored as '*./models/alexnet/alexnet.py|npy*'
6. test the difference with caffe's results(need pycaffe installed)
```
bash ./tools/diff.sh resnet
```
* Make sure your caffemodel stored in '*./models.caffe/resnet*'
* The results will be stored in '*./results/resnet.paddle|caffe*'
#!/usr/bin/python
#
#a tool to compare tensors in two files or two directories
#
import sys
import os
def walk_dir(rootdir):
for subdir, dirs, files in os.walk(rootdir):
for file in files:
yield file
def calc_diff(f1, f2):
import numpy as np
d1 = np.load(f1)
d2 = np.load(f2)
print d1.shape
print d2.shape
#print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2]
#d2 = d2[:, :, 1:-2, 1:-2]
d1 = d1.flatten()
d2 = d2.flatten()
#print d1[:10]
#print d2[:10]
d1_num = reduce(lambda x, y: x * y, d1.shape)
d2_num = reduce(lambda x, y: x * y, d2.shape)
if d1_num != d2_num:
print d1.shape
print d2.shape
assert (d1_num == d2_num), "their shape is not consistent"
try:
df = np.abs(d1 - d2)
max_df = np.max(df)
sq_df = np.mean(df * df)
return max_df, sq_df
except Exception as e:
return -1.0, -1.0
def compare(path1, path2, no_exception):
def diff(f1, f2):
max_df, sq_df = calc_diff(f1, f2)
print('[max_df:%.4e, sq_df:%.4e] when compare %s <=> %s' %
(max_df, sq_df, os.path.basename(f1), os.path.basename(f2)))
if no_exception is False:
assert (max_df < 1e-5), \
'max_df is too large with value[%.6e]' % (max_df)
assert (sq_df < 1e-10), \
'sq_df is too large with value[%.6e]' % (sq_df)
if os.path.exists(path1) is False:
print('not found %s' % (path1))
return 1
elif os.path.exists(path2) is False:
print('not found %s' % (path2))
return 1
if path1.find('.npy') > 0 and path2.find('.npy') > 0:
diff(path1, path2)
return
for f in walk_dir(path2):
if f.find('.npy') < 0:
continue
f1 = os.path.join(path1, f)
f2 = os.path.join(path2, f)
diff(f1, f2)
print('all checking succeed to pass')
return 0
if __name__ == "__main__":
if len(sys.argv) == 1:
path1 = 'lenet.tf/results'
path2 = 'lenet.paddle/results'
elif len(sys.argv) >= 3:
path1 = sys.argv[1]
path2 = sys.argv[2]
if len(sys.argv) == 4:
no_exception = True
else:
no_exception = False
else:
print('usage:')
print(' %s [path1] [path2]' % (sys.argv[0]))
exit(1)
#print('compare inner result in %s %s' % (path1, path2))
exit(compare(path1, path2, no_exception))
#!/bin/bash
#
#function:
# a tool used to compare the results produced by paddle and caffe
#
if [[ $# -lt 2 ]];then
echo "usage:"
echo " bash $0 [model_name] [param_name] [caffe_name]"
exit 1
fi
model_name=$1
param_name=$2
paddle_file="./results/${model_name}.paddle/${param_name}.npy"
if [[ $# -eq 3 ]];then
caffe_file="./results/${model_name}.caffe/${3}.npy"
else
caffe_file="./results/${model_name}.caffe/${2}.npy"
fi
python ./compare.py $paddle_file $caffe_file
#!/bin/bash
#function:
# a tool used to compare all layers' results
#
if [[ $# -ne 1 ]];then
echo "usage:"
echo " bash $0 [model_name]"
echo " eg:bash $0 alexnet"
exit 1
fi
model_name=$1
prototxt="models.caffe/$model_name/${model_name}.prototxt"
layers=$(cat $prototxt | perl -ne 'if(/^\s+name\s*:\s*\"([^\"]+)/){print $1."\n";}')
for i in $layers;do
cf_npy="results/${model_name}.caffe/${i}.npy"
pd_npy="results/${model_name}.paddle/${i}.npy"
if [[ ! -e $cf_npy ]];then
echo "caffe's result not exist[$cf_npy]"
continue
fi
if [[ ! -e $pd_npy ]];then
echo "paddle's result not exist[$pd_npy]"
continue
fi
python compare.py $cf_npy $pd_npy no_exception
if [[ $? -eq 0 ]];then
echo "succeed to compare layer[$i]"
else
echo "failed to compare layer[$i]"
fi
done
#!/bin/bash
#
#function:
# a tool used to check the difference of models' results generated by caffe model and paddle model
#
#howto:
# bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
#
#notes:
# 0, in order to infer using caffe, we need pycaffe installed
# 1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
# 2, converted paddle model will be in 'models'
# 3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
# 4, only the last layer will be checked by default
model_name="resnet50"
results_root="results/"
if [[ -n $1 ]];then
if [ $1 = "-h" ];then
echo "usage:"
echo " bash $0 [model_name]"
echo " eg:bash $0 resnet50"
exit 0
fi
model_name=$1
fi
mkdir -p $results_root
model_prototxt="models.caffe/$model_name/${model_name}.prototxt"
model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
#1, dump layers' results from paddle
paddle_results="$results_root/${model_name}.paddle"
rm -rf $paddle_results
rm -rf "results.paddle"
bash ./tools/run.sh $model_name ./models.caffe/$model_name ./models/$model_name
if [[ $? -ne 0 ]] || [[ ! -e "results.paddle" ]];then
echo "not found paddle's results, maybe failed to convert"
exit 1
fi
mv results.paddle $paddle_results
#2, dump layers' results from caffe
caffe_results="$results_root/${model_name}.caffe"
rm -rf $caffe_results
rm -rf "results.caffe"
PYTHON=`which cfpython`
if [[ -z $PYTHON ]];then
PYTHON=`which python`
fi
$PYTHON ./infer.py caffe $model_prototxt $model_caffemodel $paddle_results/data.npy
if [[ $? -ne 0 ]] || [[ ! -e "results.caffe" ]];then
echo "not found caffe's results, maybe failed to do inference with caffe"
exit 1
fi
mv results.caffe $caffe_results
#3, extract layer names
cat $model_prototxt | grep name | perl -ne 'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}' >.layer_names
#4, compare one by one
for i in $(cat ".layer_names" | tail -n1);do
echo "process $i"
$PYTHON compare.py $caffe_results/${i}.npy $paddle_results/${i}.npy
done
......@@ -3,10 +3,10 @@
#function:
# a tool used to:
# 1, convert a caffe model
# 2, do inference using this model
# 2, do inference(only in fluid) using this model
#
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
# cd caffe2fluid/examples/imagenet && bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
#
#set -x
......@@ -65,8 +65,13 @@ if [[ -z $only_convert ]];then
PYTHON=`which python`
fi
imgfile="data/65.jpeg"
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/\"([^\"]+)\"/){ print $1."\n";}'`
$PYTHON ./infer.py $net_file $weight_file $imgfile $net_name
#FIX ME:
# only look the first line in prototxt file for the name of this network, maybe not correct
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
if [[ -z $net_name ]];then
net_name="MyNet"
fi
$PYTHON ./infer.py dump $net_file $weight_file $imgfile $net_name
ret=$?
fi
exit $ret
......@@ -7,8 +7,8 @@
import sys
import os
import numpy as np
import paddle.fluid as fluid
import paddle.v2 as paddle
import paddle.v2.fluid as fluid
def test_model(exe, test_program, fetch_list, test_reader, feeder):
......@@ -34,9 +34,6 @@ def evaluate(net_file, model_file):
from lenet import LeNet as MyNet
with_gpu = False
paddle.init(use_gpu=with_gpu)
#1, define network topology
images = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
......@@ -45,7 +42,7 @@ def evaluate(net_file, model_file):
prediction = net.layers['prob']
acc = fluid.layers.accuracy(input=prediction, label=label)
place = fluid.CUDAPlace(0) if with_gpu is True else fluid.CPUPlace()
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
......
文件模式从 100644 更改为 100755
......@@ -9,8 +9,8 @@ def import_caffepb():
p = os.path.dirname(p)
p = os.path.join(p, '../../proto')
sys.path.insert(0, p)
import caffepb
return caffepb
import caffe_pb2
return caffe_pb2
class CaffeResolver(object):
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册