提交 2b553441 编写于 作者: G guosheng

Merge branch 'develop' of https://github.com/PaddlePaddle/models into...

Merge branch 'develop' of https://github.com/PaddlePaddle/models into add-transformer-BeamsearchDecoder-dev
export CUDA_VISIBLE_DEVICES=2,3,4,5
python -u ../../tools/profile.py --feature_lst data/train_feature.lst \
--label_lst data/train_label.lst \
--mean_var data/aishell/global_mean_var \
--parallel \
--frame_dim 2640 \
--class_num 101 \
#-*- coding: utf-8 -*-
#File: DQN.py
from agent import Model
import gym
import argparse
from tqdm import tqdm
from expreplay import ReplayMemory, Experience
import numpy as np
import os
UPDATE_FREQ = 4
MEMORY_WARMUP_SIZE = 1000
def run_episode(agent, env, exp, train_or_test):
assert train_or_test in ['train', 'test'], train_or_test
total_reward = 0
state = env.reset()
for step in range(200):
action = agent.act(state, train_or_test)
next_state, reward, isOver, _ = env.step(action)
if train_or_test == 'train':
exp.append(Experience(state, action, reward, isOver))
# train model
# start training
if len(exp) > MEMORY_WARMUP_SIZE:
batch_idx = np.random.randint(
len(exp) - 1, size=(args.batch_size))
if step % UPDATE_FREQ == 0:
batch_state, batch_action, batch_reward, \
batch_next_state, batch_isOver = exp.sample(batch_idx)
agent.train(batch_state, batch_action, batch_reward, \
batch_next_state, batch_isOver)
total_reward += reward
state = next_state
if isOver:
break
return total_reward
def train_agent():
env = gym.make(args.env)
state_shape = env.observation_space.shape
exp = ReplayMemory(args.mem_size, state_shape)
action_dim = env.action_space.n
agent = Model(state_shape[0], action_dim, gamma=0.99)
while len(exp) < MEMORY_WARMUP_SIZE:
run_episode(agent, env, exp, train_or_test='train')
max_episode = 4000
# train
total_episode = 0
pbar = tqdm(total=max_episode)
recent_100_reward = []
for episode in xrange(max_episode):
# start epoch
total_reward = run_episode(agent, env, exp, train_or_test='train')
pbar.set_description('[train]exploration:{}'.format(agent.exploration))
pbar.update()
# recent 100 reward
total_reward = run_episode(agent, env, exp, train_or_test='test')
recent_100_reward.append(total_reward)
if len(recent_100_reward) > 100:
recent_100_reward = recent_100_reward[1:]
pbar.write("episode:{} test_reward:{}".format(\
episode, np.mean(recent_100_reward)))
pbar.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default='MountainCar-v0', \
help='enviroment to train DQN model, e.g CartPole-v0')
parser.add_argument('--gamma', type=float, default=0.99, \
help='discount factor for accumulated reward computation')
parser.add_argument('--mem_size', type=int, default=500000, \
help='memory size for experience replay')
parser.add_argument('--batch_size', type=int, default=192, \
help='batch size for training')
args = parser.parse_args()
train_agent()
<img src="mountain_car.gif" width="300" height="200">
# Reproduce DQN model
+ DQN in:
[Human-level Control Through Deep Reinforcement Learning](http://www.nature.com/nature/journal/v518/n7540/full/nature14236.html)
# Mountain-CAR benchmark & performance
[MountainCar-v0](https://gym.openai.com/envs/MountainCar-v0/)
A car is on a one-dimensional track, positioned between two "mountains". The goal is to drive up the mountain on the right; however, the car's engine is not strong enough to scale the mountain in a single pass. Therefore, the only way to succeed is to drive back and forth to build up momentum.
<img src="curve.png" >
# How to use
+ Dependencies:
+ python2.7
+ gym
+ tqdm
+ paddle-fluid
+ Start Training:
```
# use mountain-car enviroment as default
python DQN.py
# use other enviorment
python DQN.py --env CartPole-v0
```
#-*- coding: utf-8 -*-
#File: agent.py
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import numpy as np
from tqdm import tqdm
import math
UPDATE_TARGET_STEPS = 200
class Model(object):
def __init__(self, state_dim, action_dim, gamma):
self.global_step = 0
self.state_dim = state_dim
self.action_dim = action_dim
self.gamma = gamma
self.exploration = 1.0
self._build_net()
def _get_inputs(self):
return [fluid.layers.data(\
name='state', shape=[self.state_dim], dtype='float32'),
fluid.layers.data(\
name='action', shape=[1], dtype='int32'),
fluid.layers.data(\
name='reward', shape=[], dtype='float32'),
fluid.layers.data(\
name='next_s', shape=[self.state_dim], dtype='float32'),
fluid.layers.data(\
name='isOver', shape=[], dtype='bool')]
def _build_net(self):
state, action, reward, next_s, isOver = self._get_inputs()
self.pred_value = self.get_DQN_prediction(state)
self.predict_program = fluid.default_main_program().clone()
action_onehot = fluid.layers.one_hot(action, self.action_dim)
action_onehot = fluid.layers.cast(action_onehot, dtype='float32')
pred_action_value = fluid.layers.reduce_sum(\
fluid.layers.elementwise_mul(action_onehot, self.pred_value), dim=1)
targetQ_predict_value = self.get_DQN_prediction(next_s, target=True)
best_v = fluid.layers.reduce_max(targetQ_predict_value, dim=1)
best_v.stop_gradient = True
target = reward + (1.0 - fluid.layers.cast(\
isOver, dtype='float32')) * self.gamma * best_v
cost = fluid.layers.square_error_cost(\
input=pred_action_value, label=target)
cost = fluid.layers.reduce_mean(cost)
self._sync_program = self._build_sync_target_network()
optimizer = fluid.optimizer.Adam(1e-3)
optimizer.minimize(cost)
# define program
self.train_program = fluid.default_main_program()
# fluid exe
place = fluid.CUDAPlace(0)
self.exe = fluid.Executor(place)
self.exe.run(fluid.default_startup_program())
def get_DQN_prediction(self, state, target=False):
variable_field = 'target' if target else 'policy'
# layer fc1
param_attr = ParamAttr(name='{}_fc1'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc1_b'.format(variable_field))
fc1 = fluid.layers.fc(input=state,
size=256,
act='relu',
param_attr=param_attr,
bias_attr=bias_attr)
param_attr = ParamAttr(name='{}_fc2'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc2_b'.format(variable_field))
fc2 = fluid.layers.fc(input=fc1,
size=128,
act='tanh',
param_attr=param_attr,
bias_attr=bias_attr)
param_attr = ParamAttr(name='{}_fc3'.format(variable_field))
bias_attr = ParamAttr(name='{}_fc3_b'.format(variable_field))
value = fluid.layers.fc(input=fc2,
size=self.action_dim,
param_attr=param_attr,
bias_attr=bias_attr)
return value
def _build_sync_target_network(self):
vars = fluid.default_main_program().list_vars()
policy_vars = []
target_vars = []
for var in vars:
if 'GRAD' in var.name: continue
if 'policy' in var.name:
policy_vars.append(var)
elif 'target' in var.name:
target_vars.append(var)
policy_vars.sort(key=lambda x: x.name.split('policy_')[1])
target_vars.sort(key=lambda x: x.name.split('target_')[1])
sync_program = fluid.default_main_program().clone()
with fluid.program_guard(sync_program):
sync_ops = []
for i, var in enumerate(policy_vars):
sync_op = fluid.layers.assign(policy_vars[i], target_vars[i])
sync_ops.append(sync_op)
sync_program = sync_program.prune(sync_ops)
return sync_program
def act(self, state, train_or_test):
sample = np.random.random()
if train_or_test == 'train' and sample < self.exploration:
act = np.random.randint(self.action_dim)
else:
state = np.expand_dims(state, axis=0)
pred_Q = self.exe.run(self.predict_program,
feed={'state': state.astype('float32')},
fetch_list=[self.pred_value])[0]
pred_Q = np.squeeze(pred_Q, axis=0)
act = np.argmax(pred_Q)
self.exploration = max(0.1, self.exploration - 1e-6)
return act
def train(self, state, action, reward, next_state, isOver):
if self.global_step % UPDATE_TARGET_STEPS == 0:
self.sync_target_network()
self.global_step += 1
action = np.expand_dims(action, -1)
self.exe.run(self.train_program, \
feed={'state': state, \
'action': action, \
'reward': reward, \
'next_s': next_state, \
'isOver': isOver})
def sync_target_network(self):
self.exe.run(self._sync_program)
#-*- coding: utf-8 -*-
#File: expreplay.py
from collections import namedtuple
import numpy as np
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'isOver'])
class ReplayMemory(object):
def __init__(self, max_size, state_shape):
self.max_size = int(max_size)
self.state_shape = state_shape
self.state = np.zeros((self.max_size, ) + state_shape, dtype='float32')
self.action = np.zeros((self.max_size, ), dtype='int32')
self.reward = np.zeros((self.max_size, ), dtype='float32')
self.isOver = np.zeros((self.max_size, ), dtype='bool')
self._curr_size = 0
self._curr_pos = 0
def append(self, exp):
if self._curr_size < self.max_size:
self._assign(self._curr_pos, exp)
self._curr_size += 1
else:
self._assign(self._curr_pos, exp)
self._curr_pos = (self._curr_pos + 1) % self.max_size
def _assign(self, pos, exp):
self.state[pos] = exp.state
self.action[pos] = exp.action
self.reward[pos] = exp.reward
self.isOver[pos] = exp.isOver
def __len__(self):
return self._curr_size
def sample(self, batch_idx):
# index mapping to avoid sampling lastest state
batch_idx = (self._curr_pos + batch_idx) % self._curr_size
next_idx = (batch_idx + 1) % self._curr_size
state = self.state[batch_idx]
reward = self.reward[batch_idx]
action = self.action[batch_idx]
next_state = self.state[next_idx]
isOver = self.isOver[batch_idx]
return (state, action, reward, next_state, isOver)
# 使用ParallelExecutor的中文命名实体识别示例
以下是本例的简要目录结构及说明:
```text
.
├── data # 存储运行本例所依赖的数据,从外部获取
├── reader.py # 数据读取接口, 从外部获取
├── README.md # 文档
├── train.py # 训练脚本
├── infer.py # 预测脚本
```
## 数据
在data目录下,有两个文件夹,train_files中保存的是训练数据,test_files中保存的是测试数据,作为示例,在目录下我们各放置了两个文件,实际训练时,根据自己的实际需要将数据放置在对应目录,并根据数据格式,修改reader.py中的数据读取函数。
## 训练
修改 [train.py](./train.py)`main` 函数,指定数据路径,运行`python train.py`开始训练。
训练记录形如
```txt
pass_id:0, time_cost:4.92960214615s
[Train] precision:0.000862136531076, recall:0.0059880239521, f1:0.00150726226363
[Test] precision:0.000796178343949, recall:0.00335758254057, f1:0.00128713933283
pass_id:1, time_cost:0.715255975723s
[Train] precision:0.00474094141551, recall:0.00762112139358, f1:0.00584551148225
[Test] precision:0.0228873239437, recall:0.00727476217124, f1:0.0110403397028
pass_id:2, time_cost:0.740842103958s
[Train] precision:0.0120967741935, recall:0.00163309744148, f1:0.00287769784173
[Test] precision:0, recall:0.0, f1:0
```
## 预测
修改 [infer.py](./infer.py)`infer` 函数,指定:需要测试的模型的路径、测试数据、预测标记文件的路径,运行`python infer.py`开始预测。
预测结果如下
```txt
152804 O O
130048 O O
38862 10-B O
784 O O
1540 O O
4145 O O
2255 O O
0 O O
1279 O O
7793 O O
373 O O
1621 O O
815 O O
2 O O
247 24-B O
401 24-I O
```
输出分为三列,以"\t"分割,第一列是输入的词语的序号,第二列是标准结果,第三列为标记结果。多条输入序列之间以空行分隔。
24-B
24-I
27-B
27-I
20-B
20-I
21-B
21-I
22-B
22-I
23-B
23-I
28-B
28-I
29-B
29-I
12-B
12-I
11-B
11-I
10-B
10-I
13-B
13-I
38-B
38-I
14-B
14-I
16-B
16-I
33-B
33-I
18-B
18-I
31-B
31-I
30-B
30-I
37-B
37-I
36-B
36-I
35-B
35-I
19-B
19-I
32-B
32-I
O
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
import numpy as np
import paddle.fluid as fluid
import paddle
import reader
def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
def infer(model_path, batch_size, test_data_file, target_file):
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data(
name='mention', shape=[1], dtype='int64', lod_level=1)
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1)
label_reverse_dict = load_reverse_dict(target_file)
test_data = paddle.batch(
reader.file_reader(test_data_file), batch_size=batch_size)
place = fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place)
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_data():
crf_decode = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_targets,
return_numpy=False)
lod_info = (crf_decode[0].lod())[0]
np_data = np.array(crf_decode[0])
assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)):
assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index]
word_index = 0
for tag_index in xrange(lod_info[sen_index],
lod_info[sen_index + 1]):
word = str(data[sen_index][0][word_index])
gold_tag = label_reverse_dict[data[sen_index][2][
word_index]]
tag = label_reverse_dict[np_data[tag_index][0]]
print word + "\t" + gold_tag + "\t" + tag
word_index += 1
print ""
if __name__ == "__main__":
infer(
model_path="output/params_pass_0",
batch_size=6,
test_data_file="data/test_files",
target_file="data/label_dict")
import os
def file_reader(file_dir):
def reader():
files = os.listdir(file_dir)
for fi in files:
for line in open(file_dir + '/' + fi, 'r'):
line = line.strip()
features = line.split(";")
word_idx = []
for item in features[1].strip().split(" "):
word_idx.append(int(item))
target_idx = []
for item in features[2].strip().split(" "):
label_index = int(item)
if label_index == 0:
label_index = 48
else:
label_index -= 1
target_idx.append(label_index)
mention_idx = []
for item in features[3].strip().split(" "):
mention_idx.append(int(item))
yield word_idx, mention_idx, target_idx,
return reader
import os
import math
import time
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import NormalInitializer
import reader
def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = fluid.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def ner_net(word_dict_len, label_dict_len):
IS_SPARSE = False
word_dim = 32
mention_dict_len = 57
mention_dim = 20
grnn_hidden = 36
emb_lr = 5
init_bound = 0.1
def _net_conf(word, mark, target):
word_embedding = fluid.layers.embedding(
input=word,
size=[word_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="word_emb",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
mention_embedding = fluid.layers.embedding(
input=mention,
size=[mention_dict_len, mention_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="mention_emb",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
word_embedding_r = fluid.layers.embedding(
input=word,
size=[word_dict_len, word_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="word_emb_r",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
mention_embedding_r = fluid.layers.embedding(
input=mention,
size=[mention_dict_len, mention_dim],
dtype='float32',
is_sparse=IS_SPARSE,
param_attr=fluid.ParamAttr(
learning_rate=emb_lr,
name="mention_emb_r",
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound)))
word_mention_vector = fluid.layers.concat(
input=[word_embedding, mention_embedding], axis=1)
word_mention_vector_r = fluid.layers.concat(
input=[word_embedding_r, mention_embedding_r], axis=1)
pre_gru = fluid.layers.fc(
input=word_mention_vector,
size=grnn_hidden * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru = fluid.layers.dynamic_gru(
input=pre_gru,
size=grnn_hidden,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
pre_gru_r = fluid.layers.fc(
input=word_mention_vector_r,
size=grnn_hidden * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru_r = fluid.layers.dynamic_gru(
input=pre_gru_r,
size=grnn_hidden,
is_reverse=True,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
gru_merged = fluid.layers.concat(input=[gru, gru_r], axis=1)
emission = fluid.layers.fc(
size=label_dict_len,
input=gru_merged,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-init_bound, high=init_bound),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
crf_cost = fluid.layers.linear_chain_crf(
input=emission,
label=target,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=0.2, ))
avg_cost = fluid.layers.mean(x=crf_cost)
return avg_cost, emission
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data(
name='mention', shape=[1], dtype='int64', lod_level=1)
target = fluid.layers.data(
name="target", shape=[1], dtype='int64', lod_level=1)
avg_cost, emission = _net_conf(word, mention, target)
return avg_cost, emission, word, mention, target
def test2(exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
result_list = exe.run(
inference_program,
feed={"word": word,
"mention": mention,
"target": target},
fetch_list=cur_fetch_list)
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer[0], number_label[0],
number_correct[0])
return chunk_evaluator.eval()
def test(test_exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
result_list = test_exe.run(
fetch_list=cur_fetch_list,
feed={"word": word,
"mention": mention,
"target": target})
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer.sum(),
number_label.sum(), number_correct.sum())
return chunk_evaluator.eval()
def main(train_data_file, test_data_file, model_save_dir, num_passes):
if not os.path.exists(model_save_dir):
os.mkdir(model_save_dir)
BATCH_SIZE = 256
word_dict_len = 1942563
label_dict_len = 49
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
avg_cost, feature_out, word, mention, target = ner_net(word_dict_len,
label_dict_len)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(
name='crfw', ))
(precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
chunk_evaluator = fluid.metrics.ChunkEvaluator()
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
[num_infer_chunks, num_label_chunks, num_correct_chunks])
train_reader = paddle.batch(
paddle.reader.shuffle(
reader.file_reader(train_data_file), buf_size=2000000),
batch_size=BATCH_SIZE)
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.file_reader(test_data_file), buf_size=2000000),
batch_size=BATCH_SIZE)
place = fluid.CUDAPlace(0)
feeder = fluid.DataFeeder(
feed_list=[word, mention, target], place=place)
exe = fluid.Executor(place)
exe.run(startup)
train_exe = fluid.ParallelExecutor(
loss_name=avg_cost.name, use_cuda=True)
test_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=inference_program,
share_vars_from=train_exe)
batch_id = 0
for pass_id in xrange(num_passes):
chunk_evaluator.reset()
train_reader_iter = train_reader()
start_time = time.time()
while True:
try:
cur_batch = next(train_reader_iter)
cost, nums_infer, nums_label, nums_correct = train_exe.run(
fetch_list=[
avg_cost.name, num_infer_chunks.name,
num_label_chunks.name, num_correct_chunks.name
],
feed=feeder.feed(cur_batch))
chunk_evaluator.update(
np.array(nums_infer).sum(),
np.array(nums_label).sum(),
np.array(nums_correct).sum())
cost_list = np.array(cost)
batch_id += 1
except StopIteration:
break
end_time = time.time()
print("pass_id:" + str(pass_id) + ", time_cost:" + str(
end_time - start_time) + "s")
precision, recall, f1_score = chunk_evaluator.eval()
print("[Train] precision:" + str(precision) + ", recall:" + str(
recall) + ", f1:" + str(f1_score))
p, r, f1 = test2(
exe, chunk_evaluator, inference_program, test_reader, place,
[num_infer_chunks, num_label_chunks, num_correct_chunks])
print("[Test] precision:" + str(p) + ", recall:" + str(r) + ", f1:"
+ str(f1))
save_dirname = os.path.join(model_save_dir,
"params_pass_%d" % pass_id)
fluid.io.save_inference_model(
save_dirname, ['word', 'mention', 'target'], [crf_decode], exe)
if __name__ == "__main__":
main(
train_data_file="./data/train_files",
test_data_file="./data/test_files",
model_save_dir="./output",
num_passes=1000)
......@@ -6,3 +6,82 @@ The minimum PaddlePaddle version needed for the code sample in this directory is
This model built with paddle fluid is still under active development and is not
the final version. We welcome feedbacks.
## Introduction
The current code support the training of [SE-ResNeXt](https://arxiv.org/abs/1709.01507) (50/152 layers).
## Data Preparation
1. Download ImageNet-2012 dataset
```
cd data/
mkdir -p ILSVRC2012/
cd ILSVRC2012/
# get training set
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_train.tar
# get validation set
wget http://www.image-net.org/challenges/LSVRC/2012/nnoupb/ILSVRC2012_img_val.tar
# prepare directory
tar xf ILSVRC2012_img_train.tar
tar xf ILSVRC2012_img_val.tar
# unzip all classes data using unzip.sh
sh unzip.sh
```
2. Download training and validation label files from [ImageNet2012 url](https://pan.baidu.com/s/1Y6BCo0nmxsm_FsEqmx2hKQ)(password:```wx99```). Untar it into workspace ```ILSVRC2012/```. The files include
**train_list.txt**: training list of imagenet 2012 classification task, with each line seperated by SPACE.
```
train/n02483708/n02483708_2436.jpeg 369
train/n03998194/n03998194_7015.jpeg 741
train/n04523525/n04523525_38118.jpeg 884
train/n04596742/n04596742_3032.jpeg 909
train/n03208938/n03208938_7065.jpeg 535
...
```
**val_list.txt**: validation list of imagenet 2012 classification task, with each line seperated by SPACE.
```
val/ILSVRC2012_val_00000001.jpeg 65
val/ILSVRC2012_val_00000002.jpeg 970
val/ILSVRC2012_val_00000003.jpeg 230
val/ILSVRC2012_val_00000004.jpeg 809
val/ILSVRC2012_val_00000005.jpeg 516
...
```
**synset_words.txt**: the semantic label of each class.
## Training a model
To start a training task, one can use command line as:
```
python train.py --num_layers=50 --batch_size=8 --with_mem_opt=True --parallel_exe=False
```
## Finetune a model
```
python train.py --num_layers=50 --batch_size=8 --with_mem_opt=True --parallel_exe=False --pretrained_model="pretrain/96/"
```
TBD
## Inference
```
python infer.py --num_layers=50 --batch_size=8 --model='model/90' --test_list=''
```
TBD
## Results
The SE-ResNeXt-50 model is trained by starting with learning rate ```0.1``` and decaying it by ```0.1``` after each ```10``` epoches. Top-1/Top-5 Validation Accuracy on ImageNet 2012 is listed in table.
|model | [original paper(Fig.5)](https://arxiv.org/abs/1709.01507) | Pytorch | Paddle fluid
|- | :-: |:-: | -:
|SE-ResNeXt-50 | 77.6%/- | 77.71%/93.63% | 77.42%/93.50%
## Released models
|model | Baidu Cloud
|- | -:
|SE-ResNeXt-50 | [url]()
TBD
### Caffe2Fluid
This tool is used to convert a Caffe model to a Fluid model
### Key Features
1. Convert caffe model to fluid model with codes of defining a network(useful for re-training)
2. Pycaffe is not necessary when just want convert model without do caffe-inference
3. Caffe's customized layers convertion also be supported by extending this tool
4. A bunch of tools in `examples/imagenet/tools` are provided to compare the difference
### HowTo
1. Prepare caffepb.py in ./proto if your python has no 'pycaffe' module, two options provided here:
1. Prepare `caffepb.py` in `./proto` if your python has no `pycaffe` module, two options provided here:
- Generate pycaffe from caffe.proto
```
bash ./proto/compile.sh
......@@ -15,36 +24,39 @@ This tool is used to convert a Caffe model to a Fluid model
2. Convert the Caffe model to Fluid model
- Generate fluid code and weight file
```
python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
```
- Save weights as fluid model file
```
python alexnet.py alexnet.npy ./fluid
```
```
python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
```
- Save weights as fluid model file
```
# only infer the last layer's result
python alexnet.py alexnet.npy ./fluid
# infer these 2 layer's result
python alexnet.py alexnet.npy ./fluid fc8,prob
```
3. Use the converted model to infer
- See more details in '*examples/imagenet/run.sh*'
- See more details in `examples/imagenet/tools/run.sh`
4. Compare the inference results with caffe
- See more details in '*examples/imagenet/diff.sh*'
- See more details in `examples/imagenet/tools/diff.sh`
### How to convert custom layer
1. Implement your custom layer in a file under '*kaffe/custom_layers*', eg: mylayer.py
1. Implement your custom layer in a file under `kaffe/custom_layers`, eg: mylayer.py
- Implement ```shape_func(input_shape, [other_caffe_params])``` to calculate the output shape
- Implement ```layer_func(inputs, name, [other_caffe_params])``` to construct a fluid layer
- Register these two functions ```register(kind='MyType', shape=shape_func, layer=layer_func)```
- Notes: more examples can be found in '*kaffe/custom_layers*'
- Notes: more examples can be found in `kaffe/custom_layers`
2. Add ```import mylayer``` to '*kaffe/custom_layers/\_\_init__.py*'
2. Add ```import mylayer``` to `kaffe/custom_layers/\_\_init__.py`
3. Prepare your pycaffe as your customized version(same as previous env prepare)
- (option1) replace 'proto/caffe.proto' with your own caffe.proto and compile it
- (option2) change your pycaffe to the customized version
- (option1) replace `proto/caffe.proto` with your own caffe.proto and compile it
- (option2) change your `pycaffe` to the customized version
4. Convert the Caffe model to Fluid model
......@@ -53,7 +65,7 @@ This tool is used to convert a Caffe model to a Fluid model
export CAFFE2FLUID_CUSTOM_LAYERS=/path/to/caffe2fluid/kaffe
```
6. Use the converted model when loading model in 'xxxnet.py' and 'xxxnet.npy'(no need if model is already in 'fluid/model' and 'fluid/params')
6. Use the converted model when loading model in `xxxnet.py` and `xxxnet.npy`(no need if model is already in `fluid/model` and `fluid/params`)
### Tested models
- Lenet:
......
A demo to show converting caffe models on 'imagenet' using caffe2fluid
A demo to show converting caffe models trained on 'imagenet' using caffe2fluid
---
......@@ -10,28 +10,32 @@ A demo to show converting caffe models on 'imagenet' using caffe2fluid
3. Convert the Caffe model to Fluid model
- generate fluid code and weight file
<pre><code>python convert.py alexnet.prototxt \
```python convert.py alexnet.prototxt \
--caffemodel alexnet.caffemodel \
--data-output-path alexnet.npy \
--code-output-path alexnet.py
</code></pre>
```
- save weights as fluid model file
<pre><code>python alexnet.py alexnet.npy ./fluid_model
</code></pre>
```
python alexnet.py alexnet.npy ./fluid
```
4. Do inference
<pre><code>python infer.py infer ./fluid_mode data/65.jpeg
</code></pre>
```
python infer.py infer ./fluid data/65.jpeg
```
5. convert model and do inference together
<pre><code>bash ./run.sh alexnet ./models.caffe/alexnet ./models/alexnet
</code></pre>
The Caffe model is stored in './models.caffe/alexnet/alexnet.prototxt|caffemodel'
and the Fluid model will be save in './models/alexnet/alexnet.py|npy'
```
bash ./tools/run.sh alexnet ./models.caffe/alexnet ./models/alexnet
```
* Assume the Caffe model is stored in '*./models.caffe/alexnet/alexnet.prototxt|caffemodel*'
* converted model will be stored as '*./models/alexnet/alexnet.py|npy*'
6. test the difference with caffe's results(need pycaffe installed)
<pre><code>bash ./diff.sh resnet
</code></pre>
Make sure your caffemodel stored in './models.caffe/resnet'.
The results will be stored in './results/resnet.paddle|caffe'
```
bash ./tools/diff.sh resnet
```
* Make sure your caffemodel stored in '*./models.caffe/resnet*'
* The results will be stored in '*./results/resnet.paddle|caffe*'
......@@ -17,8 +17,21 @@ def walk_dir(rootdir):
def calc_diff(f1, f2):
import numpy as np
d1 = np.load(f1).flatten()
d2 = np.load(f2).flatten()
d1 = np.load(f1)
d2 = np.load(f2)
print d1.shape
print d2.shape
#print d1[0, 0, 0:10, 0:10]
#print d2[0, 0, 0:10, 0:10]
#d1 = d1[:, :, 1:-2, 1:-2]
#d2 = d2[:, :, 1:-2, 1:-2]
d1 = d1.flatten()
d2 = d2.flatten()
#print d1[:10]
#print d2[:10]
d1_num = reduce(lambda x, y: x * y, d1.shape)
d2_num = reduce(lambda x, y: x * y, d2.shape)
......@@ -36,15 +49,16 @@ def calc_diff(f1, f2):
return -1.0, -1.0
def compare(path1, path2):
def compare(path1, path2, no_exception):
def diff(f1, f2):
max_df, sq_df = calc_diff(f1, f2)
print('compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]' %
(f1, f2, max_df, sq_df))
assert (max_df < 1e-5), \
'max_df is too large with value[%.6e]' % (max_df)
assert (sq_df < 1e-10), \
'sq_df is too large with value[%.6e]' % (sq_df)
print('[max_df:%.4e, sq_df:%.4e] when compare %s <=> %s' %
(max_df, sq_df, os.path.basename(f1), os.path.basename(f2)))
if no_exception is False:
assert (max_df < 1e-5), \
'max_df is too large with value[%.6e]' % (max_df)
assert (sq_df < 1e-10), \
'sq_df is too large with value[%.6e]' % (sq_df)
if os.path.exists(path1) is False:
print('not found %s' % (path1))
......@@ -73,13 +87,17 @@ if __name__ == "__main__":
if len(sys.argv) == 1:
path1 = 'lenet.tf/results'
path2 = 'lenet.paddle/results'
elif len(sys.argv) == 3:
elif len(sys.argv) >= 3:
path1 = sys.argv[1]
path2 = sys.argv[2]
if len(sys.argv) == 4:
no_exception = True
else:
no_exception = False
else:
print('usage:')
print(' %s [path1] [path2]' % (sys.argv[0]))
exit(1)
print('compare inner result in %s %s' % (path1, path2))
exit(compare(path1, path2))
#print('compare inner result in %s %s' % (path1, path2))
exit(compare(path1, path2, no_exception))
......@@ -43,7 +43,7 @@ def build_model(net_file, net_name):
(net_file, net_name))
net_path = os.path.dirname(net_file)
module_name = os.path.basename(net_file).rstrip('.py')
module_name = os.path.splitext(os.path.basename(net_file))[0]
if net_path not in sys.path:
sys.path.insert(0, net_path)
......@@ -51,7 +51,7 @@ def build_model(net_file, net_name):
m = __import__(module_name, fromlist=[net_name])
MyNet = getattr(m, net_name)
except Exception as e:
print('failed to load module[%s]' % (module_name))
print('failed to load module[%s.%s]' % (module_name, net_name))
print(e)
return None
......@@ -153,7 +153,6 @@ def load_inference_model(dirname, exe):
def infer(model_path, imgfile, net_file=None, net_name=None, debug=True):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
fluid = import_fluid()
place = fluid.CPUPlace()
......@@ -214,7 +213,6 @@ def caffe_infer(prototxt, caffemodel, datafile):
results = []
names = []
for k, v in net.blobs.items():
k = k.rstrip('_output')
k = k.replace('/', '_')
names.append(k)
results.append(v.data.copy())
......@@ -260,7 +258,7 @@ if __name__ == "__main__":
print('usage:')
print('\tpython %s dump [net_file] [weight_file] [datafile] [net_name]' \
% (sys.argv[0]))
print('\teg:python dump %s %s %s %s %s' % (sys.argv[0],\
print('\teg:python %s dump %s %s %s %s' % (sys.argv[0],\
net_file, weight_file, datafile, net_name))
sys.exit(1)
......
#!/bin/bash
#
#function:
# a tool used to compare the results produced by paddle and caffe
#
if [[ $# -lt 2 ]];then
echo "usage:"
echo " bash $0 [model_name] [param_name] [caffe_name]"
exit 1
fi
model_name=$1
param_name=$2
paddle_file="./results/${model_name}.paddle/${param_name}.npy"
if [[ $# -eq 3 ]];then
caffe_file="./results/${model_name}.caffe/${3}.npy"
else
caffe_file="./results/${model_name}.caffe/${2}.npy"
fi
python ./compare.py $paddle_file $caffe_file
#!/bin/bash
#function:
# a tool used to compare all layers' results
#
if [[ $# -ne 1 ]];then
echo "usage:"
echo " bash $0 [model_name]"
echo " eg:bash $0 alexnet"
exit 1
fi
model_name=$1
prototxt="models.caffe/$model_name/${model_name}.prototxt"
layers=$(cat $prototxt | perl -ne 'if(/^\s+name\s*:\s*\"([^\"]+)/){print $1."\n";}')
for i in $layers;do
cf_npy="results/${model_name}.caffe/${i}.npy"
pd_npy="results/${model_name}.paddle/${i}.npy"
if [[ ! -e $cf_npy ]];then
echo "caffe's result not exist[$cf_npy]"
continue
fi
if [[ ! -e $pd_npy ]];then
echo "paddle's result not exist[$pd_npy]"
continue
fi
python compare.py $cf_npy $pd_npy no_exception
if [[ $? -eq 0 ]];then
echo "succeed to compare layer[$i]"
else
echo "failed to compare layer[$i]"
fi
done
......@@ -36,7 +36,7 @@ model_caffemodel="models.caffe/${model_name}/${model_name}.caffemodel"
paddle_results="$results_root/${model_name}.paddle"
rm -rf $paddle_results
rm -rf "results.paddle"
bash run.sh $model_name ./models.caffe/$model_name ./models/$model_name
bash ./tools/run.sh $model_name ./models.caffe/$model_name ./models/$model_name
if [[ $? -ne 0 ]] || [[ ! -e "results.paddle" ]];then
echo "not found paddle's results, maybe failed to convert"
exit 1
......
......@@ -6,7 +6,7 @@
# 2, do inference(only in fluid) using this model
#
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
# cd caffe2fluid/examples/imagenet && bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
#
#set -x
......@@ -67,7 +67,7 @@ if [[ -z $only_convert ]];then
imgfile="data/65.jpeg"
#FIX ME:
# only look the first line in prototxt file for the name of this network, maybe not correct
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^\s*name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
net_name=`grep "name" $proto_file | head -n1 | perl -ne 'if(/^name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'`
if [[ -z $net_name ]];then
net_name="MyNet"
fi
......
......@@ -9,8 +9,8 @@ def import_caffepb():
p = os.path.dirname(p)
p = os.path.join(p, '../../proto')
sys.path.insert(0, p)
import caffepb
return caffepb
import caffe_pb2
return caffe_pb2
class CaffeResolver(object):
......
......@@ -7,13 +7,14 @@ from .register import get_registered_layers
import axpy
import flatten
import argmax
import reshape
#custom layer import ends
custom_layers = get_registered_layers()
def set_args(f, params):
def set_args(f, params, node=None):
""" set args for function 'f' using the parameters in node.layer.parameters
Args:
......@@ -24,18 +25,15 @@ def set_args(f, params):
arg_names (list): a list of argument names
kwargs (dict): a dict contains needed arguments
"""
from ..protobuf_to_dict import protobuf_to_dict
argc = f.__code__.co_argcount
arg_list = f.__code__.co_varnames[0:argc]
kwargs = {}
for arg_name in arg_list:
try:
v = getattr(node.layer.parameters, arg_name, None)
except Exception as e:
v = None
if v is not None:
kwargs[arg_name] = v
if arg_name in params:
kwargs[arg_name] = params[arg_name]
return arg_list, kwargs
......@@ -53,7 +51,7 @@ def compute_output_shape(kind, node):
parents = node.parents
inputs = [list(p.output_shape) for p in parents]
arg_names, kwargs = set_args(shape_func, node.layer.parameters)
arg_names, kwargs = set_args(shape_func, node.params)
if len(inputs) == 1:
inputs = inputs[0]
......@@ -62,16 +60,16 @@ def compute_output_shape(kind, node):
def make_node(template, kind, node):
""" make a TensorFlowNode for custom layer which means construct
""" make a PaddleNode for custom layer which means construct
a piece of code to define a layer implemented in 'custom_layers'
Args:
@template (TensorFlowNode): a factory to new a instance of TensorFLowNode
@template (PaddleNode): a factory to new a instance of PaddleNode
@kind (str): type of custom layer
@node (graph.Node): a layer in the net
Returns:
instance of TensorFlowNode
instance of PaddleNode
"""
assert kind in custom_layers, "layer[%s] not exist in custom layers" % (
kind)
......@@ -79,7 +77,7 @@ def make_node(template, kind, node):
layer_func = custom_layers[kind]['layer']
#construct arguments needed by custom layer function from node's parameters
arg_names, kwargs = set_args(layer_func, node.layer.parameters)
arg_names, kwargs = set_args(layer_func, node.params, node)
return template('custom_layer', kind, **kwargs)
......
......@@ -27,7 +27,9 @@ def argmax_shape(input_shape, out_max_val=False, top_k=1, axis=-1):
axis += len(input_shape)
assert (axis + 1 == len(input_shape)
), 'only can be applied on the last dimension now'
), 'only can be applied on the last dimension[axis:%d, %s] now,'\
'make sure you have set axis param in xxx.prototxt file' \
% (axis, str(input_shape))
output_shape = input_shape
output_shape[-1] = top_k
......@@ -56,14 +58,13 @@ def argmax_layer(input, name, out_max_val=False, top_k=1, axis=-1):
if axis < 0:
axis += len(input.shape)
assert (axis + 1 == len(input_shape)
), 'only can be applied on the last dimension now'
topk_var, index_var = fluid.layers.topk(input=input, k=top_k)
if out_max_val is True:
output = fluid.layers.concate([topk_var, index_var], axis=axis)
index_var = fluid.layers.cast(index_var, dtype=topk_var.dtype)
output = fluid.layers.concat([index_var, topk_var], axis=axis)
else:
output = topk_var
output = index_var
return output
......
""" a custom layer for 'reshape', maybe we should implement this in standard way.
more info can be found here: http://caffe.berkeleyvision.org/tutorial/layers/reshape.html
"""
from .register import register
def import_fluid():
import paddle.fluid as fluid
return fluid
def reshape_shape(input_sp, shape, axis=0, num_axes=-1):
""" calculate the output shape of this layer using input shape
Args:
@input_shape (list of num): a list of number which represents the input shape
@shape (object): parameter from caffe's Reshape layer
@axis (int): parameter from caffe's Reshape layer
@num_axes(int): parameter from caffe's Reshape layer
Returns:
@output_shape (list of num): a list of numbers represent the output shape
"""
def count(num_list):
return reduce(lambda a, b: a * b, num_list)
input_shape = list(input_sp)
input_count = count(input_shape)
input_num_axes = len(input_shape)
input_start_axis = axis
start_axis = input_start_axis if input_start_axis >= 0 \
else input_num_axes + input_start_axis + 1
assert start_axis >= 0, "[Reshape]axis %d out of range" % (input_start_axis)
assert start_axis <= input_num_axes, "[Reshape]axis %d out of range for %d-D input data"\
% (input_start_axis, input_num_axes)
assert num_axes >= -1, "[Reshape]num_axes must be >= 0, or -1 for all"
end_axis = input_num_axes if num_axes == -1 else start_axis + num_axes
assert end_axis <= input_num_axes, "end_axis[%d] = axis[%d] + num_axes[%d] is out of range"\
% (end_axis, start_axis, num_axes)
num_axes_replaced = end_axis - start_axis
num_axes_retained = input_num_axes - num_axes_replaced
num_new_axes = len(shape['dim'])
output_shape = []
for i in range(start_axis):
output_shape.append(input_shape[i])
for i in range(num_new_axes):
output_shape.append(shape['dim'][i])
for i in range(end_axis, input_num_axes):
output_shape.append(input_shape[i])
assert len(output_shape) == num_axes_retained + num_new_axes,\
"[Reshape]invalid dims of output shape[%s]" % (str(output_shape))
inferred_axis = -1
copy_axes = []
constant_count = 1
for i in range(num_new_axes):
top_dim = shape['dim'][i]
if top_dim == 0:
copy_axes.append(i)
elif top_dim == -1:
assert inferred_axis == -1, "[Reshape]new shape contains multiple -1 dims"
else:
constant_count *= top_dim
if inferred_axis >= 0:
explicit_count = constant_count
explicit_count *= count(input_shape[0:start_axis])
explicit_count *= count(input_shape[end_axis:])
for i in range(len(copy_axes)):
explicit_count *= output_shape[start_axis + copy_axes[i]]
assert input_count % explicit_count == 0, "[Reshape]botom count[%d] "\
"must be divisible by product of the specified dimensions[%d] "\
% (input_count, explicit_count)
output_count = count(output_shape)
assert output_count == input_count, "[Reshape]output count[%d] must match input count[%d]" % (
output_count, input_count)
return output_shape
def reshape_layer(input, name, shape, axis=0, num_axes=-1):
""" build a layer of type 'Flatten' using fluid
Args:
@input (variable): input fluid variable for this layer
@name (str): name for this layer
@shape (object): parameter from caffe's Reshape layer
@axis (int): parameter from caffe's Reshape layer
@num_axes(int): parameter from caffe's Reshape layer
Returns:
output (variable): output variable for this layer
"""
fluid = import_fluid()
input_shape = list(input.shape)
if input_shape[0] == -1:
input_shape[0] = 1
output_shape = reshape_shape(input_shape, shape, axis, num_axes)
output_shape[0] = -1
else:
output_shape = reshape_shape(input_shape, shape, axis, num_axes)
output = fluid.layers.reshape(input, shape=output_shape, name=name)
return output
register(kind='Reshape', shape=reshape_shape, layer=reshape_layer)
......@@ -13,8 +13,8 @@ class Node(object):
self.layer = LayerAdapter(layer, kind) if layer else None
self.parents = []
self.children = []
self.data = None
self.output_shape = None
self.data = None #parameters of this node
self.output_shape = None #output shape of this node
self.metadata = {}
def add_parent(self, parent_node):
......@@ -37,10 +37,24 @@ class Node(object):
@property
def parameters(self):
""" get parameters stored in a protobuf object
"""
if self.layer is not None:
return self.layer.parameters
return None
@property
def params(self):
""" get parameters stored in a dict
"""
from .protobuf_to_dict import protobuf_to_dict
p = self.parameters
if p is not None:
return protobuf_to_dict(p)
else:
return None
def __str__(self):
return '[%s] %s' % (self.kind, self.name)
......@@ -124,10 +138,18 @@ class Graph(object):
for node in self.topologically_sorted():
# If the node has learned parameters, display the first one's shape.
# In case of convolutions, this corresponds to the weights.
data_shape = node.data[0].shape if node.data else '--'
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
if node.data is None:
data_shape = '--'
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
else:
for d in node.data:
#data_shape = node.data[0].shape if node.data else '--'
data_shape = d.shape
out_shape = node.output_shape or '--'
s.append('{:<20} {:<30} {:>20} {:>20}'.format(
node.kind, node.name, data_shape, tuple(out_shape)))
return '\n'.join(s)
......@@ -194,15 +216,25 @@ class GraphBuilder(object):
Newer models use the "Input layer" type.
'''
nodes = [Node(name, NodeKind.Data) for name in self.params.input]
if len(nodes):
input_dim = map(int, self.params.input_dim)
if not input_dim:
if len(self.params.input_shape) > 0:
input_dim = map(int, self.params.input_shape[0].dim)
else:
raise KaffeError('Dimensions for input not specified.')
for node in nodes:
node.output_shape = tuple(input_dim)
inputs_num = len(nodes)
if inputs_num > 0:
input_dims_num = len(self.params.input_dim)
if input_dims_num > 0 and input_dims_num != inputs_num * 4:
raise KaffeError('invalid input_dim[%d] param in prototxt' %
(input_dims_num))
input_dims = [[]] * inputs_num
for i in range(input_dims_num):
dim = self.params.input_dim[i]
which = int(i / 4)
input_dims[which].append(int(dim))
for i in range(inputs_num):
if len(self.params.input_shape) == inputs_num:
input_dim = map(int, self.params.input_shape[i].dim)
input_dims[i] = input_dim
nodes[i].output_shape = tuple(input_dims[i])
return nodes
def build(self):
......
""" this module is used as a template for generating sub class of Network
"""
class MyNet(object):
### automatically generated by caffe2fluid ###
inputs_info = "INPUTS_INFO"
custom_layers_path = "_CAFFE2FLUID_CUSTOM_LAYERS_"
def custom_layer_factory(self):
import os
pk_paths = []
default = os.path.dirname(os.path.abspath(__file__))
location = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
pk_name = 'custom_layers'
pk_dir = os.path.join(location, pk_name)
pk_paths.append((location, pk_dir))
location = MyNet.custom_layers_path
pk_dir = os.path.join(MyNet.custom_layers_path, pk_name)
pk_paths.append((location, pk_dir))
for loc, pk_dir in pk_paths:
if os.path.exists(pk_dir):
if loc not in sys.path:
sys.path.insert(0, loc)
break
try:
from custom_layers import make_custom_layer
return make_custom_layer
except Exception as e:
print('maybe you should set $CAFFE2FLUID_CUSTOM_LAYERS first')
raise e
@classmethod
def input_shapes(cls):
return cls.inputs_info
@classmethod
def convert(cls, npy_model, fluid_path, outputs=None):
fluid = import_fluid()
shapes = cls.input_shapes()
input_name = shapes.keys()[0]
feed_data = {}
for name, shape in shapes.items():
data_layer = fluid.layers.data(
name=name, shape=shape, dtype="float32")
feed_data[name] = data_layer
net = cls(feed_data)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
net.load(data_path=npy_model, exe=exe, place=place)
output_vars = []
model_filename = 'model'
params_filename = 'params'
if outputs is None:
output_vars.append(net.get_output())
else:
if outputs[0] == 'dump_all':
model_filename = None
params_filename = None
output_vars.append(net.get_output())
else:
if type(outputs) is list:
for n in outputs:
assert n in net.layers, 'not found layer with this name[%s]' % (
n)
output_vars.append(net.layers[n])
fluid.io.save_inference_model(
fluid_path, [input_name],
output_vars,
exe,
main_program=None,
model_filename=model_filename,
params_filename=model_filename)
return 0
def main():
""" a tool used to convert caffe model to fluid
"""
import sys
import os
filename = os.path.splitext(os.path.basename(sys.argv[0]))[0]
if len(sys.argv) < 3:
print('usage:')
print(' python %s %s.npy [save_dir] [layer names seperated by comma]' \
% (sys.argv[0], filename))
print(' eg: python %s %s.npy ./fluid' % (sys.argv[0], filename))
print(' eg: python %s %s.npy ./fluid layer_name1,layer_name2' \
% (sys.argv[0], filename))
return 1
npy_weight = sys.argv[1]
fluid_model = sys.argv[2]
outputs = None
if len(sys.argv) >= 4:
outputs = sys.argv[3].split(',')
ret = MyNet.convert(npy_weight, fluid_model, outputs)
if ret == 0:
outputs = 'last output layer' if outputs is None else outputs
print('succeed to convert to fluid format with output layers[%s]'
' in directory[%s]' % (outputs, fluid_model))
else:
print('failed to convert model to fluid format')
return ret
def generate_net_code(net_name, inputs_info):
""" generate framework of a custom net code which represent a subclass of Network
Args:
@net_name (str): class name for this net
@inputs_info (str): a str which represents a dict, eg: '{"data": [3, 32, 32]}'
Returns:
net_codes (str): codes for this subclass
"""
import os
import inspect
net_codes = str(inspect.getsource(MyNet))
net_codes = net_codes.replace('MyNet(object)', '%s(Network)' % net_name)
net_codes = net_codes.replace('MyNet', net_name)
net_codes = net_codes.replace('"INPUTS_INFO"', inputs_info)
custom_layer_dir = os.path.dirname(os.path.abspath(__file__))
net_codes = net_codes.replace('_CAFFE2FLUID_CUSTOM_LAYERS_',
custom_layer_dir)
return net_codes
def generate_main_code(net_name):
""" generate a piece of code for 'main' function
Args:
@net_name (str): class name for this net
Returns:
main_codes (str): codes for this main function
"""
import inspect
main_codes = str(inspect.getsource(main))
main_codes = main_codes.replace('MyNet', net_name)
return main_codes
if __name__ == "__main__":
""" just for testing
"""
print generate_net_code('Attribute', "{'data': [3, 277, 277]}")
print generate_main_code('Attribute')
......@@ -22,15 +22,13 @@ def layer(op):
layer_input = self.terminals[0]
else:
layer_input = list(self.terminals)
# Perform the operation and get the output.
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT.
self.layers[name] = layer_output
# This output is now the input for the next layer.
self.feed(layer_output)
#print('output shape of %s:' % (name))
#print layer_output.shape
# Return self for chained calls.
return self
......@@ -129,6 +127,7 @@ class Network(object):
s_w,
name,
relu=True,
relu_negative_slope=0.0,
padding=None,
group=1,
biased=True):
......@@ -144,6 +143,14 @@ class Network(object):
fluid = import_fluid()
prefix = name + '_'
leaky_relu = False
act = 'relu'
if relu is False:
act = None
elif relu_negative_slope != 0.0:
leaky_relu = True
act = None
output = fluid.layers.conv2d(
input=input,
filter_size=[k_h, k_w],
......@@ -153,7 +160,11 @@ class Network(object):
groups=group,
param_attr=fluid.ParamAttr(name=prefix + "weights"),
bias_attr=fluid.ParamAttr(name=prefix + "biases"),
act="relu" if relu is True else None)
act=act)
if leaky_relu:
output = fluid.layers.leaky_relu(output, alpha=relu_negative_slope)
return output
@layer
......@@ -251,6 +262,13 @@ class Network(object):
@layer
def softmax(self, input, name):
fluid = import_fluid()
shape = input.shape
if len(shape) > 2:
for sz in shape[2:]:
assert sz == 1, "invalid input shape[%s] for softmax" % (
str(shape))
input = fluid.layers.reshape(input, shape[0:2])
output = fluid.layers.softmax(input)
return output
......@@ -286,24 +304,43 @@ class Network(object):
@layer
def dropout(self, input, drop_prob, name, is_test=True):
fluid = import_fluid()
output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test, name=name)
if is_test:
output = input
else:
output = fluid.layers.dropout(
input, dropout_prob=drop_prob, is_test=is_test)
return output
@layer
def scale(self, input, axis=1, num_axes=1, name=None):
fluid = import_fluid()
assert num_axes == 1, "layer scale not support this num_axes[%d] now" % (
num_axes)
prefix = name + '_'
scale_shape = input.shape[axis:axis + num_axes]
param_attr = fluid.ParamAttr(name=prefix + 'scale')
scale_param = fluid.layers.create_parameter(
shape=scale_shape, dtype=input.dtype, name=name, attr=param_attr)
offset_attr = fluid.ParamAttr(name=prefix + 'offset')
offset_param = fluid.layers.create_parameter(
shape=scale_shape, dtype=input.dtype, name=name, attr=offset_attr)
output = fluid.layers.elementwise_mul(input, scale_param, axis=axis)
output = fluid.layers.elementwise_add(output, offset_param, axis=axis)
return output
def custom_layer_factory(self):
""" get a custom layer maker provided by subclass
"""
raise NotImplementedError(
'[custom_layer_factory] must be implemented by the subclass.')
@layer
def custom_layer(self, inputs, kind, name, *args, **kwargs):
""" make custom layer from the package specified by '$CAFFE2FLUID_CUSTOM_LAYERS'
""" make custom layer
"""
#fluid = import_fluid()
#import custom package
default = os.path.dirname(os.path.abspath(__file__))
p = os.environ.get('CAFFE2FLUID_CUSTOM_LAYERS', default)
pk = os.path.join(p, 'custom_layers')
assert os.path.exists(pk) is True, "not found custom_layer package [%s],"\
"you need to set $CAFFE2FLUID_CUSTOM_LAYERS" % (pk)
if p not in sys.path:
sys.path.insert(0, p)
from custom_layers import make_custom_layer
return make_custom_layer(kind, inputs, name, *args, **kwargs)
layer_factory = self.custom_layer_factory()
return layer_factory(kind, inputs, name, *args, **kwargs)
......@@ -11,9 +11,9 @@ from . import network
def get_padding_type(kernel_params, input_shape, output_shape):
'''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
Caffe supports arbitrary padding values, while TensorFlow only
Caffe supports arbitrary padding values, while Paddle only
supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
can be translated to TensorFlow. There are some subtleties to
can be translated to Paddle. There are some subtleties to
how the padding edge-cases are handled. These are described here:
https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
'''
......@@ -24,11 +24,11 @@ def get_padding_type(kernel_params, input_shape, output_shape):
return None
class TensorFlowNode(object):
'''An intermediate representation for TensorFlow operations.'''
class PaddleNode(object):
'''An intermediate representation for Paddle operations.'''
def __init__(self, op, *args, **kwargs):
# A string corresponding to the TensorFlow operation
# A string corresponding to the Paddle operation
self.op = op
# Positional arguments for the operation
self.args = args
......@@ -64,12 +64,17 @@ class MaybeActivated(object):
if node.metadata.get('relu', False) != default:
self.inject_kwargs['relu'] = not default
default_slope = 0.0
slope = node.metadata.get('relu_negative_slope', default_slope)
if slope != default_slope:
self.inject_kwargs['relu_negative_slope'] = slope
def __call__(self, *args, **kwargs):
kwargs.update(self.inject_kwargs)
return TensorFlowNode(*args, **kwargs)
return PaddleNode(*args, **kwargs)
class TensorFlowMapper(NodeMapper):
class PaddleMapper(NodeMapper):
def get_kernel_params(self, node):
kernel_params = node.layer.kernel_parameters
input_shape = node.get_only_parent().output_shape
......@@ -97,7 +102,7 @@ class TensorFlowMapper(NodeMapper):
kernel_params.stride_h, kernel_params.stride_w, **kwargs)
def map_relu(self, node):
return TensorFlowNode('relu')
return PaddleNode('relu')
def map_pooling(self, node):
pool_type = node.parameters.pool
......@@ -108,18 +113,25 @@ class TensorFlowMapper(NodeMapper):
else:
# Stochastic pooling, for instance.
raise KaffeError('Unsupported pooling type.')
(kernel_params, padding) = self.get_kernel_params(node)
ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
return TensorFlowNode(pool_op, kernel_params.kernel_h,
global_pool = getattr(node.layer.parameters, 'global_pooling', False)
if global_pool:
input_shape = node.get_only_parent().output_shape
return PaddleNode(pool_op, input_shape.height, input_shape.width, 1,
1, ceil_mode)
else:
(kernel_params, padding) = self.get_kernel_params(node)
return PaddleNode(pool_op, kernel_params.kernel_h,
kernel_params.kernel_w, kernel_params.stride_h,
kernel_params.stride_w, ceil_mode, **padding)
def map_sigmoid(self, node):
return TensorFlowNode('sigmoid')
return PaddleNode('sigmoid')
def map_custom(self, node):
from .. import custom_layers
return custom_layers.make_node(TensorFlowNode, node.kind, node)
return custom_layers.make_node(PaddleNode, node.kind, node)
def map_inner_product(self, node):
#TODO: Axis
......@@ -129,24 +141,24 @@ class TensorFlowMapper(NodeMapper):
return MaybeActivated(node)('fc', node.parameters.num_output)
def map_softmax(self, node):
return TensorFlowNode('softmax')
return PaddleNode('softmax')
def map_lrn(self, node):
params = node.parameters
# The window size must be an odd value. For a window
# size of (2*n+1), TensorFlow defines depth_radius = n.
# size of (2*n+1), Paddle defines depth_radius = n.
assert params.local_size % 2 == 1
# Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
# Caffe scales by (alpha/(2*n+1)), whereas Paddle
# just scales by alpha (as does Krizhevsky's paper).
# We'll account for that here.
alpha = params.alpha / float(params.local_size)
return TensorFlowNode('lrn', params.local_size, alpha, params.beta)
return PaddleNode('lrn', params.local_size, alpha, params.beta)
def map_concat(self, node):
return TensorFlowNode('concat', node.parameters.axis)
return PaddleNode('concat', node.parameters.axis)
def map_dropout(self, node):
return TensorFlowNode('dropout', node.parameters.dropout_ratio)
return PaddleNode('dropout', node.parameters.dropout_ratio)
def map_batch_norm(self, node):
scale_offset = len(node.data) == 4
......@@ -164,16 +176,20 @@ class TensorFlowMapper(NodeMapper):
operations = {0: 'multiply', 1: 'add', 2: 'max'}
op_code = node.parameters.operation
try:
return TensorFlowNode(operations[op_code])
return PaddleNode(operations[op_code])
except KeyError:
raise KaffeError('Unknown elementwise operation: {}'.format(
op_code))
def map_scale(self, node):
params = node.parameters
return PaddleNode('scale', axis=params.axis, num_axes=params.num_axes)
def commit(self, chains):
return chains
class TensorFlowEmitter(object):
class PaddleEmitter(object):
def __init__(self, tab=None):
self.tab = tab or ' ' * 4
self.prefix = ''
......@@ -198,18 +214,10 @@ class TensorFlowEmitter(object):
codes.append(network_source + '\n')
return self.statement('\n'.join(codes))
def emit_class_def(self, name):
return self.statement('class %s(Network):' % (name))
def emit_setup_def(self):
return self.statement('def setup(self):')
def emit_shape_def(self, input_nodes):
self.outdent()
func_def = self.statement('@classmethod')
func_def += self.statement('def input_shapes(cls):')
self.indent()
def get_inputs_info(self, input_nodes):
input_shapes = {}
for n in input_nodes:
name = n.name
......@@ -218,51 +226,7 @@ class TensorFlowEmitter(object):
input_shapes[name] = ', '.join(shape)
input_shapes = ['"%s": [%s]' % (n, l) for n, l in input_shapes.items()]
shape_str = ','.join(input_shapes)
func_def += self.statement('return {%s}' % (shape_str))
return '\n\n' + func_def
def emit_convert_def(self, input_nodes):
codes = []
inputs = {}
#codes.append('shapes = cls.input_shapes()')
codes.append('shapes = cls.input_shapes()')
codes.append('input_name = shapes.keys()[0]')
codes.append('input_shape = shapes[input_name]')
for n in input_nodes:
name = n.name
layer_var = name + '_layer'
layer_def = '%s = fluid.layers.data(name="%s", shape=shapes["%s"],'\
' dtype="float32")' % (layer_var, name, name)
#layer_var, layer_def = data_layer_def(n.name, n.output_shape)
codes.append(layer_def)
inputs[name] = layer_var
input_dict = ','.join(['"%s": %s' % (n, l) for n, l in inputs.items()])
codes.append('feed_data = {' + input_dict + '}')
codes.append('net = cls(feed_data)')
codes.append("place = fluid.CPUPlace()")
codes.append("exe = fluid.Executor(place)")
codes.append("exe.run(fluid.default_startup_program())")
codes.append("net.load(data_path=npy_model, exe=exe, place=place)")
codes.append("output_vars = [net.get_output()]")
codes.append("fluid.io.save_inference_model(" \
"fluid_path, [input_name],output_vars," \
"exe, main_program=None, model_filename='model'," \
"params_filename='params')")
codes.append(
"print('save fluid model as [model] and [params] in directory [%s]' % (fluid_path))"
)
self.outdent()
func_def = self.statement('@classmethod')
func_def += self.statement('def convert(cls, npy_model, fluid_path):')
self.indent()
func_def += self.statement('fluid = import_fluid()')
for l in codes:
func_def += self.statement(l)
return '\n' + func_def
return '{%s}' % (shape_str)
def emit_main_def(self, name):
if name is None:
......@@ -271,22 +235,7 @@ class TensorFlowEmitter(object):
self.prefix = ''
main_def = self.statement('if __name__ == "__main__":')
self.indent()
main_def += self.statement(
"#usage: save as an inference model for online service\n")
main_def += self.statement("import sys")
main_def += self.statement("if len(sys.argv) != 3:")
self.indent()
main_def += self.statement("print('usage:')")
main_def += self.statement(
"print('\tpython %s [xxxnet.npy] [save_dir]' % (sys.argv[0]))")
main_def += self.statement("exit(1)")
self.outdent()
main_def += self.statement("npy_weight = sys.argv[1]")
main_def += self.statement("fluid_model = sys.argv[2]")
main_def += self.statement("%s.convert(npy_weight, fluid_model)" %
(name))
main_def += self.statement("exit(0)")
main_def += self.statement('exit(main())')
return '\n\n' + main_def
def emit_parents(self, chain):
......@@ -301,10 +250,17 @@ class TensorFlowEmitter(object):
return self.statement('self.' + node.emit())
def emit(self, name, chains, input_nodes=None):
from ..net_template import generate_net_code
from ..net_template import generate_main_code
self.net_name = name
inputs_info = self.get_inputs_info(input_nodes)
s = self.emit_imports()
s += self.emit_class_def(name)
s += generate_net_code(name, inputs_info) + '\n'
self.indent()
# define the net using api
s += self.emit_setup_def()
self.indent()
blocks = []
......@@ -315,8 +271,9 @@ class TensorFlowEmitter(object):
b += self.emit_node(node)
blocks.append(b[:-1])
s = s + '\n\n'.join(blocks)
s += self.emit_shape_def(input_nodes)
s += self.emit_convert_def(input_nodes)
# define the main function
s += '\n\n\n' + generate_main_code(name)
s += self.emit_main_def(name)
return s
......@@ -350,7 +307,7 @@ class Transformer(object):
]),
# Rename nodes
# Slashes are used for scoping in TensorFlow. Replace slashes
# Slashes are used for scoping in Paddle. Replace slashes
# in node names with underscores.
# (Caffe's GoogLeNet implementation uses slashes)
NodeRenamer(lambda node: node.name.replace('/', '_'))
......@@ -365,11 +322,8 @@ class Transformer(object):
def transform_data(self):
if self.params is None:
transformers = [
# Reshape the parameters to TensorFlow's ordering
# Reshape the parameters to Paddle's ordering
DataReshaper({
# (c_o, c_i, h, w) -> (h, w, c_i, c_o) for TF
NodeKind.Convolution: (0, 1, 2, 3),
# (c_o, c_i) -> (c_i, c_o)
NodeKind.InnerProduct: (1, 0)
}),
......@@ -389,9 +343,9 @@ class Transformer(object):
def transform_source(self):
if self.source is None:
mapper = TensorFlowMapper(self.graph)
mapper = PaddleMapper(self.graph)
chains = mapper.map()
emitter = TensorFlowEmitter()
emitter = PaddleEmitter()
input_nodes = self.graph.get_input_nodes()
self.source = emitter.emit(self.graph.name, chains, input_nodes)
return self.source
"""a util for convert protobuf to dict
"""
from google.protobuf.message import Message
from google.protobuf.descriptor import FieldDescriptor
__all__ = [
"protobuf_to_dict", "TYPE_CALLABLE_MAP", "dict_to_protobuf",
"REVERSE_TYPE_CALLABLE_MAP"
]
EXTENSION_CONTAINER = '___X'
TYPE_CALLABLE_MAP = {
FieldDescriptor.TYPE_DOUBLE: float,
FieldDescriptor.TYPE_FLOAT: float,
FieldDescriptor.TYPE_INT32: int,
FieldDescriptor.TYPE_INT64: long,
FieldDescriptor.TYPE_UINT32: int,
FieldDescriptor.TYPE_UINT64: long,
FieldDescriptor.TYPE_SINT32: int,
FieldDescriptor.TYPE_SINT64: long,
FieldDescriptor.TYPE_FIXED32: int,
FieldDescriptor.TYPE_FIXED64: long,
FieldDescriptor.TYPE_SFIXED32: int,
FieldDescriptor.TYPE_SFIXED64: long,
FieldDescriptor.TYPE_BOOL: bool,
FieldDescriptor.TYPE_STRING: unicode,
FieldDescriptor.TYPE_BYTES: lambda b: b.encode("base64"),
FieldDescriptor.TYPE_ENUM: int,
}
def repeated(type_callable):
return lambda value_list: [type_callable(value) for value in value_list]
def enum_label_name(field, value):
return field.enum_type.values_by_number[int(value)].name
def protobuf_to_dict(pb,
type_callable_map=TYPE_CALLABLE_MAP,
use_enum_labels=False):
result_dict = {}
extensions = {}
for field, value in pb.ListFields():
type_callable = _get_field_value_adaptor(pb, field, type_callable_map,
use_enum_labels)
if field.label == FieldDescriptor.LABEL_REPEATED:
type_callable = repeated(type_callable)
if field.is_extension:
extensions[str(field.number)] = type_callable(value)
continue
result_dict[field.name] = type_callable(value)
if extensions:
result_dict[EXTENSION_CONTAINER] = extensions
return result_dict
def _get_field_value_adaptor(pb,
field,
type_callable_map=TYPE_CALLABLE_MAP,
use_enum_labels=False):
if field.type == FieldDescriptor.TYPE_MESSAGE:
# recursively encode protobuf sub-message
return lambda pb: protobuf_to_dict(pb,
type_callable_map=type_callable_map,
use_enum_labels=use_enum_labels)
if use_enum_labels and field.type == FieldDescriptor.TYPE_ENUM:
return lambda value: enum_label_name(field, value)
if field.type in type_callable_map:
return type_callable_map[field.type]
raise TypeError("Field %s.%s has unrecognised type id %d" %
(pb.__class__.__name__, field.name, field.type))
def get_bytes(value):
return value.decode('base64')
REVERSE_TYPE_CALLABLE_MAP = {FieldDescriptor.TYPE_BYTES: get_bytes, }
def dict_to_protobuf(pb_klass_or_instance,
values,
type_callable_map=REVERSE_TYPE_CALLABLE_MAP,
strict=True):
"""Populates a protobuf model from a dictionary.
:param pb_klass_or_instance: a protobuf message class, or an protobuf instance
:type pb_klass_or_instance: a type or instance of a subclass of google.protobuf.message.Message
:param dict values: a dictionary of values. Repeated and nested values are
fully supported.
:param dict type_callable_map: a mapping of protobuf types to callables for setting
values on the target instance.
:param bool strict: complain if keys in the map are not fields on the message.
"""
if isinstance(pb_klass_or_instance, Message):
instance = pb_klass_or_instance
else:
instance = pb_klass_or_instance()
return _dict_to_protobuf(instance, values, type_callable_map, strict)
def _get_field_mapping(pb, dict_value, strict):
field_mapping = []
for key, value in dict_value.items():
if key == EXTENSION_CONTAINER:
continue
if key not in pb.DESCRIPTOR.fields_by_name:
if strict:
raise KeyError("%s does not have a field called %s" % (pb, key))
continue
field_mapping.append(
(pb.DESCRIPTOR.fields_by_name[key], value, getattr(pb, key, None)))
for ext_num, ext_val in dict_value.get(EXTENSION_CONTAINER, {}).items():
try:
ext_num = int(ext_num)
except ValueError:
raise ValueError("Extension keys must be integers.")
if ext_num not in pb._extensions_by_number:
if strict:
raise KeyError(
"%s does not have a extension with number %s. Perhaps you forgot to import it?"
% (pb, key))
continue
ext_field = pb._extensions_by_number[ext_num]
pb_val = None
pb_val = pb.Extensions[ext_field]
field_mapping.append((ext_field, ext_val, pb_val))
return field_mapping
def _dict_to_protobuf(pb, value, type_callable_map, strict):
fields = _get_field_mapping(pb, value, strict)
for field, input_value, pb_value in fields:
if field.label == FieldDescriptor.LABEL_REPEATED:
for item in input_value:
if field.type == FieldDescriptor.TYPE_MESSAGE:
m = pb_value.add()
_dict_to_protobuf(m, item, type_callable_map, strict)
elif field.type == FieldDescriptor.TYPE_ENUM and isinstance(
item, basestring):
pb_value.append(_string_to_enum(field, item))
else:
pb_value.append(item)
continue
if field.type == FieldDescriptor.TYPE_MESSAGE:
_dict_to_protobuf(pb_value, input_value, type_callable_map, strict)
continue
if field.type in type_callable_map:
input_value = type_callable_map[field.type](input_value)
if field.is_extension:
pb.Extensions[field] = input_value
continue
if field.type == FieldDescriptor.TYPE_ENUM and isinstance(input_value,
basestring):
input_value = _string_to_enum(field, input_value)
setattr(pb, field.name, input_value)
return pb
def _string_to_enum(field, input_value):
enum_dict = field.enum_type.values_by_name
try:
input_value = enum_dict[input_value].number
except KeyError:
raise KeyError("`%s` is not a valid value for field `%s`" %
(input_value, field.name))
return input_value
......@@ -58,19 +58,22 @@ def shape_scalar(node):
def shape_data(node):
if node.output_shape:
# Old-style input specification
return node.output_shape
try:
# New-style input specification
return map(int, node.parameters.shape[0].dim)
except:
# We most likely have a data layer on our hands. The problem is,
# Caffe infers the dimensions of the data from the source (eg: LMDB).
# We want to avoid reading datasets here. Fail for now.
# This can be temporarily fixed by transforming the data layer to
# Caffe's "input" layer (as is usually used in the "deploy" version).
# TODO: Find a better solution for this.
raise KaffeError('Cannot determine dimensions of data layer.\n'
'See comments in function shape_data for more info.')
shape = node.output_shape
else:
try:
# New-style input specification
shape = map(int, node.parameters.shape[0].dim)
except:
# We most likely have a data layer on our hands. The problem is,
# Caffe infers the dimensions of the data from the source (eg: LMDB).
# We want to avoid reading datasets here. Fail for now.
# This can be temporarily fixed by transforming the data layer to
# Caffe's "input" layer (as is usually used in the "deploy" version).
# TODO: Find a better solution for this.
raise KaffeError(
'Cannot determine dimensions of data layer.\n'
'See comments in function shape_data for more info.')
return shape
def shape_mem_data(node):
......@@ -95,12 +98,16 @@ def shape_convolution(node):
def shape_pool(node):
global_pool = getattr(node.layer.parameters, 'global_pooling', False)
if global_pool:
input_shape = node.get_only_parent().output_shape
return make_tensor(input_shape.batch_size, input_shape.channels, 1, 1)
ceil_mode = getattr(node.layer.parameters, 'ceil_mode', True)
if ceil_mode is True:
method = math.ceil
else:
method = math.floor
return get_strided_kernel_output_shape(node, method)
......
......@@ -66,12 +66,14 @@ class DataInjector(object):
def adjust_parameters(self, node, data):
if not self.did_use_pb:
return data
# When using the protobuf-backend, each parameter initially has four dimensions.
# In certain cases (like FC layers), we want to eliminate the singleton dimensions.
# This implementation takes care of the common cases. However, it does leave the
# potential for future issues.
# The Caffe-backend does not suffer from this problem.
data = list(data)
squeeze_indices = [1] # Squeeze biases.
if node.kind == NodeKind.InnerProduct:
squeeze_indices.append(0) # Squeeze FC.
......@@ -80,8 +82,22 @@ class DataInjector(object):
if idx >= len(data):
continue
shape_old = data[idx].shape
data[idx] = np.squeeze(data[idx])
d = data[idx]
assert len(
d.shape
) == 4, 'invalid shape[%s] from caffe when adjust_parameters' % (
str(d.shape))
shape_old = d.shape
sq_axis = None
if idx == 0:
sq_axis = (0, 1)
elif idx == 1:
sq_axis = (0, 1, 2)
else:
continue
data[idx] = np.squeeze(d, axis=sq_axis)
shape_new = data[idx].shape
if len(shape_old) != shape_new:
debug('squeeze idx:%d, with kind:%s,name:%s' % \
......@@ -131,18 +147,19 @@ class DataReshaper(object):
for node in graph.nodes:
if node.data is None:
continue
if node.kind not in self.reshaped_node_types:
# Check for 2+ dimensional data
if any(len(tensor.shape) > 1 for tensor in node.data):
notice('parmaters not reshaped for node: {}'.format(node))
#if any(len(tensor.shape) > 1 for tensor in node.data):
# notice('parmaters not reshaped for node: {}'.format(node))
continue
transpose_order = self.map(node.kind)
weights = node.data[0]
if (node.kind == NodeKind.InnerProduct
) and self.has_spatial_parent(node):
if node.kind == NodeKind.InnerProduct:
# The FC layer connected to the spatial layer needs to be
# re-wired to match the new spatial ordering.
in_shape = node.get_only_parent().output_shape
#in_shape = node.get_only_parent().output_shape
fc_shape = weights.shape
output_channels = fc_shape[0]
weights = weights.reshape((output_channels, -1))
......@@ -216,8 +233,9 @@ class ReLUFuser(SubNodeFuser):
parent.kind in self.allowed_parent_types) and \
child.kind == NodeKind.ReLU)
def merge(self, parent, _):
def merge(self, parent, child):
parent.metadata['relu'] = True
parent.metadata['relu_negative_slope'] = child.parameters.negative_slope
class BatchNormScaleBiasFuser(SubNodeFuser):
......@@ -299,8 +317,11 @@ class ParameterNamer(object):
names = ('mean', 'variance')
if len(node.data) == 4:
names += ('scale', 'offset')
elif node.kind == NodeKind.Scale:
names = ('scale', 'offset')
else:
warn('Unhandled parameters: {}'.format(node.kind))
warn('Unhandled parameters when naming this it[%s]' %
(node.kind))
continue
assert len(names) == len(node.data)
node.data = dict(zip(names, node.data))
......
......@@ -11,14 +11,10 @@ if [[ -z $PROTOC ]];then
fi
WORK_ROOT=$(dirname `readlink -f "$BASH_SOURCE[0]"`)
PY_NAME="$WORK_ROOT/caffepb.py"
PY_NAME="$WORK_ROOT/caffe_pb2.py"
$PROTOC --proto_path=$WORK_ROOT --python_out=$WORK_ROOT $WORK_ROOT/caffe.proto
ret=$?
if [ $ret -eq 0 ];then
mv $WORK_ROOT/caffe_pb2.py $PY_NAME
fi
if [ -e "$PY_NAME" ];then
echo "succeed to generate [$PY_NAME]"
exit 0
......
cd train
dir=./
for x in `ls *.tar`
do
filename=`basename $x .tar`
mkdir $filename
tar -xvf $x -C ./$filename
done
import os
import sys
import numpy as np
import argparse
import functools
import paddle
import paddle.fluid as fluid
from utility import add_arguments, print_arguments
from se_resnext import SE_ResNeXt
import reader
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('test_list', str, '', "The testing data lists.")
add_arg('num_layers', int, 50, "How many layers for SE-ResNeXt model.")
add_arg('model_dir', str, '', "The model path.")
# yapf: enable
def eval(args):
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = SE_ResNeXt(input=image, class_dim=class_dim, layers=args.num_layers)
cost = fluid.layers.cross_entropy(input=out, label=label)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
avg_cost = fluid.layers.mean(x=cost)
inference_program = fluid.default_main_program().clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
if not os.path.exists(args.model_dir):
raise ValueError("The model path [%s] does not exist." %
(args.model_dir))
if not os.path.exists(args.test_list):
raise ValueError("The test lists [%s] does not exist." %
(args.test_list))
def if_exist(var):
return os.path.exists(os.path.join(args.model_dir, var.name))
fluid.io.load_vars(exe, args.model_dir, predicate=if_exist)
test_reader = paddle.batch(
reader.test(args.test_list), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
fetch_list = [avg_cost, acc_top1, acc_top5]
test_info = [[], [], []]
for batch_id, data in enumerate(test_reader()):
loss, acc1, acc5 = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_list)
test_info[0].append(loss[0])
test_info[1].append(acc1[0])
test_info[2].append(acc5[0])
if batch_id % 1 == 0:
print("Test {0}, loss {1}, acc1 {2}, acc5 {3}"
.format(batch_id, loss[0], acc1[0], acc5[0]))
sys.stdout.flush()
test_loss = np.array(test_info[0]).mean()
test_acc1 = np.array(test_info[1]).mean()
test_acc5 = np.array(test_info[2]).mean()
print("Test loss {0}, acc1 {1}, acc5 {2}".format(test_loss, test_acc1,
test_acc5))
sys.stdout.flush()
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
eval(args)
此差异已折叠。
import os
import sys
import numpy as np
import argparse
import functools
import paddle
import paddle.fluid as fluid
from utility import add_arguments, print_arguments
from se_resnext import SE_ResNeXt
import reader
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('test_list', str, '', "The testing data lists.")
add_arg('num_layers', int, 50, "How many layers for SE-ResNeXt model.")
add_arg('model_dir', str, '', "The model path.")
# yapf: enable
def infer(args):
class_dim = 1000
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
out = SE_ResNeXt(input=image, class_dim=class_dim, layers=args.num_layers)
out = fluid.layers.softmax(input=out)
inference_program = fluid.default_main_program().clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
if not os.path.exists(args.model_dir):
raise ValueError("The model path [%s] does not exist." %
(args.model_dir))
if not os.path.exists(args.test_list):
raise ValueError("The test lists [%s] does not exist." %
(args.test_list))
def if_exist(var):
return os.path.exists(os.path.join(args.model_dir, var.name))
fluid.io.load_vars(exe, args.model_dir, predicate=if_exist)
test_reader = paddle.batch(
reader.infer(args.test_list), batch_size=args.batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image])
fetch_list = [out]
TOPK = 1
for batch_id, data in enumerate(test_reader()):
result = exe.run(inference_program,
feed=feeder.feed(data),
fetch_list=fetch_list)
result = result[0]
pred_label = np.argsort(result)[::-1][0][0]
print("Test {0}-score {1}, class {2}: "
.format(batch_id, result[0][pred_label], pred_label))
sys.stdout.flush()
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
infer(args)
......@@ -153,72 +153,3 @@ def mobile_net(img, class_dim, scale=1.0):
act='softmax',
param_attr=parameter_attr)
return tmp
def train(learning_rate, batch_size, num_passes, model_save_dir='model'):
class_dim = 102
image_shape = [3, 224, 224]
image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = mobile_net(image, class_dim=class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(5 * 1e-5))
opts = optimizer.minimize(avg_cost)
b_size_var = fluid.layers.create_tensor(dtype='int64')
b_acc_var = fluid.layers.accuracy(input=out, label=label, total=b_size_var)
inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(
target_vars=[b_acc_var, b_size_var])
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
train_reader = paddle.batch(
paddle.dataset.flowers.train(), batch_size=batch_size)
test_reader = paddle.batch(
paddle.dataset.flowers.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_pass_acc_evaluator = fluid.average.WeightedAverage()
test_pass_acc_evaluator = fluid.average.WeightedAverage()
for pass_id in range(num_passes):
train_pass_acc_evaluator.reset()
for batch_id, data in enumerate(train_reader()):
loss, acc, size = exe.run(
fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[avg_cost, b_acc_var, b_size_var])
train_pass_acc_evaluator.add(value=acc, weight=size)
print("Pass {0}, batch {1}, loss {2}, acc {3}".format(
pass_id, batch_id, loss[0], acc[0]))
test_pass_acc_evaluator.reset()
for data in test_reader():
loss, acc, size = exe.run(
inference_program,
feed=feeder.feed(data),
fetch_list=[avg_cost, b_acc_var, b_size_var])
test_pass_acc_evaluator.add(value=acc, weight=size)
print("End pass {0}, train_acc {1}, test_acc {2}".format(
pass_id,
train_pass_acc_evaluator.eval(), test_pass_acc_evaluator.eval()))
if pass_id % 10 == 0:
model_path = os.path.join(model_save_dir, str(pass_id))
print 'save models to %s' % (model_path)
fluid.io.save_inference_model(model_path, ['image'], [out], exe)
if __name__ == '__main__':
train(learning_rate=0.005, batch_size=40, num_passes=300)
......@@ -3,7 +3,7 @@ import math
import random
import functools
import numpy as np
import paddle.v2 as paddle
import paddle
from PIL import Image, ImageEnhance
random.seed(0)
......@@ -13,9 +13,9 @@ DATA_DIM = 224
THREAD = 8
BUF_SIZE = 1024
DATA_DIR = 'ILSVRC2012'
TRAIN_LIST = 'ILSVRC2012/train_list.txt'
TEST_LIST = 'ILSVRC2012/test_list.txt'
DATA_DIR = 'data/ILSVRC2012'
TRAIN_LIST = 'data/ILSVRC2012/train_list.txt'
TEST_LIST = 'data/ILSVRC2012/val_list.txt'
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
......@@ -123,7 +123,7 @@ def process_image(sample, mode, color_jitter, rotate):
if mode == 'train' or mode == 'test':
return img, sample[1]
elif mode == 'infer':
return img
return [img]
def _reader_creator(file_list,
......@@ -151,13 +151,13 @@ def _reader_creator(file_list,
return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
def train():
def train(file_list=TRAIN_LIST):
return _reader_creator(
TRAIN_LIST, 'train', shuffle=True, color_jitter=True, rotate=True)
file_list, 'train', shuffle=True, color_jitter=False, rotate=False)
def test():
return _reader_creator(TEST_LIST, 'test', shuffle=False)
def test(file_list=TEST_LIST):
return _reader_creator(file_list, 'test', shuffle=False)
def infer(file_list):
......
import paddle.v2 as paddle
import os
import numpy as np
import time
import sys
import paddle
import paddle.fluid as fluid
import reader
import paddle.fluid.layers.control_flow as control_flow
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import math
def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
......@@ -19,23 +28,28 @@ def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
def squeeze_excitation(input, num_channels, reduction_ratio):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(input=pool,
size=num_channels / reduction_ratio,
act='relu')
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(input=squeeze,
size=num_channels,
act='sigmoid')
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(
-stdv, stdv)))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def shortcut(input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out:
if stride == 1:
filter_size = 1
else:
filter_size = 3
if ch_in != ch_out or stride != 1:
filter_size = 1
return conv_bn_layer(input, ch_out, filter_size, stride)
else:
return input
......@@ -66,8 +80,8 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
def SE_ResNeXt(input, class_dim, infer=False, layers=50):
supported_layers = [50, 152]
if layers not in supported_layers:
print("supported layers are", supported_layers, "but input layer is",
layers)
print("supported layers are", supported_layers, \
"but input layer is ", layers)
exit()
if layers == 50:
cardinality = 32
......@@ -96,10 +110,7 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50):
conv = conv_bn_layer(
input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
input=conv, pool_size=3, pool_stride=2, pool_padding=1, \
pool_type='max')
for block in range(len(depth)):
......@@ -112,10 +123,16 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50):
reduction_ratio=reduction_ratio)
pool = fluid.layers.pool2d(
input=conv, pool_size=0, pool_type='avg', global_pooling=True)
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
if not infer:
drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.5)
else:
drop = pool
out = fluid.layers.fc(input=drop, size=class_dim, act='softmax')
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(input=drop,
size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv)))
return out
此差异已折叠。
# 语言模型
以下是本例的简要目录结构及说明:
```text
.
├── README.md # 文档
├── train.py # 训练脚本
├── infer.py # 预测脚本
└── utils.py # 通用函数
```
## 简介
循环神经网络语言模型的介绍可以参阅论文[Recurrent Neural Network Regularization](https://arxiv.org/abs/1409.2329),在本例中,我们实现了GRU-RNN语言模型。
## 训练
运行命令 `python train.py` 开始训练模型。
```python
python train.py
```
当前支持的参数可参见[train.py](./train.py) `train_net` 函数
```python
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=20, # batch size
buffer_size=1000, # buffer size, default value is OK
word_freq_threshold=0) # vocabulary related parameter, and words with frequency below this value will be filtered
train(train_reader=train_reader,
vocab=vocab,
network=network,
hid_size=200, # embedding and hidden size
base_lr=1.0, # base learning rate
batch_size=20, # batch size, the same as that in prepare_data
pass_num=12, # the number of passes for training
use_cuda=True, # whether to use GPU card
parallel=False, # whether to be parallel
model_dir="model", # directory to save model
init_low_bound=-0.1, # uniform parameter initialization lower bound
init_high_bound=0.1) # uniform parameter initialization upper bound
```
## 自定义网络结构
可在[train.py](./train.py) `network` 函数中调整网络结构,当前的网络结构如下:
```python
emb = fluid.layers.embedding(input=src, size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=emb_lr_x),
is_sparse=True)
fc0 = fluid.layers.fc(input=emb, size=hid_size * 3,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
gru_h0 = fluid.layers.dynamic_gru(input=fc0, size=hid_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=gru_lr_x))
fc = fluid.layers.fc(input=gru_h0, size=vocab_size, act='softmax',
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
learning_rate=fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst)
```
## 训练结果示例
我们在Tesla K40m单GPU卡上训练的日志如下所示
```text
epoch_1 start
step:100 ppl:771.053
step:200 ppl:449.597
step:300 ppl:642.654
step:400 ppl:458.128
step:500 ppl:510.912
step:600 ppl:451.545
step:700 ppl:364.404
step:800 ppl:324.272
step:900 ppl:360.797
step:1000 ppl:275.761
step:1100 ppl:294.599
step:1200 ppl:335.877
step:1300 ppl:185.262
step:1400 ppl:241.744
step:1500 ppl:211.507
step:1600 ppl:233.431
step:1700 ppl:298.767
step:1800 ppl:203.403
step:1900 ppl:158.828
step:2000 ppl:171.148
step:2100 ppl:280.884
epoch:1 num_steps:2104 time_cost(s):47.478780
model saved in model/epoch_1
epoch_2 start
step:100 ppl:238.099
step:200 ppl:136.527
step:300 ppl:204.184
step:400 ppl:252.886
step:500 ppl:177.377
step:600 ppl:197.688
step:700 ppl:131.650
step:800 ppl:223.906
step:900 ppl:144.785
step:1000 ppl:176.286
step:1100 ppl:148.158
step:1200 ppl:203.581
step:1300 ppl:168.208
step:1400 ppl:159.412
step:1500 ppl:114.032
step:1600 ppl:157.985
step:1700 ppl:147.743
step:1800 ppl:88.676
step:1900 ppl:141.962
step:2000 ppl:106.087
step:2100 ppl:122.709
epoch:2 num_steps:2104 time_cost(s):47.583789
model saved in model/epoch_2
...
```
## 预测
运行命令 `python infer.py model_dir start_epoch last_epoch(inclusive)` 开始预测,其中,start_epoch指定开始预测的轮次,last_epoch指定结束的轮次,例如
```python
python infer.py model 1 12 # prediction from epoch 1 to epoch 12
```
## 预测结果示例
```text
model:model/epoch_1 ppl:254.540 time_cost(s):3.29
model:model/epoch_2 ppl:177.671 time_cost(s):3.27
model:model/epoch_3 ppl:156.251 time_cost(s):3.27
model:model/epoch_4 ppl:139.036 time_cost(s):3.27
model:model/epoch_5 ppl:132.661 time_cost(s):3.27
model:model/epoch_6 ppl:130.092 time_cost(s):3.28
model:model/epoch_7 ppl:128.751 time_cost(s):3.27
model:model/epoch_8 ppl:125.411 time_cost(s):3.27
model:model/epoch_9 ppl:124.604 time_cost(s):3.28
model:model/epoch_10 ppl:124.754 time_cost(s):3.29
model:model/epoch_11 ppl:125.421 time_cost(s):3.27
model:model/epoch_12 ppl:125.676 time_cost(s):3.27
```
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget http://images.cocodataset.org/zips/train2014.zip
wget http://images.cocodataset.org/zips/val2014.zip
wget http://images.cocodataset.org/zips/train2017.zip
wget http://images.cocodataset.org/zips/val2017.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
# Extract the data.
echo "Extractint..."
unzip train2014.tar
unzip val2014.tar
unzip train2017.tar
unzip val2017.tar
unzip annotations_trainval2014.tar
unzip annotations_trainval2017.tar
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册