未验证 提交 b0239e3a 编写于 作者: C Chen Weihang 提交者: GitHub

change some model using data loader (#4595)

上级 edf1a872
...@@ -99,11 +99,13 @@ class MNIST(fluid.dygraph.Layer): ...@@ -99,11 +99,13 @@ class MNIST(fluid.dygraph.Layer):
self.pool_2_shape = 50 * 4 * 4 self.pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5 scale = (2.0 / (self.pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(self.pool_2_shape, 10, self._fc = Linear(
param_attr=fluid.param_attr.ParamAttr( self.pool_2_shape,
initializer=fluid.initializer.NormalInitializer( 10,
loc=0.0, scale=scale)), param_attr=fluid.param_attr.ParamAttr(
act="softmax") initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs, label=None): def forward(self, inputs, label=None):
x = self._simple_img_conv_pool_1(inputs) x = self._simple_img_conv_pool_1(inputs)
...@@ -117,17 +119,21 @@ class MNIST(fluid.dygraph.Layer): ...@@ -117,17 +119,21 @@ class MNIST(fluid.dygraph.Layer):
return x return x
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(1, 28, 28)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def test_mnist(reader, model, batch_size): def test_mnist(reader, model, batch_size):
acc_set = [] acc_set = []
avg_loss_set = [] avg_loss_set = []
for batch_id, data in enumerate(reader()): for batch_id, data in enumerate(reader()):
dy_x_data = np.array([x[0].reshape(1, 28, 28) img, label = data
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
prediction, acc = model(img, label) prediction, acc = model(img, label)
loss = fluid.layers.cross_entropy(input=prediction, label=label) loss = fluid.layers.cross_entropy(input=prediction, label=label)
...@@ -187,28 +193,33 @@ def train_mnist(args): ...@@ -187,28 +193,33 @@ def train_mnist(args):
if args.use_data_parallel: if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context() strategy = fluid.dygraph.parallel.prepare_context()
mnist = MNIST() mnist = MNIST()
adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters()) adam = AdamOptimizer(
learning_rate=0.001, parameter_list=mnist.parameters())
if args.use_data_parallel: if args.use_data_parallel:
mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy) mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) reader_decorator(paddle.dataset.mnist.train()),
batch_size=BATCH_SIZE,
drop_last=True)
if args.use_data_parallel: if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader) train_reader)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) reader_decorator(paddle.dataset.mnist.test()),
batch_size=BATCH_SIZE,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
for epoch in range(epoch_num): for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_loader()):
dy_x_data = np.array([x[0].reshape(1, 28, 28) img, label = data
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
cost, acc = mnist(img, label) cost, acc = mnist(img, label)
...@@ -231,7 +242,7 @@ def train_mnist(args): ...@@ -231,7 +242,7 @@ def train_mnist(args):
epoch, batch_id, avg_loss.numpy())) epoch, batch_id, avg_loss.numpy()))
mnist.eval() mnist.eval()
test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE) test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE)
mnist.train() mnist.train()
if args.ce: if args.ce:
print("kpis\ttest_acc\t%s" % test_acc) print("kpis\ttest_acc\t%s" % test_acc)
...@@ -244,7 +255,7 @@ def train_mnist(args): ...@@ -244,7 +255,7 @@ def train_mnist(args):
fluid.dygraph.parallel.Env().local_rank == 0) fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters: if save_parameters:
fluid.save_dygraph(mnist.state_dict(), "save_temp") fluid.save_dygraph(mnist.state_dict(), "save_temp")
print("checkpoint saved") print("checkpoint saved")
inference_mnist() inference_mnist()
......
...@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate): ...@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate):
img /= img_std img /= img_std
if mode == 'train' or mode == 'val': if mode == 'train' or mode == 'val':
return (img, sample[1]) return (img, [sample[1]])
elif mode == 'test': elif mode == 'test':
return (img, ) return (img, )
......
...@@ -116,10 +116,8 @@ def train_mobilenet(): ...@@ -116,10 +116,8 @@ def train_mobilenet():
optimizer.set_dict(opti_dict) optimizer.set_dict(opti_dict)
# 3. reader # 3. reader
train_data_loader, train_data = utility.create_data_loader( train_data_loader = utility.create_data_loader(is_train=True, args=args)
is_train=True, args=args) test_data_loader = utility.create_data_loader(is_train=False, args=args)
test_data_loader, test_data = utility.create_data_loader(
is_train=False, args=args)
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num) imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num)
train_reader = imagenet_reader.train(settings=args) train_reader = imagenet_reader.train(settings=args)
...@@ -145,8 +143,6 @@ def train_mobilenet(): ...@@ -145,8 +143,6 @@ def train_mobilenet():
t1 = time.time() t1 = time.time()
if args.max_iter and total_batch_num == args.max_iter: if args.max_iter and total_batch_num == args.max_iter:
return return
label = to_variable(label.numpy().astype('int64').reshape(
int(args.batch_size // place_num), 1))
t_start = time.time() t_start = time.time()
# 4.1.1 call net() # 4.1.1 call net()
......
...@@ -309,32 +309,14 @@ def create_data_loader(is_train, args): ...@@ -309,32 +309,14 @@ def create_data_loader(is_train, args):
Returns: Returns:
data_loader and the input data of net, data_loader and the input data of net,
""" """
image_shape = [int(m) for m in args.image_shape.split(",")]
feed_image = fluid.data(
name="feed_image",
shape=[None] + image_shape,
dtype="float32",
lod_level=0)
feed_label = fluid.data(
name="feed_label", shape=[None, 1], dtype="int64", lod_level=0)
feed_y_a = fluid.data(
name="feed_y_a", shape=[None, 1], dtype="int64", lod_level=0)
if is_train and args.use_mixup: if is_train and args.use_mixup:
feed_y_b = fluid.data(
name="feed_y_b", shape=[None, 1], dtype="int64", lod_level=0)
feed_lam = fluid.data(
name="feed_lam", shape=[None, 1], dtype="float32", lod_level=0)
data_loader = fluid.io.DataLoader.from_generator( data_loader = fluid.io.DataLoader.from_generator(
capacity=64, capacity=64,
use_double_buffer=True, use_double_buffer=True,
iterable=True, iterable=True,
return_list=True) return_list=True)
return data_loader, [feed_image, feed_y_a, feed_y_b, feed_lam] return data_loader
else: else:
data_loader = fluid.io.DataLoader.from_generator( data_loader = fluid.io.DataLoader.from_generator(
capacity=64, capacity=64,
...@@ -342,7 +324,7 @@ def create_data_loader(is_train, args): ...@@ -342,7 +324,7 @@ def create_data_loader(is_train, args):
iterable=True, iterable=True,
return_list=True) return_list=True)
return data_loader, [feed_image, feed_label] return data_loader
def print_info(pass_id, batch_id, print_step, metrics, time_info, info_mode): def print_info(pass_id, batch_id, print_step, metrics, time_info, info_mode):
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os import os
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.dygraph.nn import Embedding from paddle.fluid.dygraph.nn import Embedding
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
import numpy as np import numpy as np
import six import six
import multiprocessing import multiprocessing
import reader import reader
import model_check import model_check
import time import time
from args import * from args import *
#import fluid.clip as clip #import fluid.clip as clip
#from fluid.clip import * #from fluid.clip import *
import sys import sys
if sys.version[0] == '2': if sys.version[0] == '2':
reload(sys) reload(sys)
sys.setdefaultencoding("utf-8") sys.setdefaultencoding("utf-8")
class SimpleLSTMRNN(fluid.Layer): class SimpleLSTMRNN(fluid.Layer):
def __init__(self, def __init__(self,
hidden_size, hidden_size,
num_steps, num_steps,
num_layers=2, num_layers=2,
init_scale=0.1, init_scale=0.1,
dropout=None): dropout=None):
super(SimpleLSTMRNN, self).__init__() super(SimpleLSTMRNN, self).__init__()
self._hidden_size = hidden_size self._hidden_size = hidden_size
self._num_layers = num_layers self._num_layers = num_layers
self._init_scale = init_scale self._init_scale = init_scale
self._dropout = dropout self._dropout = dropout
self._num_steps = num_steps self._num_steps = num_steps
self.cell_array = [] self.cell_array = []
self.hidden_array = [] self.hidden_array = []
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
self.mask_array = [] self.mask_array = []
for i in range(self._num_layers): for i in range(self._num_layers):
weight_1 = self.create_parameter( weight_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 2, self._hidden_size * 4], shape=[self._hidden_size * 2, self._hidden_size * 4],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1))
bias_1 = self.create_parameter( bias_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
shape=[self._hidden_size * 4], shape=[self._hidden_size * 4],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=fluid.initializer.Constant(0.0))
self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1))
def forward(self, input_embedding, init_hidden=None, init_cell=None): def forward(self, input_embedding, init_hidden=None, init_cell=None):
cell_array = [] cell_array = []
hidden_array = [] hidden_array = []
for i in range(self._num_layers): for i in range(self._num_layers):
hidden_array.append(init_hidden[i]) hidden_array.append(init_hidden[i])
cell_array.append(init_cell[i]) cell_array.append(init_cell[i])
res = [] res = []
for index in range(self._num_steps): for index in range(self._num_steps):
step_input = input_embedding[:,index,:] step_input = input_embedding[:, index, :]
for k in range(self._num_layers): for k in range(self._num_layers):
pre_hidden = hidden_array[k] pre_hidden = hidden_array[k]
pre_cell = cell_array[k] pre_cell = cell_array[k]
weight_1 = self.weight_1_arr[k] weight_1 = self.weight_1_arr[k]
bias = self.bias_arr[k] bias = self.bias_arr[k]
nn = fluid.layers.concat([step_input, pre_hidden], 1) nn = fluid.layers.concat([step_input, pre_hidden], 1)
gate_input = fluid.layers.matmul(x=nn, y=weight_1) gate_input = fluid.layers.matmul(x=nn, y=weight_1)
gate_input = fluid.layers.elementwise_add(gate_input, bias) gate_input = fluid.layers.elementwise_add(gate_input, bias)
i, j, f, o = fluid.layers.split( i, j, f, o = fluid.layers.split(
gate_input, num_or_sections=4, dim=-1) gate_input, num_or_sections=4, dim=-1)
c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid(
i) * fluid.layers.tanh(j) i) * fluid.layers.tanh(j)
m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o)
hidden_array[k] = m hidden_array[k] = m
cell_array[k] = c cell_array[k] = c
step_input = m step_input = m
if self._dropout is not None and self._dropout > 0.0: if self._dropout is not None and self._dropout > 0.0:
step_input = fluid.layers.dropout( step_input = fluid.layers.dropout(
step_input, step_input,
dropout_prob=self._dropout, dropout_prob=self._dropout,
dropout_implementation='upscale_in_train') dropout_implementation='upscale_in_train')
res.append(step_input) res.append(step_input)
real_res = fluid.layers.concat(res, 1) real_res = fluid.layers.concat(res, 1)
real_res = fluid.layers.reshape(real_res, [ -1, self._num_steps, self._hidden_size]) real_res = fluid.layers.reshape(
last_hidden = fluid.layers.concat(hidden_array, 1) real_res, [-1, self._num_steps, self._hidden_size])
last_hidden = fluid.layers.reshape( last_hidden = fluid.layers.concat(hidden_array, 1)
last_hidden, shape=[-1, self._num_layers, self._hidden_size]) last_hidden = fluid.layers.reshape(
last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_hidden, shape=[-1, self._num_layers, self._hidden_size])
last_cell = fluid.layers.concat(cell_array, 1) last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2])
last_cell = fluid.layers.reshape( last_cell = fluid.layers.concat(cell_array, 1)
last_cell, shape=[-1, self._num_layers, self._hidden_size]) last_cell = fluid.layers.reshape(
last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) last_cell, shape=[-1, self._num_layers, self._hidden_size])
return real_res, last_hidden, last_cell last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2])
return real_res, last_hidden, last_cell
class PtbModel(fluid.Layer):
def __init__(self, class PtbModel(fluid.Layer):
hidden_size, def __init__(self,
vocab_size, hidden_size,
num_layers=2, vocab_size,
num_steps=20, num_layers=2,
init_scale=0.1, num_steps=20,
dropout=None): init_scale=0.1,
super(PtbModel, self).__init__() dropout=None):
self.hidden_size = hidden_size super(PtbModel, self).__init__()
self.vocab_size = vocab_size self.hidden_size = hidden_size
self.init_scale = init_scale self.vocab_size = vocab_size
self.num_layers = num_layers self.init_scale = init_scale
self.num_steps = num_steps self.num_layers = num_layers
self.dropout = dropout self.num_steps = num_steps
self.simple_lstm_rnn = SimpleLSTMRNN( self.dropout = dropout
hidden_size, self.simple_lstm_rnn = SimpleLSTMRNN(
num_steps, hidden_size,
num_layers=num_layers, num_steps,
init_scale=init_scale, num_layers=num_layers,
dropout=dropout) init_scale=init_scale,
self.embedding = Embedding( dropout=dropout)
size=[vocab_size, hidden_size], self.embedding = Embedding(
dtype='float32', size=[vocab_size, hidden_size],
is_sparse=False, dtype='float32',
param_attr=fluid.ParamAttr( is_sparse=False,
name='embedding_para', param_attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( name='embedding_para',
low=-init_scale, high=init_scale))) initializer=fluid.initializer.UniformInitializer(
self.softmax_weight = self.create_parameter( low=-init_scale, high=init_scale)))
attr=fluid.ParamAttr(), self.softmax_weight = self.create_parameter(
shape=[self.hidden_size, self.vocab_size], attr=fluid.ParamAttr(),
dtype="float32", shape=[self.hidden_size, self.vocab_size],
default_initializer=fluid.initializer.UniformInitializer( dtype="float32",
low=-self.init_scale, high=self.init_scale)) default_initializer=fluid.initializer.UniformInitializer(
self.softmax_bias = self.create_parameter( low=-self.init_scale, high=self.init_scale))
attr=fluid.ParamAttr(), self.softmax_bias = self.create_parameter(
shape=[self.vocab_size], attr=fluid.ParamAttr(),
dtype="float32", shape=[self.vocab_size],
default_initializer=fluid.initializer.UniformInitializer( dtype="float32",
low=-self.init_scale, high=self.init_scale)) default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale))
def build_once(self, input, label, init_hidden, init_cell):
pass def build_once(self, input, label, init_hidden, init_cell):
pass
def forward(self, input, label, init_hidden, init_cell):
def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size]) init_h = fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size])
init_c = fluid.layers.reshape(
init_cell, shape=[self.num_layers, -1, self.hidden_size]) init_c = fluid.layers.reshape(
init_cell, shape=[self.num_layers, -1, self.hidden_size])
x_emb = self.embedding(input)
x_emb = self.embedding(input)
x_emb = fluid.layers.reshape(
x_emb, shape=[-1, self.num_steps, self.hidden_size]) x_emb = fluid.layers.reshape(
if self.dropout is not None and self.dropout > 0.0: x_emb, shape=[-1, self.num_steps, self.hidden_size])
x_emb = fluid.layers.dropout( if self.dropout is not None and self.dropout > 0.0:
x_emb, x_emb = fluid.layers.dropout(
dropout_prob=self.dropout, x_emb,
dropout_implementation='upscale_in_train') dropout_prob=self.dropout,
rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, dropout_implementation='upscale_in_train')
init_c) rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h,
init_c)
projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias) projection = fluid.layers.matmul(rnn_out, self.softmax_weight)
projection = fluid.layers.elementwise_add(projection, self.softmax_bias)
loss = fluid.layers.softmax_with_cross_entropy(
logits=projection, label=label, soft_label=False) loss = fluid.layers.softmax_with_cross_entropy(
loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) logits=projection, label=label, soft_label=False)
loss = fluid.layers.reduce_mean(loss, dim=[0]) loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps])
loss = fluid.layers.reduce_sum(loss) loss = fluid.layers.reduce_mean(loss, dim=[0])
loss = fluid.layers.reduce_sum(loss)
return loss, last_hidden, last_cell
return loss, last_hidden, last_cell
def debug_emb(self):
def debug_emb(self):
np.save("emb_grad", self.x_emb.gradient())
np.save("emb_grad", self.x_emb.gradient())
def train_ptb_lm():
args = parse_args() def train_ptb_lm():
args = parse_args()
# check if set use_gpu=True in paddlepaddle cpu version
model_check.check_cuda(args.use_gpu) # check if set use_gpu=True in paddlepaddle cpu version
model_check.check_cuda(args.use_gpu)
place = core.CPUPlace()
if args.use_gpu: place = core.CPUPlace()
place = fluid.CUDAPlace(0) if args.use_gpu:
dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace(0)
else: dev_count = fluid.core.get_cuda_device_count()
place = fluid.CPUPlace() else:
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CPUPlace()
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# check if paddlepaddle version is satisfied
model_check.check_version() # check if paddlepaddle version is satisfied
model_check.check_version()
model_type = args.model_type
model_type = args.model_type
vocab_size = 10000
if model_type == "test": vocab_size = 10000
num_layers = 1 if model_type == "test":
batch_size = 2 num_layers = 1
hidden_size = 10 batch_size = 2
num_steps = 3 hidden_size = 10
init_scale = 0.1 num_steps = 3
max_grad_norm = 5.0 init_scale = 0.1
epoch_start_decay = 1 max_grad_norm = 5.0
max_epoch = 1 epoch_start_decay = 1
dropout = 0.0 max_epoch = 1
lr_decay = 0.5 dropout = 0.0
base_learning_rate = 1.0 lr_decay = 0.5
elif model_type == "small": base_learning_rate = 1.0
num_layers = 2 elif model_type == "small":
batch_size = 20 num_layers = 2
hidden_size = 200 batch_size = 20
num_steps = 20 hidden_size = 200
init_scale = 0.1 num_steps = 20
max_grad_norm = 5.0 init_scale = 0.1
epoch_start_decay = 4 max_grad_norm = 5.0
max_epoch = 13 epoch_start_decay = 4
dropout = 0.0 max_epoch = 13
lr_decay = 0.5 dropout = 0.0
base_learning_rate = 1.0 lr_decay = 0.5
elif model_type == "medium": base_learning_rate = 1.0
num_layers = 2 elif model_type == "medium":
batch_size = 20 num_layers = 2
hidden_size = 650 batch_size = 20
num_steps = 35 hidden_size = 650
init_scale = 0.05 num_steps = 35
max_grad_norm = 5.0 init_scale = 0.05
epoch_start_decay = 6 max_grad_norm = 5.0
max_epoch = 39 epoch_start_decay = 6
dropout = 0.5 max_epoch = 39
lr_decay = 0.8 dropout = 0.5
base_learning_rate = 1.0 lr_decay = 0.8
elif model_type == "large": base_learning_rate = 1.0
num_layers = 2 elif model_type == "large":
batch_size = 20 num_layers = 2
hidden_size = 1500 batch_size = 20
num_steps = 35 hidden_size = 1500
init_scale = 0.04 num_steps = 35
max_grad_norm = 10.0 init_scale = 0.04
epoch_start_decay = 14 max_grad_norm = 10.0
max_epoch = 55 epoch_start_decay = 14
dropout = 0.65 max_epoch = 55
lr_decay = 1.0 / 1.15 dropout = 0.65
base_learning_rate = 1.0 lr_decay = 1.0 / 1.15
else: base_learning_rate = 1.0
print("model type not support") else:
return print("model type not support")
return
with fluid.dygraph.guard(place):
if args.ce: with fluid.dygraph.guard(place):
print("ce mode") if args.ce:
seed = 33 print("ce mode")
np.random.seed(seed) seed = 33
fluid.default_startup_program().random_seed = seed np.random.seed(seed)
fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed
max_epoch = 1 fluid.default_main_program().random_seed = seed
ptb_model = PtbModel( max_epoch = 1
hidden_size=hidden_size, ptb_model = PtbModel(
vocab_size=vocab_size, hidden_size=hidden_size,
num_layers=num_layers, vocab_size=vocab_size,
num_steps=num_steps, num_layers=num_layers,
init_scale=init_scale, num_steps=num_steps,
dropout=dropout) init_scale=init_scale,
dropout=dropout)
if args.init_from_pretrain_model:
if not os.path.exists(args.init_from_pretrain_model + '.pdparams'): if args.init_from_pretrain_model:
print(args.init_from_pretrain_model) if not os.path.exists(args.init_from_pretrain_model + '.pdparams'):
raise Warning("The pretrained params do not exist.") print(args.init_from_pretrain_model)
return raise Warning("The pretrained params do not exist.")
fluid.load_dygraph(args.init_from_pretrain_model) return
print("finish initing model from pretrained params from %s" % fluid.load_dygraph(args.init_from_pretrain_model)
(args.init_from_pretrain_model)) print("finish initing model from pretrained params from %s" %
(args.init_from_pretrain_model))
dy_param_updated = dict()
dy_param_init = dict() dy_param_updated = dict()
dy_loss = None dy_param_init = dict()
last_hidden = None dy_loss = None
last_cell = None last_hidden = None
last_cell = None
data_path = args.data_path
print("begin to load data") data_path = args.data_path
ptb_data = reader.get_ptb_data(data_path) print("begin to load data")
print("finished load data") ptb_data = reader.get_ptb_data(data_path)
train_data, valid_data, test_data = ptb_data print("finished load data")
train_data, valid_data, test_data = ptb_data
batch_len = len(train_data) // batch_size
total_batch_size = (batch_len - 1) // num_steps batch_len = len(train_data) // batch_size
log_interval = 200 total_batch_size = (batch_len - 1) // num_steps
log_interval = 200
bd = []
lr_arr = [1.0] bd = []
for i in range(1, max_epoch): lr_arr = [1.0]
bd.append(total_batch_size * i) for i in range(1, max_epoch):
new_lr = base_learning_rate * (lr_decay** bd.append(total_batch_size * i)
max(i + 1 - epoch_start_decay, 0.0)) new_lr = base_learning_rate * (lr_decay**
lr_arr.append(new_lr) max(i + 1 - epoch_start_decay, 0.0))
lr_arr.append(new_lr)
grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
sgd = SGDOptimizer( grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr_arr), sgd = SGDOptimizer(
parameter_list=ptb_model.parameters(), learning_rate=fluid.layers.piecewise_decay(
grad_clip=grad_clip) boundaries=bd, values=lr_arr),
parameter_list=ptb_model.parameters(),
def eval(model, data): grad_clip=grad_clip)
print("begin to eval")
total_loss = 0.0 def reader_decorator(reader):
iters = 0.0 def __reader__():
init_hidden_data = np.zeros( for item in reader:
(num_layers, batch_size, hidden_size), dtype='float32') x_data = item[0].reshape((-1, num_steps, 1))
init_cell_data = np.zeros( y_data = item[1].reshape((-1, num_steps, 1))
(num_layers, batch_size, hidden_size), dtype='float32') yield x_data, y_data
model.eval() return __reader__
train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
for batch_id, batch in enumerate(train_data_iter): def eval(model, data):
x_data, y_data = batch print("begin to eval")
x_data = x_data.reshape((-1, num_steps, 1)) total_loss = 0.0
y_data = y_data.reshape((-1, num_steps, 1)) iters = 0.0
x = to_variable(x_data) init_hidden_data = np.zeros(
y = to_variable(y_data) (num_layers, batch_size, hidden_size), dtype='float32')
init_hidden = to_variable(init_hidden_data) init_cell_data = np.zeros(
init_cell = to_variable(init_cell_data) (num_layers, batch_size, hidden_size), dtype='float32')
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
init_cell) model.eval()
train_data_iter = reader_decorator(
out_loss = dy_loss.numpy() reader.get_data_iter(data, batch_size, num_steps))
init_hidden_data = last_hidden.numpy() eval_data_loader = fluid.io.DataLoader.from_generator(capacity=200)
init_cell_data = last_cell.numpy() eval_data_loader.set_batch_generator(train_data_iter, places=place)
total_loss += out_loss for batch_id, batch in enumerate(eval_data_loader):
iters += num_steps x, y = batch
init_hidden = to_variable(init_hidden_data)
print("eval finished") init_cell = to_variable(init_cell_data)
ppl = np.exp(total_loss / iters) dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
print("ppl ", batch_id, ppl[0]) init_cell)
ce_time = [] out_loss = dy_loss.numpy()
ce_ppl = []
init_hidden_data = last_hidden.numpy()
total_batch_num = 0 #this is for benchmark init_cell_data = last_cell.numpy()
for epoch_id in range(max_epoch):
ptb_model.train() total_loss += out_loss
total_loss = 0.0 iters += num_steps
iters = 0.0
init_hidden_data = np.zeros( print("eval finished")
(num_layers, batch_size, hidden_size), dtype='float32') ppl = np.exp(total_loss / iters)
init_cell_data = np.zeros( print("ppl ", batch_id, ppl[0])
(num_layers, batch_size, hidden_size), dtype='float32')
ce_time = []
train_data_iter = reader.get_data_iter(train_data, batch_size, ce_ppl = []
num_steps)
init_hidden = to_variable(init_hidden_data) total_batch_num = 0 #this is for benchmark
init_cell = to_variable(init_cell_data) for epoch_id in range(max_epoch):
start_time = time.time() ptb_model.train()
for batch_id, batch in enumerate(train_data_iter): total_loss = 0.0
if args.max_iter and total_batch_num == args.max_iter: iters = 0.0
return init_hidden_data = np.zeros(
batch_start = time.time() (num_layers, batch_size, hidden_size), dtype='float32')
x_data, y_data = batch init_cell_data = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
x_data = x_data.reshape((-1, num_steps, 1))
y_data = y_data.reshape((-1, num_steps, 1)) train_data_iter = reader_decorator(
reader.get_data_iter(train_data, batch_size, num_steps))
x = to_variable(x_data)
y = to_variable(y_data) train_data_loader = fluid.io.DataLoader.from_generator(capacity=200)
train_data_loader.set_batch_generator(train_data_iter, places=place)
dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
init_cell) init_hidden = to_variable(init_hidden_data)
init_hidden = last_hidden.detach() init_cell = to_variable(init_cell_data)
init_cell = last_cell.detach() start_time = time.time()
out_loss = dy_loss.numpy() for batch_id, batch in enumerate(train_data_loader):
if args.max_iter and total_batch_num == args.max_iter:
dy_loss.backward() return
sgd.minimize(dy_loss) batch_start = time.time()
x, y = batch
ptb_model.clear_gradients()
total_loss += out_loss dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
batch_end = time.time() init_cell)
train_batch_cost = batch_end - batch_start init_hidden = last_hidden.detach()
iters += num_steps init_cell = last_cell.detach()
total_batch_num = total_batch_num + 1 #this is for benchmark out_loss = dy_loss.numpy()
if batch_id > 0 and batch_id % log_interval == 0: dy_loss.backward()
ppl = np.exp(total_loss / iters) sgd.minimize(dy_loss)
print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f" %
(epoch_id, batch_id, ppl[0], ptb_model.clear_gradients()
sgd._global_learning_rate().numpy(), out_loss, train_batch_cost)) total_loss += out_loss
batch_end = time.time()
print("one epoch finished", epoch_id) train_batch_cost = batch_end - batch_start
print("time cost ", time.time() - start_time) iters += num_steps
ppl = np.exp(total_loss / iters) total_batch_num = total_batch_num + 1 #this is for benchmark
ce_time.append(time.time() - start_time)
ce_ppl.append(ppl[0]) if batch_id > 0 and batch_id % log_interval == 0:
print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0])) ppl = np.exp(total_loss / iters)
print("-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f" %
if batch_size <= 20 and epoch_id == 0 and ppl[0] > 1000: (epoch_id, batch_id, ppl[0],
# for bad init, after first epoch, the loss is over 1000 sgd._global_learning_rate().numpy(), out_loss, train_batch_cost))
# no more need to continue
print("Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch.") print("one epoch finished", epoch_id)
print("Abort this training process and please start again.") print("time cost ", time.time() - start_time)
return ppl = np.exp(total_loss / iters)
ce_time.append(time.time() - start_time)
save_model_dir = os.path.join(args.save_model_dir, ce_ppl.append(ppl[0])
str(epoch_id), 'params') print("-- Epoch:[%d]; ppl: %.5f" % (epoch_id, ppl[0]))
fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
print("Saved model to: %s.\n" % save_model_dir) if batch_size <= 20 and epoch_id == 0 and ppl[0] > 1000:
# for bad init, after first epoch, the loss is over 1000
eval(ptb_model, valid_data) # no more need to continue
print(
if args.ce: "Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
_ppl = 0 )
_time = 0 print("Abort this training process and please start again.")
try: return
_time = ce_time[-1]
_ppl = ce_ppl[-1] save_model_dir = os.path.join(args.save_model_dir,
except: str(epoch_id), 'params')
print("ce info error") fluid.save_dygraph(ptb_model.state_dict(), save_model_dir)
print("kpis\ttrain_duration_card%s\t%s" % (dev_count, _time)) print("Saved model to: %s.\n" % save_model_dir)
print("kpis\ttrain_ppl_card%s\t%f" % (dev_count, _ppl))
eval(ptb_model, valid_data)
eval(ptb_model, test_data)
if args.ce:
train_ptb_lm() _ppl = 0
_time = 0
try:
_time = ce_time[-1]
_ppl = ce_ppl[-1]
except:
print("ce info error")
print("kpis\ttrain_duration_card%s\t%s" % (dev_count, _time))
print("kpis\ttrain_ppl_card%s\t%f" % (dev_count, _ppl))
eval(ptb_model, test_data)
train_ptb_lm()
...@@ -81,7 +81,6 @@ def optimizer_setting(parameter_list=None): ...@@ -81,7 +81,6 @@ def optimizer_setting(parameter_list=None):
boundaries=bd, values=lr), boundaries=bd, values=lr),
momentum=momentum_rate, momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay)) regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer return optimizer
...@@ -116,11 +115,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -116,11 +115,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self, num_channels, num_filters, stride, shortcut=True):
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__() super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
...@@ -186,16 +181,9 @@ class ResNet(fluid.dygraph.Layer): ...@@ -186,16 +181,9 @@ class ResNet(fluid.dygraph.Layer):
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
num_channels=3, num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D( self.pool2d_max = Pool2D(
pool_size=3, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
for block in range(len(depth)): for block in range(len(depth)):
...@@ -220,11 +208,12 @@ class ResNet(fluid.dygraph.Layer): ...@@ -220,11 +208,12 @@ class ResNet(fluid.dygraph.Layer):
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(self.pool2d_avg_output, self.out = Linear(
class_dim, self.pool2d_avg_output,
act='softmax', class_dim,
param_attr=fluid.param_attr.ParamAttr( act='softmax',
initializer=fluid.initializer.Uniform(-stdv, stdv))) param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs): def forward(self, inputs):
y = self.conv(inputs) y = self.conv(inputs)
...@@ -237,6 +226,16 @@ class ResNet(fluid.dygraph.Layer): ...@@ -237,6 +226,16 @@ class ResNet(fluid.dygraph.Layer):
return y return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data): def eval(model, data):
model.eval() model.eval()
...@@ -245,15 +244,8 @@ def eval(model, data): ...@@ -245,15 +244,8 @@ def eval(model, data):
total_acc5 = 0.0 total_acc5 = 0.0
total_sample = 0 total_sample = 0
for batch_id, data in enumerate(data()): for batch_id, data in enumerate(data()):
dy_x_data = np.array( img = data[0]
[x[0].reshape(3, 224, 224) for x in data]).astype('float32') label = data[1]
if len(np.array([x[1] for x in data]).astype('int64')) != batch_size:
continue
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
out = model(img) out = model(img)
...@@ -303,13 +295,24 @@ def train_resnet(): ...@@ -303,13 +295,24 @@ def train_resnet():
resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy) resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False), batch_size=batch_size) reader_decorator(paddle.dataset.flowers.train(use_xmap=True)),
batch_size=batch_size,
drop_last=True)
if args.use_data_parallel: if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader) train_reader)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size) reader_decorator(paddle.dataset.flowers.test(use_xmap=True)),
batch_size=batch_size,
drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
#file_name = './model/epoch_0.npz' #file_name = './model/epoch_0.npz'
#model_data = np.load( file_name ) #model_data = np.load( file_name )
...@@ -331,23 +334,13 @@ def train_resnet(): ...@@ -331,23 +334,13 @@ def train_resnet():
print("load finished") print("load finished")
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_loader()):
#NOTE: used in benchmark #NOTE: used in benchmark
if args.max_iter and total_batch_num == args.max_iter: if args.max_iter and total_batch_num == args.max_iter:
return return
batch_start = time.time() batch_start = time.time()
dy_x_data = np.array( img, label = data
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
if len(np.array([x[1]
for x in data]).astype('int64')) != batch_size:
continue
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
-1, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
out = resnet(img) out = resnet(img)
...@@ -390,16 +383,14 @@ def train_resnet(): ...@@ -390,16 +383,14 @@ def train_resnet():
(eop, batch_id, total_loss / total_sample, \ (eop, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample)) total_acc1 / total_sample, total_acc5 / total_sample))
resnet.eval() resnet.eval()
eval(resnet, test_reader) eval(resnet, test_loader)
save_parameters = (not args.use_data_parallel) or ( save_parameters = (not args.use_data_parallel) or (
args.use_data_parallel and args.use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0) fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters: if save_parameters:
fluid.save_dygraph(resnet.state_dict(), fluid.save_dygraph(resnet.state_dict(), 'resnet_params')
'resnet_params')
if __name__ == '__main__': if __name__ == '__main__':
train_resnet() train_resnet()
...@@ -169,8 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -169,8 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
act=None) act=None)
self.scale = SqueezeExcitation( self.scale = SqueezeExcitation(
num_channels=num_filters * 2, num_channels=num_filters * 2, reduction_ratio=reduction_ratio)
reduction_ratio=reduction_ratio)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
...@@ -219,10 +218,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -219,10 +218,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
pool_size=3, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101: elif layers == 101:
cardinality = 32 cardinality = 32
reduction_ratio = 16 reduction_ratio = 16
...@@ -235,10 +231,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -235,10 +231,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
pool_size=3, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152: elif layers == 152:
cardinality = 64 cardinality = 64
reduction_ratio = 16 reduction_ratio = 16
...@@ -263,10 +256,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -263,10 +256,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride=1, stride=1,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
pool_size=3, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64 num_channels = 64
...@@ -294,10 +284,11 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -294,10 +284,11 @@ class SeResNeXt(fluid.dygraph.Layer):
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1 self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 2 * 1 * 1
self.out = Linear(self.pool2d_avg_output, self.out = Linear(
class_dim, self.pool2d_avg_output,
param_attr=fluid.param_attr.ParamAttr( class_dim,
initializer=fluid.initializer.Uniform(-stdv, stdv))) param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs): def forward(self, inputs):
if self.layers == 50 or self.layers == 101: if self.layers == 50 or self.layers == 101:
...@@ -318,6 +309,16 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -318,6 +309,16 @@ class SeResNeXt(fluid.dygraph.Layer):
return y return y
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def eval(model, data): def eval(model, data):
model.eval() model.eval()
...@@ -327,15 +328,7 @@ def eval(model, data): ...@@ -327,15 +328,7 @@ def eval(model, data):
total_acc5 = 0.0 total_acc5 = 0.0
total_sample = 0 total_sample = 0
for batch_id, data in enumerate(data()): for batch_id, data in enumerate(data()):
dy_x_data = np.array( img, label = data
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
if len(np.array([x[1] for x in data]).astype('int64')) != batch_size:
continue
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
out = model(img) out = model(img)
...@@ -389,29 +382,29 @@ def train(): ...@@ -389,29 +382,29 @@ def train():
se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext, se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext,
strategy) strategy)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False), reader_decorator(paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size, batch_size=batch_size,
drop_last=True) drop_last=True)
if args.use_data_parallel: if args.use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader) train_reader)
test_reader = paddle.batch( test_reader = paddle.batch(
paddle.dataset.flowers.test(use_xmap=False), batch_size=32) reader_decorator(paddle.dataset.flowers.test(use_xmap=False)),
batch_size=32)
train_loader = fluid.io.DataLoader.from_generator(capacity=10)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader = fluid.io.DataLoader.from_generator(capacity=10)
test_loader.set_sample_list_generator(test_reader, places=place)
for epoch_id in range(epoch_num): for epoch_id in range(epoch_num):
total_loss = 0.0 total_loss = 0.0
total_acc1 = 0.0 total_acc1 = 0.0
total_acc5 = 0.0 total_acc5 = 0.0
total_sample = 0 total_sample = 0
for batch_id, data in enumerate(train_reader()): for batch_id, data in enumerate(train_loader()):
img, label = data
dy_x_data = np.array([x[0].reshape(3, 224, 224)
for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True label.stop_gradient = True
out = se_resnext(img) out = se_resnext(img)
...@@ -454,7 +447,7 @@ def train(): ...@@ -454,7 +447,7 @@ def train():
(epoch_id, batch_id, total_loss / total_sample, \ (epoch_id, batch_id, total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample)) total_acc1 / total_sample, total_acc5 / total_sample))
se_resnext.eval() se_resnext.eval()
eval(se_resnext, test_reader) eval(se_resnext, test_loader)
se_resnext.train() se_resnext.train()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册