提交 203b6e19 编写于 作者: H huangjun12 提交者: SunGaofeng

add ETS model to release 1.6 (#3565)

* add caption model ETS for PaddleVideo

* modify dynamic rnn to static rnn, and add infer past

* modify reader and configs for evaluation and validation

* modify details of the ets net code

* modify API of data and dataloader for ets net
上级 f39c93e6
MODEL:
name: "ETS"
feat_size: 2048
fc_dim: 1024
gru_hidden_dim: 512
decoder_size: 512
word_emb_dim: 512
max_length: 80
beam_size: 3
START: "<s>"
END: "<e>"
UNK: "<unk>"
feat_path: './data/dataset/ets/data_dict'
dict_file: './data/dataset/ets/dict.txt'
TRAIN:
epoch: 40
batch_size: 256
l2_weight_decay: 1e-4
clip_norm: 5.0
num_threads: 8
buffer_size: 1024
filelist: './data/dataset/ets/train.list'
use_gpu: True
num_gpus: 4
VALID:
filelist: './data/dataset/ets/val.list'
batch_size: 256
num_threads: 8
buffer_size: 1024
use_gpu: True
num_gpus: 4
TEST:
filelist: './data/dataset/ets/val.list'
batch_size: 1
num_threads: 1
buffer_size: 1024
INFER:
filelist: './data/dataset/ets/infer.list'
batch_size: 1
num_threads: 1
buffer_size: 1024
...@@ -92,6 +92,8 @@ def test(args): ...@@ -92,6 +92,8 @@ def test(args):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if args.weights: if args.weights:
assert os.path.exists( assert os.path.exists(
args.weights), "Given weight dir {} not exist.".format(args.weights) args.weights), "Given weight dir {} not exist.".format(args.weights)
...@@ -111,6 +113,14 @@ def test(args): ...@@ -111,6 +113,14 @@ def test(args):
epoch_period = [] epoch_period = []
for test_iter, data in enumerate(test_reader()): for test_iter, data in enumerate(test_reader()):
cur_time = time.time() cur_time = time.time()
if args.model_name == 'ETS':
feat_data = [items[:3] for items in data]
vinfo = [items[3:] for items in data]
test_outs = exe.run(fetch_list=test_fetch_list,
feed=test_feeder.feed(feat_data),
return_numpy=False)
test_outs += vinfo
else:
test_outs = exe.run(fetch_list=test_fetch_list, test_outs = exe.run(fetch_list=test_fetch_list,
feed=test_feeder.feed(data)) feed=test_feeder.feed(data))
period = time.time() - cur_time period = time.time() - cur_time
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
import numpy as np
import datetime
import logging
import json
import os
from models.ctcn.ctcn_utils import BoxCoder
logger = logging.getLogger(__name__)
class MetricsCalculator():
def __init__(self,
name='ETS',
mode='train',
dict_file =''
):
self.name = name
self.mode = mode # 'train', 'valid', 'test', 'infer'
self.dict_file = dict_file
self.reset()
def reset(self):
logger.info('Resetting {} metrics...'.format(self.mode))
self.aggr_batch_size = 0
if (self.mode == 'train') or (self.mode == 'valid'):
self.aggr_loss = 0.0
elif (self.mode == 'test') or (self.mode == 'infer'):
self.result_dict = dict()
self.out_file = self.name + '_' + self.mode + '_res_' + '.json'
def accumulate(self, fetch_list):
if self.mode == 'valid':
loss = fetch_list[0]
self.aggr_loss += np.mean(np.array(loss))
elif (self.mode == 'test') or (self.mode == 'infer'):
seq_ids = fetch_list[0]
seq_scores = fetch_list[1]
vid = fetch_list[2][0]
stime = fetch_list[2][1]
etime = fetch_list[2][2]
#get idx_to_word
self.idx_to_word = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
self.idx_to_word[i] = line.strip().split()[0]
for i in range(len(seq_ids.lod()[0]) - 1):
start = seq_ids.lod()[0][i]
end = seq_ids.lod()[0][i + 1]
for j in range(end - start)[:1]:
sub_start = seq_ids.lod()[1][start + j]
sub_end = seq_ids.lod()[1][start + j + 1]
sent = " ".join([self.idx_to_word[idx]
for idx in np.array(seq_ids)[sub_start:sub_end][1:-1]])
if vid not in self.result_dict:
self.result_dict[vid] = [{'timestamp': [stime, etime], 'sentence': sent}]
else:
self.result_dict[vid].append({'timestamp': [stime, etime], 'sentence': sent})
def accumulate_infer_results(self, fetch_list):
# the same as test
pass
def finalize_metrics(self, savedir):
self.filepath = os.path.join(savedir, self.out_file)
with open(self.filepath, 'w') as f:
f.write(json.dumps({'version': 'VERSION 1.0', 'results': self.result_dict, 'external_data': {}}, indent=2))
logger.info('results has been saved into file: {}'.format(self.filepath))
def finalize_infer_metrics(self, savedir):
# the same as test
pass
def get_computed_metrics(self):
pass
...@@ -28,6 +28,7 @@ from metrics.detections import detection_metrics as detection_metrics ...@@ -28,6 +28,7 @@ from metrics.detections import detection_metrics as detection_metrics
from metrics.bmn_metrics import bmn_proposal_metrics as bmn_proposal_metrics from metrics.bmn_metrics import bmn_proposal_metrics as bmn_proposal_metrics
from metrics.bsn_metrics import bsn_tem_metrics as bsn_tem_metrics from metrics.bsn_metrics import bsn_tem_metrics as bsn_tem_metrics
from metrics.bsn_metrics import bsn_pem_metrics as bsn_pem_metrics from metrics.bsn_metrics import bsn_pem_metrics as bsn_pem_metrics
from metrics.ets_metrics import ets_metrics as ets_metrics
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -421,6 +422,44 @@ class BsnPemMetrics(Metrics): ...@@ -421,6 +422,44 @@ class BsnPemMetrics(Metrics):
self.calculator.reset() self.calculator.reset()
class ETSMetrics(Metrics):
def __init__(self, name, mode, cfg):
self.name = name
self.mode = mode
args = {}
args['dict_file'] = cfg.MODEL.dict_file
args['mode'] = mode
args['name'] = name
self.calculator = ets_metrics.MetricsCalculator(**args)
def calculate_and_log_out(self, fetch_list, info=''):
if (self.mode == 'train') or (self.mode == 'valid'):
loss = np.array(fetch_list[0])
logger.info(
info + '\tLoss = {}'.format('%.08f' % np.mean(loss)))
elif self.mode == "test":
translation_ids = np.array(fetch_list[0])
translation_scores = np.array(fetch_list[1])
logger.info(
info + '\ttranslation_ids = {}, \ttranslation_scores = {}'.format(
'%.01f' % np.mean(translation_ids), '%.04f' % np.mean(translation_scores)))
def accumulate(self, fetch_list):
self.calculator.accumulate(fetch_list)
def finalize_and_log_out(self, info='', savedir='./'):
if self.mode == 'valid':
logger.info(info)
else: #test or infer
self.calculator.finalize_metrics(savedir)
if self.mode == 'test':
logger.info(info + 'please refer to metrics/ets_metrics/README.md to get accuracy')
def reset(self):
self.calculator.reset()
class MetricsZoo(object): class MetricsZoo(object):
def __init__(self): def __init__(self):
self.metrics_zoo = {} self.metrics_zoo = {}
...@@ -461,3 +500,4 @@ regist_metrics("CTCN", DetectionMetrics) ...@@ -461,3 +500,4 @@ regist_metrics("CTCN", DetectionMetrics)
regist_metrics("BMN", BmnMetrics) regist_metrics("BMN", BmnMetrics)
regist_metrics("BSNTEM", BsnTemMetrics) regist_metrics("BSNTEM", BsnTemMetrics)
regist_metrics("BSNPEM", BsnPemMetrics) regist_metrics("BSNPEM", BsnPemMetrics)
regist_metrics("ETS", ETSMetrics)
...@@ -10,6 +10,7 @@ from .ctcn import CTCN ...@@ -10,6 +10,7 @@ from .ctcn import CTCN
from .bmn import BMN from .bmn import BMN
from .bsn import BsnTem from .bsn import BsnTem
from .bsn import BsnPem from .bsn import BsnPem
from .ets import ETS
# regist models, sort by alphabet # regist models, sort by alphabet
regist_model("AttentionCluster", AttentionCluster) regist_model("AttentionCluster", AttentionCluster)
...@@ -23,3 +24,4 @@ regist_model("CTCN", CTCN) ...@@ -23,3 +24,4 @@ regist_model("CTCN", CTCN)
regist_model("BMN", BMN) regist_model("BMN", BMN)
regist_model("BsnTem", BsnTem) regist_model("BsnTem", BsnTem)
regist_model("BsnPem", BsnPem) regist_model("BsnPem", BsnPem)
regist_model("ETS", ETS)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
from ..model import ModelBase
from . import ets_net
import logging
logger = logging.getLogger(__name__)
__all__ = ["ETS"]
class ETS(ModelBase):
"""ETS model"""
def __init__(self, name, cfg, mode='train'):
super(ETS, self).__init__(name, cfg, mode=mode)
self.get_config()
def get_config(self):
self.feat_size = self.get_config_from_sec('MODEL', 'feat_size')
self.fc_dim = self.get_config_from_sec('MODEL', 'fc_dim')
self.gru_hidden_dim = self.get_config_from_sec('MODEL',
'gru_hidden_dim')
self.decoder_size = self.get_config_from_sec('MODEL', 'decoder_size')
self.word_emb_dim = self.get_config_from_sec('MODEL', 'word_emb_dim')
self.dict_file = self.get_config_from_sec('MODEL', 'dict_file')
self.max_length = self.get_config_from_sec('MODEL', 'max_length')
self.beam_size = self.get_config_from_sec('MODEL', 'beam_size')
self.num_epochs = self.get_config_from_sec('train', 'epoch')
self.l2_weight_decay = self.get_config_from_sec('train',
'l2_weight_decay')
self.clip_norm = self.get_config_from_sec('train', 'clip_norm')
def build_input(self, use_dataloader=True):
feat_shape = [None, self.feat_size]
word_shape = [None, 1]
word_next_shape = [None, 1]
# set init data to None
py_reader = None
feat = None
word = None
word_next = None
init_ids = None
init_scores = None
self.use_dataloader = use_dataloader
feat = fluid.data(
name='feat', shape=feat_shape, dtype='float32', lod_level=1)
feed_list = []
feed_list.append(feat)
if (self.mode == 'train') or (self.mode == 'valid'):
word = fluid.data(
name='word', shape=word_shape, dtype='int64', lod_level=1)
word_next = fluid.data(
name='word_next',
shape=word_next_shape,
dtype='int64',
lod_level=1)
feed_list.append(word)
feed_list.append(word_next)
elif (self.mode == 'test') or (self.mode == 'infer'):
init_ids = fluid.data(
name="init_ids", shape=[None, 1], dtype="int64", lod_level=2)
init_scores = fluid.data(
name="init_scores",
shape=[None, 1],
dtype="float32",
lod_level=2)
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
if use_dataloader:
assert self.mode != 'infer', \
'dataloader is not recommendated when infer, please set use_dataloader to be false.'
self.dataloader = fluid.io.DataLoader.from_generator(
feed_list=feed_list, capacity=16, iterable=True)
self.feature_input = [feat]
self.word = word
self.word_next = word_next
self.init_ids = init_ids
self.init_scores = init_scores
def create_model_args(self):
cfg = {}
cfg['feat_size'] = self.feat_size
cfg['fc_dim'] = self.fc_dim
cfg['gru_hidden_dim'] = self.gru_hidden_dim
cfg['decoder_size'] = self.decoder_size
cfg['word_emb_dim'] = self.word_emb_dim
word_dict = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
word_dict[line.strip().split()[0]] = i
dict_size = len(word_dict)
cfg['dict_size'] = dict_size
cfg['max_length'] = self.max_length
cfg['beam_size'] = self.beam_size
return cfg
def build_model(self):
cfg = self.create_model_args()
self.videomodel = ets_net.ETSNET(
feat_size=cfg['feat_size'],
fc_dim=cfg['fc_dim'],
gru_hidden_dim=cfg['gru_hidden_dim'],
decoder_size=cfg['decoder_size'],
word_emb_dim=cfg['word_emb_dim'],
dict_size=cfg['dict_size'],
max_length=cfg['max_length'],
beam_size=cfg['beam_size'],
mode=self.mode)
if (self.mode == 'train') or (self.mode == 'valid'):
prob = self.videomodel.net(self.feature_input[0], self.word)
self.network_outputs = [prob]
elif (self.mode == 'test') or (self.mode == 'infer'):
translation_ids, translation_scores = self.videomodel.net(
self.feature_input[0], self.init_ids, self.init_scores)
self.network_outputs = [translation_ids, translation_scores]
def optimizer(self):
l2_weight_decay = self.l2_weight_decay
clip_norm = self.clip_norm
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm))
lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
self.gru_hidden_dim, 1000)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_decay,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=l2_weight_decay))
return optimizer
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
self.loss_ = self.videomodel.loss(self.network_outputs[0],
self.word_next)
return self.loss_
def outputs(self):
return self.network_outputs
def feeds(self):
if (self.mode == 'train') or (self.mode == 'valid'):
return self.feature_input + [self.word, self.word_next]
elif (self.mode == 'test') or (self.mode == 'infer'):
return self.feature_input + [self.init_ids, self.init_scores]
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
def fetches(self):
if (self.mode == 'train') or (self.mode == 'valid'):
losses = self.loss()
fetch_list = [item for item in losses]
elif (self.mode == 'test') or (self.mode == 'infer'):
preds = self.outputs()
fetch_list = [item for item in preds]
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
return fetch_list
def pretrain_info(self):
return (None, None)
def weights_info(self):
pass
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
DATATYPE = 'float32'
class ETSNET(object):
def __init__(self,
feat_size,
fc_dim,
gru_hidden_dim,
max_length,
beam_size,
decoder_size,
word_emb_dim,
dict_size,
mode='train'):
self.feat_size = feat_size
self.fc_dim = fc_dim
self.gru_hidden_dim = gru_hidden_dim
self.decoder_size = decoder_size
self.word_emb_dim = word_emb_dim
self.dict_size = dict_size
self.max_length = max_length
self.beam_size = beam_size
self.mode = mode
def encoder(self, feat):
bias_attr = fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0))
input_fc = fluid.layers.fc(input=feat,
size=self.fc_dim,
act='tanh',
bias_attr=bias_attr)
gru_forward_fc = fluid.layers.fc(input=input_fc,
size=self.gru_hidden_dim * 3,
bias_attr=False)
gru_forward = fluid.layers.dynamic_gru(
input=gru_forward_fc, size=self.gru_hidden_dim, is_reverse=False)
gru_backward_fc = fluid.layers.fc(input=input_fc,
size=self.gru_hidden_dim * 3,
bias_attr=False)
gru_backward = fluid.layers.dynamic_gru(
input=gru_backward_fc, size=self.gru_hidden_dim, is_reverse=True)
encoded_sequence = fluid.layers.concat(
input=[gru_forward, gru_backward], axis=1)
gru_weights = fluid.layers.fc(input=encoded_sequence,
size=1,
act='sequence_softmax',
bias_attr=False)
gru_scaled = fluid.layers.elementwise_mul(
x=encoded_sequence, y=gru_weights, axis=0)
encoded_vector = fluid.layers.sequence_pool(
input=gru_scaled, pool_type='sum')
encoded_proj = fluid.layers.fc(input=encoded_sequence,
size=self.decoder_size,
bias_attr=False)
return encoded_sequence, encoded_vector, encoded_proj
def cell(self, x, hidden, encoder_out, encoder_out_proj):
def simple_attention(encoder_vec, encoder_proj, decoder_state):
decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=self.decoder_size,
bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
mixed_state = fluid.layers.elementwise_add(encoder_proj,
decoder_state_expand)
attention_weights = fluid.layers.fc(input=mixed_state,
size=1,
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
input=attention_weights)
weigths_reshape = fluid.layers.reshape(
x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
context = simple_attention(encoder_out, encoder_out_proj, hidden)
out = fluid.layers.fc(input=[x, context],
size=self.decoder_size * 3,
bias_attr=False)
out = fluid.layers.gru_unit(
input=out, hidden=hidden, size=self.decoder_size * 3)[0]
return out, out
def train_decoder(self, word, encoded_sequence, encoded_vector,
encoded_proj):
decoder_boot = fluid.layers.fc(input=encoded_vector,
size=self.decoder_size,
act='tanh',
bias_attr=False)
word_embedding = fluid.layers.embedding(
input=word, size=[self.dict_size, self.word_emb_dim])
pad_value = fluid.layers.assign(input=np.array([0.], dtype=np.float32))
word_embedding, length = fluid.layers.sequence_pad(word_embedding,
pad_value)
word_embedding = fluid.layers.transpose(word_embedding, [1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
x = rnn.step_input(word_embedding)
pre_state = rnn.memory(init=decoder_boot)
out, current_state = self.cell(x, pre_state, encoded_sequence,
encoded_proj)
prob = fluid.layers.fc(input=out,
size=self.dict_size,
act='softmax')
rnn.update_memory(pre_state, current_state)
rnn.step_output(prob)
rnn_out = rnn()
rnn_out = fluid.layers.transpose(rnn_out, [1, 0, 2])
length = fluid.layers.reshape(length, [-1])
rnn_out = fluid.layers.sequence_unpad(x=rnn_out, length=length)
return rnn_out
def infer_decoder(self, init_ids, init_scores, encoded_sequence,
encoded_vector, encoded_proj):
decoder_boot = fluid.layers.fc(input=encoded_vector,
size=self.decoder_size,
act='tanh',
bias_attr=False)
max_len = fluid.layers.fill_constant(
shape=[1], dtype='int64', value=self.max_length)
counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
# create and init arrays to save selected ids, scores and states for each step
ids_array = fluid.layers.array_write(init_ids, i=counter)
scores_array = fluid.layers.array_write(init_scores, i=counter)
state_array = fluid.layers.array_write(decoder_boot, i=counter)
cond = fluid.layers.less_than(x=counter, y=max_len)
while_op = fluid.layers.While(cond=cond)
with while_op.block():
pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
pre_score = fluid.layers.array_read(array=scores_array, i=counter)
pre_state = fluid.layers.array_read(array=state_array, i=counter)
pre_ids_emb = fluid.layers.embedding(
input=pre_ids, size=[self.dict_size, self.word_emb_dim])
out, current_state = self.cell(pre_ids_emb, pre_state,
encoded_sequence, encoded_proj)
prob = fluid.layers.fc(input=out,
size=self.dict_size,
act='softmax')
# beam search
topk_scores, topk_indices = fluid.layers.topk(
prob, k=self.beam_size)
accu_scores = fluid.layers.elementwise_add(
x=fluid.layers.log(topk_scores),
y=fluid.layers.reshape(
pre_score, shape=[-1]),
axis=0)
accu_scores = fluid.layers.lod_reset(x=accu_scores, y=pre_ids)
selected_ids, selected_scores = fluid.layers.beam_search(
pre_ids,
pre_score,
topk_indices,
accu_scores,
self.beam_size,
end_id=1)
fluid.layers.increment(x=counter, value=1, in_place=True)
# save selected ids and corresponding scores of each step
fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
fluid.layers.array_write(
selected_scores, array=scores_array, i=counter)
# update rnn state by sequence_expand acting as gather
current_state = fluid.layers.sequence_expand(current_state,
selected_scores)
fluid.layers.array_write(
current_state, array=state_array, i=counter)
current_enc_seq = fluid.layers.sequence_expand(encoded_sequence,
selected_scores)
fluid.layers.assign(current_enc_seq, encoded_sequence)
current_enc_proj = fluid.layers.sequence_expand(encoded_proj,
selected_scores)
fluid.layers.assign(current_enc_proj, encoded_proj)
# update conditional variable
length_cond = fluid.layers.less_than(x=counter, y=max_len)
finish_cond = fluid.layers.logical_not(
fluid.layers.is_empty(x=selected_ids))
fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = fluid.layers.beam_search_decode(
ids=ids_array,
scores=scores_array,
beam_size=self.beam_size,
end_id=1)
return translation_ids, translation_scores
def net(self, feat, *input_decoder):
encoded_sequence, encoded_vector, encoded_proj = self.encoder(feat)
if (self.mode == 'train') or (self.mode == 'valid'):
word, = input_decoder
prob = self.train_decoder(word, encoded_sequence, encoded_vector,
encoded_proj)
return prob
else:
init_ids, init_scores = input_decoder
translation_ids, translation_scores = self.infer_decoder(
init_ids, init_scores, encoded_sequence, encoded_vector,
encoded_proj)
return translation_ids, translation_scores
def loss(self, prob, word_next):
cost = fluid.layers.cross_entropy(input=prob, label=word_next)
avg_cost = fluid.layers.mean(cost)
return [avg_cost]
...@@ -109,6 +109,8 @@ def infer(args): ...@@ -109,6 +109,8 @@ def infer(args):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
filelist = args.filelist or infer_config.INFER.filelist filelist = args.filelist or infer_config.INFER.filelist
filepath = args.video_path or infer_config.INFER.get('filepath', '') filepath = args.video_path or infer_config.INFER.get('filepath', '')
if filepath != '': if filepath != '':
...@@ -137,16 +139,26 @@ def infer(args): ...@@ -137,16 +139,26 @@ def infer(args):
periods = [] periods = []
cur_time = time.time() cur_time = time.time()
for infer_iter, data in enumerate(infer_reader()): for infer_iter, data in enumerate(infer_reader()):
if args.model_name == 'ETS':
data_feed_in = [items[:3] for items in data]
vinfo = [items[3:] for items in data]
video_id = [items[0] for items in vinfo]
infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in),
return_numpy=False)
infer_result_list = infer_outs + vinfo
else:
data_feed_in = [items[:-1] for items in data] data_feed_in = [items[:-1] for items in data]
video_id = [items[-1] for items in data] video_id = [items[-1] for items in data]
infer_outs = exe.run(fetch_list=fetch_list, infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in)) feed=infer_feeder.feed(data_feed_in))
infer_result_list = [item for item in infer_outs] + [video_id]
prev_time = cur_time prev_time = cur_time
cur_time = time.time() cur_time = time.time()
period = cur_time - prev_time period = cur_time - prev_time
periods.append(period) periods.append(period)
infer_result_list = [item for item in infer_outs] + [video_id]
infer_metrics.accumulate(infer_result_list) infer_metrics.accumulate(infer_result_list)
if args.log_interval > 0 and infer_iter % args.log_interval == 0: if args.log_interval > 0 and infer_iter % args.log_interval == 0:
......
...@@ -6,6 +6,7 @@ from .ctcn_reader import CTCNReader ...@@ -6,6 +6,7 @@ from .ctcn_reader import CTCNReader
from .bmn_reader import BMNReader from .bmn_reader import BMNReader
from .bsn_reader import BSNVideoReader from .bsn_reader import BSNVideoReader
from .bsn_reader import BSNProposalReader from .bsn_reader import BSNProposalReader
from .ets_reader import ETSReader
# regist reader, sort by alphabet # regist reader, sort by alphabet
regist_reader("ATTENTIONCLUSTER", FeatureReader) regist_reader("ATTENTIONCLUSTER", FeatureReader)
...@@ -19,3 +20,4 @@ regist_reader("CTCN", CTCNReader) ...@@ -19,3 +20,4 @@ regist_reader("CTCN", CTCNReader)
regist_reader("BMN", BMNReader) regist_reader("BMN", BMNReader)
regist_reader("BSNTEM", BSNVideoReader) regist_reader("BSNTEM", BSNVideoReader)
regist_reader("BSNPEM", BSNProposalReader) regist_reader("BSNPEM", BSNProposalReader)
regist_reader("ETS", ETSReader)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import random
import sys
import numpy as np
import functools
import paddle
import logging
logger = logging.getLogger(__name__)
import pickle
from .reader_utils import DataReader
python_ver = sys.version_info
class ETSReader(DataReader):
"""
Data reader for ETS model, which was stored as features extracted by prior networks
"""
def __init__(self, name, mode, cfg):
self.name = name
self.mode = mode
self.feat_path = cfg.MODEL.feat_path
self.dict_file = cfg.MODEL.dict_file
self.START = cfg.MODEL.START
self.END = cfg.MODEL.END
self.UNK = cfg.MODEL.UNK
self.filelist = cfg[mode.upper()]['filelist']
self.batch_size = cfg[mode.upper()]['batch_size']
self.num_threads = cfg[mode.upper()]['num_threads']
self.buffer_size = cfg[mode.upper()]['buffer_size']
if (mode == 'test') or (mode == 'infer'):
self.num_threads = 1 # set num_threads as 1 for test and infer
def load_file(self):
word_dict = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
word_dict[line.strip().split()[0]] = i
return word_dict
def create_reader(self):
"""reader creator for ets model"""
if self.mode == 'infer':
return self.make_infer_reader()
else:
return self.make_multiprocess_reader()
def make_infer_reader(self):
"""reader for inference"""
def reader():
batch_out = []
with open(self.filelist) as f:
lines = f.readlines()
reader_list = [
line.strip() for line in lines if line.strip() != ''
]
word_dict = self.load_file()
for line in reader_list:
vid, stime, etime, sentence = line.split('\t')
stime, etime = float(stime), float(etime)
if python_ver < (3, 0):
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'))
else:
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'),
encoding='bytes')
feat = datas[int(stime * 5):int(etime * 5 + 0.5), :]
init_ids = np.array([[0]], dtype='int64')
init_scores = np.array([[0.]], dtype='float32')
if feat.shape[0] == 0:
continue
batch_out.append(
(feat, init_ids, init_scores, vid, stime, etime))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return reader
def make_multiprocess_reader(self):
"""multiprocess reader"""
def process_data(sample):
vid, feat, stime, etime, sentence = sample
if self.mode == 'train' or self.mode == 'valid':
word_ids = [
word_dict.get(w, word_dict[self.UNK])
for w in sentence.split()
]
word_ids_next = word_ids + [word_dict[self.END]]
word_ids = [word_dict[self.START]] + word_ids
return feat, word_ids, word_ids_next
elif self.mode == 'test':
init_ids = np.array([[0]], dtype='int64')
init_scores = np.array([[0.]], dtype='float32')
return feat, init_ids, init_scores, vid, stime, etime
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
def make_reader():
def reader():
lines = open(self.filelist).readlines()
reader_list = [
line.strip() for line in lines if line.strip() != ''
]
if self.mode == 'train':
random.shuffle(reader_list)
for line in reader_list:
vid, stime, etime, sentence = line.split('\t')
stime, etime = float(stime), float(etime)
if python_ver < (3, 0):
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'))
else:
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'),
encoding='bytes')
feat = datas[int(stime * 5):int(etime * 5 + 0.5), :]
if feat.shape[0] == 0:
continue
yield [vid, feat, stime, etime, sentence]
mapper = functools.partial(process_data)
return paddle.reader.xmap_readers(mapper, reader, self.num_threads,
self.buffer_size)
def batch_reader():
batch_out = []
for out in _reader():
batch_out.append(out)
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
word_dict = self.load_file()
_reader = make_reader()
return batch_reader
...@@ -173,12 +173,18 @@ def train(args): ...@@ -173,12 +173,18 @@ def train(args):
if args.model_name in ['CTCN']: if args.model_name in ['CTCN']:
build_strategy.enable_sequential_execution = True build_strategy.enable_sequential_execution = True
exec_strategy = fluid.ExecutionStrategy()
compiled_train_prog = fluid.compiler.CompiledProgram( compiled_train_prog = fluid.compiler.CompiledProgram(
train_prog).with_data_parallel( train_prog).with_data_parallel(
loss_name=train_loss.name, build_strategy=build_strategy) loss_name=train_loss.name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
compiled_valid_prog = fluid.compiler.CompiledProgram( compiled_valid_prog = fluid.compiler.CompiledProgram(
valid_prog).with_data_parallel( valid_prog).with_data_parallel(
share_vars_from=compiled_train_prog, build_strategy=build_strategy) share_vars_from=compiled_train_prog,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
# get reader # get reader
bs_denominator = 1 bs_denominator = 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册