提交 203b6e19 编写于 作者: H huangjun12 提交者: SunGaofeng

add ETS model to release 1.6 (#3565)

* add caption model ETS for PaddleVideo

* modify dynamic rnn to static rnn, and add infer past

* modify reader and configs for evaluation and validation

* modify details of the ets net code

* modify API of data and dataloader for ets net
上级 f39c93e6
MODEL:
name: "ETS"
feat_size: 2048
fc_dim: 1024
gru_hidden_dim: 512
decoder_size: 512
word_emb_dim: 512
max_length: 80
beam_size: 3
START: "<s>"
END: "<e>"
UNK: "<unk>"
feat_path: './data/dataset/ets/data_dict'
dict_file: './data/dataset/ets/dict.txt'
TRAIN:
epoch: 40
batch_size: 256
l2_weight_decay: 1e-4
clip_norm: 5.0
num_threads: 8
buffer_size: 1024
filelist: './data/dataset/ets/train.list'
use_gpu: True
num_gpus: 4
VALID:
filelist: './data/dataset/ets/val.list'
batch_size: 256
num_threads: 8
buffer_size: 1024
use_gpu: True
num_gpus: 4
TEST:
filelist: './data/dataset/ets/val.list'
batch_size: 1
num_threads: 1
buffer_size: 1024
INFER:
filelist: './data/dataset/ets/infer.list'
batch_size: 1
num_threads: 1
buffer_size: 1024
......@@ -92,6 +92,8 @@ def test(args):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
if args.weights:
assert os.path.exists(
args.weights), "Given weight dir {} not exist.".format(args.weights)
......@@ -111,8 +113,16 @@ def test(args):
epoch_period = []
for test_iter, data in enumerate(test_reader()):
cur_time = time.time()
test_outs = exe.run(fetch_list=test_fetch_list,
feed=test_feeder.feed(data))
if args.model_name == 'ETS':
feat_data = [items[:3] for items in data]
vinfo = [items[3:] for items in data]
test_outs = exe.run(fetch_list=test_fetch_list,
feed=test_feeder.feed(feat_data),
return_numpy=False)
test_outs += vinfo
else:
test_outs = exe.run(fetch_list=test_fetch_list,
feed=test_feeder.feed(data))
period = time.time() - cur_time
epoch_period.append(period)
test_metrics.accumulate(test_outs)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
import numpy as np
import datetime
import logging
import json
import os
from models.ctcn.ctcn_utils import BoxCoder
logger = logging.getLogger(__name__)
class MetricsCalculator():
def __init__(self,
name='ETS',
mode='train',
dict_file =''
):
self.name = name
self.mode = mode # 'train', 'valid', 'test', 'infer'
self.dict_file = dict_file
self.reset()
def reset(self):
logger.info('Resetting {} metrics...'.format(self.mode))
self.aggr_batch_size = 0
if (self.mode == 'train') or (self.mode == 'valid'):
self.aggr_loss = 0.0
elif (self.mode == 'test') or (self.mode == 'infer'):
self.result_dict = dict()
self.out_file = self.name + '_' + self.mode + '_res_' + '.json'
def accumulate(self, fetch_list):
if self.mode == 'valid':
loss = fetch_list[0]
self.aggr_loss += np.mean(np.array(loss))
elif (self.mode == 'test') or (self.mode == 'infer'):
seq_ids = fetch_list[0]
seq_scores = fetch_list[1]
vid = fetch_list[2][0]
stime = fetch_list[2][1]
etime = fetch_list[2][2]
#get idx_to_word
self.idx_to_word = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
self.idx_to_word[i] = line.strip().split()[0]
for i in range(len(seq_ids.lod()[0]) - 1):
start = seq_ids.lod()[0][i]
end = seq_ids.lod()[0][i + 1]
for j in range(end - start)[:1]:
sub_start = seq_ids.lod()[1][start + j]
sub_end = seq_ids.lod()[1][start + j + 1]
sent = " ".join([self.idx_to_word[idx]
for idx in np.array(seq_ids)[sub_start:sub_end][1:-1]])
if vid not in self.result_dict:
self.result_dict[vid] = [{'timestamp': [stime, etime], 'sentence': sent}]
else:
self.result_dict[vid].append({'timestamp': [stime, etime], 'sentence': sent})
def accumulate_infer_results(self, fetch_list):
# the same as test
pass
def finalize_metrics(self, savedir):
self.filepath = os.path.join(savedir, self.out_file)
with open(self.filepath, 'w') as f:
f.write(json.dumps({'version': 'VERSION 1.0', 'results': self.result_dict, 'external_data': {}}, indent=2))
logger.info('results has been saved into file: {}'.format(self.filepath))
def finalize_infer_metrics(self, savedir):
# the same as test
pass
def get_computed_metrics(self):
pass
......@@ -28,6 +28,7 @@ from metrics.detections import detection_metrics as detection_metrics
from metrics.bmn_metrics import bmn_proposal_metrics as bmn_proposal_metrics
from metrics.bsn_metrics import bsn_tem_metrics as bsn_tem_metrics
from metrics.bsn_metrics import bsn_pem_metrics as bsn_pem_metrics
from metrics.ets_metrics import ets_metrics as ets_metrics
logger = logging.getLogger(__name__)
......@@ -421,6 +422,44 @@ class BsnPemMetrics(Metrics):
self.calculator.reset()
class ETSMetrics(Metrics):
def __init__(self, name, mode, cfg):
self.name = name
self.mode = mode
args = {}
args['dict_file'] = cfg.MODEL.dict_file
args['mode'] = mode
args['name'] = name
self.calculator = ets_metrics.MetricsCalculator(**args)
def calculate_and_log_out(self, fetch_list, info=''):
if (self.mode == 'train') or (self.mode == 'valid'):
loss = np.array(fetch_list[0])
logger.info(
info + '\tLoss = {}'.format('%.08f' % np.mean(loss)))
elif self.mode == "test":
translation_ids = np.array(fetch_list[0])
translation_scores = np.array(fetch_list[1])
logger.info(
info + '\ttranslation_ids = {}, \ttranslation_scores = {}'.format(
'%.01f' % np.mean(translation_ids), '%.04f' % np.mean(translation_scores)))
def accumulate(self, fetch_list):
self.calculator.accumulate(fetch_list)
def finalize_and_log_out(self, info='', savedir='./'):
if self.mode == 'valid':
logger.info(info)
else: #test or infer
self.calculator.finalize_metrics(savedir)
if self.mode == 'test':
logger.info(info + 'please refer to metrics/ets_metrics/README.md to get accuracy')
def reset(self):
self.calculator.reset()
class MetricsZoo(object):
def __init__(self):
self.metrics_zoo = {}
......@@ -461,3 +500,4 @@ regist_metrics("CTCN", DetectionMetrics)
regist_metrics("BMN", BmnMetrics)
regist_metrics("BSNTEM", BsnTemMetrics)
regist_metrics("BSNPEM", BsnPemMetrics)
regist_metrics("ETS", ETSMetrics)
......@@ -10,6 +10,7 @@ from .ctcn import CTCN
from .bmn import BMN
from .bsn import BsnTem
from .bsn import BsnPem
from .ets import ETS
# regist models, sort by alphabet
regist_model("AttentionCluster", AttentionCluster)
......@@ -23,3 +24,4 @@ regist_model("CTCN", CTCN)
regist_model("BMN", BMN)
regist_model("BsnTem", BsnTem)
regist_model("BsnPem", BsnPem)
regist_model("ETS", ETS)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
from ..model import ModelBase
from . import ets_net
import logging
logger = logging.getLogger(__name__)
__all__ = ["ETS"]
class ETS(ModelBase):
"""ETS model"""
def __init__(self, name, cfg, mode='train'):
super(ETS, self).__init__(name, cfg, mode=mode)
self.get_config()
def get_config(self):
self.feat_size = self.get_config_from_sec('MODEL', 'feat_size')
self.fc_dim = self.get_config_from_sec('MODEL', 'fc_dim')
self.gru_hidden_dim = self.get_config_from_sec('MODEL',
'gru_hidden_dim')
self.decoder_size = self.get_config_from_sec('MODEL', 'decoder_size')
self.word_emb_dim = self.get_config_from_sec('MODEL', 'word_emb_dim')
self.dict_file = self.get_config_from_sec('MODEL', 'dict_file')
self.max_length = self.get_config_from_sec('MODEL', 'max_length')
self.beam_size = self.get_config_from_sec('MODEL', 'beam_size')
self.num_epochs = self.get_config_from_sec('train', 'epoch')
self.l2_weight_decay = self.get_config_from_sec('train',
'l2_weight_decay')
self.clip_norm = self.get_config_from_sec('train', 'clip_norm')
def build_input(self, use_dataloader=True):
feat_shape = [None, self.feat_size]
word_shape = [None, 1]
word_next_shape = [None, 1]
# set init data to None
py_reader = None
feat = None
word = None
word_next = None
init_ids = None
init_scores = None
self.use_dataloader = use_dataloader
feat = fluid.data(
name='feat', shape=feat_shape, dtype='float32', lod_level=1)
feed_list = []
feed_list.append(feat)
if (self.mode == 'train') or (self.mode == 'valid'):
word = fluid.data(
name='word', shape=word_shape, dtype='int64', lod_level=1)
word_next = fluid.data(
name='word_next',
shape=word_next_shape,
dtype='int64',
lod_level=1)
feed_list.append(word)
feed_list.append(word_next)
elif (self.mode == 'test') or (self.mode == 'infer'):
init_ids = fluid.data(
name="init_ids", shape=[None, 1], dtype="int64", lod_level=2)
init_scores = fluid.data(
name="init_scores",
shape=[None, 1],
dtype="float32",
lod_level=2)
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
if use_dataloader:
assert self.mode != 'infer', \
'dataloader is not recommendated when infer, please set use_dataloader to be false.'
self.dataloader = fluid.io.DataLoader.from_generator(
feed_list=feed_list, capacity=16, iterable=True)
self.feature_input = [feat]
self.word = word
self.word_next = word_next
self.init_ids = init_ids
self.init_scores = init_scores
def create_model_args(self):
cfg = {}
cfg['feat_size'] = self.feat_size
cfg['fc_dim'] = self.fc_dim
cfg['gru_hidden_dim'] = self.gru_hidden_dim
cfg['decoder_size'] = self.decoder_size
cfg['word_emb_dim'] = self.word_emb_dim
word_dict = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
word_dict[line.strip().split()[0]] = i
dict_size = len(word_dict)
cfg['dict_size'] = dict_size
cfg['max_length'] = self.max_length
cfg['beam_size'] = self.beam_size
return cfg
def build_model(self):
cfg = self.create_model_args()
self.videomodel = ets_net.ETSNET(
feat_size=cfg['feat_size'],
fc_dim=cfg['fc_dim'],
gru_hidden_dim=cfg['gru_hidden_dim'],
decoder_size=cfg['decoder_size'],
word_emb_dim=cfg['word_emb_dim'],
dict_size=cfg['dict_size'],
max_length=cfg['max_length'],
beam_size=cfg['beam_size'],
mode=self.mode)
if (self.mode == 'train') or (self.mode == 'valid'):
prob = self.videomodel.net(self.feature_input[0], self.word)
self.network_outputs = [prob]
elif (self.mode == 'test') or (self.mode == 'infer'):
translation_ids, translation_scores = self.videomodel.net(
self.feature_input[0], self.init_ids, self.init_scores)
self.network_outputs = [translation_ids, translation_scores]
def optimizer(self):
l2_weight_decay = self.l2_weight_decay
clip_norm = self.clip_norm
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm))
lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
self.gru_hidden_dim, 1000)
optimizer = fluid.optimizer.Adam(
learning_rate=lr_decay,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=l2_weight_decay))
return optimizer
def loss(self):
assert self.mode != 'infer', "invalid loss calculationg in infer mode"
self.loss_ = self.videomodel.loss(self.network_outputs[0],
self.word_next)
return self.loss_
def outputs(self):
return self.network_outputs
def feeds(self):
if (self.mode == 'train') or (self.mode == 'valid'):
return self.feature_input + [self.word, self.word_next]
elif (self.mode == 'test') or (self.mode == 'infer'):
return self.feature_input + [self.init_ids, self.init_scores]
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
def fetches(self):
if (self.mode == 'train') or (self.mode == 'valid'):
losses = self.loss()
fetch_list = [item for item in losses]
elif (self.mode == 'test') or (self.mode == 'infer'):
preds = self.outputs()
fetch_list = [item for item in preds]
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
return fetch_list
def pretrain_info(self):
return (None, None)
def weights_info(self):
pass
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import ParamAttr
import numpy as np
DATATYPE = 'float32'
class ETSNET(object):
def __init__(self,
feat_size,
fc_dim,
gru_hidden_dim,
max_length,
beam_size,
decoder_size,
word_emb_dim,
dict_size,
mode='train'):
self.feat_size = feat_size
self.fc_dim = fc_dim
self.gru_hidden_dim = gru_hidden_dim
self.decoder_size = decoder_size
self.word_emb_dim = word_emb_dim
self.dict_size = dict_size
self.max_length = max_length
self.beam_size = beam_size
self.mode = mode
def encoder(self, feat):
bias_attr = fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(0.0),
initializer=fluid.initializer.NormalInitializer(scale=0.0))
input_fc = fluid.layers.fc(input=feat,
size=self.fc_dim,
act='tanh',
bias_attr=bias_attr)
gru_forward_fc = fluid.layers.fc(input=input_fc,
size=self.gru_hidden_dim * 3,
bias_attr=False)
gru_forward = fluid.layers.dynamic_gru(
input=gru_forward_fc, size=self.gru_hidden_dim, is_reverse=False)
gru_backward_fc = fluid.layers.fc(input=input_fc,
size=self.gru_hidden_dim * 3,
bias_attr=False)
gru_backward = fluid.layers.dynamic_gru(
input=gru_backward_fc, size=self.gru_hidden_dim, is_reverse=True)
encoded_sequence = fluid.layers.concat(
input=[gru_forward, gru_backward], axis=1)
gru_weights = fluid.layers.fc(input=encoded_sequence,
size=1,
act='sequence_softmax',
bias_attr=False)
gru_scaled = fluid.layers.elementwise_mul(
x=encoded_sequence, y=gru_weights, axis=0)
encoded_vector = fluid.layers.sequence_pool(
input=gru_scaled, pool_type='sum')
encoded_proj = fluid.layers.fc(input=encoded_sequence,
size=self.decoder_size,
bias_attr=False)
return encoded_sequence, encoded_vector, encoded_proj
def cell(self, x, hidden, encoder_out, encoder_out_proj):
def simple_attention(encoder_vec, encoder_proj, decoder_state):
decoder_state_proj = fluid.layers.fc(input=decoder_state,
size=self.decoder_size,
bias_attr=False)
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
mixed_state = fluid.layers.elementwise_add(encoder_proj,
decoder_state_expand)
attention_weights = fluid.layers.fc(input=mixed_state,
size=1,
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
input=attention_weights)
weigths_reshape = fluid.layers.reshape(
x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = fluid.layers.sequence_pool(input=scaled, pool_type='sum')
return context
context = simple_attention(encoder_out, encoder_out_proj, hidden)
out = fluid.layers.fc(input=[x, context],
size=self.decoder_size * 3,
bias_attr=False)
out = fluid.layers.gru_unit(
input=out, hidden=hidden, size=self.decoder_size * 3)[0]
return out, out
def train_decoder(self, word, encoded_sequence, encoded_vector,
encoded_proj):
decoder_boot = fluid.layers.fc(input=encoded_vector,
size=self.decoder_size,
act='tanh',
bias_attr=False)
word_embedding = fluid.layers.embedding(
input=word, size=[self.dict_size, self.word_emb_dim])
pad_value = fluid.layers.assign(input=np.array([0.], dtype=np.float32))
word_embedding, length = fluid.layers.sequence_pad(word_embedding,
pad_value)
word_embedding = fluid.layers.transpose(word_embedding, [1, 0, 2])
rnn = fluid.layers.StaticRNN()
with rnn.step():
x = rnn.step_input(word_embedding)
pre_state = rnn.memory(init=decoder_boot)
out, current_state = self.cell(x, pre_state, encoded_sequence,
encoded_proj)
prob = fluid.layers.fc(input=out,
size=self.dict_size,
act='softmax')
rnn.update_memory(pre_state, current_state)
rnn.step_output(prob)
rnn_out = rnn()
rnn_out = fluid.layers.transpose(rnn_out, [1, 0, 2])
length = fluid.layers.reshape(length, [-1])
rnn_out = fluid.layers.sequence_unpad(x=rnn_out, length=length)
return rnn_out
def infer_decoder(self, init_ids, init_scores, encoded_sequence,
encoded_vector, encoded_proj):
decoder_boot = fluid.layers.fc(input=encoded_vector,
size=self.decoder_size,
act='tanh',
bias_attr=False)
max_len = fluid.layers.fill_constant(
shape=[1], dtype='int64', value=self.max_length)
counter = fluid.layers.zeros(shape=[1], dtype='int64', force_cpu=True)
# create and init arrays to save selected ids, scores and states for each step
ids_array = fluid.layers.array_write(init_ids, i=counter)
scores_array = fluid.layers.array_write(init_scores, i=counter)
state_array = fluid.layers.array_write(decoder_boot, i=counter)
cond = fluid.layers.less_than(x=counter, y=max_len)
while_op = fluid.layers.While(cond=cond)
with while_op.block():
pre_ids = fluid.layers.array_read(array=ids_array, i=counter)
pre_score = fluid.layers.array_read(array=scores_array, i=counter)
pre_state = fluid.layers.array_read(array=state_array, i=counter)
pre_ids_emb = fluid.layers.embedding(
input=pre_ids, size=[self.dict_size, self.word_emb_dim])
out, current_state = self.cell(pre_ids_emb, pre_state,
encoded_sequence, encoded_proj)
prob = fluid.layers.fc(input=out,
size=self.dict_size,
act='softmax')
# beam search
topk_scores, topk_indices = fluid.layers.topk(
prob, k=self.beam_size)
accu_scores = fluid.layers.elementwise_add(
x=fluid.layers.log(topk_scores),
y=fluid.layers.reshape(
pre_score, shape=[-1]),
axis=0)
accu_scores = fluid.layers.lod_reset(x=accu_scores, y=pre_ids)
selected_ids, selected_scores = fluid.layers.beam_search(
pre_ids,
pre_score,
topk_indices,
accu_scores,
self.beam_size,
end_id=1)
fluid.layers.increment(x=counter, value=1, in_place=True)
# save selected ids and corresponding scores of each step
fluid.layers.array_write(selected_ids, array=ids_array, i=counter)
fluid.layers.array_write(
selected_scores, array=scores_array, i=counter)
# update rnn state by sequence_expand acting as gather
current_state = fluid.layers.sequence_expand(current_state,
selected_scores)
fluid.layers.array_write(
current_state, array=state_array, i=counter)
current_enc_seq = fluid.layers.sequence_expand(encoded_sequence,
selected_scores)
fluid.layers.assign(current_enc_seq, encoded_sequence)
current_enc_proj = fluid.layers.sequence_expand(encoded_proj,
selected_scores)
fluid.layers.assign(current_enc_proj, encoded_proj)
# update conditional variable
length_cond = fluid.layers.less_than(x=counter, y=max_len)
finish_cond = fluid.layers.logical_not(
fluid.layers.is_empty(x=selected_ids))
fluid.layers.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = fluid.layers.beam_search_decode(
ids=ids_array,
scores=scores_array,
beam_size=self.beam_size,
end_id=1)
return translation_ids, translation_scores
def net(self, feat, *input_decoder):
encoded_sequence, encoded_vector, encoded_proj = self.encoder(feat)
if (self.mode == 'train') or (self.mode == 'valid'):
word, = input_decoder
prob = self.train_decoder(word, encoded_sequence, encoded_vector,
encoded_proj)
return prob
else:
init_ids, init_scores = input_decoder
translation_ids, translation_scores = self.infer_decoder(
init_ids, init_scores, encoded_sequence, encoded_vector,
encoded_proj)
return translation_ids, translation_scores
def loss(self, prob, word_next):
cost = fluid.layers.cross_entropy(input=prob, label=word_next)
avg_cost = fluid.layers.mean(cost)
return [avg_cost]
......@@ -109,6 +109,8 @@ def infer(args):
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
filelist = args.filelist or infer_config.INFER.filelist
filepath = args.video_path or infer_config.INFER.get('filepath', '')
if filepath != '':
......@@ -137,16 +139,26 @@ def infer(args):
periods = []
cur_time = time.time()
for infer_iter, data in enumerate(infer_reader()):
data_feed_in = [items[:-1] for items in data]
video_id = [items[-1] for items in data]
infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in))
if args.model_name == 'ETS':
data_feed_in = [items[:3] for items in data]
vinfo = [items[3:] for items in data]
video_id = [items[0] for items in vinfo]
infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in),
return_numpy=False)
infer_result_list = infer_outs + vinfo
else:
data_feed_in = [items[:-1] for items in data]
video_id = [items[-1] for items in data]
infer_outs = exe.run(fetch_list=fetch_list,
feed=infer_feeder.feed(data_feed_in))
infer_result_list = [item for item in infer_outs] + [video_id]
prev_time = cur_time
cur_time = time.time()
period = cur_time - prev_time
periods.append(period)
infer_result_list = [item for item in infer_outs] + [video_id]
infer_metrics.accumulate(infer_result_list)
if args.log_interval > 0 and infer_iter % args.log_interval == 0:
......
......@@ -6,6 +6,7 @@ from .ctcn_reader import CTCNReader
from .bmn_reader import BMNReader
from .bsn_reader import BSNVideoReader
from .bsn_reader import BSNProposalReader
from .ets_reader import ETSReader
# regist reader, sort by alphabet
regist_reader("ATTENTIONCLUSTER", FeatureReader)
......@@ -19,3 +20,4 @@ regist_reader("CTCN", CTCNReader)
regist_reader("BMN", BMNReader)
regist_reader("BSNTEM", BSNVideoReader)
regist_reader("BSNPEM", BSNProposalReader)
regist_reader("ETS", ETSReader)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import os
import random
import sys
import numpy as np
import functools
import paddle
import logging
logger = logging.getLogger(__name__)
import pickle
from .reader_utils import DataReader
python_ver = sys.version_info
class ETSReader(DataReader):
"""
Data reader for ETS model, which was stored as features extracted by prior networks
"""
def __init__(self, name, mode, cfg):
self.name = name
self.mode = mode
self.feat_path = cfg.MODEL.feat_path
self.dict_file = cfg.MODEL.dict_file
self.START = cfg.MODEL.START
self.END = cfg.MODEL.END
self.UNK = cfg.MODEL.UNK
self.filelist = cfg[mode.upper()]['filelist']
self.batch_size = cfg[mode.upper()]['batch_size']
self.num_threads = cfg[mode.upper()]['num_threads']
self.buffer_size = cfg[mode.upper()]['buffer_size']
if (mode == 'test') or (mode == 'infer'):
self.num_threads = 1 # set num_threads as 1 for test and infer
def load_file(self):
word_dict = dict()
with open(self.dict_file, 'r') as f:
for i, line in enumerate(f):
word_dict[line.strip().split()[0]] = i
return word_dict
def create_reader(self):
"""reader creator for ets model"""
if self.mode == 'infer':
return self.make_infer_reader()
else:
return self.make_multiprocess_reader()
def make_infer_reader(self):
"""reader for inference"""
def reader():
batch_out = []
with open(self.filelist) as f:
lines = f.readlines()
reader_list = [
line.strip() for line in lines if line.strip() != ''
]
word_dict = self.load_file()
for line in reader_list:
vid, stime, etime, sentence = line.split('\t')
stime, etime = float(stime), float(etime)
if python_ver < (3, 0):
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'))
else:
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'),
encoding='bytes')
feat = datas[int(stime * 5):int(etime * 5 + 0.5), :]
init_ids = np.array([[0]], dtype='int64')
init_scores = np.array([[0.]], dtype='float32')
if feat.shape[0] == 0:
continue
batch_out.append(
(feat, init_ids, init_scores, vid, stime, etime))
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
return reader
def make_multiprocess_reader(self):
"""multiprocess reader"""
def process_data(sample):
vid, feat, stime, etime, sentence = sample
if self.mode == 'train' or self.mode == 'valid':
word_ids = [
word_dict.get(w, word_dict[self.UNK])
for w in sentence.split()
]
word_ids_next = word_ids + [word_dict[self.END]]
word_ids = [word_dict[self.START]] + word_ids
return feat, word_ids, word_ids_next
elif self.mode == 'test':
init_ids = np.array([[0]], dtype='int64')
init_scores = np.array([[0.]], dtype='float32')
return feat, init_ids, init_scores, vid, stime, etime
else:
raise NotImplementedError('mode {} not implemented'.format(
self.mode))
def make_reader():
def reader():
lines = open(self.filelist).readlines()
reader_list = [
line.strip() for line in lines if line.strip() != ''
]
if self.mode == 'train':
random.shuffle(reader_list)
for line in reader_list:
vid, stime, etime, sentence = line.split('\t')
stime, etime = float(stime), float(etime)
if python_ver < (3, 0):
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'))
else:
datas = pickle.load(
open(os.path.join(self.feat_path, vid), 'rb'),
encoding='bytes')
feat = datas[int(stime * 5):int(etime * 5 + 0.5), :]
if feat.shape[0] == 0:
continue
yield [vid, feat, stime, etime, sentence]
mapper = functools.partial(process_data)
return paddle.reader.xmap_readers(mapper, reader, self.num_threads,
self.buffer_size)
def batch_reader():
batch_out = []
for out in _reader():
batch_out.append(out)
if len(batch_out) == self.batch_size:
yield batch_out
batch_out = []
word_dict = self.load_file()
_reader = make_reader()
return batch_reader
......@@ -173,12 +173,18 @@ def train(args):
if args.model_name in ['CTCN']:
build_strategy.enable_sequential_execution = True
exec_strategy = fluid.ExecutionStrategy()
compiled_train_prog = fluid.compiler.CompiledProgram(
train_prog).with_data_parallel(
loss_name=train_loss.name, build_strategy=build_strategy)
loss_name=train_loss.name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
compiled_valid_prog = fluid.compiler.CompiledProgram(
valid_prog).with_data_parallel(
share_vars_from=compiled_train_prog, build_strategy=build_strategy)
share_vars_from=compiled_train_prog,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
# get reader
bs_denominator = 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册