提交 dbd0b9af 编写于 作者: S shippingwang

plane

上级 3a559be9
......@@ -23,7 +23,7 @@ TRAIN:
dataset : TACoS
model : TALL
VALID:
TEST:
batch_size : 1
context_num : 1
context_size : 128
......
......@@ -441,61 +441,15 @@ class TallMetrics(Metrics):
loss = np.array(fetch_list[0])
logger.info(info +'\tLoss = {}'.format('%.6f' % np.mean(loss)))
elif self.mode == "test":
elif self.mode == "test":
pass
def accumalate():
if self.mode == "test":
outs = fetch_list[0]
outputs = np.squeeze(outs)
start = fetch_list[1]
end = fetch_list[2]
k = fetch_list[3]
t = fetch_list[4]
movie_clip_sentences = fetch_list[5]
movie_clip_featmaps = fetch_lkist[6]
sentence_image_mat = np.zeros([len(movie_clip_sentences), len(movie_clip_featmaps)])
sentence_image_reg_mat = np.zeros([len(movie_clip_sentences), len(movie_clip_featmaps ), 2])
sentence_image_mat[k, t] = outputs[0]
# sentence_image_mat[k, t] = expit(outputs[0]) * conf_score
reg_end = end + outputs[2]
reg_start = start + outputs[1]
sentence_image_reg_mat[k, t, 0] = reg_start
sentence_image_reg_mat[k, t, 1] = reg_end
clips = [b[0] for b in movie_clip_featmaps]
sclips = [b[0] for b in movie_clip_sentences]
for i in range(len(sel.IoU_thresh)):
IoU = self.IoU_thresh[i]
self.current_correct_num_10 = compute_IoU_recall_top_n_forreg(10, IoU, sentence_image_mat, sentence_image_reg_mat, sclips, iclips)
self_current_correct_num_5 = compute_IoU_recall_top_n_forreg(5, IoU, sentence_image_mat, sentence_image_reg_mat, sclips, iclips)
self.current_correct_num_1 = compute_IoU_recall_top_n_forreg(1, IoU, sentence_image_mat, sentence_image_reg_mat, sclips, iclips)
#logger.info(info + " IoU=" + str(IoU) + ", R@10: " + str(correct_num_10 / len(sclips)) + "; IoU=" + str(IoU) + ", R@5: " + str(correct_num_5 / len(sclips)) + "; IoU=" + str(IoU) + ", R@1: " + str(correct_num_1 / len(sclips)))
self.all_correct_num_10[i] += correct_num_10
self.all_correct_num_5[i] += correct_num_5
self.all_correct_num_1[i] += correct_num_1
self.all_retrievd += len(sclips)
self.calculator.accumalate(self, fetch_list)
def finalize_and_log_out(self, info="", savedir="/"):
all_retrievd = self.all_retrievd
for k in range(len(self.IoU_thresh)):
print(" IoU=" + str(self.IoU_thresh[k]) + ", R@10: " + str(all_correct_num_10[k] / all_retrievd) + "; IoU=" + str(self.IoU_thresh[k]) + ", R@5: " + str(all_correct_num_5[k] / all_retrievd) + "; IoU=" + str(self.IoU_thresh[k]) + ", R@1: " + str(all_correct_num_1[k] / all_retrievd))
R1_IOU5 = self all_correct_num_1[2] / all_retrievd
R5_IOU5 = self.all_correct_num_5[2] / all_retrievd
print "{}\n".format("best_R1_IOU5: %0.3f" % R1_IOU5)
print "{}\n".format("best_R5_IOU5: %0.3f" % R5_IOU5)
self.calculator.finalize_and_log_out()
def reset(self):
self.calculator.reset()
......
......@@ -37,14 +37,15 @@ class MetricsCalculator():
self.all_correct_num_1 = [0.0] * 5
self.all_retrievd = 0.0
def finalize_metrics(self):
def calculate_and_log_out(self, fetch_list, info=""):
return
def calculate_metrics(self,):
return
def accumalate(self):
return
def finalize_and_log_out(self, info="", savedir="./"):
......
......@@ -37,9 +37,8 @@ class TALL(ModelBase):
self.sentence_embedding_size = self.get_config_from_sec("model", "sentence_embedding_size")
self.hidden_size = self.get_config_from_sec("model", "hidden_size")
self.output_size = self.get_config_from_sec("model", "output_size")
self.pretrained_model = None
#self.pretrained_model = None
#pretrained_model = "output/20/"
self.epochs = self.get_config_from_sec("train", "epoch")
self.context_size = self.get_config_from_sec("train", "context_size"
self.context_num = self.get_config_from_sec("train", "context_num"
......@@ -47,7 +46,7 @@ class TALL(ModelBase):
self.sent_vec_dim = self.get_config_from_sec("train", "sent_vec_dim"
self.off_size = self.get_config_from_sec("train", "off_size")
self.movie_length_info = self.get_config_from_sec("train", "movie_length_info")
# different params in train/test mode
self.batch_size =self.get_config_from_sec(self.mode, "batch_size"
self.clip_sentvec = self.get_config_from_sec(self.mode, "test_clip_sentvec")
self.sliding_clip_path= selyf.get_config_from_sec(self.mode, "sliding_clip_path")
......@@ -75,38 +74,34 @@ class TALL(ModelBase):
self.use_pyreader = use_pyreader
if use_pyreader:
if self.mode == "train":
py_reader = fluid.io.PyReader(feed_list=[self.images, self.sentences, self.offsets], capacity=4, iterable=True)
elif self.mode == "valid":
py_reader = fluid.io.PyReader(feed_list=[self.images, self.sentences], capacity=4, iterable=True)
feed_list = [self.images, self.sentences, slef.offsets] if self.mode == "train" else [self.images, self.sentences]
py_reader = fluid.io.PyReader(feed_list=feed_list, capacity=4, iterable=True)
self.py_reader = py_reader
def create_model_args(self):
cfg = {}
cfg["images"] = self.images
cfg["sentences"] = self.sentences
if self.mode=="train":
cfg["offsets"] = self.offsets
cfg["semantic_size"] = self.semantic_size
cfg["hidden_size"] = self.hidden_size
cfg["output_size"] = self.output_size
cfg["hidden_size"] = self.hidden_size
return cfg
def build_model(self):
cfg = self.create_model_args()
videomodel = tall_model.TALL(mode=self.mode, cfg=cfg)
outs, offs = videomodel.net()
self.network_outputs = [outs, offs]
videomodel = TALL(mode=self.mode, cfg=cfg)
outs = videomodel.net()
self.network_outputs = [outs, self.offsets] if self.mode == "train" else [outs]
def optimizer():
def optimizer(self):
assert self.mode == 'train','optimizer only can be get in train mode.'
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
return fluid.optimizer.Adam(learning_rate=1e-3)
def loss(self):
assert self.mode == 'train'
outs = self.network_outputs[0]
offs = self.network_outputs[1]
sim_score_mat = outs[0]
......@@ -148,17 +143,17 @@ class TALL(ModelBase):
return self.network_outputs
def feeds(self):
return [self.images, self.sentences, self.offsets]
return [self.images, self.sentences, self.offsets] is self.mode=="train" else [self.images, self.sentences]
def fetchs(self):
fetch_list = [self.loss()]
fetch_list = [self.loss()] if self.mode=="train" else [self.network_outputs]
return fetch_list
def pretrain_info(self):
return
return('TALL_pretrained', '')
def weights_info(self):
return
return ('TALL_final.pdparams', '')
def load_pretraine_params(self, exe, pretrain, prog, place):
def is_parameter(var):
......
......@@ -22,8 +22,6 @@ class TALL(object):
def __init__(self, mode, cfg):
self.images = cfg["images"]
self.sentences = cfg["sentences"]
if self.mode == "train":
self.offsets = cfg[offsets]
self.semantic_size = cfg["semantic_size"]
self.hidden_size = cfg["hidden_size"]
self.output_size = cfg["output_size"]
......@@ -46,9 +44,9 @@ class TALL(object):
def net(self)
# visual2semantic
transformed_clip_train = fluid.layers.fc(
transformed_clip = fluid.layers.fc(
input=self.images,
size=semantic_size,
size=self.semantic_size,
act=None,
name='v2s_lt',
param_attr=fluid.ParamAttr(
......@@ -56,11 +54,11 @@ class TALL(object):
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0, seed=0)),
bias_attr=False)
#l2_normalize
transformed_clip_train = fluid.layers.l2_normalize(x=transformed_clip_train, axis=1)
transformed_clip = fluid.layers.l2_normalize(x=transformed_clip, axis=1)
# sentence2semantic
transformed_sentence_train = fluid.layers.fc(
transformed_sentence = fluid.layers.fc(
input=self.sentences,
size=semantic_size,
size=self.semantic_size,
act=None,
name='s2s_lt',
param_attr=fluid.ParamAttr(
......@@ -68,30 +66,29 @@ class TALL(object):
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=1.0, seed=0)),
bias_attr=False)
#l2_normalize
transformed_sentence_train = fluid.layers.l2_normalize(x=transformed_sentence_train, axis=1)
transformed_sentence = fluid.layers.l2_normalize(x=transformed_sentence, axis=1)
cross_modal_vec_train=_cross_modal_comb(transformed_clip_train, transformed_sentence_train)
cross_modal_vec_train=fluid.layers.unsqueeze(input=cross_modal_vec_train, axes=[0])
cross_modal_vec_train=fluid.layers.transpose(cross_modal_vec_train, perm=[0, 3, 1, 2])
cross_modal_vec=_cross_modal_comb(transformed_clip_train, transformed_sentence)
cross_modal_vec=fluid.layers.unsqueeze(input=cross_modal_vec, axes=[0])
cross_modal_vec=fluid.layers.transpose(cross_modal_vec, perm=[0, 3, 1, 2])
mid_output = fluid.layers.conv2d(
input=cross_modal_vec_train,
num_filters=hidden_size,
input=cross_modal_vec,
num_filters=self.hidden_size,
filter_size=1,
stride=1,
act="relu",
param_attr=fluid.param_attr.ParamAttr(name="mid_out_weights"),
bias_attr=False)
sim_score_mat_train = fluid.layers.conv2d(
sim_score_mat = fluid.layers.conv2d(
input=mid_output,
num_filters=output_size,
num_filters=self.output_size,
filter_size=1,
stride=1,
act=None,
param_attr=fluid.param_attr.ParamAttr(name="sim_mat_weights"),
bias_attr=False)
self.sim_score_mat_train = fluid.layers.squeeze(input=sim_score_mat_train, axes=[0])
return self.sim_score_mat_train, self.offsets
sim_score_mat = fluid.layers.squeeze(input=sim_score_mat, axes=[0])
return sim_score_mat
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册