diff --git a/dygraph/bmn/bmn.yaml b/dygraph/bmn/bmn.yaml index 315b470d90694a1b16ac78f2b832bd6ea42cb6e9..525e6c3679f9b5934d41e26b7d842c15fdf8007d 100644 --- a/dygraph/bmn/bmn.yaml +++ b/dygraph/bmn/bmn.yaml @@ -12,10 +12,11 @@ MODEL: TRAIN: subset: "train" epoch: 9 - batch_size: 4 - num_threads: 8 use_gpu: True num_gpus: 4 + batch_size: 16 + num_workers: 4 + use_shuffle: True learning_rate: 0.001 learning_rate_decay: 0.1 lr_decay_iter: 4200 @@ -23,15 +24,14 @@ TRAIN: VALID: subset: "validation" - batch_size: 4 - num_threads: 8 - use_gpu: True num_gpus: 4 + batch_size: 16 + num_workers: 4 TEST: subset: "validation" batch_size: 1 - num_threads: 1 + num_workers: 4 snms_alpha: 0.001 snms_t1: 0.5 snms_t2: 0.9 @@ -41,7 +41,7 @@ TEST: INFER: subset: "test" batch_size: 1 - num_threads: 1 + num_workers: 4 snms_alpha: 0.4 snms_t1: 0.5 snms_t2: 0.9 diff --git a/dygraph/bmn/eval.py b/dygraph/bmn/eval.py index 2565fa76b07bdc50f842edbdae8de624d89ba00e..d337c6dd0c23368ebc3990472d666efda8542039 100644 --- a/dygraph/bmn/eval.py +++ b/dygraph/bmn/eval.py @@ -13,7 +13,7 @@ #limitations under the License. import paddle -import paddle.fluid as fluid +from paddle.io import DataLoader, DistributedBatchSampler import numpy as np import argparse import pandas as pd @@ -23,7 +23,7 @@ import ast import json import logging -from reader import BMNReader +from reader import BmnDataset from model import BMN, bmn_loss_func from bmn_utils import boundary_choose, bmn_post_processing from config_utils import * @@ -129,90 +129,91 @@ def test_bmn(args): os.makedirs(test_config.TEST.result_path) if not args.use_gpu: - place = fluid.CPUPlace() + place = paddle.CPUPlace() else: - place = fluid.CUDAPlace(0) - - with fluid.dygraph.guard(place): - bmn = BMN(test_config) - - # load checkpoint - if args.weights: - assert os.path.exists(args.weights + '.pdparams' - ), "Given weight dir {} not exist.".format( - args.weights) - - logger.info('load test weights from {}'.format(args.weights)) - model_dict, _ = fluid.load_dygraph(args.weights) - bmn.set_dict(model_dict) - - reader = BMNReader(mode="test", cfg=test_config) - test_reader = reader.create_reader() - - aggr_loss = 0.0 - aggr_tem_loss = 0.0 - aggr_pem_reg_loss = 0.0 - aggr_pem_cls_loss = 0.0 - aggr_batch_size = 0 - video_dict, video_list = get_dataset_dict(test_config) - - bmn.eval() - for batch_id, data in enumerate(test_reader()): - video_feat = np.array([item[0] for item in data]).astype(DATATYPE) - gt_iou_map = np.array([item[1] for item in data]).astype(DATATYPE) - gt_start = np.array([item[2] for item in data]).astype(DATATYPE) - gt_end = np.array([item[3] for item in data]).astype(DATATYPE) - video_idx = [item[4] for item in data][0] #batch_size=1 by default - - x_data = fluid.dygraph.base.to_variable(video_feat) - gt_iou_map = fluid.dygraph.base.to_variable(gt_iou_map) - gt_start = fluid.dygraph.base.to_variable(gt_start) - gt_end = fluid.dygraph.base.to_variable(gt_end) - gt_iou_map.stop_gradient = True - gt_start.stop_gradient = True - gt_end.stop_gradient = True - - pred_bm, pred_start, pred_end = bmn(x_data) - loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( - pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, - test_config) - - pred_bm = pred_bm.numpy() - pred_start = pred_start[0].numpy() - pred_end = pred_end[0].numpy() - aggr_loss += np.mean(loss.numpy()) - aggr_tem_loss += np.mean(tem_loss.numpy()) - aggr_pem_reg_loss += np.mean(pem_reg_loss.numpy()) - aggr_pem_cls_loss += np.mean(pem_cls_loss.numpy()) - aggr_batch_size += 1 - - if batch_id % args.log_interval == 0: - logger.info("Processing................ batch {}".format( - batch_id)) - - gen_props( - pred_bm, - pred_start, - pred_end, - video_idx, - video_list, - test_config, - mode='test') - - avg_loss = aggr_loss / aggr_batch_size - avg_tem_loss = aggr_tem_loss / aggr_batch_size - avg_pem_reg_loss = aggr_pem_reg_loss / aggr_batch_size - avg_pem_cls_loss = aggr_pem_cls_loss / aggr_batch_size - - logger.info('[EVAL] \tAvg_oss = {}, \tAvg_tem_loss = {}, \tAvg_pem_reg_loss = {}, \tAvg_pem_cls_loss = {}'.format( - '%.04f' % avg_loss, '%.04f' % avg_tem_loss, \ - '%.04f' % avg_pem_reg_loss, '%.04f' % avg_pem_cls_loss)) - - logger.info("Post_processing....This may take a while") - bmn_post_processing(video_dict, test_config.TEST.subset, - test_config.TEST.output_path, - test_config.TEST.result_path) - logger.info("[EVAL] eval finished") + place = paddle.CUDAPlace(0) + + paddle.disable_static(place) + bmn = BMN(test_config) + + # load checkpoint + if args.weights: + assert os.path.exists( + args.weights + + '.pdparams'), "Given weight dir {} not exist.".format(args.weights) + + logger.info('load test weights from {}'.format(args.weights)) + model_dict, _ = paddle.load(args.weights) + bmn.set_dict(model_dict) + + eval_dataset = BmnDataset(test_config, 'test') + eval_sampler = DistributedBatchSampler( + eval_dataset, batch_size=test_config.TEST.batch_size) + eval_loader = DataLoader( + eval_dataset, + batch_sampler=eval_sampler, + places=place, + num_workers=test_config.TEST.num_workers, + return_list=True) + + aggr_loss = 0.0 + aggr_tem_loss = 0.0 + aggr_pem_reg_loss = 0.0 + aggr_pem_cls_loss = 0.0 + aggr_batch_size = 0 + video_dict, video_list = get_dataset_dict(test_config) + + bmn.eval() + for batch_id, data in enumerate(eval_loader): + x_data = paddle.to_tensor(data[0]) + gt_iou_map = paddle.to_tensor(data[1]) + gt_start = paddle.to_tensor(data[2]) + gt_end = paddle.to_tensor(data[3]) + video_idx = data[4] #batch_size=1 by default + gt_iou_map.stop_gradient = True + gt_start.stop_gradient = True + gt_end.stop_gradient = True + + pred_bm, pred_start, pred_end = bmn(x_data) + loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( + pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, + test_config) + + pred_bm = pred_bm.numpy() + pred_start = pred_start[0].numpy() + pred_end = pred_end[0].numpy() + aggr_loss += np.mean(loss.numpy()) + aggr_tem_loss += np.mean(tem_loss.numpy()) + aggr_pem_reg_loss += np.mean(pem_reg_loss.numpy()) + aggr_pem_cls_loss += np.mean(pem_cls_loss.numpy()) + aggr_batch_size += 1 + + if batch_id % args.log_interval == 0: + logger.info("Processing................ batch {}".format(batch_id)) + + gen_props( + pred_bm, + pred_start, + pred_end, + video_idx, + video_list, + test_config, + mode='test') + + avg_loss = aggr_loss / aggr_batch_size + avg_tem_loss = aggr_tem_loss / aggr_batch_size + avg_pem_reg_loss = aggr_pem_reg_loss / aggr_batch_size + avg_pem_cls_loss = aggr_pem_cls_loss / aggr_batch_size + + logger.info('[EVAL] \tAvg_oss = {}, \tAvg_tem_loss = {}, \tAvg_pem_reg_loss = {}, \tAvg_pem_cls_loss = {}'.format( + '%.04f' % avg_loss, '%.04f' % avg_tem_loss, \ + '%.04f' % avg_pem_reg_loss, '%.04f' % avg_pem_cls_loss)) + + logger.info("Post_processing....This may take a while") + bmn_post_processing(video_dict, test_config.TEST.subset, + test_config.TEST.output_path, + test_config.TEST.result_path) + logger.info("[EVAL] eval finished") if __name__ == '__main__': diff --git a/dygraph/bmn/model.py b/dygraph/bmn/model.py index f77e8e0e95bc4e0d397ba7247327c5bf7038c8e4..f3c056cd55c2ecdb1062a347b474404fb707d074 100644 --- a/dygraph/bmn/model.py +++ b/dygraph/bmn/model.py @@ -13,8 +13,8 @@ #limitations under the License. import paddle -import paddle.fluid as fluid -from paddle.fluid import ParamAttr +import paddle.nn.functional as F +from paddle import ParamAttr import numpy as np import math @@ -24,7 +24,7 @@ DATATYPE = 'float32' # Net -class Conv1D(fluid.dygraph.Layer): +class Conv1D(paddle.nn.Layer): def __init__(self, prefix, num_channels=256, @@ -38,32 +38,36 @@ class Conv1D(fluid.dygraph.Layer): k = 1. / math.sqrt(fan_in) param_attr = ParamAttr( name=prefix + "_w", - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-k, high=k)) bias_attr = ParamAttr( name=prefix + "_b", - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-k, high=k)) - self._conv2d = fluid.dygraph.Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=(1, size_k), + self._conv2d = paddle.nn.Conv2d( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=(1, size_k), stride=1, padding=(0, padding), groups=groups, - act=act, - param_attr=param_attr, + weight_attr=param_attr, bias_attr=bias_attr) + if act == "relu": + self._act = paddle.nn.ReLU() + elif act == "sigmoid": + self._act = paddle.nn.Sigmoid() def forward(self, x): - x = fluid.layers.unsqueeze(input=x, axes=[2]) + x = paddle.unsqueeze(x, axis=[2]) x = self._conv2d(x) - x = fluid.layers.squeeze(input=x, axes=[2]) + x = self._act(x) + x = paddle.squeeze(x, axis=[2]) return x -class BMN(fluid.dygraph.Layer): +class BMN(paddle.nn.Layer): def __init__(self, cfg): super(BMN, self).__init__() @@ -127,55 +131,58 @@ class BMN(fluid.dygraph.Layer): sample_mask = get_interp1d_mask(self.tscale, self.dscale, self.prop_boundary_ratio, self.num_sample, self.num_sample_perbin) - self.sample_mask = fluid.dygraph.base.to_variable(sample_mask) + self.sample_mask = paddle.to_tensor(sample_mask) self.sample_mask.stop_gradient = True - self.p_conv3d1 = fluid.dygraph.Conv3D( - num_channels=128, - num_filters=self.hidden_dim_3d, - filter_size=(self.num_sample, 1, 1), + self.p_conv3d1 = paddle.nn.Conv3d( + in_channels=128, + out_channels=self.hidden_dim_3d, + kernel_size=(self.num_sample, 1, 1), stride=(self.num_sample, 1, 1), padding=0, - act="relu", - param_attr=ParamAttr(name="PEM_3d1_w"), + weight_attr=ParamAttr(name="PEM_3d1_w"), bias_attr=ParamAttr(name="PEM_3d1_b")) + self.p_conv3d1_act = paddle.nn.ReLU() - self.p_conv2d1 = fluid.dygraph.Conv2D( - num_channels=512, - num_filters=self.hidden_dim_2d, - filter_size=1, + self.p_conv2d1 = paddle.nn.Conv2d( + in_channels=512, + out_channels=self.hidden_dim_2d, + kernel_size=1, stride=1, padding=0, - act="relu", - param_attr=ParamAttr(name="PEM_2d1_w"), + weight_attr=ParamAttr(name="PEM_2d1_w"), bias_attr=ParamAttr(name="PEM_2d1_b")) - self.p_conv2d2 = fluid.dygraph.Conv2D( - num_channels=128, - num_filters=self.hidden_dim_2d, - filter_size=3, + self.p_conv2d1_act = paddle.nn.ReLU() + + self.p_conv2d2 = paddle.nn.Conv2d( + in_channels=128, + out_channels=self.hidden_dim_2d, + kernel_size=3, stride=1, padding=1, - act="relu", - param_attr=ParamAttr(name="PEM_2d2_w"), + weight_attr=ParamAttr(name="PEM_2d2_w"), bias_attr=ParamAttr(name="PEM_2d2_b")) - self.p_conv2d3 = fluid.dygraph.Conv2D( - num_channels=128, - num_filters=self.hidden_dim_2d, - filter_size=3, + self.p_conv2d2_act = paddle.nn.ReLU() + + self.p_conv2d3 = paddle.nn.Conv2d( + in_channels=128, + out_channels=self.hidden_dim_2d, + kernel_size=3, stride=1, padding=1, - act="relu", - param_attr=ParamAttr(name="PEM_2d3_w"), + weight_attr=ParamAttr(name="PEM_2d3_w"), bias_attr=ParamAttr(name="PEM_2d3_b")) - self.p_conv2d4 = fluid.dygraph.Conv2D( - num_channels=128, - num_filters=2, - filter_size=1, + self.p_conv2d3_act = paddle.nn.ReLU() + + self.p_conv2d4 = paddle.nn.Conv2d( + in_channels=128, + out_channels=2, + kernel_size=1, stride=1, padding=0, - act="sigmoid", - param_attr=ParamAttr(name="PEM_2d4_w"), + weight_attr=ParamAttr(name="PEM_2d4_w"), bias_attr=ParamAttr(name="PEM_2d4_b")) + self.p_conv2d4_act = paddle.nn.Sigmoid() def forward(self, x): #Base Module @@ -185,24 +192,28 @@ class BMN(fluid.dygraph.Layer): #TEM xs = self.ts_conv1(x) xs = self.ts_conv2(xs) - xs = fluid.layers.squeeze(xs, axes=[1]) + xs = paddle.squeeze(xs, axis=[1]) xe = self.te_conv1(x) xe = self.te_conv2(xe) - xe = fluid.layers.squeeze(xe, axes=[1]) + xe = paddle.squeeze(xe, axis=[1]) #PEM xp = self.p_conv1(x) #BM layer - xp = fluid.layers.matmul(xp, self.sample_mask) - xp = fluid.layers.reshape( - xp, shape=[0, 0, -1, self.dscale, self.tscale]) + xp = paddle.matmul(xp, self.sample_mask) + xp = paddle.reshape(xp, shape=[0, 0, -1, self.dscale, self.tscale]) xp = self.p_conv3d1(xp) - xp = fluid.layers.squeeze(xp, axes=[2]) + xp = self.p_conv3d1_act(xp) + xp = paddle.squeeze(xp, axis=[2]) xp = self.p_conv2d1(xp) + xp = self.p_conv2d1_act(xp) xp = self.p_conv2d2(xp) + xp = self.p_conv2d2_act(xp) xp = self.p_conv2d3(xp) + xp = self.p_conv2d3_act(xp) xp = self.p_conv2d4(xp) + xp = self.p_conv2d4_act(xp) return xp, xs, xe @@ -217,35 +228,28 @@ def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, ] + [0 for i in range(idx)] bm_mask.append(mask_vector) bm_mask = np.array(bm_mask, dtype=np.float32) - self_bm_mask = fluid.layers.create_global_var( - shape=[dscale, tscale], value=0, dtype=DATATYPE, persistable=True) - fluid.layers.assign(bm_mask, self_bm_mask) - self_bm_mask.stop_gradient = True - return self_bm_mask + bm_mask = paddle.to_tensor(bm_mask) + bm_mask.stop_gradient = True + return bm_mask def tem_loss_func(pred_start, pred_end, gt_start, gt_end): def bi_loss(pred_score, gt_label): - pred_score = fluid.layers.reshape( - x=pred_score, shape=[-1], inplace=False) - gt_label = fluid.layers.reshape( - x=gt_label, shape=[-1], inplace=False) + pred_score = paddle.reshape(x=pred_score, shape=[-1]) + gt_label = paddle.reshape(x=gt_label, shape=[-1]) gt_label.stop_gradient = True - pmask = fluid.layers.cast(x=(gt_label > 0.5), dtype=DATATYPE) - num_entries = fluid.layers.cast( - fluid.layers.shape(pmask), dtype=DATATYPE) - num_positive = fluid.layers.cast( - fluid.layers.reduce_sum(pmask), dtype=DATATYPE) + pmask = paddle.cast(x=(gt_label > 0.5), dtype=DATATYPE) + num_entries = paddle.cast(paddle.shape(pmask), dtype=DATATYPE) + num_positive = paddle.cast(paddle.reduce_sum(pmask), dtype=DATATYPE) ratio = num_entries / num_positive coef_0 = 0.5 * ratio / (ratio - 1) coef_1 = 0.5 * ratio epsilon = 0.000001 - temp = fluid.layers.log(pred_score + epsilon) - loss_pos = fluid.layers.elementwise_mul( - fluid.layers.log(pred_score + epsilon), pmask) - loss_pos = coef_1 * fluid.layers.reduce_mean(loss_pos) - loss_neg = fluid.layers.elementwise_mul( - fluid.layers.log(1.0 - pred_score + epsilon), (1.0 - pmask)) - loss_neg = coef_0 * fluid.layers.reduce_mean(loss_neg) + temp = paddle.log(pred_score + epsilon) + loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask) + loss_pos = coef_1 * paddle.reduce_mean(loss_pos) + loss_neg = paddle.multiply( + paddle.log(1.0 - pred_score + epsilon), (1.0 - pmask)) + loss_neg = coef_0 * paddle.reduce_mean(loss_neg) loss = -1 * (loss_pos + loss_neg) return loss @@ -256,77 +260,72 @@ def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, def pem_reg_loss_func(pred_score, gt_iou_map, mask): - gt_iou_map = fluid.layers.elementwise_mul(gt_iou_map, mask) + gt_iou_map = paddle.multiply(gt_iou_map, mask) - u_hmask = fluid.layers.cast(x=gt_iou_map > 0.7, dtype=DATATYPE) - u_mmask = fluid.layers.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3) - u_mmask = fluid.layers.cast(x=u_mmask, dtype=DATATYPE) - u_lmask = fluid.layers.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.) - u_lmask = fluid.layers.cast(x=u_lmask, dtype=DATATYPE) - u_lmask = fluid.layers.elementwise_mul(u_lmask, mask) + u_hmask = paddle.cast(x=gt_iou_map > 0.7, dtype=DATATYPE) + u_mmask = paddle.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3) + u_mmask = paddle.cast(x=u_mmask, dtype=DATATYPE) + u_lmask = paddle.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.) + u_lmask = paddle.cast(x=u_lmask, dtype=DATATYPE) + u_lmask = paddle.multiply(u_lmask, mask) - num_h = fluid.layers.cast( - fluid.layers.reduce_sum(u_hmask), dtype=DATATYPE) - num_m = fluid.layers.cast( - fluid.layers.reduce_sum(u_mmask), dtype=DATATYPE) - num_l = fluid.layers.cast( - fluid.layers.reduce_sum(u_lmask), dtype=DATATYPE) + num_h = paddle.cast(paddle.reduce_sum(u_hmask), dtype=DATATYPE) + num_m = paddle.cast(paddle.reduce_sum(u_mmask), dtype=DATATYPE) + num_l = paddle.cast(paddle.reduce_sum(u_lmask), dtype=DATATYPE) r_m = num_h / num_m - u_smmask = fluid.layers.uniform_random( + u_smmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=DATATYPE, min=0.0, max=1.0) - u_smmask = fluid.layers.elementwise_mul(u_mmask, u_smmask) - u_smmask = fluid.layers.cast(x=(u_smmask > (1. - r_m)), dtype=DATATYPE) + u_smmask = paddle.multiply(u_mmask, u_smmask) + u_smmask = paddle.cast(x=(u_smmask > (1. - r_m)), dtype=DATATYPE) r_l = num_h / num_l - u_slmask = fluid.layers.uniform_random( + u_slmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=DATATYPE, min=0.0, max=1.0) - u_slmask = fluid.layers.elementwise_mul(u_lmask, u_slmask) - u_slmask = fluid.layers.cast(x=(u_slmask > (1. - r_l)), dtype=DATATYPE) + u_slmask = paddle.multiply(u_lmask, u_slmask) + u_slmask = paddle.cast(x=(u_slmask > (1. - r_l)), dtype=DATATYPE) weights = u_hmask + u_smmask + u_slmask weights.stop_gradient = True - loss = fluid.layers.square_error_cost(pred_score, gt_iou_map) - loss = fluid.layers.elementwise_mul(loss, weights) - loss = 0.5 * fluid.layers.reduce_sum(loss) / fluid.layers.reduce_sum( - weights) + loss = F.square_error_cost(pred_score, gt_iou_map) + loss = paddle.multiply(loss, weights) + loss = 0.5 * paddle.reduce_sum(loss) / paddle.reduce_sum(weights) return loss def pem_cls_loss_func(pred_score, gt_iou_map, mask): - gt_iou_map = fluid.layers.elementwise_mul(gt_iou_map, mask) + gt_iou_map = paddle.multiply(gt_iou_map, mask) gt_iou_map.stop_gradient = True - pmask = fluid.layers.cast(x=(gt_iou_map > 0.9), dtype=DATATYPE) - nmask = fluid.layers.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE) - nmask = fluid.layers.elementwise_mul(nmask, mask) + pmask = paddle.cast(x=(gt_iou_map > 0.9), dtype=DATATYPE) + nmask = paddle.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE) + nmask = paddle.multiply(nmask, mask) - num_positive = fluid.layers.reduce_sum(pmask) - num_entries = num_positive + fluid.layers.reduce_sum(nmask) + num_positive = paddle.reduce_sum(pmask) + num_entries = num_positive + paddle.reduce_sum(nmask) ratio = num_entries / num_positive coef_0 = 0.5 * ratio / (ratio - 1) coef_1 = 0.5 * ratio epsilon = 0.000001 - loss_pos = fluid.layers.elementwise_mul( - fluid.layers.log(pred_score + epsilon), pmask) - loss_pos = coef_1 * fluid.layers.reduce_sum(loss_pos) - loss_neg = fluid.layers.elementwise_mul( - fluid.layers.log(1.0 - pred_score + epsilon), nmask) - loss_neg = coef_0 * fluid.layers.reduce_sum(loss_neg) + loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask) + loss_pos = coef_1 * paddle.reduce_sum(loss_pos) + loss_neg = paddle.multiply( + paddle.log(1.0 - pred_score + epsilon), nmask) + loss_neg = coef_0 * paddle.reduce_sum(loss_neg) loss = -1 * (loss_pos + loss_neg) / num_entries return loss - pred_bm_reg = fluid.layers.squeeze( - fluid.layers.slice( - pred_bm, axes=[1], starts=[0], ends=[1]), axes=[1]) - pred_bm_cls = fluid.layers.squeeze( - fluid.layers.slice( - pred_bm, axes=[1], starts=[1], ends=[2]), axes=[1]) + pred_bm_reg = paddle.squeeze( + paddle.slice( + pred_bm, axes=[1], starts=[0], ends=[1]), axis=[1]) + pred_bm_cls = paddle.squeeze( + paddle.slice( + pred_bm, axes=[1], starts=[1], ends=[2]), axis=[1]) bm_mask = _get_mask(cfg) diff --git a/dygraph/bmn/predict.py b/dygraph/bmn/predict.py index 363e15b0d36c8065436531432109dbc60296b8ee..6e66afb174fbd6c4db314351044ac5a1fb9facf0 100644 --- a/dygraph/bmn/predict.py +++ b/dygraph/bmn/predict.py @@ -13,7 +13,7 @@ #limitations under the License. import paddle -import paddle.fluid as fluid +from paddle.io import DataLoader, DistributedBatchSampler import numpy as np import argparse import sys @@ -23,7 +23,7 @@ import json from model import BMN from eval import gen_props -from reader import BMNReader +from reader import BmnDataset from bmn_utils import bmn_post_processing from config_utils import * @@ -93,53 +93,60 @@ def infer_bmn(args): os.makedirs(infer_config.INFER.output_path) if not os.path.isdir(infer_config.INFER.result_path): os.makedirs(infer_config.INFER.result_path) - place = fluid.CUDAPlace(0) - with fluid.dygraph.guard(place): - bmn = BMN(infer_config) - # load checkpoint - if args.weights: - assert os.path.exists(args.weights + ".pdparams" - ), "Given weight dir {} not exist.".format( - args.weights) - - logger.info('load test weights from {}'.format(args.weights)) - model_dict, _ = fluid.load_dygraph(args.weights) - bmn.set_dict(model_dict) - - reader = BMNReader(mode="infer", cfg=infer_config) - infer_reader = reader.create_reader() - - video_dict, video_list = get_dataset_dict(infer_config) - - bmn.eval() - for batch_id, data in enumerate(infer_reader()): - video_feat = np.array([item[0] for item in data]).astype(DATATYPE) - video_idx = [item[1] for item in data][0] #batch_size=1 by default - - x_data = fluid.dygraph.base.to_variable(video_feat) - - pred_bm, pred_start, pred_end = bmn(x_data) - - pred_bm = pred_bm.numpy() - pred_start = pred_start[0].numpy() - pred_end = pred_end[0].numpy() - - logger.info("Processing................ batch {}".format(batch_id)) - gen_props( - pred_bm, - pred_start, - pred_end, - video_idx, - video_list, - infer_config, - mode='infer') - - logger.info("Post_processing....This may take a while") - bmn_post_processing(video_dict, infer_config.INFER.subset, - infer_config.INFER.output_path, - infer_config.INFER.result_path) - logger.info("[INFER] infer finished. Results saved in {}".format( - args.save_dir) + "bmn_results_test.json") + + place = paddle.CUDAPlace(0) + paddle.disable_static(place) + + bmn = BMN(infer_config) + # load checkpoint + if args.weights: + assert os.path.exists( + args.weights + + ".pdparams"), "Given weight dir {} not exist.".format(args.weights) + + logger.info('load test weights from {}'.format(args.weights)) + model_dict, _ = paddle.load(args.weights) + bmn.set_dict(model_dict) + + infer_dataset = BmnDataset(infer_config, 'infer') + infer_sampler = DistributedBatchSampler( + infer_dataset, batch_size=infer_config.INFER.batch_size) + infer_loader = DataLoader( + infer_dataset, + batch_sampler=infer_sampler, + places=place, + num_workers=infer_config.INFER.num_workers, + return_list=True) + + video_dict, video_list = get_dataset_dict(infer_config) + + bmn.eval() + for batch_id, data in enumerate(infer_loader): + x_data = paddle.to_tensor(data[0]) + video_idx = data[1] #batch_size=1 by default + + pred_bm, pred_start, pred_end = bmn(x_data) + + pred_bm = pred_bm.numpy() + pred_start = pred_start[0].numpy() + pred_end = pred_end[0].numpy() + + logger.info("Processing................ batch {}".format(batch_id)) + gen_props( + pred_bm, + pred_start, + pred_end, + video_idx, + video_list, + infer_config, + mode='infer') + + logger.info("Post_processing....This may take a while") + bmn_post_processing(video_dict, infer_config.INFER.subset, + infer_config.INFER.output_path, + infer_config.INFER.result_path) + logger.info("[INFER] infer finished. Results saved in {}".format( + args.save_dir) + "bmn_results_test.json") if __name__ == '__main__': diff --git a/dygraph/bmn/reader.py b/dygraph/bmn/reader.py index d76ff5e578213d7736d7cdf62a56d48394b813a9..e1b5040584c955f1e7c84b3ff1f5899262924670 100644 --- a/dygraph/bmn/reader.py +++ b/dygraph/bmn/reader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. @@ -14,37 +14,50 @@ import paddle import numpy as np -import random import json -import multiprocessing -import functools import logging -import platform import os +import sys + +from paddle.io import Dataset, DataLoader, DistributedBatchSampler logger = logging.getLogger(__name__) +from config_utils import * from bmn_utils import iou_with_anchors, ioa_with_anchors +DATATYPE = "float32" + -class BMNReader(): - def __init__(self, mode, cfg): +class BmnDataset(Dataset): + def __init__(self, cfg, mode): self.mode = mode self.tscale = cfg.MODEL.tscale # 100 self.dscale = cfg.MODEL.dscale # 100 self.anno_file = cfg.MODEL.anno_file + self.feat_path = cfg.MODEL.feat_path self.file_list = cfg.INFER.filelist self.subset = cfg[mode.upper()]['subset'] self.tgap = 1. / self.tscale - self.feat_path = cfg.MODEL.feat_path self.get_dataset_dict() self.get_match_map() - self.batch_size = cfg[mode.upper()]['batch_size'] - self.num_threads = cfg[mode.upper()]['num_threads'] - if (mode == 'test') or (mode == 'infer'): - self.num_threads = 1 # set num_threads as 1 for test and infer + def __getitem__(self, index): + video_name = self.video_list[index] + video_idx = np.array(self.video_list.index(video_name)).astype('int64') + video_feat = self.load_file(video_name) + if self.mode == 'infer': + return video_feat, video_idx + else: + gt_iou_map, gt_start, gt_end = self.get_video_label(video_name) + if self.mode == 'train' or self.mode == 'valid': + return video_feat, gt_iou_map, gt_start, gt_end + elif self.mode == 'test': + return video_feat, gt_iou_map, gt_start, gt_end, video_idx + + def __len__(self): + return len(self.video_list) def get_dataset_dict(self): assert (os.path.exists(self.feat_path)), "Input feature path not exists" @@ -128,7 +141,8 @@ class BMNReader(): gt_start = np.array(match_score_start) gt_end = np.array(match_score_end) - return gt_iou_map, gt_start, gt_end + return gt_iou_map.astype(DATATYPE), gt_start.astype( + DATATYPE), gt_end.astype(DATATYPE) def load_file(self, video_name): file_name = video_name + ".npy" @@ -137,158 +151,3 @@ class BMNReader(): video_feat = video_feat.T video_feat = video_feat.astype("float32") return video_feat - - def create_reader(self): - """reader creator for bmn model""" - if self.mode == 'infer': - return self.make_infer_reader() - if self.num_threads == 1: - return self.make_reader() - else: - sysstr = platform.system() - if sysstr == 'Windows': - return self.make_multithread_reader() - else: - return self.make_multiprocess_reader() - - def make_infer_reader(self): - """reader for inference""" - - def reader(): - batch_out = [] - for video_name in self.video_list: - video_idx = self.video_list.index(video_name) - video_feat = self.load_file(video_name) - batch_out.append((video_feat, video_idx)) - - if len(batch_out) == self.batch_size: - yield batch_out - batch_out = [] - - return reader - - def make_reader(self): - """single process reader""" - - def reader(): - video_list = self.video_list - if self.mode == 'train': - random.shuffle(video_list) - - batch_out = [] - for video_name in video_list: - video_idx = video_list.index(video_name) - video_feat = self.load_file(video_name) - gt_iou_map, gt_start, gt_end = self.get_video_label(video_name) - - if self.mode == 'train' or self.mode == 'valid': - batch_out.append((video_feat, gt_iou_map, gt_start, gt_end)) - elif self.mode == 'test': - batch_out.append( - (video_feat, gt_iou_map, gt_start, gt_end, video_idx)) - else: - raise NotImplementedError('mode {} not implemented'.format( - self.mode)) - if len(batch_out) == self.batch_size: - yield batch_out - batch_out = [] - - return reader - - def make_multithread_reader(self): - def reader(): - if self.mode == 'train': - random.shuffle(self.video_list) - for video_name in self.video_list: - video_idx = self.video_list.index(video_name) - yield [video_name, video_idx] - - def process_data(sample, mode): - video_name = sample[0] - video_idx = sample[1] - video_feat = self.load_file(video_name) - gt_iou_map, gt_start, gt_end = self.get_video_label(video_name) - if mode == 'train' or mode == 'valid': - return (video_feat, gt_iou_map, gt_start, gt_end) - elif mode == 'test': - return (video_feat, gt_iou_map, gt_start, gt_end, video_idx) - else: - raise NotImplementedError('mode {} not implemented'.format( - mode)) - - mapper = functools.partial(process_data, mode=self.mode) - - def batch_reader(): - xreader = paddle.reader.xmap_readers(mapper, reader, - self.num_threads, 1024) - batch = [] - for item in xreader(): - batch.append(item) - if len(batch) == self.batch_size: - yield batch - batch = [] - - return batch_reader - - def make_multiprocess_reader(self): - """multiprocess reader""" - - def read_into_queue(video_list, queue): - - batch_out = [] - for video_name in video_list: - video_idx = video_list.index(video_name) - video_feat = self.load_file(video_name) - gt_iou_map, gt_start, gt_end = self.get_video_label(video_name) - - if self.mode == 'train' or self.mode == 'valid': - batch_out.append((video_feat, gt_iou_map, gt_start, gt_end)) - elif self.mode == 'test': - batch_out.append( - (video_feat, gt_iou_map, gt_start, gt_end, video_idx)) - else: - raise NotImplementedError('mode {} not implemented'.format( - self.mode)) - - if len(batch_out) == self.batch_size: - queue.put(batch_out) - batch_out = [] - queue.put(None) - - def queue_reader(): - video_list = self.video_list - if self.mode == 'train': - random.shuffle(video_list) - - n = self.num_threads - queue_size = 20 - reader_lists = [None] * n - file_num = int(len(video_list) // n) - for i in range(n): - if i < len(reader_lists) - 1: - tmp_list = video_list[i * file_num:(i + 1) * file_num] - else: - tmp_list = video_list[i * file_num:] - reader_lists[i] = tmp_list - - manager = multiprocessing.Manager() - queue = manager.Queue(queue_size) - p_list = [None] * len(reader_lists) - for i in range(len(reader_lists)): - reader_list = reader_lists[i] - p_list[i] = multiprocessing.Process( - target=read_into_queue, args=(reader_list, queue)) - p_list[i].start() - reader_num = len(reader_lists) - finish_num = 0 - while finish_num < reader_num: - sample = queue.get() - if sample is None: - finish_num += 1 - else: - yield sample - for i in range(len(p_list)): - if p_list[i].is_alive(): - p_list[i].join() - - return queue_reader diff --git a/dygraph/bmn/run.sh b/dygraph/bmn/run.sh index b426012056b92f9524271d127595775f281f789a..3d79477cb2e5c97e41c2fc5324ccc9f7537cd935 100644 --- a/dygraph/bmn/run.sh +++ b/dygraph/bmn/run.sh @@ -1,5 +1,9 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3 -python -m paddle.distributed.launch \ - --selected_gpus=0,1,2,3 \ - --log_dir ./mylog \ - train.py --use_data_parallel True + +start_time=$(date +%s) + +python3 train.py --use_data_parallel=1 + +end_time=$(date +%s) +cost_time=$[ $end_time-$start_time ] +echo "4 card bs=16 9 epoch training time is $(($cost_time/60))min $(($cost_time%60))s" diff --git a/dygraph/bmn/train.py b/dygraph/bmn/train.py index 0171830e2a3b7e02ee3ef3a95a410a803293ec1e..21591bb962d80b6224fd449d3b21dcf18aacb2df 100644 --- a/dygraph/bmn/train.py +++ b/dygraph/bmn/train.py @@ -13,7 +13,8 @@ #limitations under the License. import paddle -import paddle.fluid as fluid +from paddle.io import DataLoader, DistributedBatchSampler +import paddle.distributed as dist import numpy as np import argparse import ast @@ -22,7 +23,7 @@ import sys import os from model import BMN, bmn_loss_func -from reader import BMNReader +from reader import BmnDataset from config_utils import * DATATYPE = 'float32' @@ -98,29 +99,22 @@ def optimizer(cfg, parameter_list): lr_decay = cfg.TRAIN.learning_rate_decay l2_weight_decay = cfg.TRAIN.l2_weight_decay lr = [base_lr, base_lr * lr_decay] - optimizer = fluid.optimizer.Adam( - fluid.layers.piecewise_decay( - boundaries=bd, values=lr), - parameter_list=parameter_list, - regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=l2_weight_decay)) + scheduler = paddle.optimizer.lr_scheduler.PiecewiseLR( + boundaries=bd, values=lr) + optimizer = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=parameter_list, + weight_decay=l2_weight_decay) return optimizer # Validation -def val_bmn(model, config, args): - reader = BMNReader(mode="valid", cfg=config) - val_reader = reader.create_reader() - for batch_id, data in enumerate(val_reader()): - video_feat = np.array([item[0] for item in data]).astype(DATATYPE) - gt_iou_map = np.array([item[1] for item in data]).astype(DATATYPE) - gt_start = np.array([item[2] for item in data]).astype(DATATYPE) - gt_end = np.array([item[3] for item in data]).astype(DATATYPE) - - x_data = fluid.dygraph.base.to_variable(video_feat) - gt_iou_map = fluid.dygraph.base.to_variable(gt_iou_map) - gt_start = fluid.dygraph.base.to_variable(gt_start) - gt_end = fluid.dygraph.base.to_variable(gt_end) +def val_bmn(model, val_loader, config, args): + for batch_id, data in enumerate(val_loader): + x_data = paddle.to_tensor(data[0]) + gt_iou_map = paddle.to_tensor(data[1]) + gt_start = paddle.to_tensor(data[2]) + gt_end = paddle.to_tensor(data[3]) gt_iou_map.stop_gradient = True gt_start.stop_gradient = True gt_end.stop_gradient = True @@ -129,7 +123,7 @@ def val_bmn(model, config, args): loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, config) - avg_loss = fluid.layers.mean(loss) + avg_loss = paddle.mean(loss) if args.log_interval > 0 and (batch_id % args.log_interval == 0): logger.info('[VALID] iter {} '.format(batch_id) @@ -145,99 +139,127 @@ def train_bmn(args): valid_config = merge_configs(config, 'valid', vars(args)) if not args.use_gpu: - place = fluid.CPUPlace() + place = paddle.CPUPlace() elif not args.use_data_parallel: - place = fluid.CUDAPlace(0) + place = paddle.CUDAPlace(0) else: - place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) - - with fluid.dygraph.guard(place): - if args.use_data_parallel: - strategy = fluid.dygraph.parallel.prepare_context() - bmn = BMN(train_config) - adam = optimizer(train_config, parameter_list=bmn.parameters()) - - if args.use_data_parallel: - bmn = fluid.dygraph.parallel.DataParallel(bmn, strategy) - - if args.resume: - # if resume weights is given, load resume weights directly - assert os.path.exists(args.resume + ".pdparams"), \ - "Given resume weight dir {} not exist.".format(args.resume) - - model, _ = fluid.dygraph.load_dygraph(args.resume) - bmn.set_dict(model) - - reader = BMNReader(mode="train", cfg=train_config) - train_reader = reader.create_reader() - if args.use_data_parallel: - train_reader = fluid.contrib.reader.distributed_batch_reader( - train_reader) - - for epoch in range(args.epoch): - for batch_id, data in enumerate(train_reader()): - video_feat = np.array( - [item[0] for item in data]).astype(DATATYPE) - gt_iou_map = np.array( - [item[1] for item in data]).astype(DATATYPE) - gt_start = np.array([item[2] for item in data]).astype(DATATYPE) - gt_end = np.array([item[3] for item in data]).astype(DATATYPE) - - x_data = fluid.dygraph.base.to_variable(video_feat) - gt_iou_map = fluid.dygraph.base.to_variable(gt_iou_map) - gt_start = fluid.dygraph.base.to_variable(gt_start) - gt_end = fluid.dygraph.base.to_variable(gt_end) - gt_iou_map.stop_gradient = True - gt_start.stop_gradient = True - gt_end.stop_gradient = True - - pred_bm, pred_start, pred_end = bmn(x_data) - - loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( - pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, - train_config) - avg_loss = fluid.layers.mean(loss) - - if args.use_data_parallel: - avg_loss = bmn.scale_loss(avg_loss) - avg_loss.backward() - bmn.apply_collective_grads() - else: - avg_loss.backward() - - adam.minimize(avg_loss) - - bmn.clear_gradients() - - if args.log_interval > 0 and ( - batch_id % args.log_interval == 0): - logger.info('[TRAIN] Epoch {}, iter {} '.format(epoch, batch_id) - + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format( - '%.04f' % avg_loss.numpy()[0], '%.04f' % tem_loss.numpy()[0], \ - '%.04f' % pem_reg_loss.numpy()[0], '%.04f' % pem_cls_loss.numpy()[0])) - - logger.info('[TRAIN] Epoch {} training finished'.format(epoch)) - if not os.path.isdir(args.save_dir): - os.makedirs(args.save_dir) + place = paddle.CUDAPlace(dist.ParallelEnv().dev_id) + + paddle.disable_static(place) + if args.use_data_parallel: + dist.init_parallel_env() + bmn = BMN(train_config) + adam = optimizer(train_config, parameter_list=bmn.parameters()) + + if args.use_data_parallel: + bmn = paddle.DataParallel(bmn) + + if args.resume: + # if resume weights is given, load resume weights directly + assert os.path.exists(args.resume + ".pdparams"), \ + "Given resume weight dir {} not exist.".format(args.resume) + + model, _ = paddle.load(args.resume) + bmn.set_dict(model) + + #Reader + bs_denominator = 1 + if args.use_gpu: + gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") + if gpus == "": + pass + else: + gpus = gpus.split(",") + num_gpus = len(gpus) + assert num_gpus == train_config.TRAIN.num_gpus, \ + "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \ + "shoud be the same as that" \ + "set in {}({})".format( + num_gpus, args.config, train_config.TRAIN.num_gpus) + bs_denominator = train_config.TRAIN.num_gpus + + bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator) + bs_val_single = int(valid_config.VALID.batch_size / bs_denominator) + + train_dataset = BmnDataset(train_config, 'train') + val_dataset = BmnDataset(valid_config, 'valid') + train_sampler = DistributedBatchSampler( + train_dataset, + batch_size=bs_train_single, + shuffle=train_config.TRAIN.use_shuffle, + drop_last=True) + train_loader = DataLoader( + train_dataset, + batch_sampler=train_sampler, + places=place, + num_workers=train_config.TRAIN.num_workers, + return_list=True) + val_sampler = DistributedBatchSampler(val_dataset, batch_size=bs_val_single) + val_loader = DataLoader( + val_dataset, + batch_sampler=val_sampler, + places=place, + num_workers=valid_config.VALID.num_workers, + return_list=True) + + for epoch in range(args.epoch): + for batch_id, data in enumerate(train_loader): + x_data = paddle.to_tensor(data[0]) + gt_iou_map = paddle.to_tensor(data[1]) + gt_start = paddle.to_tensor(data[2]) + gt_end = paddle.to_tensor(data[3]) + gt_iou_map.stop_gradient = True + gt_start.stop_gradient = True + gt_end.stop_gradient = True + + pred_bm, pred_start, pred_end = bmn(x_data) + + loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( + pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, + train_config) + avg_loss = paddle.mean(loss) + + if args.use_data_parallel: + avg_loss = bmn.scale_loss(avg_loss) + avg_loss.backward() + bmn.apply_collective_grads() + else: + avg_loss.backward() + + adam.step() + adam.clear_grad() + + if args.log_interval > 0 and (batch_id % args.log_interval == 0): + logger.info('[TRAIN] Epoch {}, iter {} '.format(epoch, batch_id) + + '\tLoss = {}, \ttem_loss = {}, \tpem_reg_loss = {}, \tpem_cls_loss = {}'.format( + '%.04f' % avg_loss.numpy()[0], '%.04f' % tem_loss.numpy()[0], \ + '%.04f' % pem_reg_loss.numpy()[0], '%.04f' % pem_cls_loss.numpy()[0])) + + logger.info('[TRAIN] Epoch {} training finished'.format(epoch)) + + #save + if not os.path.isdir(args.save_dir): + os.makedirs(args.save_dir) + + if dist.get_rank() == 0: save_model_name = os.path.join( args.save_dir, "bmn_paddle_dy" + "_epoch{}".format(epoch)) - fluid.dygraph.save_dygraph(bmn.state_dict(), save_model_name) + paddle.save(bmn.state_dict(), save_model_name) - # validation - if args.valid_interval > 0 and (epoch + 1 - ) % args.valid_interval == 0: - bmn.eval() - val_bmn(bmn, valid_config, args) - bmn.train() + # validation + if args.valid_interval > 0 and (epoch + 1) % args.valid_interval == 0: + bmn.eval() + val_bmn(bmn, val_loader, valid_config, args) + bmn.train() - #save final results - if fluid.dygraph.parallel.Env().local_rank == 0: - save_model_name = os.path.join(args.save_dir, - "bmn_paddle_dy" + "_final") - fluid.dygraph.save_dygraph(bmn.state_dict(), save_model_name) - logger.info('[TRAIN] training finished') + #save final results + if dist.get_rank() == 0: + save_model_name = os.path.join(args.save_dir, + "bmn_paddle_dy" + "_final") + paddle.save(bmn.state_dict(), save_model_name) + logger.info('[TRAIN] training finished') if __name__ == "__main__": args = parse_args() - train_bmn(args) + dist.spawn(train_bmn, args=(args, ), nprocs=4)