use new API (#4606)

da5c04a6 · Kaipeng Deng · GitHub · 82532bf0 · da5c04a6 · da5c04a6
4 changed file
--- a/PaddleCV/3d_vision/PointNet++/models/pointnet2_modules.py
+++ b/PaddleCV/3d_vision/PointNet++/models/pointnet2_modules.py
@@ -200,8 +200,7 @@ def pointnet_fp_module(unknown, known, unknown_feats, known_feats, mlp, bn=True,
        dist.stop_gradient = True
        idx.stop_gradient = True
        dist = fluid.layers.sqrt(dist)
-        ones = fluid.layers.fill_constant_batch_size_like(dist, dist.shape, dist.dtype, 1)
-        dist_recip = ones / (dist + 1e-8); # 1.0 / dist
+        dist_recip = 1.0 / (dist + 1e-8); # 1.0 / dist
        norm = fluid.layers.reduce_sum(dist_recip, dim=-1, keep_dim=True)
        weight = dist_recip / norm
        weight.stop_gradient = True

--- a/PaddleCV/3d_vision/PointRCNN/models/loss_utils.py
+++ b/PaddleCV/3d_vision/PointRCNN/models/loss_utils.py
@@ -93,8 +93,8 @@ def get_reg_loss(pred_reg, reg_label, fg_mask, point_num, loc_scope,
        x_res_norm_label = x_res_label / loc_bin_size
        z_res_norm_label = z_res_label / loc_bin_size

-        x_bin_onehot = fluid.layers.one_hot(x_bin_label, depth=per_loc_bin_num)
-        z_bin_onehot = fluid.layers.one_hot(z_bin_label, depth=per_loc_bin_num)
+        x_bin_onehot = fluid.one_hot(x_bin_label[:, 0], depth=per_loc_bin_num)
+        z_bin_onehot = fluid.one_hot(z_bin_label[:, 0], depth=per_loc_bin_num)

        loss_x_res = fluid.layers.smooth_l1(fluid.layers.reduce_sum(pred_reg[:, x_res_l: x_res_r] * x_bin_onehot, dim=1, keep_dim=True), x_res_norm_label)
        loss_x_res = fluid.layers.reduce_mean(loss_x_res * fg_mask) * fg_scale
@@ -115,7 +115,7 @@ def get_reg_loss(pred_reg, reg_label, fg_mask, point_num, loc_scope,
        y_res_label = y_shift - (fluid.layers.cast(y_bin_label, dtype=y_shift.dtype) * loc_y_bin_size + loc_y_bin_size / 2.)
        y_res_norm_label = y_res_label / loc_y_bin_size

-        y_bin_onehot = fluid.layers.one_hot(y_bin_label, depth=per_loc_bin_num)
+        y_bin_onehot = fluid.one_hot(y_bin_label[:, 0], depth=per_loc_bin_num)

        loss_y_bin = fluid.layers.cross_entropy(pred_reg[:, y_bin_l: y_bin_r], y_bin_label)
        loss_y_bin = fluid.layers.reduce_mean(loss_y_bin * fg_mask) * fg_scale
@@ -169,7 +169,7 @@ def get_reg_loss(pred_reg, reg_label, fg_mask, point_num, loc_scope,
        ry_res_label = shift_angle - (fluid.layers.cast(ry_bin_label, dtype=shift_angle.dtype) * angle_per_class + angle_per_class / 2)
        ry_res_norm_label = ry_res_label / (angle_per_class / 2)

-    ry_bin_onehot = fluid.layers.one_hot(ry_bin_label, depth=num_head_bin)
+    ry_bin_onehot = fluid.one_hot(ry_bin_label[:, 0], depth=num_head_bin)
    loss_ry_bin = fluid.layers.softmax_with_cross_entropy(pred_reg[:, ry_bin_l:ry_bin_r], ry_bin_label)
    loss_ry_bin = fluid.layers.reduce_mean(loss_ry_bin * fg_mask) * fg_scale
    loss_ry_res = fluid.layers.smooth_l1(fluid.layers.reduce_sum(pred_reg[:, ry_res_l: ry_res_r] * ry_bin_onehot, dim=1, keep_dim=True), ry_res_norm_label)

--- a/PaddleCV/3d_vision/PointRCNN/models/pointnet2_modules.py
+++ b/PaddleCV/3d_vision/PointRCNN/models/pointnet2_modules.py
@@ -184,8 +184,7 @@ def pointnet_fp_module(unknown, known, unknown_feats, known_feats, mlp, bn=True,
        dist.stop_gradient = True
        idx.stop_gradient = True
        dist = fluid.layers.sqrt(dist)
-        ones = fluid.layers.fill_constant_batch_size_like(dist, dist.shape, dist.dtype, 1)
-        dist_recip = ones / (dist + 1e-8); # 1.0 / dist
+        dist_recip = 1.0 / (dist + 1e-8); # 1.0 / dist
        norm = fluid.layers.reduce_sum(dist_recip, dim=-1, keep_dim=True)
        weight = dist_recip / norm
        weight.stop_gradient = True

--- a/PaddleCV/3d_vision/PointRCNN/utils/optimizer.py
+++ b/PaddleCV/3d_vision/PointRCNN/utils/optimizer.py
@@ -54,21 +54,31 @@ def cosine_warmup_decay(learning_rate, betas, warmup_factor, decay_factor,
    warmup_step_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(warmup_step), force_cpu=True)

-    with control_flow.Switch() as switch:
-        with switch.case(global_step < warmup_step_var):
-            cur_lr = annealing_cos(warmup_start_lr, learning_rate,
-                                   global_step / warmup_step_var)
-            fluid.layers.assign(cur_lr, lr)
-            cur_beta1 = annealing_cos(betas[0], betas[1],
-                                   global_step / warmup_step_var)
-            fluid.layers.assign(cur_beta1, beta1)
-        with switch.case(global_step >= warmup_step_var):
-            cur_lr = annealing_cos(learning_rate, decay_end_lr,
-                                   (global_step - warmup_step_var) / (total_step - warmup_step))
-            fluid.layers.assign(cur_lr, lr)
-            cur_beta1 = annealing_cos(betas[1], betas[0],
-                                   (global_step - warmup_step_var) / (total_step - warmup_step))
-            fluid.layers.assign(cur_beta1, beta1)
+    warmup_pred = global_step < warmup_step_var
+    decay_pred = global_step >= warmup_step_var
+
+    # learning rate warmup and decay
+    def warmup_lr():
+        return annealing_cos(warmup_start_lr, learning_rate,
+                             global_step / warmup_step_var)
+    def decay_lr():
+        return annealing_cos(learning_rate, decay_end_lr,
+                (global_step - warmup_step_var) / (total_step - warmup_step))
+
+    lr = fluid.layers.case(pred_fn_pairs=[(warmup_pred, warmup_lr),
+                                          (decay_pred, decay_lr)])
+
+    # Adam beta1 warmup and decay
+    def warmup_beta1():
+        return annealing_cos(betas[0], betas[1],
+                             global_step / warmup_step_var)
+
+    def decay_beta1():
+        return annealing_cos(betas[0], betas[1],
+                             global_step / warmup_step_var)
+
+    beta1 = fluid.layers.case(pred_fn_pairs=[(warmup_pred, warmup_beta1),
+                                          (decay_pred, decay_beta1)])

    return lr, beta1

@@ -96,11 +106,11 @@ def optimize(loss,
        raise ValueError("Unkown learning rate scheduler, should be "
                         "'cosine_warmup_decay'")

+    grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm)
    optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr,
                                     beta1=scheduled_beta1,
-                                     beta2=beta2)
-    fluid.clip.set_gradient_clip(
-        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm))
+                                     beta2=beta2,
+                                     grad_clip=grad_clip)

    param_list = dict()