【cherry-pick】fix decay param and overflow in match_matrix (#22107)

* fix decay param in DecayAdagrad test=develop (#22026) * fix integer overflow in match_matrix (#22036) * fix integer overflow in match_matrix test=develop * fix integer overflow in match_matrix test=develop * fix typo test=develop

【cherry-pick】fix decay param and overflow in match_matrix (#22107)
* fix decay param in DecayAdagrad test=develop (#22026) * fix integer overflow in match_matrix (#22036) * fix integer overflow in match_matrix test=develop * fix integer overflow in match_matrix test=develop * fix typo test=develop
eb6d3396 · Aurelius84 · GitHub · 9b64d636 · eb6d3396 · eb6d3396
隐藏空白更改
内联并排

Showing with 21 addition and 24 deletion

paddle/fluid/operators/match_matrix_tensor_op.cc paddle/fluid/operators/match_matrix_tensor_op.cc +19 -23

python/paddle/fluid/optimizer.py python/paddle/fluid/optimizer.py +2 -1

未找到文件。
--- a/paddle/fluid/operators/match_matrix_tensor_op.cc
+++ b/paddle/fluid/operators/match_matrix_tensor_op.cc
@@ -56,8 +56,8 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const {
  PADDLE_ENFORCE_EQ(w_dims[2], y_dims[1],
                    "W 's shape must satisfy: W[2] = Y[1]");
-  int out_dim_0 = -1;
+  int64_t out_dim_0 = -1;
-  int tmp_dim_0 = -1;
+  int64_t tmp_dim_0 = -1;
  if (ctx->IsRuntime()) {
    framework::Variable* x_var =
        boost::get<framework::Variable*>(ctx->GetInputVarPtrs("X")[0]);
@@ -86,8 +86,8 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const {
    out_dim_0 = 0;
    for (size_t i = 1; i < x_lod_0.size(); i++) {
-      int x_len = x_lod_0[i] - x_lod_0[i - 1];
+      int64_t x_len = x_lod_0[i] - x_lod_0[i - 1];
-      int y_len = y_lod_0[i] - y_lod_0[i - 1];
+      int64_t y_len = y_lod_0[i] - y_lod_0[i - 1];
      out_dim_0 += (x_len * y_len);
    }
    out_dim_0 *= dim_t;
@@ -173,17 +173,17 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel<T> {
    auto* tmp = ctx.Output<LoDTensor>("Tmp");
    int dim_t = ctx.Attr<int>("dim_t");
-    int dim_in = x->dims()[1];
+    int64_t dim_in = x->dims()[1];
    const auto& offset_l = x->lod()[0];
    const auto& offset_r = y->lod()[0];
    std::vector<size_t> top_offset;
-    int top_size = 0;
+    size_t top_size = 0;
    top_offset.push_back(top_size);
    for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
-      int len_l = offset_l[b + 1] - offset_l[b];
+      size_t len_l = offset_l[b + 1] - offset_l[b];
-      int len_r = offset_r[b + 1] - offset_r[b];
+      size_t len_r = offset_r[b + 1] - offset_r[b];
      top_size += dim_t * len_l * len_r;
      top_offset.push_back(top_size);
    }
@@ -204,8 +204,8 @@ class CPUMatchMatrixTensorOPKernel : public framework::OpKernel<T> {
    for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
      for (int t = 0; t < dim_t; t++) {
-        int len_l = offset_l[b + 1] - offset_l[b];
+        size_t len_l = offset_l[b + 1] - offset_l[b];
-        int len_r = offset_r[b + 1] - offset_r[b];
+        size_t len_r = offset_r[b + 1] - offset_r[b];
        auto* top_data = out_data + top_offset[b] + t * len_l * len_r;
        const auto* l_t_data =
            bottom_l_trans_data + offset_l[b] * dim_t * dim_in + t * dim_in;
@@ -234,16 +234,16 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel<T> {
    auto* tmp = ctx.Input<LoDTensor>("Tmp");
    int dim_t = ctx.Attr<int>("dim_t");
-    int dim_in = x->dims()[1];
+    int64_t dim_in = x->dims()[1];
    const auto& offset_l = x->lod()[0];
    const auto& offset_r = y->lod()[0];
-    std::vector<int> top_offset;
+    std::vector<size_t> top_offset;
-    int top_size = 0;
+    size_t top_size = 0;
    top_offset.push_back(top_size);
    for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
-      int len_l = offset_l[b + 1] - offset_l[b];
+      size_t len_l = offset_l[b + 1] - offset_l[b];
-      int len_r = offset_r[b + 1] - offset_r[b];
+      size_t len_r = offset_r[b + 1] - offset_r[b];
      top_size += dim_t * len_l * len_r;
      top_offset.push_back(top_size);
    }
@@ -270,11 +270,11 @@ class CPUMatchMatrixTensorOPGradKernel : public framework::OpKernel<T> {
    for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
      for (int t = 0; t < dim_t; t++) {
-        int len_l = offset_l[b + 1] - offset_l[b];
+        size_t len_l = offset_l[b + 1] - offset_l[b];
-        int len_r = offset_r[b + 1] - offset_r[b];
+        size_t len_r = offset_r[b + 1] - offset_r[b];
-        for (int i = 0; i < len_l; i++) {
+        for (size_t i = 0; i < len_l; i++) {
-          for (int j = 0; j < len_r; j++) {
+          for (size_t j = 0; j < len_r; j++) {
            auto diff =
                top_diff[top_offset[b] + t * len_l * len_r + i * len_r + j];
            auto* l_trans_data = bottom_l_trans_data +
@@ -324,11 +324,7 @@ REGISTER_OPERATOR(match_matrix_tensor_grad, ops::MatchMatrixTensorOpGrad);
 REGISTER_OP_CPU_KERNEL(match_matrix_tensor,
                       ops::CPUMatchMatrixTensorOPKernel<
                           paddle::platform::CPUDeviceContext, float>);
-//     ops::CPUMatchMatrixTensorOPKernel<paddle::platform::CPUDeviceContext,
-//                                       double>
 REGISTER_OP_CPU_KERNEL(match_matrix_tensor_grad,
                       ops::CPUMatchMatrixTensorOPGradKernel<
                           paddle::platform::CPUDeviceContext, float>);
-//     ops::CPUMatchMatrixTensorOPGradKernel<paddle::platform::CPUDeviceContext,
-//                                           double>
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -2060,7 +2060,8 @@ class DecayedAdagradOptimizer(Optimizer):
            },
            outputs={"ParamOut": param_and_grad[0],
                     "MomentOut": moment_acc},
-            attrs={"epsilon": self._epsilon},
+            attrs={"epsilon": self._epsilon,
+                   "decay": self._decay},
            stop_gradient=True)
        return decayed_adagrad_op