pyramid_hash_op.cc 21.8 KB
Newer Older
A
Aurelius84 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <xxhash.h>
16

A
Aurelius84 已提交
17 18
#include <algorithm>
#include <cmath>
19

20
#include "paddle/fluid/framework/convert_utils.h"
A
Aurelius84 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/search_compute.h"

extern "C" {
#include "math/bloomfilter.h"
}

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
using LoD = framework::LoD;

class PyramidHashOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
             "X (Tensor, MUST be Tensor<!!!_int32_!!!>) Input variable which "
             "should contain lod information.");
    AddInput("W", "W (Tensor)");
    AddInput("WhiteList", "WhiteList (Tensor)");
    AddInput("BlackList", "BlackList (Tensor)");
    AddAttr<int>("num_emb", "num_emb").SetDefault(0).EqualGreaterThan(0);
    AddAttr<int>("space_len", "space_len").SetDefault(0).EqualGreaterThan(0);
    AddAttr<int>("pyramid_layer", "pyramid_layer (must be >= 2)")
        .SetDefault(2)
        .EqualGreaterThan(2);
    AddAttr<int>("rand_len", "rand_len").SetDefault(0).EqualGreaterThan(0);
    AddAttr<float>("drop_out_percent", "drop_out_percent")
        .SetDefault(0)
        .EqualGreaterThan(0);
    AddAttr<int>("is_training", "is_training")
        .SetDefault(0)
        .EqualGreaterThan(0);
    AddAttr<bool>("use_filter", "use_filter").SetDefault(true);
    AddAttr<int>("white_list_len", "white_list_len")
        .SetDefault(0)
        .EqualGreaterThan(0);
    AddAttr<int>("black_list_len", "black_list_len")
        .SetDefault(0)
        .EqualGreaterThan(0);
    AddAttr<int>("seed", "seed").SetDefault(0).EqualGreaterThan(0);
    AddAttr<float>("lr", "learning rate").SetDefault(0.0).EqualGreaterThan(0.0);
C
Chengmo 已提交
65 66 67 68 69 70
    AddAttr<std::string>(
        "distribute_update_vars",
        "['PyramidHash_emb_0','Filter']"
        "Decided which params should be updated in distribute training. "
        "Used in Distribute Transpiler to create a trainer/server program.")
        .SetDefault("");
A
Aurelius84 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    AddOutput("Out", "Out (Tensor, default Tensor<float>) Output variable");
    AddOutput("DropPos", "Out (Tensor, Tensor<int>) Output variable");
    AddOutput("X_Temp_Out", "Out (Tensor, Tensor<int>) Output variable")
        .AsIntermediate();

    AddComment(R"DOC(
      PyramidHash

      NOTE: only support 'float32' data type now.

    )DOC");
  }
};

class PyramidHashOP : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
90
    PADDLE_ENFORCE_EQ(
91 92
        ctx->HasInput("X"),
        true,
93 94
        platform::errors::NotFound("Input(X) of PyramidHashOP is not found."));
    PADDLE_ENFORCE_EQ(
95 96
        ctx->HasInput("W"),
        true,
97
        platform::errors::NotFound("Input(W) of PyramidHashOP is not found."));
98 99
    PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"),
                      true,
100 101
                      platform::errors::NotFound(
                          "Output(Out) of PyramidHashOP is not found."));
102 103
    PADDLE_ENFORCE_EQ(ctx->HasOutput("DropPos"),
                      true,
104 105
                      platform::errors::NotFound(
                          "Output(DropPos) of PyramidHashOP is not found."));
A
Aurelius84 已提交
106 107

    auto x_dims = ctx->GetInputDim("X");
108 109
    PADDLE_ENFORCE_EQ(x_dims.size(),
                      2,
110 111 112 113
                      platform::errors::InvalidArgument(
                          "The rank of Input(X) of PyramidHashOP is invalid. "
                          "It should be 2, but got %d",
                          x_dims.size()));
A
Aurelius84 已提交
114 115

    auto w_dims = ctx->GetInputDim("W");
116 117
    PADDLE_ENFORCE_EQ(w_dims.size(),
                      2,
118 119 120 121
                      platform::errors::InvalidArgument(
                          "The rank of Input(W) of PyramidHashOP is invalid. "
                          "It should be 2, but got %d",
                          w_dims.size()));
A
Aurelius84 已提交
122 123 124 125

    int space_len = ctx->Attrs().Get<int>("space_len");
    int rand_len = ctx->Attrs().Get<int>("rand_len");

126
    PADDLE_ENFORCE_EQ(
127 128
        w_dims[0],
        space_len + rand_len,
129 130 131
        platform::errors::InvalidArgument(
            "The first dimension of Input(W) of PyramidHashOP is invalid. "
            "It should be space_len + rand_len, but now %d != %d + %d",
132 133 134
            w_dims[0],
            space_len,
            rand_len));
135
    PADDLE_ENFORCE_EQ(
136 137
        w_dims[1],
        1,
138 139 140 141
        platform::errors::InvalidArgument(
            "The second dimension of Input(W) of PyramidHashOP is invalid."
            " It should be 1, but got %d",
            w_dims[1]));
A
Aurelius84 已提交
142 143

    int num_emb = ctx->Attrs().Get<int>("num_emb");
144
    PADDLE_ENFORCE_EQ(
145 146
        num_emb % rand_len,
        0,
147 148 149
        platform::errors::InvalidArgument(
            "The PyramidHashOP's Attr(num_emb) should mod Attr(rand_len), "
            "but num_emb is %d, rand_len is %d",
150 151
            num_emb,
            rand_len));
A
Aurelius84 已提交
152 153 154 155

    int white_list_len = ctx->Attrs().Get<int>("white_list_len");
    if (white_list_len > 0) {
      PADDLE_ENFORCE_EQ(
156 157
          ctx->HasInput("WhiteList"),
          true,
158 159
          platform::errors::NotFound("Input(WhiteList) of PyramidHashOP is not "
                                     "found but white_list_len > 0."));
A
Aurelius84 已提交
160
      auto wl_dims = ctx->GetInputDim("WhiteList");
161
      PADDLE_ENFORCE_EQ(
162 163
          wl_dims.size(),
          2,
164 165 166 167
          platform::errors::InvalidArgument(
              "The rank of Input(WhiteList) of PyramidHashOP is invalid."
              " It should be 2, but got %d",
              wl_dims.size()));
168 169
      PADDLE_ENFORCE_EQ(wl_dims[0],
                        white_list_len,
170 171 172 173 174
                        platform::errors::InvalidArgument(
                            "The first dimension of Input(WhiteList) of "
                            "PyramidHashOP is invalid."
                            " It should be equal to Attr(white_list_len) "
                            ", but first dimension is %d, white_list_len is %d",
175 176 177 178
                            wl_dims[0],
                            white_list_len));
      PADDLE_ENFORCE_EQ(wl_dims[1],
                        1,
179 180 181 182 183
                        platform::errors::InvalidArgument(
                            "The second dimension of Input(WhiteList) of "
                            "PyramidHashOP is invalid."
                            " It should be 1, but got %d",
                            wl_dims[1]));
A
Aurelius84 已提交
184 185 186 187 188
    }

    int black_list_len = ctx->Attrs().Get<int>("black_list_len");
    if (black_list_len > 0) {
      PADDLE_ENFORCE_EQ(
189 190
          ctx->HasInput("BlackList"),
          true,
191 192
          platform::errors::NotFound("Input(BlackList) of PyramidHashOP is not "
                                     "found but black_list_len > 0."));
A
Aurelius84 已提交
193
      auto bl_dims = ctx->GetInputDim("BlackList");
194
      PADDLE_ENFORCE_EQ(
195 196
          bl_dims.size(),
          2,
197 198 199 200
          platform::errors::InvalidArgument(
              "The rank of Input(BlackList) of PyramidHashOP is invalid."
              " It should be 2, but got %d",
              bl_dims.size()));
201 202
      PADDLE_ENFORCE_EQ(bl_dims[0],
                        black_list_len,
203 204 205 206 207
                        platform::errors::InvalidArgument(
                            "The first dimension of Input(BlackList) of "
                            "PyramidHashOP is invalid."
                            " It should be equal to Attr(black_list_len)"
                            ", but first dimension is %d, black_list_len is %d",
208 209 210 211
                            bl_dims[0],
                            black_list_len));
      PADDLE_ENFORCE_EQ(bl_dims[1],
                        1,
212 213 214 215 216
                        platform::errors::InvalidArgument(
                            "The second dimension of Input(BlackList) of "
                            "PyramidHashOP is invalid."
                            " It should be 1, but got %d",
                            bl_dims[1]));
A
Aurelius84 已提交
217 218 219 220 221 222
    }

    if (ctx->IsRuntime()) {
      // something to do in runtime.
    } else {
      // compile time
223
      ctx->SetOutputDim("Out", phi::make_ddim({-1, num_emb}));
A
Aurelius84 已提交
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
      ctx->SetOutputDim("X_Temp_Out", x_dims);
      ctx->ShareLoD("X", /*->*/ "Out");
    }
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "W"), ctx.GetPlace());
  }
};

template <typename DeviceContext, typename T>
class CPUPyramidHashOPKernel : public framework::OpKernel<T> {
 public:
  bool should_use_term(math::bloomfilter* _filter,
241 242
                       math::bloomfilter* _black_filter,
                       const float* word_repr,
A
Aurelius84 已提交
243
                       int len) const {
244 245
    return (!_filter || 1 == math::bloomfilter_get(
                                 _filter, word_repr, len * sizeof(float))) &&
A
Aurelius84 已提交
246
           (!_black_filter ||
247 248
            0 == math::bloomfilter_get(
                     _black_filter, word_repr, len * sizeof(float)));
A
Aurelius84 已提交
249 250
  }

251 252 253 254 255 256
  void hash_embedding_ff(const float* hash_id,
                         int len,
                         T* top_pos,
                         const T* weights,
                         int _num_emb,
                         int _rand_len,
A
Aurelius84 已提交
257
                         int _space_len) const {
258 259 260
    unsigned int pos1 = XXH32(hash_id, len * sizeof(float), 0) % _space_len;
    unsigned int pos2 =
        XXH32(hash_id, len * sizeof(float), _rand_len) % _space_len;
261

262
    for (int j = 0; j != _num_emb; j += _rand_len) {
263 264 265
      if (j + _rand_len < _num_emb) {
        __builtin_prefetch(weights + pos2);
        __builtin_prefetch(top_pos + j + _rand_len);
266
      }
267 268

      unsigned int pos3 =
269
          XXH32(hash_id, len * sizeof(float), j + 2 * _rand_len) % _space_len;
270 271
      memcpy(
          top_pos + j, const_cast<T*>(weights + pos1), _rand_len * sizeof(T));
272 273
      pos1 = pos2;
      pos2 = pos3;
A
Aurelius84 已提交
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
    }
  }

  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* bottom = ctx.Input<LoDTensor>("X");
    auto* _blobs_0 = ctx.Input<Tensor>("W");
    auto* _blobs_1 = ctx.Input<Tensor>("WhiteList");
    auto* _blobs_2 = ctx.Input<Tensor>("BlackList");
    auto* top = ctx.Output<LoDTensor>("Out");
    auto* drop_pos = ctx.Output<LoDTensor>("DropPos");

    int _num_emb = ctx.Attr<int>("num_emb");
    bool use_filter = ctx.Attr<bool>("use_filter");
    int white_list_len = ctx.Attr<int>("white_list_len");
    int black_list_len = ctx.Attr<int>("black_list_len");
    int _pyramid_layer = ctx.Attr<int>("pyramid_layer");
    int _is_training = ctx.Attr<int>("is_training");
    int seed = ctx.Attr<int>("seed");
    unsigned int _seed = (unsigned int)seed;
    int _rand_len = ctx.Attr<int>("rand_len");
    int _space_len = ctx.Attr<int>("space_len");
    float _drop_out_percent = ctx.Attr<float>("drop_out_percent");

    const auto& offset = bottom->lod()[0];
    const auto* bottom_data_ori = bottom->data<int32_t>();
    auto* buff = ctx.Output<LoDTensor>("X_Temp_Out");
300
    buff->Resize(phi::make_ddim({bottom->dims()[0], bottom->dims()[1]}));
301
    float* bottom_data = buff->mutable_data<float>(ctx.GetPlace());
302
    for (int i = 0; i < bottom->dims()[0]; i++) {
A
Aurelius84 已提交
303 304 305 306 307 308 309 310 311 312 313 314 315
      bottom_data[i] = bottom_data_ori[i];
    }

    const auto* weights = _blobs_0->data<T>();

    std::vector<size_t> top_offset;
    top_offset.resize(offset.size());
    top_offset[0] = 0;

    math::bloomfilter* _filter = NULL;
    math::bloomfilter* _black_filter = NULL;
    if (use_filter) {
      if (white_list_len != 0) {
316
        _filter = (math::bloomfilter*)_blobs_1->data<float>();
317
        PADDLE_ENFORCE_EQ(
318 319
            math::bloomfilter_check(_filter),
            1,
320 321 322 323
            platform::errors::PreconditionNotMet(
                "The white filter is not loaded successfully, please make sure "
                "'white_list_len': %d is valid for Input(WhiteList).",
                white_list_len));
A
Aurelius84 已提交
324 325
      }
      if (black_list_len != 0) {
326
        _black_filter = (math::bloomfilter*)_blobs_2->data<float>();
327
        PADDLE_ENFORCE_EQ(
328 329
            math::bloomfilter_check(_black_filter),
            1,
330 331 332 333
            platform::errors::PreconditionNotMet(
                "The black filter is not loaded successfully, please make sure "
                "'black_list_len': %d is valid for Input(BlackList).",
                black_list_len));
A
Aurelius84 已提交
334 335 336
      }
    }

337
    drop_pos->Resize(phi::make_ddim(
A
Aurelius84 已提交
338 339 340 341 342 343 344
        {bottom->dims()[0] * bottom->dims()[1] * _pyramid_layer, 1}));
    std::vector<size_t> drop_pos_offset;
    drop_pos_offset.resize(offset.size());
    drop_pos_offset[0] = 0;
    int* iter = drop_pos->mutable_data<int>(ctx.GetPlace());
    int* iter_end = iter;

345
    for (size_t i = 0; i < top_offset.size() - 1; ++i) {
A
Aurelius84 已提交
346 347 348 349 350 351 352
      int w = offset[i + 1] - offset[i];
      int nsentense_with_pyramid = 0;
      if (w < 2) {
        nsentense_with_pyramid = 0;
      } else {
        for (int ilayer = 1; ilayer < _pyramid_layer && ilayer < w; ++ilayer) {
          for (int l = 0; l < w - ilayer; ++l) {
353 354
            if (should_use_term(_filter,
                                _black_filter,
355
                                (const float*)(bottom_data + offset[i] + l),
A
Aurelius84 已提交
356 357 358
                                ilayer + 1)) {
              if (_is_training != 0) {
                unsigned int rand_val = rand_r(&_seed);
359
                float rate = static_cast<float>(rand_val) / (RAND_MAX);
A
Aurelius84 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
                *(iter_end++) = (rate < _drop_out_percent ? 0 : 1);
              } else {
                *(iter_end++) = 1;
              }
            } else {
              *(iter_end++) = 0;
            }
          }
        }
        nsentense_with_pyramid = std::count(iter, iter_end, 1);
        iter = iter_end;
      }
      drop_pos_offset[i + 1] = drop_pos_offset[i] + nsentense_with_pyramid;
      top_offset[i + 1] =
          top_offset[i] +
          (nsentense_with_pyramid == 0 ? 1 : nsentense_with_pyramid);
    }

    int top_l = top_offset[top_offset.size() - 1];

    framework::LoD top_lod;
    top_lod.push_back(top_offset);
    top->set_lod(top_lod);
383
    top->Resize(phi::make_ddim({top_l, _num_emb}));
A
Aurelius84 已提交
384 385 386 387 388 389 390 391
    auto* top_data = top->mutable_data<T>(ctx.GetPlace());

    framework::LoD drop_pos_lod;
    drop_pos_lod.push_back(drop_pos_offset);
    drop_pos->set_lod(drop_pos_lod);

    iter = drop_pos->mutable_data<int>(ctx.GetPlace());
    int top_counter = 0;
392
    for (size_t i = 0; i < offset.size() - 1; ++i) {
A
Aurelius84 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
      int w_drop = drop_pos_offset[i + 1] - drop_pos_offset[i];
      int w = offset[i + 1] - offset[i];
      if (w_drop == 0) {
        if (w >= 2) {
          for (int ilayer = 1; ilayer < _pyramid_layer && ilayer < w;
               ++ilayer) {
            for (int l = 0; l < w - ilayer; ++l) {
              iter++;
            }
          }
        }
        auto* top_pos = top_data + top_counter++ * _num_emb;
        memset(top_pos, 0, _num_emb * sizeof(T));
        continue;
      }
      if (w >= 2) {
        for (int ilayer = 1; ilayer < _pyramid_layer && ilayer < w; ++ilayer) {
          for (int l = 0; l < w - ilayer; ++l) {
            if (*(iter++) == 0) {
              // do nothing
            } else {
              auto* top_pos = top_data + top_counter++ * _num_emb;
415
              hash_embedding_ff((const float*)(bottom_data + offset[i] + l),
416 417 418 419 420 421
                                ilayer + 1,
                                top_pos,
                                weights,
                                _num_emb,
                                _rand_len,
                                _space_len);
A
Aurelius84 已提交
422 423 424 425 426 427 428 429
            }
          }
        }
      }
    }
    if (iter != iter_end) {
      exit(1);
    }
430
    auto weight_type = framework::TransToProtoVarType(_blobs_0->dtype());
431
    if (_is_training == 0 && weight_type != framework::proto::VarType::INT8) {
432 433 434
      axpy_noadd(top_data,
                 top_data,
                 top->dims()[0] * top->dims()[1],
435
                 _drop_out_percent);
A
Aurelius84 已提交
436 437 438 439 440 441 442 443 444
    }
  }
};

class PyramidHashOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
445 446
    PADDLE_ENFORCE_EQ(ctx->HasInput("X"),
                      true,
447 448
                      platform::errors::NotFound(
                          "Input(X) of PyramidHashOpGrad is not found."));
449 450
    PADDLE_ENFORCE_EQ(ctx->HasInput("W"),
                      true,
451 452
                      platform::errors::NotFound(
                          "Input(W) of PyramidHashOpGrad is not found."));
453 454
    PADDLE_ENFORCE_EQ(ctx->HasInput("DropPos"),
                      true,
455 456 457
                      platform::errors::NotFound(
                          "Input(DropPos) of PyramidHashOpGrad is not found."));
    PADDLE_ENFORCE_EQ(
458 459
        ctx->HasInput("X_Temp_Out"),
        true,
460 461
        platform::errors::NotFound(
            "Input(X_Temp_Out) of PyramidHashOpGrad is not found."));
A
Aurelius84 已提交
462
    PADDLE_ENFORCE_EQ(
463 464
        ctx->HasInput(framework::GradVarName("Out")),
        true,
465 466
        platform::errors::NotFound(
            "Input(Out@Grad) of PyramidHashOpGrad is not found."));
A
Aurelius84 已提交
467 468 469 470 471 472 473 474 475 476
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "W"), ctx.GetPlace());
  }
};

H
hong 已提交
477 478
template <typename T>
class PyramidHashGradOpMaker : public framework::SingleGradOpMaker<T> {
A
Aurelius84 已提交
479
 public:
H
hong 已提交
480
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
A
Aurelius84 已提交
481 482

 protected:
483
  void Apply(GradOpPtr<T> op_desc_ptr) const override {
A
Aurelius84 已提交
484
    op_desc_ptr->SetType("pyramid_hash_grad");
H
hong 已提交
485 486 487
    op_desc_ptr->SetInput("X", this->Input("X"));
    op_desc_ptr->SetInput("W", this->Input("W"));
    op_desc_ptr->SetInput("DropPos", this->Output("DropPos"));
488
    op_desc_ptr->SetInput("X_Temp_Out", this->Output("X_Temp_Out"));
H
hong 已提交
489 490 491 492 493

    op_desc_ptr->SetInput(framework::GradVarName("Out"),
                          this->OutputGrad("Out"));
    op_desc_ptr->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
    op_desc_ptr->SetAttrMap(this->Attrs());
A
Aurelius84 已提交
494 495 496 497 498 499
  }
};

template <typename DeviceContext, typename T>
class CPUPyramidHashOPGradKernel : public framework::OpKernel<T> {
 public:
500 501 502 503 504 505 506
  void hash_embedding_bp(const T* hash_id,
                         int len,
                         const T* top_pos,
                         T* weights,
                         T mlr,
                         int _num_emb,
                         int _rand_len,
A
Aurelius84 已提交
507
                         int _space_len) const {
508
    for (int j = 0; j != _num_emb; j += _rand_len) {
A
Aurelius84 已提交
509
      unsigned int pos = XXH32(hash_id, len * sizeof(T), j) % _space_len;
510
      axpy(top_pos + j, weights + pos, _rand_len, mlr);
A
Aurelius84 已提交
511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
    }
  }

  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* bottom = ctx.Input<LoDTensor>("X");
    auto* _blobs = ctx.Input<Tensor>("W");
    auto* drop_pos = ctx.Input<LoDTensor>("DropPos");
    auto* top = ctx.Input<LoDTensor>(framework::GradVarName("Out"));

    int _num_emb = ctx.Attr<int>("num_emb");
    float _lr = ctx.Attr<float>("lr");
    int _rand_len = ctx.Attr<int>("rand_len");
    int _space_len = ctx.Attr<int>("space_len");
    int _pyramid_layer = ctx.Attr<int>("pyramid_layer");

526 527
    auto* buff = ctx.Input<LoDTensor>("X_Temp_Out");
    auto* bottom_data = buff->data<T>();
A
Aurelius84 已提交
528 529

    int _slot_len = bottom->dims()[0];
530
    if (static_cast<size_t>(_slot_len) == bottom->lod()[0].size() - 1 &&
A
Aurelius84 已提交
531 532 533 534 535 536 537 538
        std::count(bottom_data, bottom_data + _slot_len, -1) == _slot_len) {
      return;
    }

    auto& offset = bottom->lod()[0];
    auto& drop_pos_offset = drop_pos->lod()[0];

    const auto* top_diff = top->data<T>();
539
    // in-place update weight, so need const_cast
A
Aurelius84 已提交
540 541 542 543 544
    T* weights = const_cast<T*>(_blobs->data<T>());
    T mlr = -1.0 * _lr;

    const int* iter = drop_pos->data<int>();
    int top_counter = 0;
545
    for (size_t i = 0; i < offset.size() - 1; ++i) {
A
Aurelius84 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558
      int w = offset[i + 1] - offset[i];
      int w_drop = drop_pos_offset[i + 1] - drop_pos_offset[i];
      if (w_drop == 0) {
        top_counter++;
      }
      if (w > 1) {
        for (int ilayer = 1; ilayer < _pyramid_layer && ilayer < w; ++ilayer) {
          for (int l = 0; l < w - ilayer; ++l) {
            if (*(iter++) == 0) {
              // do nothing
            } else {
              const T* top_pos = top_diff + top_counter++ * _num_emb;
              hash_embedding_bp((const T*)(bottom_data + offset[i] + l),
559 560 561 562 563 564 565
                                ilayer + 1,
                                top_pos,
                                weights,
                                mlr,
                                _num_emb,
                                _rand_len,
                                _space_len);
A
Aurelius84 已提交
566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
            }
          }
        }
      } else {
        // do nothing
      }
    }
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
namespace plt = paddle::platform;
namespace frm = paddle::framework;
582 583 584
REGISTER_OPERATOR(pyramid_hash,
                  ops::PyramidHashOP,
                  ops::PyramidHashOpMaker,
H
hong 已提交
585 586
                  ops::PyramidHashGradOpMaker<paddle::framework::OpDesc>,
                  ops::PyramidHashGradOpMaker<paddle::imperative::OpBase>);
A
Aurelius84 已提交
587 588 589
REGISTER_OPERATOR(pyramid_hash_grad, ops::PyramidHashOpGrad);

REGISTER_OP_CPU_KERNEL(
590 591
    pyramid_hash,
    ops::CPUPyramidHashOPKernel<plt::CPUDeviceContext, float>,
592
    ops::CPUPyramidHashOPKernel<plt::CPUDeviceContext, int8_t>);
A
Aurelius84 已提交
593 594
REGISTER_OP_CPU_KERNEL(
    pyramid_hash_grad,
595
    ops::CPUPyramidHashOPGradKernel<plt::CPUDeviceContext, float>);