未验证 提交 2616d51a 编写于 作者: C Chenxiao Niu 提交者: GitHub

[MLU] add rnn forward kernel. (#43894)

上级 1efc80c6
...@@ -620,6 +620,80 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() { ...@@ -620,6 +620,80 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
} }
} }
MLUSeqDataDesc::MLUSeqDataDesc(cnnlSeqDataLayout_t layout,
cnnlDataType_t dtype,
int dimNb,
const int dimSize[],
int seqLengthArraySize,
const int seqLengthArray[],
void* paddingFill) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateSeqDataDescriptor(&seq_data_desc_));
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetSeqDataDescriptor(seq_data_desc_,
layout,
dtype,
dimNb,
dimSize,
seqLengthArraySize,
seqLengthArray,
paddingFill));
}
const cnnlSeqDataDescriptor_t MLUSeqDataDesc::get() const {
return seq_data_desc_;
}
MLUSeqDataDesc::~MLUSeqDataDesc() {
if (seq_data_desc_) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroySeqDataDescriptor(seq_data_desc_));
}
}
MLURNNDesc::MLURNNDesc(const int hidden_size,
const int num_layers,
const cnnlRNNInputMode_t input_mode,
const cnnlDirectionMode_t direction,
const cnnlRNNMode_t rnn_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateRNNDescriptor(&rnn_desc_));
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNDescriptor(
rnn_desc_, hidden_size, num_layers, input_mode, direction, rnn_mode));
}
MLURNNDesc::MLURNNDesc(cnnlRNNMode_t cell_mode,
cnnlRNNBiasMode_t bias_mode,
cnnlDirectionMode_t direction,
cnnlRNNInputMode_t input_mode,
cnnlDataType_t data_type,
cnnlDataType_t math_prec,
int input_size,
int hidden_size,
int proj_size,
int layer_num,
void* dropout_desc,
cnnlRNNPaddingMode_t padding_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlCreateRNNDescriptor(&rnn_desc_));
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNDescriptor_v2(rnn_desc_,
cell_mode,
bias_mode,
direction,
input_mode,
data_type,
math_prec,
input_size,
hidden_size,
proj_size,
layer_num,
dropout_desc,
padding_mode));
}
const cnnlRNNDescriptor_t MLURNNDesc::get() const { return rnn_desc_; }
MLURNNDesc::~MLURNNDesc() {
if (rnn_desc_) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlDestroyRNNDescriptor(rnn_desc_));
}
}
/* static */ void MLUCnnl::Active(const ExecutionContext& ctx, /* static */ void MLUCnnl::Active(const ExecutionContext& ctx,
cnnlActivationDescriptor_t active_desc, cnnlActivationDescriptor_t active_desc,
const cnnlTensorDescriptor_t input_desc, const cnnlTensorDescriptor_t input_desc,
...@@ -4471,6 +4545,105 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() { ...@@ -4471,6 +4545,105 @@ MLUCnnlDCNDesc::~MLUCnnlDCNDesc() {
output)); output));
} }
/* static */ void MLUCnnl::RNNForward(const ExecutionContext& ctx,
const cnnlRNNDescriptor_t rnn_desc,
const int dev_seq_lengths[],
const void* weight_param_ptr,
size_t weightspace_size,
const cnnlSeqDataDescriptor_t x_desc,
const void* x,
const cnnlSeqDataDescriptor_t y_desc,
void* y,
const cnnlTensorDescriptor_t h_desc,
const void* hx,
void* hy,
const cnnlTensorDescriptor_t c_desc,
const void* cx,
void* cy,
void* reservespace_ptr) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
// make sure 1. cnnlSetRNNDescriptor_v2 is invoked
// 2. x_desc is not NULL
PADDLE_ENFORCE_NOT_NULL(
rnn_desc,
paddle::platform::errors::Fatal(
"MLU RNNForward failed. rnn_desc initializing failed."));
PADDLE_ENFORCE_NOT_NULL(
x_desc,
paddle::platform::errors::Fatal(
"MLU RNNForward failed. x_desc initializing failed."));
auto& dev_ctx = GetDevCtxFromCTX(ctx);
size_t workspace_size, reservespace_size;
Tensor workspace;
PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNTempSizes(
handle, rnn_desc, x_desc, &workspace_size, &reservespace_size));
workspace = ctx.AllocateTmpTensor<int8_t, MLUDeviceContext>(
{static_cast<int64_t>(workspace_size)}, dev_ctx);
void* workspace_ptr = workspace.mutable_data(ctx.GetPlace());
PADDLE_ENFORCE_MLU_SUCCESS(cnnlRNNForwardTraining(handle,
rnn_desc,
dev_seq_lengths,
x_desc,
x,
y_desc,
y,
h_desc,
hx,
hy,
c_desc,
cx,
cy,
weight_param_ptr,
weightspace_size,
workspace_ptr,
workspace_size,
reservespace_ptr,
reservespace_size));
}
/* static */ void MLUCnnl::Mask(const ExecutionContext& ctx,
cnnlMaskedOp_t masked_mode,
const cnnlTensorDescriptor_t input_desc,
const void* input,
const cnnlTensorDescriptor_t masked_desc,
const void* masked,
const cnnlTensorDescriptor_t value_desc,
const void* value,
const cnnlTensorDescriptor_t output_desc,
void* output,
uint32_t* number) {
cnnlHandle_t handle = GetHandleFromCTX(ctx);
auto& dev_ctx = GetDevCtxFromCTX(ctx);
size_t workspace_size;
Tensor workspace;
PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetMaskedWorkspaceSize(handle,
masked_mode,
input_desc,
masked_desc,
value_desc,
output_desc,
&workspace_size));
workspace = ctx.AllocateTmpTensor<int8_t, MLUDeviceContext>(
{static_cast<int64_t>(workspace_size)}, dev_ctx);
void* workspace_ptr = workspace.mutable_data(ctx.GetPlace());
PADDLE_ENFORCE_MLU_SUCCESS(cnnlMasked_v3(handle,
masked_mode,
input_desc,
input,
masked_desc,
masked,
value_desc,
value,
workspace_ptr,
workspace_size,
output_desc,
output,
number));
}
/* static */ void MLUCnnl::BceWithLogits( /* static */ void MLUCnnl::BceWithLogits(
const ExecutionContext& ctx, const ExecutionContext& ctx,
cnnlBceWithLogitsReduction_t reduction, cnnlBceWithLogitsReduction_t reduction,
......
...@@ -495,6 +495,90 @@ class MLUCnnlDCNDesc { ...@@ -495,6 +495,90 @@ class MLUCnnlDCNDesc {
cnnlDCNDescriptor_t dcn_desc_ = nullptr; cnnlDCNDescriptor_t dcn_desc_ = nullptr;
}; };
class MLUSeqDataDesc {
public:
MLUSeqDataDesc(const MLUSeqDataDesc& desc) = delete;
MLUSeqDataDesc& operator=(const MLUSeqDataDesc& desc) = delete;
MLUSeqDataDesc(cnnlSeqDataLayout_t layout,
cnnlDataType_t dtype,
int dimNb,
const int dimSize[],
int seqLengthArraySize,
const int seqLengthArray[],
void* paddingFill);
const cnnlSeqDataDescriptor_t get() const;
~MLUSeqDataDesc();
private:
cnnlSeqDataDescriptor_t seq_data_desc_ = nullptr;
};
class MLURNNDesc {
public:
MLURNNDesc(const MLURNNDesc& desc) = delete;
MLURNNDesc& operator=(const MLURNNDesc& desc) = delete;
MLURNNDesc(const int hidden_size,
const int num_layers,
const cnnlRNNInputMode_t input_mode,
const cnnlDirectionMode_t direction,
const cnnlRNNMode_t rnn_mode);
MLURNNDesc(cnnlRNNMode_t cell_mode,
cnnlRNNBiasMode_t bias_mode,
cnnlDirectionMode_t direction,
cnnlRNNInputMode_t input_mode,
cnnlDataType_t data_type,
cnnlDataType_t math_prec,
int input_size,
int hidden_size,
int proj_size,
int layer_num,
void* dropout_desc,
cnnlRNNPaddingMode_t padding_mode);
void SetRNNProjectionLayers(const int rec_proj_size,
const int out_proj_size) {
PADDLE_ENFORCE_MLU_SUCCESS(
cnnlSetRNNProjectionLayers(rnn_desc_, rec_proj_size, out_proj_size));
}
void SetPeepholeMode(const cnnlRNNPeepholeMode_t peephole_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(
cnnlSetRNNPeepholeMode(rnn_desc_, peephole_mode));
}
void SetRNNBiasMode(const cnnlRNNBiasMode_t bias_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNBiasMode(rnn_desc_, bias_mode));
}
void SetRNNMaskMode(const cnnlRNNMaskMode_t mask_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNMaskMode(rnn_desc_, mask_mode));
}
void SetRNNClip(const cnnlRNNClipMode_t clip_mode,
const cnnlNanPropagation_t clip_nan_opt,
const double left_clip,
const double right_clip) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNClip(
rnn_desc_, clip_mode, clip_nan_opt, left_clip, right_clip));
}
void SetRNNPaddingMode(const cnnlRNNPaddingMode_t padding_mode) {
PADDLE_ENFORCE_MLU_SUCCESS(cnnlSetRNNPaddingMode(rnn_desc_, padding_mode));
}
const cnnlRNNDescriptor_t get() const;
~MLURNNDesc();
private:
cnnlRNNDescriptor_t rnn_desc_ = nullptr;
};
class MLUCnnl { class MLUCnnl {
public: public:
static void Active(const ExecutionContext& ctx, static void Active(const ExecutionContext& ctx,
...@@ -1814,6 +1898,35 @@ class MLUCnnl { ...@@ -1814,6 +1898,35 @@ class MLUCnnl {
const cnnlTensorDescriptor_t output_desc, const cnnlTensorDescriptor_t output_desc,
void* output); void* output);
static void RNNForward(const ExecutionContext& ctx,
const cnnlRNNDescriptor_t rnn_desc,
const int dev_seq_lengths[],
const void* weight_param_ptr,
size_t weightspace_size,
const cnnlSeqDataDescriptor_t x_desc,
const void* x,
const cnnlSeqDataDescriptor_t y_desc,
void* y,
const cnnlTensorDescriptor_t h_desc,
const void* hx,
void* hy,
const cnnlTensorDescriptor_t c_desc,
const void* cx,
void* cy,
void* reservespace_ptr);
static void Mask(const ExecutionContext& ctx,
cnnlMaskedOp_t masked_mode,
const cnnlTensorDescriptor_t input_desc,
const void* input,
const cnnlTensorDescriptor_t masked_desc,
const void* masked,
const cnnlTensorDescriptor_t value_desc,
const void* value,
const cnnlTensorDescriptor_t output_desc,
void* output,
uint32_t* number);
static void Transform(const ExecutionContext& ctx, static void Transform(const ExecutionContext& ctx,
const void* alpha, const void* alpha,
const void* beta, const void* beta,
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/mlu/mlu_baseop.h"
#include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/xpu/xpu_header.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
using TensorList = std::vector<framework::Tensor>;
template <typename TensorType, typename T>
void reset_parameter_vector(
const std::vector<TensorType>& raw_params_vec,
const int& num_layers,
const bool& is_bidirec,
std::vector<std::vector<std::pair<const T*, size_t>>>* params_vec) {
// the parameter raw seuquence is [FWhi, FWhh, BWhi, BWhh] * num_layers
// + [FBhi, FBhh, BBhi, BBhh] * num_layers, we will reset the parameter to
// ([FWhi, FWhh, FBhi, FBhh] + [BWhi, BWhh, BBhi, BBhh]) * num_layers
const int& direction_num = is_bidirec ? 2 : 1;
const int& layer_weight_size = 4 * direction_num;
const int& all_weight_size = num_layers * layer_weight_size;
const int& bias_start_idx = all_weight_size / 2;
for (int i = 0; i < num_layers; i++) {
params_vec->at(i).resize(layer_weight_size);
for (int j = 0; j < layer_weight_size; j++) {
int k = j % 4;
const int& section = j / 4;
int tensor_idx = i * 2 * direction_num + section * 2 + k % 2;
if (k >= 2) {
tensor_idx += bias_start_idx;
}
using remove_cv_t = typename std::remove_cv<T>::type;
params_vec->at(i)[j] = std::make_pair(
raw_params_vec[tensor_idx]->template data<remove_cv_t>(),
raw_params_vec[tensor_idx]->numel() * sizeof(T));
}
}
}
template <typename DeviceContext, typename T>
class RNNMLUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
// Input
auto& dev_ctx = GetDevCtxFromCTX(ctx);
auto* input = ctx.Input<Tensor>("Input");
auto pre_state = ctx.MultiInput<Tensor>("PreState");
auto weight_list = ctx.MultiInput<framework::Tensor>("WeightList");
bool has_seq_length = ctx.HasInput("SequenceLength");
// Output
auto state = ctx.MultiOutput<Tensor>("State");
auto* output = ctx.Output<Tensor>("Out");
// auto* dropout_mask = ctx.Output<Tensor>("DropoutState");
auto* reserve_data = ctx.Output<Tensor>("Reserve");
// Attributes
const int& num_layers = ctx.Attr<int>("num_layers");
const bool& is_bidirec = ctx.Attr<bool>("is_bidirec");
const int& hidden_size = ctx.Attr<int>("hidden_size");
const std::string& mode = ctx.Attr<std::string>("mode");
const Tensor* sequence_length = nullptr;
if (has_seq_length) {
sequence_length = ctx.Input<Tensor>("SequenceLength");
}
// if (dropout_mask->IsInitialized()) {
// if (dropout_mask->numel() != output->numel()) dropout_mask->clear();
// }
// dropout_mask->mutable_data<uint8_t>(output->dims(), ctx.GetPlace());
// auto& dev_ctx = ctx.template device_context<DeviceContext>();
// phi::funcs::SetConstant<platform::XPUDeviceContext, uint8_t> ones;
// ones(dev_ctx, dropout_mask, static_cast<uint8_t>(1));
auto init_h = pre_state[0]; // -> hx
auto init_c = pre_state[1]; // -> cx
auto last_h = state[0];
auto last_c = state[1];
// check shape
const int in_out_dim_num = input->dims().size();
const int& seq_len = input->dims()[0]; // time_step
const int& batch_size = input->dims()[1];
const int& input_dim = input->dims()[2];
const int& direction_num = is_bidirec ? 2 : 1;
int in_dim_arr[in_out_dim_num] = {seq_len, batch_size, input_dim};
int out_dim_arr[in_out_dim_num] = {
seq_len, batch_size, direction_num * hidden_size};
int proj_size = hidden_size;
std::vector<int> seq_len_vec(batch_size, seq_len);
if (has_seq_length) { // set seq_len if no padding, otherwise seq_len for
// each element.
seq_len_vec = operators::GetDataFromTensor(sequence_length);
}
cnnlDirectionMode_t direction =
is_bidirec ? CNNL_RNN_BIDIRECTIONAL : CNNL_RNN_UNIDIRECTIONAL;
PADDLE_ENFORCE_EQ(
mode,
"LSTM",
platform::errors::InvalidArgument(
"MLU only support LSTM mode now, current mode is %s", mode));
PADDLE_ENFORCE_EQ(
num_layers,
1,
platform::errors::InvalidArgument(
"MLU only support 1 num_layers, current num_layers is %s",
num_layers));
PADDLE_ENFORCE_EQ(
init_h->dims()[0],
num_layers * direction_num,
platform::errors::InvalidArgument("The num_layers of in RNN layer must"
" be the same as first dim of init "
"hidden, but received num_layers:%d,"
" dim:%d",
num_layers,
init_h->dims()[0]));
PADDLE_ENFORCE_EQ(
init_c->dims()[0],
num_layers * direction_num,
platform::errors::InvalidArgument(
"The num_layers of in RNN layer must"
" be the same as first dim of cell state hidden, but received"
" num_layers:%d, dim:%d",
num_layers,
init_c->dims()[0]));
// weightlist
std::vector<std::vector<std::pair<const T*, size_t>>> parameter_lists;
parameter_lists.resize(num_layers);
reset_parameter_vector(
weight_list, num_layers, is_bidirec, &parameter_lists);
// init the output and allocate the memory
output->mutable_data<T>(ctx.GetPlace()); // -> y in cnnl
last_h->mutable_data<T>(ctx.GetPlace()); // -> hy in cnnl
last_c->mutable_data<T>(ctx.GetPlace()); // -> cy in cnnl
MLUSeqDataDesc input_seq_data_desc(CNNL_SEQDATA_TNC,
ToCnnlDataType(input->dtype()),
in_out_dim_num,
in_dim_arr,
static_cast<int>(seq_len_vec.size()),
seq_len_vec.data(),
nullptr);
MLUSeqDataDesc out_seq_data_desc(CNNL_SEQDATA_TNC,
ToCnnlDataType(input->dtype()),
in_out_dim_num,
out_dim_arr,
static_cast<int>(seq_len_vec.size()),
seq_len_vec.data(),
nullptr);
MLUCnnlTensorDesc hx_desc(*init_h);
MLUCnnlTensorDesc cx_desc(*init_c);
MLURNNDesc rnn_desc(CNNL_LSTM,
CNNL_RNN_DOUBLE_BIAS,
direction,
CNNL_RNN_LINEAR_INPUT,
ToCnnlDataType(input->dtype()),
ToCnnlDataType(input->dtype()),
input_dim,
hidden_size,
/*projection*/ proj_size,
num_layers,
nullptr,
CNNL_RNN_PADDED_IO_DISABLED);
rnn_desc.SetRNNMaskMode(CNNL_LSTM_MASK_ENABLED);
// copy weight params
size_t weightspace_size;
framework::Tensor weightspace;
PADDLE_ENFORCE_MLU_SUCCESS(cnnlGetRNNWeightSpaceSize(
GetHandleFromCTX(ctx), rnn_desc.get(), &weightspace_size));
weightspace = ctx.AllocateTmpTensor<T, DeviceContext>(
{static_cast<int64_t>(weightspace_size)}, dev_ctx);
void* weightspace_ptr = weightspace.mutable_data(ctx.GetPlace());
auto w_x = parameter_lists[0][0];
auto w_h = parameter_lists[0][1];
auto b_x = parameter_lists[0][2];
auto b_h = parameter_lists[0][3];
auto actual_total_w_size =
w_x.second + w_h.second + b_x.second + b_h.second;
void* w_x_ptr = weightspace_ptr;
void* w_h_ptr = static_cast<char*>(weightspace_ptr) + w_x.second;
void* b_x_ptr =
static_cast<char*>(weightspace_ptr) + w_x.second + w_h.second;
void* b_h_ptr = static_cast<char*>(weightspace_ptr) + w_x.second +
w_h.second + b_x.second;
memory::Copy(weightspace.place(),
w_x_ptr,
weightspace.place(),
w_x.first,
w_x.second,
nullptr);
memory::Copy(weightspace.place(),
w_h_ptr,
weightspace.place(),
w_h.first,
w_h.second,
nullptr);
memory::Copy(weightspace.place(),
b_x_ptr,
weightspace.place(),
b_x.first,
b_x.second,
nullptr);
memory::Copy(weightspace.place(),
b_h_ptr,
weightspace.place(),
b_h.first,
b_h.second,
nullptr);
if (is_bidirec) {
auto bw_x = parameter_lists[0][4];
auto bw_h = parameter_lists[0][5];
auto bb_x = parameter_lists[0][6];
auto bb_h = parameter_lists[0][7];
void* bw_x_ptr =
static_cast<char*>(weightspace_ptr) + actual_total_w_size;
void* bw_h_ptr = static_cast<char*>(weightspace_ptr) +
actual_total_w_size + bw_x.second;
void* bb_x_ptr = static_cast<char*>(weightspace_ptr) +
actual_total_w_size + bw_x.second + bw_h.second;
void* bb_h_ptr = static_cast<char*>(weightspace_ptr) +
actual_total_w_size + bw_x.second + bw_h.second +
bb_x.second;
actual_total_w_size +=
bw_x.second + bw_h.second + bb_x.second + bb_h.second;
memory::Copy(weightspace.place(),
bw_x_ptr,
weightspace.place(),
bw_x.first,
bw_x.second,
nullptr);
memory::Copy(weightspace.place(),
bw_h_ptr,
weightspace.place(),
bw_h.first,
bw_h.second,
nullptr);
memory::Copy(weightspace.place(),
bb_x_ptr,
weightspace.place(),
bb_x.first,
bb_x.second,
nullptr);
memory::Copy(weightspace.place(),
bb_h_ptr,
weightspace.place(),
bb_h.first,
bb_h.second,
nullptr);
}
PADDLE_ENFORCE_EQ(weightspace_size,
actual_total_w_size,
platform::errors::InvalidArgument(
"The weightsize doesn't match"
" weightspace_size:%d, actual_total_w_size:%d",
weightspace_size,
actual_total_w_size));
// get reservespace_ptr
int gate_num = 4;
int hidden_data_idx = (num_layers - 1);
hidden_data_idx += (gate_num + 1) * num_layers;
const int& block_size = direction_num * seq_len * batch_size * hidden_size;
reserve_data->Resize({hidden_data_idx, block_size});
reserve_data->mutable_data<T>(ctx.GetPlace());
MLUCnnl::RNNForward(ctx,
rnn_desc.get(),
seq_len_vec.data(),
weightspace_ptr,
weightspace_size,
input_seq_data_desc.get(),
GetBasePtr(input),
out_seq_data_desc.get(),
GetBasePtr(output),
hx_desc.get(),
GetBasePtr(init_h),
GetBasePtr(last_h),
cx_desc.get(),
GetBasePtr(init_c),
GetBasePtr(last_c),
GetBasePtr(reserve_data));
if (has_seq_length) {
// if has_seq_length, do mask out the output of cnnlRNNForwardTraining
auto masked_mode = CNNL_MASKED_FILL;
float off_value = 0.0f;
framework::Tensor on_value_tensor(input->dtype());
framework::Tensor masked_tensor(framework::TransToPhiDataType(VT::INT8));
framework::Tensor h_masked_tensor(
framework::TransToPhiDataType(VT::INT8));
on_value_tensor.Resize({1});
masked_tensor.Resize({seq_len, batch_size, direction_num * hidden_size});
h_masked_tensor.Resize(
{seq_len, batch_size, direction_num * hidden_size});
on_value_tensor.mutable_data<T>(ctx.GetPlace());
masked_tensor.mutable_data<int8_t>(ctx.GetPlace());
int8_t* h_masked_ptr =
h_masked_tensor.mutable_data<int8_t>(platform::CPUPlace());
for (int t = 0; t < seq_len; ++t) {
for (int n = 0; n < batch_size; ++n) {
for (int c = 0; c < direction_num * hidden_size; ++c) {
auto tmp_seq_len = seq_len_vec[n];
auto offset = t * batch_size * direction_num * hidden_size +
n * direction_num * hidden_size + c;
*(h_masked_ptr + offset) = t >= tmp_seq_len ? 1 : 0;
}
}
}
framework::TensorCopy(
h_masked_tensor, ctx.GetPlace(), dev_ctx, &masked_tensor);
dev_ctx.Wait();
FillMLUTensorWithHostValue(ctx, off_value, &on_value_tensor);
MLUCnnlTensorDesc on_value_desc(on_value_tensor);
MLUCnnlTensorDesc output_desc(*output);
MLUCnnlTensorDesc masked_desc(masked_tensor);
MLUCnnl::Mask(ctx,
masked_mode,
output_desc.get(),
GetBasePtr(output),
masked_desc.get(),
GetBasePtr(&masked_tensor),
on_value_desc.get(),
GetBasePtr(&on_value_tensor),
output_desc.get(),
GetBasePtr(output),
nullptr);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_MLU_KERNEL(
rnn, ops::RNNMLUKernel<paddle::platform::MLUDeviceContext, float>);
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import math
import paddle.fluid.core as core
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import random
import sys
sys.path.append('..')
from op_test import OpTest
sys.path.append("../rnn")
from rnn_numpy import SimpleRNN, LSTM, GRU
from convert import get_params_for_net
random.seed(2)
np.set_printoptions(threshold=np.inf)
paddle.enable_static()
class TestRNNOp(OpTest):
def get_weight_names(self):
weight_names = []
for i in range(self.num_layers):
for j in range(0, 2 * self.direction_num):
weight_names.append("{}.weight_{}".format(i, j))
for i in range(self.num_layers):
for j in range(0, 2 * self.direction_num):
weight_names.append("{}.bias_{}".format(i, j))
return weight_names
def setUp(self):
self.__class__.use_mlu = True
self.place = paddle.device.MLUPlace(0)
self.in_type = np.float32
self.init_dtype()
self.init_size()
self.op_type = "rnn"
self.sequence_length = np.array([12, 11, 10, 9, 8], dtype=np.int32)
self.num_layers = 1
self.is_bidirec = False
self.mode = "LSTM"
self.is_test = False
self.dropout = 0.0
self.set_attrs()
self.direction_num = 2 if self.is_bidirec else 1
direction = "bidirectional" if self.is_bidirec else "forward"
input = np.random.uniform(low=-0.1,
high=0.1,
size=(self.seq_length, self.batch_size,
self.input_size)).astype(self.dtype)
input[11][1:][:] = 0
input[10][2:][:] = 0
input[9][3:][:] = 0
input[8][4:][:] = 0
rnn1 = LSTM(self.input_size,
self.hidden_size,
num_layers=self.num_layers,
time_major=True,
direction=direction,
dropout=self.dropout,
dtype=self.dtype)
flat_w = get_params_for_net(rnn1)
output, (last_hidden,
last_cell) = rnn1(input, sequence_length=self.sequence_length)
init_h = np.zeros(
(self.num_layers * self.direction_num, self.batch_size,
self.hidden_size)).astype(self.dtype)
init_c = np.zeros(
(self.num_layers * self.direction_num, self.batch_size,
self.hidden_size)).astype(self.dtype)
state_out = np.ndarray((300)).astype("uint8")
self.inputs = {
'Input': input,
'WeightList': flat_w,
'PreState': [('init_h', init_h), ('init_c', init_c)],
'SequenceLength': self.sequence_length
}
if self.sequence_length is None:
self.inputs = {
'Input': input,
'WeightList': flat_w,
'PreState': [('init_h', init_h), ('init_c', init_c)],
}
self.attrs = {
'dropout_prob': self.dropout,
'is_bidirec': self.is_bidirec,
'input_size': self.input_size,
'hidden_size': self.hidden_size,
'num_layers': self.num_layers,
'mode': self.mode,
'is_test': self.is_test
}
self.outputs = {
'Out': output,
"State": [('last_hidden', last_hidden), ('last_cell', last_cell)],
'Reserve': np.ndarray((400)).astype("uint8"),
'DropoutState': state_out
}
def init_dtype(self):
self.dtype = self.in_type
def init_size(self):
self.seq_length = 12
self.batch_size = 5
self.input_size = 3
self.hidden_size = 2
def test_output(self):
self.check_output_with_place(
self.place, no_check_set=['Reserve', 'DropoutState', 'State'])
def set_attrs(self):
pass
# def test_grad(self):
# if not self.is_test:
# var_name_list = self.get_weight_names()
# grad_check_list = ['Input', 'init_h', 'init_c']
# grad_check_list.extend(var_name_list)
# self.check_grad_with_place(self.place, set(grad_check_list),
# ['Out', 'last_hidden', 'last_cell'])
# class TestRNNOp1(TestRNNOp):
# def set_attrs(self):
# self.sequence_length = None
# class TestRNNOp2(TestRNNOp):
# def set_attrs(self):
# self.sequence_length = None
# self.is_bidirec = True
# class TestRNNOp3(TestRNNOp):
# def set_attrs(self):
# self.is_test = True
# self.sequence_length = None
# class TestRNNOp4(TestRNNOp):
# def set_attrs(self):
# self.is_test = True
# self.sequence_length = None
# self.is_bidirec = True
#TODO(chenxiao): cnnl doesn't support num_layers > 1 case
# class TestRNNOp5(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# class TestRNNOp6(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# class TestRNNOp7(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# self.is_test = True
# class TestRNNOp8(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 2
# self.is_bidirec = True
# self.sequence_length = None
# class TestRNNOp9(TestRNNOp):
# def set_attrs(self):
# self.num_layers = 3
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册