未验证 提交 188871e6 编写于 作者: Z Zhang Jun 提交者: GitHub

[inference][cherrypick] Implement layer_norm op using INormalization Layer and...

[inference][cherrypick] Implement layer_norm op using INormalization Layer and conv_fusion support bias's rank equal to input's rank  (#54590)

* [inference]conv_fusion support bias's rank equal to input's rank (#54477)

* support bias's rank equal to input's rank

* [inference][trt]layer_norm op with dynamic shape support INormalizationLayer in TRT8.6 (#54379)

* layer_norm op with dynamic shape support INormalizationLayer in TRT8.6

* Using trt layer to make layers_norm op in lower than trt8.6
layer_norm op with dynamic shape support INormalizationLayer in TRT8.6

---------
Co-authored-by: Nbukejiyu <52310069+bukejiyu@users.noreply.github.com>
上级 ee6354d4
......@@ -24,19 +24,108 @@ class LayerNormOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(4) << "convert a layer_norm op to tensorrt layer_norm plugin";
VLOG(4) << "convert a layer_norm op with dynamic shape to Normalization "
"layer or Static shape tensorrt layer_norm plugin";
framework::OpDesc op_desc(op, nullptr);
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
const int begin_norm_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;
auto* X = engine_->GetITensor(op_desc.Input("X")[0]);
auto rank = X->getDimensions().nbDims;
std::string output_name = op_desc.Output("Y")[0];
const float eps = op_desc.HasAttr("epsilon")
? PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon"))
: 1e-5f;
if (engine_->with_dynamic_shape()) {
auto* Scale = engine_->GetITensor(op_desc.Input("Scale")[0]);
auto* Bias = engine_->GetITensor(op_desc.Input("Bias")[0]);
int32_t begin_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;
uint32_t axisMask{0};
for (int32_t i = begin_axis; i < rank; i++) {
axisMask |= 1 << i;
}
std::vector<int32_t> indice_dim_vec(rank);
std::iota(indice_dim_vec.begin(), indice_dim_vec.end(), 0);
auto p = std::remove_if(indice_dim_vec.begin(),
indice_dim_vec.end(),
[begin_axis](int x) { return x < begin_axis; });
indice_dim_vec.resize(p - indice_dim_vec.begin());
auto newDims = Gather(Shape(X), indice_dim_vec);
auto newrank = indice_dim_vec.size();
auto* one_rank_tensor =
Add1DConstantLayer(std::vector<int32_t>(rank - newrank, 1));
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(one_rank_tensor);
itensors.push_back(newDims);
nvinfer1::ITensor* concat_shape_tensor = Concat(itensors);
auto Bias_reshape = Reshape(
Bias,
concat_shape_tensor,
("layer_norm Bias: reshape: (Output(" + output_name + ")").c_str());
auto Scale_reshape = Reshape(
Scale,
concat_shape_tensor,
("layer_norm Scale: reshape: (Output(" + output_name + ")").c_str());
#if IS_TRT_VERSION_GE(8600)
auto layer = TRT_ENGINE_ADD_LAYER(
engine_, Normalization, *X, *Scale_reshape, *Bias_reshape, axisMask);
layer->setEpsilon(eps);
RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
#else
// μ
auto miu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Reduce, *X, nvinfer1::ReduceOperation::kAVG, axisMask, true);
miu_layer->setName((output_name + "_miu").c_str());
auto miu_output = miu_layer->getOutput(0);
// x−μ
auto xsubmiu_output = Sub(X, miu_output);
// σ
// pow(x−μ,2)
auto pow_tensor = Add1DConstantLayer(static_cast<float>(2));
auto xsubmiu_pow_out = Pow(
xsubmiu_output,
BroadcastTensors(xsubmiu_output,
pow_tensor,
("layer_norm_pow: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()));
// mean_var
auto mean_var_layer =
TRT_ENGINE_ADD_LAYER(engine_,
Reduce,
*xsubmiu_pow_out,
nvinfer1::ReduceOperation::kAVG,
axisMask,
true);
mean_var_layer->setName((output_name + "_sigma").c_str());
auto mean_var_out = mean_var_layer->getOutput(0);
// sigma
auto eps_tensor = Add1DConstantLayer(eps);
auto sum_out = Sum(
mean_var_out,
BroadcastTensors(mean_var_out,
eps_tensor,
("layer_norm_eps: reshape_for_broadcast: (Output(" +
output_name + ")")
.c_str()));
auto sigma_layer = TRT_ENGINE_ADD_LAYER(
engine_, Unary, *sum_out, nvinfer1::UnaryOperation::kSQRT);
auto sigma_output = sigma_layer->getOutput(0);
// σ/sigma
auto div_out = Div(xsubmiu_output, sigma_output);
// (σ/sigma)*g+b
auto scale_out = Prod(div_out, Scale_reshape);
auto layer = TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*scale_out,
*Bias_reshape,
nvinfer1::ElementWiseOperation::kSUM);
RreplenishLayerAndOutput(layer, "layer_norm", {output_name}, test_mode);
#endif
} else {
auto* Bias_v = scope.FindVar(op_desc.Input("Bias")[0]);
auto* Scale_v = scope.FindVar(op_desc.Input("Scale")[0]);
PADDLE_ENFORCE_NOT_NULL(
Bias_v,
platform::errors::InvalidArgument(
......@@ -45,7 +134,6 @@ class LayerNormOpConverter : public OpConverter {
Scale_v,
platform::errors::InvalidArgument(
"Input(Scale) of layer_norm should not be null."));
auto* Bias_t = Bias_v->GetMutable<phi::DenseTensor>();
auto* Scale_t = Scale_v->GetMutable<phi::DenseTensor>();
......@@ -54,27 +142,11 @@ class LayerNormOpConverter : public OpConverter {
auto scale_weight =
engine_->GetFp32TrtWeight(op_desc.Input("Scale").front(), *Scale_t);
nvinfer1::ILayer* layernorm_layer = nullptr;
if (engine_->with_dynamic_shape()) {
// For dynamic shape,
// the shape of mean and variance will be determine in configuPlugin.
std::vector<int64_t> mean_shape{1};
std::vector<int64_t> variance_shape{1};
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::LayerNormPluginDynamic* plugin =
new plugin::LayerNormPluginDynamic(
static_cast<const float*>(bias_weight.get().values),
bias_weight.get().count,
static_cast<const float*>(scale_weight.get().values),
scale_weight.get().count,
begin_norm_axis,
eps,
mean_shape,
variance_shape,
with_fp16);
layernorm_layer = engine_->AddDynamicPlugin(&X, 1, plugin);
} else {
const int begin_norm_axis =
op_desc.HasAttr("begin_norm_axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("begin_norm_axis"))
: 1;
int statis_num = 1;
for (int i = 1; i < begin_norm_axis; i++) {
statis_num *= X->getDimensions().d[i];
......@@ -93,14 +165,12 @@ class LayerNormOpConverter : public OpConverter {
mean_shape,
variance_shape,
with_fp16);
layernorm_layer = engine_->AddPlugin(
auto* layernorm_layer = engine_->AddPlugin(
&X, 1, reinterpret_cast<plugin::PluginTensorRT*>(plugin));
}
auto output_name = op_desc.Output("Y").front();
RreplenishLayerAndOutput(
layernorm_layer, "layer_norm", {output_name}, test_mode);
}
}
};
} // namespace tensorrt
......
......@@ -519,6 +519,14 @@ class OpConverter {
return c;
}
nvinfer1::ITensor* Pow(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kPOW)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Act(nvinfer1::ITensor* a,
nvinfer1::ActivationType act_type) {
nvinfer1::ITensor* c =
......
......@@ -413,15 +413,15 @@ void ConvFusionKernel(const Context& ctx,
compute_format);
DenseTensor transformed_input;
const int input_rank = input.dims().size();
auto unsys_pad_process = [&](const std::vector<int>& new_input_shape_vec,
const std::vector<int>& input_pad) {
DDim new_input_shape(make_ddim(new_input_shape_vec));
transformed_input.Resize(new_input_shape);
ctx.template Alloc<T>(&transformed_input);
const int rank = input.dims().size();
T pad_value(0.0);
switch (rank) {
switch (input_rank) {
case 4: {
funcs::PadFunction<Context, T, 4>(
ctx, input_pad, input, pad_value, &transformed_input);
......@@ -442,11 +442,16 @@ void ConvFusionKernel(const Context& ctx,
conv_attr_cache->input_pad);
}
std::vector<int> b_dims(input.dims().size(), 1);
std::vector<int> b_dims(input_rank, 1);
if (compute_format == CUDNN_TENSOR_NCHW) {
auto bias_rank = bias.dims().size();
if (input_rank == bias_rank) {
b_dims[1] = static_cast<int>(bias.dims()[1]);
} else {
b_dims[1] = static_cast<int>(bias.dims()[0]);
}
} else {
b_dims[input.dims().size() - 1] = static_cast<int>(bias.dims()[0]);
b_dims[input_rank - 1] = static_cast<int>(bias.dims()[0]);
}
auto search_func = [&](cudnnConvolutionFwdAlgo_t* cudnn_algo,
......
......@@ -43,7 +43,7 @@ class TrtConvertLayerNormTest(TrtLayerAutoScanTest):
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]], shape_input):
return np.ones(shape_input).astype(np.float32)
return np.random.random(shape_input).astype(np.float32)
def generate_input2(attrs: List[Dict[str, Any]], shape_input):
begin = attrs[0]["begin_norm_axis"]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册