未验证 提交 e6e2e537 编写于 作者: S Shang Zhizhou 提交者: GitHub

Optimize error report (#27254)

* optimize errror report

* add test case for pad op converter

* fix some spelling mistake commented by peiyang
上级 ee1ed42c
...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter { ...@@ -34,8 +34,11 @@ class ConcatOpConverter : public OpConverter {
itensors.push_back(engine_->GetITensor(input_name)); itensors.push_back(engine_->GetITensor(input_name));
} }
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
PADDLE_ENFORCE(axis > 0, PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument(
"The axis attr of Concat op should be large than 0 for trt"); "The axis attr of Concat"
" op should be larger than 0 for trt. "
"But received %d.",
axis));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(),
itensors.size()); itensors.size());
......
...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op, ...@@ -100,7 +100,9 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0}; TensorRTEngine::Weight bias{nvinfer1::DataType::kFLOAT, nullptr, 0};
auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input, auto* layer = fadd_layer(const_cast<nvinfer1::ITensor*>(X), n_output, n_input,
nv_ksize, weight, bias); nv_ksize, weight, bias);
PADDLE_ENFORCE(layer != nullptr); PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::Fatal("TensorRT create conv2d"
" layer error."));
layer->setStride(nv_strides); layer->setStride(nv_strides);
layer->setPadding(nv_paddings); layer->setPadding(nv_paddings);
layer->setNbGroups(groups); layer->setNbGroups(groups);
......
...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -43,13 +43,30 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer"; VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(Y_v); PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>(); auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr; float* weight_data = nullptr;
weight_data = weight_data =
...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -176,9 +193,24 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight op_desc.Input("X").size(), 1,
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
......
...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter { ...@@ -29,38 +29,67 @@ class DefaultIOConverter : public EngineIOConverter {
// NOTE out is GPU memory. // NOTE out is GPU memory.
virtual void operator()(const LoDTensor& in, void* out, virtual void operator()(const LoDTensor& in, void* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(out != nullptr); PADDLE_ENFORCE_NOT_NULL(out,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'out' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = in.place(); const auto& place = in.place();
size_t size = in.memory_size(); size_t size = in.memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor in's memory_size shoule be less than or equal to "
"the input max_size. But in's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
cudaMemcpyHostToDevice, *stream_)); out, in.data<float>(), size, cudaMemcpyHostToDevice, *stream_));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out, in.data<float>(), size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out, in.data<float>(), size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
// NOTE in is GPU memory. // NOTE in is GPU memory.
virtual void operator()(const void* in, LoDTensor* out, virtual void operator()(const void* in, LoDTensor* out,
size_t max_size) override { size_t max_size) override {
PADDLE_ENFORCE(in != nullptr); PADDLE_ENFORCE_NOT_NULL(in,
PADDLE_ENFORCE(stream_ != nullptr); platform::errors::InvalidArgument(
"The input param 'in' must not be nullptr."));
PADDLE_ENFORCE_NOT_NULL(stream_,
platform::errors::PreconditionNotMet(
"You should set up stream_ by SetStream() "
"before you call the operator()."));
const auto& place = out->place(); const auto& place = out->place();
size_t size = out->memory_size(); size_t size = out->memory_size();
PADDLE_ENFORCE_LE(size, max_size); PADDLE_ENFORCE_LE(
size, max_size,
platform::errors::InvalidArgument(
"The input Tensor out's memory_size shoule be less than or equal "
"to the input max_size. "
"But out's memory_size = %u, max_size = %u.",
size, max_size));
if (is_cpu_place(place)) { if (is_cpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToHost, *stream_)); cudaMemcpyDeviceToHost, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToHost) error."));
} else if (is_gpu_place(place)) { } else if (is_gpu_place(place)) {
PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data<float>(), in, size, PADDLE_ENFORCE_EQ(
cudaMemcpyDeviceToDevice, *stream_)); 0, cudaMemcpyAsync(out->data<float>(), in, size,
cudaMemcpyDeviceToDevice, *stream_),
platform::errors::External(
"cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error."));
} else { } else {
PADDLE_THROW("Unknown device for converter"); PADDLE_THROW(platform::errors::NotFound("Unknown device for converter"));
} }
cudaStreamSynchronize(*stream_); cudaStreamSynchronize(*stream_);
} }
......
...@@ -44,10 +44,14 @@ class EngineIOConverter { ...@@ -44,10 +44,14 @@ class EngineIOConverter {
static void ConvertInput(const std::string& op_type, const LoDTensor& in, static void ConvertInput(const std::string& op_type, const LoDTensor& in,
void* out, size_t max_size, cudaStream_t* stream) { void* out, size_t max_size, cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in is not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
...@@ -55,10 +59,14 @@ class EngineIOConverter { ...@@ -55,10 +59,14 @@ class EngineIOConverter {
static void ConvertOutput(const std::string& op_type, const void* in, static void ConvertOutput(const std::string& op_type, const void* in,
LoDTensor* out, size_t max_size, LoDTensor* out, size_t max_size,
cudaStream_t* stream) { cudaStream_t* stream) {
PADDLE_ENFORCE(stream != nullptr); PADDLE_ENFORCE_NOT_NULL(stream,
platform::errors::InvalidArgument(
"The input stream must not be nullptr."));
auto* converter = Registry<EngineIOConverter>::Global().Lookup( auto* converter = Registry<EngineIOConverter>::Global().Lookup(
op_type, "default" /* default_type */); op_type, "default" /* default_type */);
PADDLE_ENFORCE_NOT_NULL(converter); PADDLE_ENFORCE_NOT_NULL(
converter, platform::errors::Unimplemented(
"The %s in not supported yet.", op_type.c_str()));
converter->SetStream(stream); converter->SetStream(stream);
(*converter)(in, out, max_size); (*converter)(in, out, max_size);
} }
......
...@@ -53,7 +53,12 @@ class OpConverter { ...@@ -53,7 +53,12 @@ class OpConverter {
OpConverter* it{nullptr}; OpConverter* it{nullptr};
if (op_desc.Type() == "mul") { if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
std::string Y = op_desc.Input("Y")[0]; std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) { if (parameters.count(Y)) {
it = Registry<OpConverter>::Global().Lookup("fc"); it = Registry<OpConverter>::Global().Lookup("fc");
...@@ -66,38 +71,51 @@ class OpConverter { ...@@ -66,38 +71,51 @@ class OpConverter {
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul", // static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"}; // "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"}; static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
"Input(\"Y\").size() = %u.",
op_desc.Input("Y").size()));
int op_type_len = op_desc.Type().size(); int op_type_len = op_desc.Type().size();
std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len);
std::string Y = op_desc.Input("Y")[0]; std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) { if (parameters.count(Y)) {
PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, PADDLE_ENFORCE_GT(
"Unsupported elementwise type" + op_type); add_weight_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight"); "_weight");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
} else { } else {
PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, PADDLE_ENFORCE_GT(
"Unsupported elementwise type" + op_type); add_tensor_op_set.count(op_type), 0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor"); "_tensor");
} }
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "depthwise_conv2d") { if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d"); it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (!it) { if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type()); it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
} }
PADDLE_ENFORCE_NOT_NULL(it, "no OpConverter for optype [%s]", PADDLE_ENFORCE_NOT_NULL(
op_desc.Type()); it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine); it->SetEngine(engine);
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
...@@ -149,9 +167,13 @@ class OpConverter { ...@@ -149,9 +167,13 @@ class OpConverter {
for (auto& input : inputs) { for (auto& input : inputs) {
if (parameters.count(input)) continue; if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input); auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE(var, "no variable called %s", input); PADDLE_ENFORCE_NOT_NULL(
PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, var, platform::errors::NotFound("no variable called %s in block.",
"TensorRT engine only takes LoDTensor as input"); input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape(); auto var_shape = var->GetShape();
if (engine->with_dynamic_shape()) { if (engine->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
......
...@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter { ...@@ -39,9 +39,22 @@ class PadOpConverter : public OpConverter {
nvinfer1::Dims input_shape = input->getDimensions(); nvinfer1::Dims input_shape = input->getDimensions();
int nbDims = input_shape.nbDims; int nbDims = input_shape.nbDims;
int pad_size = static_cast<int>(paddings.size()); int pad_size = static_cast<int>(paddings.size());
PADDLE_ENFORCE_GE(nbDims, 2); PADDLE_ENFORCE_GE(
PADDLE_ENFORCE_EQ((nbDims + 1) * 2, pad_size); nbDims, 2,
PADDLE_ENFORCE(pad_value == 0.0, "The pad layer of TRT only support zero."); platform::errors::InvalidArgument(
"Input X[0]'s dimension should greater than or equal to 2. "
"But received %d.",
nbDims));
PADDLE_ENFORCE_EQ(
(nbDims + 1) * 2, pad_size,
platform::errors::InvalidArgument("Input X[0]'s dimension(nbDims for "
"short) should meet the condition:"
"(nbDims + 1) * 2 == pad_size. But "
"received nbDims:%d, pad_size:%d.",
nbDims, pad_size));
PADDLE_ENFORCE_EQ(pad_value, 0.0,
platform::errors::InvalidArgument(
"The pad layer of TRT only support zero."));
nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]); nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]);
nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]); nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]);
...@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter { ...@@ -50,7 +63,9 @@ class PadOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(input), *const_cast<nvinfer1::ITensor*>(input),
pre_pad, post_pad); pre_pad, post_pad);
PADDLE_ENFORCE(layer != nullptr); PADDLE_ENFORCE_NOT_NULL(layer,
platform::errors::External(
"add padding layer to tensorrt engine error"));
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode); RreplenishLayerAndOutput(layer, "pad", {output_name}, test_mode);
} }
......
...@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter { ...@@ -28,11 +28,20 @@ class SwishOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
int input_num = op_desc.Input("X").size(); int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE(input_num == 1); PADDLE_ENFORCE_EQ(input_num, 1,
platform::errors::InvalidArgument(
"The input X's size must equal to 1 in TRT swish op."
" But received X's size %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output // Get output
size_t output_num = op_desc.Output("Out").size(); size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE(output_num == 1); PADDLE_ENFORCE_EQ(
output_num, 1UL,
platform::errors::InvalidArgument(
"The ouput Out's size must equal to 1 in TRT swish op. "
"But received Out's size %u.",
output_num));
// Get attrs // Get attrs
float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta")); float beta = BOOST_GET_CONST(float, op_desc.GetAttr("beta"));
......
...@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place, ...@@ -49,7 +49,10 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
const platform::DeviceContext& ctx) { const platform::DeviceContext& ctx) {
auto dims = tensor->dims(); auto dims = tensor->dims();
size_t num_elements = analysis::AccuDims(dims, dims.size()); size_t num_elements = analysis::AccuDims(dims, dims.size());
PADDLE_ENFORCE_GT(num_elements, 0); PADDLE_ENFORCE_GT(
num_elements, 0UL,
platform::errors::PermissionDenied("RandomizeTensor only can be used for "
"tensor which dims is not zero."));
platform::CPUPlace cpu_place; platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor; framework::LoDTensor temp_tensor;
...@@ -79,7 +82,8 @@ class TRTConvertValidation { ...@@ -79,7 +82,8 @@ class TRTConvertValidation {
scope_(scope), scope_(scope),
if_add_batch_(if_add_batch), if_add_batch_(if_add_batch),
max_batch_size_(max_batch_size) { max_batch_size_(max_batch_size) {
PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0); PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0,
platform::errors::External("cudaStreamCreate error."));
engine_.reset(new TensorRTEngine(max_batch_size, workspace_size)); engine_.reset(new TensorRTEngine(max_batch_size, workspace_size));
engine_->InitNetwork(); engine_->InitNetwork();
} }
...@@ -154,7 +158,12 @@ class TRTConvertValidation { ...@@ -154,7 +158,12 @@ class TRTConvertValidation {
void Execute(int batch_size, void Execute(int batch_size,
std::unordered_set<std::string> neglected_output = {}) { std::unordered_set<std::string> neglected_output = {}) {
// Execute Fluid Op // Execute Fluid Op
PADDLE_ENFORCE_LE(batch_size, max_batch_size_); PADDLE_ENFORCE_LE(batch_size, max_batch_size_,
platform::errors::InvalidArgument(
"Runtime batch_size should be less than or equal to "
"max_batch_size_. "
"But received batch_size:%d, max_batch_size_:%d",
batch_size, max_batch_size_));
platform::CUDADeviceContext ctx(place_); platform::CUDADeviceContext ctx(place_);
op_->Run(scope_, place_); op_->Run(scope_, place_);
cudaStreamSynchronize(stream_); cudaStreamSynchronize(stream_);
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
class PadOpTRTTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[1, 3, 128, 128], dtype="float32")
pad_out = fluid.layers.pad(x=data,
paddings=[0, 0, 0, 0, 0, 1, 1, 2],
pad_value=0.0)
out = fluid.layers.batch_norm(pad_out, is_test=True)
self.feeds = {
"data": np.random.random((1, 3, 128, 128)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = PadOpTRTTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册