提交 49bafc05 编写于 作者: N nhzlx

fix comments and set name for trt layer and ITensor

上级 df161e08
...@@ -85,6 +85,14 @@ struct BriefNode { ...@@ -85,6 +85,14 @@ struct BriefNode {
std::vector<BriefNode *> outlinks; std::vector<BriefNode *> outlinks;
}; };
// Union two adjacent BriefNode.
// Suppose we have two adjacent nodes src and dst.
// We will perform the following operations:
// 1. add all inputs(except src) of dst to src inlinks.
// 2. add all outputs of dst to src outlinks.
// 3. change all the dst's inputs and outputs
// corresponding inlinks and outlinks to src node.
// 4. delete all dst's inlinks and outlinks.
void UnionContractedNodes(const std::unordered_map<int, BriefNode *> &node_map, void UnionContractedNodes(const std::unordered_map<int, BriefNode *> &node_map,
int src_id, int dst_id) { int src_id, int dst_id) {
// merge the two adjacent nodes into one node. // merge the two adjacent nodes into one node.
...@@ -224,8 +232,8 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() { ...@@ -224,8 +232,8 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
// Our algorithm must guarantee that: // Our algorithm must guarantee that:
// 1. The graph is always directed acyclic graph(DAG). // 1. The graph is always directed acyclic graph(DAG).
// 2. If there is a path in the subgraph from X to Y (X and Y are both // 2. If there is a path in the subgraph from X to Y (X and Y are both
// nodes // nodes in the subgraph), then all paths from X to Y are in the
// in the subgraph), then all paths from X to Y are in the subgraph. // subgraph.
// //
// In order to achieve the above guarantee. // In order to achieve the above guarantee.
// For adjacent nodes src -> dst. // For adjacent nodes src -> dst.
......
...@@ -35,6 +35,8 @@ class ReluOpConverter : public OpConverter { ...@@ -35,6 +35,8 @@ class ReluOpConverter : public OpConverter {
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor), engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
nvinfer1::ActivationType::kRELU); nvinfer1::ActivationType::kRELU);
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
layer->setName(("relu (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside. // output, so place the declaration inside.
......
...@@ -116,6 +116,8 @@ class BatchNormOpConverter : public OpConverter { ...@@ -116,6 +116,8 @@ class BatchNormOpConverter : public OpConverter {
scale_weights.get(), power_weights.get()); scale_weights.get(), power_weights.get());
auto output_name = op_desc.Output("Y").front(); auto output_name = op_desc.Output("Y").front();
layer->setName(("batch_norm (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->weight_map[op_desc.Input("Bias").front()] = engine_->weight_map[op_desc.Input("Bias").front()] =
std::move(combile_bias_tensor); std::move(combile_bias_tensor);
engine_->weight_map[op_desc.Input("Scale").front()] = engine_->weight_map[op_desc.Input("Scale").front()] =
......
...@@ -30,7 +30,9 @@ class ConcatOpConverter : public OpConverter { ...@@ -30,7 +30,9 @@ class ConcatOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
std::vector<nvinfer1::ITensor*> itensors; std::vector<nvinfer1::ITensor*> itensors;
std::cout << "Concat op: " << std::endl;
for (auto& input_name : op_desc.Input("X")) { for (auto& input_name : op_desc.Input("X")) {
std::cout << input_name << std::endl;
itensors.push_back(engine_->GetITensor(input_name)); itensors.push_back(engine_->GetITensor(input_name));
} }
int axis = boost::get<int>(op_desc.GetAttr("axis")); int axis = boost::get<int>(op_desc.GetAttr("axis"));
...@@ -42,6 +44,8 @@ class ConcatOpConverter : public OpConverter { ...@@ -42,6 +44,8 @@ class ConcatOpConverter : public OpConverter {
axis = axis - 1; // Remove batch dim axis = axis - 1; // Remove batch dim
layer->setAxis(axis); layer->setAxis(axis);
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
layer->setName(("concat (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside. // output, so place the declaration inside.
......
...@@ -26,6 +26,9 @@ class Conv2dOpConverter : public OpConverter { ...@@ -26,6 +26,9 @@ class Conv2dOpConverter : public OpConverter {
<< "convert a fluid conv2d op to tensorrt conv layer without bias"; << "convert a fluid conv2d op to tensorrt conv layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
std::cout << "Conv op: " << std::endl;
std::cout << op_desc.Input("Input").front() << std::endl;
std::cout << op_desc.Output("Output").front() << std::endl;
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
...@@ -78,8 +81,10 @@ class Conv2dOpConverter : public OpConverter { ...@@ -78,8 +81,10 @@ class Conv2dOpConverter : public OpConverter {
layer->setNbGroups(groups); layer->setNbGroups(groups);
auto output_name = op_desc.Output("Output").front(); auto output_name = op_desc.Output("Output").front();
layer->setName(("conv2d (Output: " + output_name + ")").c_str());
engine_->weight_map[op_desc.Input("Filter").front()] = engine_->weight_map[op_desc.Input("Filter").front()] =
std::move(weight_tensor); std::move(weight_tensor);
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { if (test_mode) {
engine_->DeclareOutput(output_name); engine_->DeclareOutput(output_name);
......
...@@ -89,6 +89,8 @@ class ElementwiseWeightOpConverter : public OpConverter { ...@@ -89,6 +89,8 @@ class ElementwiseWeightOpConverter : public OpConverter {
shift_weights.get(), scale_weights.get(), power_weights.get()); shift_weights.get(), scale_weights.get(), power_weights.get());
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
layer->setName(("elementwise_add (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->weight_map[op_desc.Input("Y").front()] = std::move(weight_tensor); engine_->weight_map[op_desc.Input("Y").front()] = std::move(weight_tensor);
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the if (test_mode) { // the test framework can not determine which is the
...@@ -137,6 +139,8 @@ class ElementwiseTensorOpConverter : public OpConverter { ...@@ -137,6 +139,8 @@ class ElementwiseTensorOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(Y), op_pair->second); *const_cast<nvinfer1::ITensor*>(Y), op_pair->second);
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
layer->setName(("elementwise (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside. // output, so place the declaration inside.
......
...@@ -107,6 +107,8 @@ class FcOpConverter : public OpConverter { ...@@ -107,6 +107,8 @@ class FcOpConverter : public OpConverter {
n_output, tmp_weight.get(), bias.get()); n_output, tmp_weight.get(), bias.get());
auto output_name = op_desc.Output("Out").front(); auto output_name = op_desc.Output("Out").front();
layer->setName(("fc (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
engine_->weight_map[op_desc.Input("Y").front()] = std::move(tmp); engine_->weight_map[op_desc.Input("Y").front()] = std::move(tmp);
if (test_mode) { if (test_mode) {
......
...@@ -72,6 +72,8 @@ class Pool2dOpConverter : public OpConverter { ...@@ -72,6 +72,8 @@ class Pool2dOpConverter : public OpConverter {
layer->setPadding(nv_paddings); layer->setPadding(nv_paddings);
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
layer->setName(("pool2d (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0)); engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { if (test_mode) {
engine_->DeclareOutput(output_name); engine_->DeclareOutput(output_name);
......
...@@ -161,20 +161,6 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -161,20 +161,6 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
boost::get<platform::CUDAPlace>(context.GetPlace()).device)), boost::get<platform::CUDAPlace>(context.GetPlace()).device)),
size * sizeof(float)); size * sizeof(float));
// TODO(zhaolong) : delete it sometimes
/* THIS CODE JUST FOR TEST
std::cout << output_maps[output_index] << std::endl;
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(framework::make_ddim(ddim));
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
TensorCopySync(*fluid_t, cpu_place ,&temp_tensor);
for(int i = 0; i < size; i++) {
std::cout << temp_data[i] << " " ;
}
std::cout << std::endl;
*/
output_index += 1; output_index += 1;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册