提交 49bafc05 编写于 作者: N nhzlx

fix comments and set name for trt layer and ITensor

上级 df161e08
......@@ -85,6 +85,14 @@ struct BriefNode {
std::vector<BriefNode *> outlinks;
};
// Union two adjacent BriefNode.
// Suppose we have two adjacent nodes src and dst.
// We will perform the following operations:
// 1. add all inputs(except src) of dst to src inlinks.
// 2. add all outputs of dst to src outlinks.
// 3. change all the dst's inputs and outputs
// corresponding inlinks and outlinks to src node.
// 4. delete all dst's inlinks and outlinks.
void UnionContractedNodes(const std::unordered_map<int, BriefNode *> &node_map,
int src_id, int dst_id) {
// merge the two adjacent nodes into one node.
......@@ -224,8 +232,8 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
// Our algorithm must guarantee that:
// 1. The graph is always directed acyclic graph(DAG).
// 2. If there is a path in the subgraph from X to Y (X and Y are both
// nodes
// in the subgraph), then all paths from X to Y are in the subgraph.
// nodes in the subgraph), then all paths from X to Y are in the
// subgraph.
//
// In order to achieve the above guarantee.
// For adjacent nodes src -> dst.
......
......@@ -35,6 +35,8 @@ class ReluOpConverter : public OpConverter {
engine_, Activation, *const_cast<nvinfer1::ITensor*>(input_tensor),
nvinfer1::ActivationType::kRELU);
auto output_name = op_desc.Output("Out")[0];
layer->setName(("relu (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside.
......
......@@ -116,6 +116,8 @@ class BatchNormOpConverter : public OpConverter {
scale_weights.get(), power_weights.get());
auto output_name = op_desc.Output("Y").front();
layer->setName(("batch_norm (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->weight_map[op_desc.Input("Bias").front()] =
std::move(combile_bias_tensor);
engine_->weight_map[op_desc.Input("Scale").front()] =
......
......@@ -30,7 +30,9 @@ class ConcatOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
std::vector<nvinfer1::ITensor*> itensors;
std::cout << "Concat op: " << std::endl;
for (auto& input_name : op_desc.Input("X")) {
std::cout << input_name << std::endl;
itensors.push_back(engine_->GetITensor(input_name));
}
int axis = boost::get<int>(op_desc.GetAttr("axis"));
......@@ -42,6 +44,8 @@ class ConcatOpConverter : public OpConverter {
axis = axis - 1; // Remove batch dim
layer->setAxis(axis);
auto output_name = op_desc.Output("Out")[0];
layer->setName(("concat (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside.
......
......@@ -26,6 +26,9 @@ class Conv2dOpConverter : public OpConverter {
<< "convert a fluid conv2d op to tensorrt conv layer without bias";
framework::OpDesc op_desc(op, nullptr);
std::cout << "Conv op: " << std::endl;
std::cout << op_desc.Input("Input").front() << std::endl;
std::cout << op_desc.Output("Output").front() << std::endl;
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1);
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1); // Y is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1);
......@@ -78,8 +81,10 @@ class Conv2dOpConverter : public OpConverter {
layer->setNbGroups(groups);
auto output_name = op_desc.Output("Output").front();
layer->setName(("conv2d (Output: " + output_name + ")").c_str());
engine_->weight_map[op_desc.Input("Filter").front()] =
std::move(weight_tensor);
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) {
engine_->DeclareOutput(output_name);
......
......@@ -89,6 +89,8 @@ class ElementwiseWeightOpConverter : public OpConverter {
shift_weights.get(), scale_weights.get(), power_weights.get());
auto output_name = op_desc.Output("Out")[0];
layer->setName(("elementwise_add (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->weight_map[op_desc.Input("Y").front()] = std::move(weight_tensor);
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the
......@@ -137,6 +139,8 @@ class ElementwiseTensorOpConverter : public OpConverter {
*const_cast<nvinfer1::ITensor*>(Y), op_pair->second);
auto output_name = op_desc.Output("Out")[0];
layer->setName(("elementwise (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) { // the test framework can not determine which is the
// output, so place the declaration inside.
......
......@@ -107,6 +107,8 @@ class FcOpConverter : public OpConverter {
n_output, tmp_weight.get(), bias.get());
auto output_name = op_desc.Output("Out").front();
layer->setName(("fc (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
engine_->weight_map[op_desc.Input("Y").front()] = std::move(tmp);
if (test_mode) {
......
......@@ -72,6 +72,8 @@ class Pool2dOpConverter : public OpConverter {
layer->setPadding(nv_paddings);
auto output_name = op_desc.Output("Out")[0];
layer->setName(("pool2d (Output: " + output_name + ")").c_str());
layer->getOutput(0)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(0));
if (test_mode) {
engine_->DeclareOutput(output_name);
......
......@@ -161,20 +161,6 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
boost::get<platform::CUDAPlace>(context.GetPlace()).device)),
size * sizeof(float));
// TODO(zhaolong) : delete it sometimes
/* THIS CODE JUST FOR TEST
std::cout << output_maps[output_index] << std::endl;
platform::CPUPlace cpu_place;
framework::LoDTensor temp_tensor;
temp_tensor.Resize(framework::make_ddim(ddim));
auto* temp_data = temp_tensor.mutable_data<float>(cpu_place);
TensorCopySync(*fluid_t, cpu_place ,&temp_tensor);
for(int i = 0; i < size; i++) {
std::cout << temp_data[i] << " " ;
}
std::cout << std::endl;
*/
output_index += 1;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册