提交 03acac2b 编写于 作者: Z zlsh80826

merge stack op

...@@ -45,6 +45,26 @@ inline void InitVarsInScope(const std::vector<VarInfo> &var_infos, Scope *scope, ...@@ -45,6 +45,26 @@ inline void InitVarsInScope(const std::vector<VarInfo> &var_infos, Scope *scope,
// get CommContext and remote send and recv op // get CommContext and remote send and recv op
void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) { void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) {
#ifdef PADDLE_WITH_DISTRIBUTE #ifdef PADDLE_WITH_DISTRIBUTE
bool need_communicator = false;
for (auto &node : graphs[0]->Nodes()) {
VLOG(3) << "node name " << node->Name();
if (node && node->IsOp()) {
if (node->Name() == "send") {
auto send_varnames =
BOOST_GET_CONST(std::vector<std::string>,
node->Op()->GetNullableAttr("send_varnames"));
if (send_varnames.size() > 0) {
need_communicator = true;
break;
}
}
}
}
if (need_communicator) {
// init communicator here // init communicator here
auto *instance = operators::distributed::Communicator::GetInstance(); auto *instance = operators::distributed::Communicator::GetInstance();
auto initialized = instance ? true : false; auto initialized = instance ? true : false;
...@@ -53,6 +73,7 @@ void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) { ...@@ -53,6 +73,7 @@ void ProcessGraph(std::vector<ir::Graph *> graphs, Scope *scope) {
"Communicator is not Initialized, you may use " "Communicator is not Initialized, you may use "
"FleetAPI(https://github.com/PaddlePaddle/Fleet/tree/" "FleetAPI(https://github.com/PaddlePaddle/Fleet/tree/"
"develop/markdown_doc/transpiler)")); "develop/markdown_doc/transpiler)"));
}
#endif #endif
} }
......
...@@ -19,6 +19,6 @@ else() ...@@ -19,6 +19,6 @@ else()
cc_library(gloo_wrapper SRCS gloo_wrapper.cc DEPS framework_proto variable_helper scope) cc_library(gloo_wrapper SRCS gloo_wrapper.cc DEPS framework_proto variable_helper scope)
endif(WITH_GLOO) endif(WITH_GLOO)
cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto) cc_library(heter_wrapper SRCS heter_wrapper.cc DEPS framework_proto device_context)
cc_test(test_fleet SRCS test_fleet.cc DEPS fleet_wrapper gloo_wrapper fs shell) cc_test(test_fleet SRCS test_fleet.cc DEPS fleet_wrapper gloo_wrapper fs shell)
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/fluid/string/pretty_log.h"
namespace paddle { namespace paddle {
...@@ -54,7 +55,7 @@ void LogQuantizationDisabled(Node* op) { ...@@ -54,7 +55,7 @@ void LogQuantizationDisabled(Node* op) {
std::stringstream msg_ss; std::stringstream msg_ss;
VLOG(4) << "Qantization skipped for operator " << op->Name() VLOG(4) << "Qantization skipped for operator " << op->Name()
<< " (type: " << op->Op()->Type() << ", id: " << op->id() << " (type: " << op->Op()->Type() << ", id: " << op->id()
<< "). Attribute use_quantizer = false."; << "). Attribute mkldnn_data_type != \"int8\".";
} }
} // namespace } // namespace
...@@ -228,12 +229,12 @@ double CPUQuantizePass::GetScaleValueForNode(const Node* node, ...@@ -228,12 +229,12 @@ double CPUQuantizePass::GetScaleValueForNode(const Node* node,
bool CPUQuantizePass::IsOpDequantized(const Node* node) const { bool CPUQuantizePass::IsOpDequantized(const Node* node) const {
return node->Op()->Type() == "dequantize" || return node->Op()->Type() == "dequantize" ||
node->Op()->GetAttrIfExists<bool>("use_quantizer"); platform::HasOpINT8DataType(node->Op());
} }
bool CPUQuantizePass::IsOpQuantized(const Node* node) const { bool CPUQuantizePass::IsOpQuantized(const Node* node) const {
return node->Op()->Type() == "quantize" || return node->Op()->Type() == "quantize" ||
node->Op()->GetAttrIfExists<bool>("use_quantizer"); platform::HasOpINT8DataType(node->Op());
} }
void CPUQuantizePass::QuantizeConv(Graph* graph, void CPUQuantizePass::QuantizeConv(Graph* graph,
...@@ -248,10 +249,9 @@ void CPUQuantizePass::QuantizeConv(Graph* graph, ...@@ -248,10 +249,9 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
Graph* g) { Graph* g) {
VLOG(4) << "Quantize conv2d op"; VLOG(4) << "Quantize conv2d op";
GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern); GET_IR_NODE_FROM_SUBGRAPH(conv_op, conv_op, conv_pattern);
auto* conv_op_desc = conv_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!conv_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(conv_op->Op())) {
LogQuantizationDisabled(conv_op); LogQuantizationDisabled(conv_op);
return; return;
} }
...@@ -353,14 +353,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const { ...@@ -353,14 +353,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize fc op"; VLOG(4) << "Quantize fc op";
GET_IR_NODE_FROM_SUBGRAPH(fc, fc, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc, fc, fc_pattern);
auto* fc_op_desc = fc->Op();
// skip if should not be quantized // skip if should not be quantized
if (!fc_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(fc->Op())) {
LogQuantizationDisabled(fc); LogQuantizationDisabled(fc);
return; return;
} }
if (!fc_op_desc->GetAttrIfExists<bool>("use_mkldnn")) { if (!fc->Op()->GetAttrIfExists<bool>("use_mkldnn")) {
return; return;
} }
...@@ -420,10 +419,9 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const { ...@@ -420,10 +419,9 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize pool2d op"; VLOG(4) << "Quantize pool2d op";
GET_IR_NODE_FROM_SUBGRAPH(pool_op, pool_op, pool_pattern); GET_IR_NODE_FROM_SUBGRAPH(pool_op, pool_op, pool_pattern);
auto* pool_op_desc = pool_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!pool_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(pool_op->Op())) {
LogQuantizationDisabled(pool_op); LogQuantizationDisabled(pool_op);
return; return;
} }
...@@ -465,10 +463,9 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const { ...@@ -465,10 +463,9 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize concat op"; VLOG(4) << "Quantize concat op";
GET_IR_NODE_FROM_SUBGRAPH(concat_op, concat_op, concat_pattern); GET_IR_NODE_FROM_SUBGRAPH(concat_op, concat_op, concat_pattern);
auto* concat_op_desc = concat_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!concat_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(concat_op->Op())) {
LogQuantizationDisabled(concat_op); LogQuantizationDisabled(concat_op);
return; return;
} }
...@@ -511,10 +508,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const { ...@@ -511,10 +508,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize prior_box op"; VLOG(4) << "Quantize prior_box op";
GET_IR_NODE_FROM_SUBGRAPH(prior_box_op, prior_box_op, prior_box_pattern); GET_IR_NODE_FROM_SUBGRAPH(prior_box_op, prior_box_op, prior_box_pattern);
auto* prior_box_op_desc = prior_box_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!prior_box_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(prior_box_op->Op())) {
LogQuantizationDisabled(prior_box_op); LogQuantizationDisabled(prior_box_op);
return; return;
} }
...@@ -554,10 +550,9 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const { ...@@ -554,10 +550,9 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize transpose op"; VLOG(4) << "Quantize transpose op";
GET_IR_NODE_FROM_SUBGRAPH(transpose_op, transpose_op, transpose_pattern); GET_IR_NODE_FROM_SUBGRAPH(transpose_op, transpose_op, transpose_pattern);
auto* transpose_op_desc = transpose_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!transpose_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(transpose_op->Op())) {
LogQuantizationDisabled(transpose_op); LogQuantizationDisabled(transpose_op);
return; return;
} }
...@@ -609,10 +604,9 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const { ...@@ -609,10 +604,9 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize reshape op"; VLOG(4) << "Quantize reshape op";
GET_IR_NODE_FROM_SUBGRAPH(reshape_op, reshape_op, reshape_pattern); GET_IR_NODE_FROM_SUBGRAPH(reshape_op, reshape_op, reshape_pattern);
auto* reshape_op_desc = reshape_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!reshape_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(reshape_op->Op())) {
LogQuantizationDisabled(reshape_op); LogQuantizationDisabled(reshape_op);
return; return;
} }
...@@ -662,10 +656,9 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const { ...@@ -662,10 +656,9 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
Graph* g) { Graph* g) {
VLOG(4) << "Quantize matmul op"; VLOG(4) << "Quantize matmul op";
GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, matmul_pattern); GET_IR_NODE_FROM_SUBGRAPH(matmul_op, matmul_op, matmul_pattern);
auto* matmul_op_desc = matmul_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!matmul_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(matmul_op->Op())) {
LogQuantizationDisabled(matmul_op); LogQuantizationDisabled(matmul_op);
return; return;
} }
...@@ -732,10 +725,9 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const { ...@@ -732,10 +725,9 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
VLOG(4) << "Quantize elementwise_add op"; VLOG(4) << "Quantize elementwise_add op";
GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op, GET_IR_NODE_FROM_SUBGRAPH(elementwise_add_op, elementwise_add_op,
elementwise_add_pattern); elementwise_add_pattern);
auto* elementwise_add_op_desc = elementwise_add_op->Op();
// skip if should not be quantized // skip if should not be quantized
if (!elementwise_add_op_desc->GetAttrIfExists<bool>("use_quantizer")) { if (!platform::HasOpINT8DataType(elementwise_add_op->Op())) {
LogQuantizationDisabled(elementwise_add_op); LogQuantizationDisabled(elementwise_add_op);
return; return;
} }
......
...@@ -26,7 +26,7 @@ namespace ir { ...@@ -26,7 +26,7 @@ namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, bool use_mkldnn, const std::vector<std::string>& outputs, bool use_mkldnn,
bool use_quantizer = false) { const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp(); auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type); op->SetType(type);
op->SetAttr("use_mkldnn", use_mkldnn); op->SetAttr("use_mkldnn", use_mkldnn);
...@@ -47,14 +47,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -47,14 +47,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetAttr("fuse_residual_connection", false); op->SetAttr("fuse_residual_connection", false);
} }
op->SetOutput("Output", {outputs[0]}); op->SetOutput("Output", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f}); op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "pool2d" || type == "transpose2" || type == "reshape2") { } else if (type == "pool2d" || type == "transpose2" || type == "reshape2") {
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "dropout") { } else if (type == "dropout") {
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
...@@ -63,14 +63,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -63,14 +63,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
if (inputs.size() > 1) op->SetInput("W", {inputs[1]}); if (inputs.size() > 1) op->SetInput("W", {inputs[1]});
if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]}); if (inputs.size() > 2) op->SetInput("Bias", {inputs[2]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("Scale_in", 1.0f); op->SetAttr("Scale_in", 1.0f);
op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_out", 1.0f);
op->SetAttr("Scale_weights", std::vector<float>{1.0f}); op->SetAttr("Scale_weights", std::vector<float>{1.0f});
} else if (type == "concat") { } else if (type == "concat") {
op->SetInput("X", inputs); op->SetInput("X", inputs);
op->SetOutput("Out", outputs); op->SetOutput("Out", outputs);
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
} else if (type == "dequantize") { } else if (type == "dequantize") {
op->SetInput("Input", {inputs[0]}); op->SetInput("Input", {inputs[0]});
op->SetOutput("Output", {outputs[0]}); op->SetOutput("Output", {outputs[0]});
...@@ -79,7 +79,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -79,7 +79,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_x", 1.0f);
op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_y", 1.0f);
op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_out", 1.0f);
...@@ -87,7 +87,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -87,7 +87,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetInput("X", {inputs[0]}); op->SetInput("X", {inputs[0]});
if (inputs.size() > 1) op->SetInput("Y", {inputs[1]}); if (inputs.size() > 1) op->SetInput("Y", {inputs[1]});
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
op->SetAttr("use_quantizer", use_quantizer); op->SetAttr("mkldnn_data_type", mkldnn_data_type);
op->SetAttr("Scale_x", 1.0f); op->SetAttr("Scale_x", 1.0f);
op->SetAttr("Scale_y", 1.0f); op->SetAttr("Scale_y", 1.0f);
op->SetAttr("Scale_out", 1.0f); op->SetAttr("Scale_out", 1.0f);
...@@ -142,7 +142,8 @@ static const std::initializer_list<std::string> variable_names{ ...@@ -142,7 +142,8 @@ static const std::initializer_list<std::string> variable_names{
// d->Dropout1->g and (g, w5, b3)->Fc1->h and (h,w3,b1,i)->Conv3->j // d->Dropout1->g and (g, w5, b3)->Fc1->h and (h,w3,b1,i)->Conv3->j
// //
// (d,w4, b2)->Conv4->i // (d,w4, b2)->Conv4->i
ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) { ProgramDesc BuildProgramDesc(bool use_mkldnn,
const std::string& mkldnn_data_type) {
ProgramDesc prog; ProgramDesc prog;
for (auto& v : variable_names) { for (auto& v : variable_names) {
auto* var = prog.MutableBlock(0)->Var(v); auto* var = prog.MutableBlock(0)->Var(v);
...@@ -152,21 +153,21 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) { ...@@ -152,21 +153,21 @@ ProgramDesc BuildProgramDesc(bool use_mkldnn, bool use_quantizer) {
} }
SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"c"}, use_mkldnn, SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"c"}, use_mkldnn,
use_quantizer); mkldnn_data_type);
SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, use_quantizer); SetOp(&prog, "pool2d", "Pool1", {"c"}, {"d"}, use_mkldnn, mkldnn_data_type);
SetOp(&prog, "conv2d", "Conv2", {"d", "w2"}, {"e"}, use_mkldnn, SetOp(&prog, "conv2d", "Conv2", {"d", "w2"}, {"e"}, use_mkldnn,
use_quantizer); mkldnn_data_type);
SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, use_quantizer); SetOp(&prog, "pool2d", "Pool2", {"e"}, {"f"}, use_mkldnn, mkldnn_data_type);
SetOp(&prog, "dropout", "Dropout1", {"d"}, {"g"}, use_mkldnn); SetOp(&prog, "dropout", "Dropout1", {"d"}, {"g"}, use_mkldnn);
SetOp(&prog, "fc", "Fc1", {"g", "w5", "b3"}, {"h"}, use_mkldnn, SetOp(&prog, "fc", "Fc1", {"g", "w5", "b3"}, {"h"}, use_mkldnn,
use_quantizer); mkldnn_data_type);
SetOp(&prog, "conv2d", "Conv3", {"h", "w3", "b1", "i"}, {"j"}, use_mkldnn, SetOp(&prog, "conv2d", "Conv3", {"h", "w3", "b1", "i"}, {"j"}, use_mkldnn,
use_quantizer); mkldnn_data_type);
SetOp(&prog, "conv2d", "Conv4", {"c", "w4", "b2"}, {"i"}, use_mkldnn, SetOp(&prog, "conv2d", "Conv4", {"c", "w4", "b2"}, {"i"}, use_mkldnn,
use_quantizer); mkldnn_data_type);
return prog; return prog;
} }
...@@ -215,7 +216,7 @@ void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, ...@@ -215,7 +216,7 @@ void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
TEST(CpuQuantizePass, quantize) { TEST(CpuQuantizePass, quantize) {
bool use_mkldnn = true; bool use_mkldnn = true;
bool use_quantizer = true; std::string mkldnn_data_type = "int8";
// (a->QUANT1->IN1,w1)->Conv1->OUT1->DEQUANT1->c and // (a->QUANT1->IN1,w1)->Conv1->OUT1->DEQUANT1->c and
// c->QUANT2->IN2->Pool1->OUT2->DEQUANT2->d // c->QUANT2->IN2->Pool1->OUT2->DEQUANT2->d
// //
...@@ -228,16 +229,16 @@ TEST(CpuQuantizePass, quantize) { ...@@ -228,16 +229,16 @@ TEST(CpuQuantizePass, quantize) {
// (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i // (d->QUANT7->IN7,w4, b2)->Conv4->DEQUANT6->OUT6->i
// Insert nodes: 8 Quant + 8 IN + 7 OUT + 7 DEQUANT // Insert nodes: 8 Quant + 8 IN + 7 OUT + 7 DEQUANT
int added_nodes = 8 + 8 + 7 + 7; int added_nodes = 8 + 8 + 7 + 7;
MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 8, 7, added_nodes, MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 8, 7,
2.0f * 127); added_nodes, 2.0f * 127);
} }
TEST(CpuQuantizePass, do_not_quantize) { TEST(CpuQuantizePass, do_not_quantize) {
bool use_mkldnn = true; bool use_mkldnn = true;
bool use_quantizer = false; std::string mkldnn_data_type = "float32";
int added_nodes = 0; int added_nodes = 0;
MainTest(BuildProgramDesc(use_mkldnn, use_quantizer), 4, 2, 0, 0, added_nodes, MainTest(BuildProgramDesc(use_mkldnn, mkldnn_data_type), 4, 2, 0, 0,
1.0f); added_nodes, 1.0f);
} }
static const std::initializer_list<std::string> variable_names_concat = { static const std::initializer_list<std::string> variable_names_concat = {
...@@ -250,10 +251,10 @@ static const std::initializer_list<std::string> variable_names_concat = { ...@@ -250,10 +251,10 @@ static const std::initializer_list<std::string> variable_names_concat = {
ProgramDesc BuildProgramDescConcat() { ProgramDesc BuildProgramDescConcat() {
ProgramDesc prog; ProgramDesc prog;
SetOp(&prog, "pool2d", "Pool1", {"a1"}, {"b1"}, true, false); SetOp(&prog, "pool2d", "Pool1", {"a1"}, {"b1"}, true, "float32");
SetOp(&prog, "pool2d", "Pool2", {"a2"}, {"b2"}, true, false); SetOp(&prog, "pool2d", "Pool2", {"a2"}, {"b2"}, true, "float32");
SetOp(&prog, "concat", "Concat", {"b1", "b2"}, {"c"}, true, true); SetOp(&prog, "concat", "Concat", {"b1", "b2"}, {"c"}, true, "int8");
SetOp(&prog, "pool2d", "Pool3", {"c"}, {"d"}, true, false); SetOp(&prog, "pool2d", "Pool3", {"c"}, {"d"}, true, "float32");
return prog; return prog;
} }
...@@ -321,11 +322,11 @@ ProgramDesc BuildProgramDescTranspose() { ...@@ -321,11 +322,11 @@ ProgramDesc BuildProgramDescTranspose() {
} }
} }
SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"b"}, true, true); SetOp(&prog, "conv2d", "Conv1", {"a", "w1"}, {"b"}, true, "int8");
SetOp(&prog, "transpose2", "Transpose1", {"b"}, {"c"}, true, true); SetOp(&prog, "transpose2", "Transpose1", {"b"}, {"c"}, true, "int8");
SetOp(&prog, "conv2d", "Conv1", {"c", "w2"}, {"d"}, true, true); SetOp(&prog, "conv2d", "Conv1", {"c", "w2"}, {"d"}, true, "int8");
SetOp(&prog, "transpose2", "Transpose2", {"d"}, {"e"}, true, true); SetOp(&prog, "transpose2", "Transpose2", {"d"}, {"e"}, true, "int8");
SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32");
return prog; return prog;
} }
...@@ -400,8 +401,8 @@ ProgramDesc BuildProgramDescReshape() { ...@@ -400,8 +401,8 @@ ProgramDesc BuildProgramDescReshape() {
prog.MutableBlock(0)->Var(v); prog.MutableBlock(0)->Var(v);
} }
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true); SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8");
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false); SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
return prog; return prog;
} }
...@@ -415,9 +416,9 @@ ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() { ...@@ -415,9 +416,9 @@ ProgramDesc BuildProgramDescReshapeBetweenNonQuantizedOp() {
prog.MutableBlock(0)->Var(v); prog.MutableBlock(0)->Var(v);
} }
SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, false); SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32");
SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, true); SetOp(&prog, "reshape2", "Reshape2", {"b"}, {"c"}, true, "int8");
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, false); SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
return prog; return prog;
} }
...@@ -505,8 +506,8 @@ ProgramDesc BuildProgramDescMatmul() { ...@@ -505,8 +506,8 @@ ProgramDesc BuildProgramDescMatmul() {
} }
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true);
SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, true); SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8");
SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32");
return prog; return prog;
} }
...@@ -518,8 +519,8 @@ ProgramDesc BuildProgramDescMatmulNotQuantized() { ...@@ -518,8 +519,8 @@ ProgramDesc BuildProgramDescMatmulNotQuantized() {
} }
SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false); SetOp(&prog, "dropout", "Dropout", {"a"}, {"b"}, false);
SetOp(&prog, "dequantize", "Dequantize", {"c"}, {"d"}, true); SetOp(&prog, "dequantize", "Dequantize", {"c"}, {"d"}, true);
SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, true); SetOp(&prog, "matmul", "Matmul", {"b", "d"}, {"e"}, true, "int8");
SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32");
return prog; return prog;
} }
...@@ -590,8 +591,8 @@ ProgramDesc BuildProgramDescElementwiseAdd() { ...@@ -590,8 +591,8 @@ ProgramDesc BuildProgramDescElementwiseAdd() {
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true); SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true); SetOp(&prog, "dequantize", "Dequantize2", {"c"}, {"d"}, true);
SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true, SetOp(&prog, "elementwise_add", "ElementwiseAdd", {"b", "d"}, {"e"}, true,
true); "int8");
SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, false); SetOp(&prog, "dropout", "Dropout", {"e"}, {"f"}, true, "float32");
return prog; return prog;
} }
......
...@@ -32,11 +32,19 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { ...@@ -32,11 +32,19 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
n->id()) != excluded_ids_list.end()) n->id()) != excluded_ids_list.end())
continue; continue;
auto* op = n->Op(); auto* op = n->Op();
if (op->HasAttr("use_quantizer") || op->HasProtoAttr("use_quantizer")) { if (op->HasAttr("mkldnn_data_type") ||
op->HasProtoAttr("mkldnn_data_type")) {
// use_quantizer is no longer used
// assign value for compatibility
if (op->GetAttrIfExists<bool>("use_quantizer")) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
}
if (op_types_list.empty()) { if (op_types_list.empty()) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("use_quantizer", true); op->SetAttr("use_quantizer", true);
} else if (std::find(op_types_list.begin(), op_types_list.end(), } else if (std::find(op_types_list.begin(), op_types_list.end(),
op->Type()) != op_types_list.end()) { op->Type()) != op_types_list.end()) {
op->SetAttr("mkldnn_data_type", std::string("int8"));
op->SetAttr("use_quantizer", true); op->SetAttr("use_quantizer", true);
} }
} }
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <boost/logic/tribool.hpp> #include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -24,13 +24,11 @@ namespace ir { ...@@ -24,13 +24,11 @@ namespace ir {
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, const std::vector<std::string>& outputs,
boost::tribool use_quantizer) { const std::string& mkldnn_data_type = "float32") {
auto* op = prog->MutableBlock(0)->AppendOp(); auto* op = prog->MutableBlock(0)->AppendOp();
op->SetType(type); op->SetType(type);
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
if (!boost::indeterminate(use_quantizer))
op->SetAttr("use_quantizer", use_quantizer);
if (type == "conv2d") { if (type == "conv2d") {
op->SetAttr("name", name); op->SetAttr("name", name);
...@@ -50,7 +48,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, ...@@ -50,7 +48,7 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op->SetOutput("Out", {outputs[0]}); op->SetOutput("Out", {outputs[0]});
} }
// operator use_quantizer // operator mkldnn_data_type
// --------------------------------------- // ---------------------------------------
// (a,b)->concat->c none // (a,b)->concat->c none
// (c,weights,bias)->conv->f false // (c,weights,bias)->conv->f false
...@@ -71,19 +69,19 @@ ProgramDesc BuildProgramDesc() { ...@@ -71,19 +69,19 @@ ProgramDesc BuildProgramDesc() {
} }
} }
SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, boost::indeterminate); SetOp(&prog, "concat", "concat1", {"a", "b"}, {"c"}, "float32");
SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, false); SetOp(&prog, "conv2d", "conv1", {"c", "weights", "bias"}, {"f"}, "float32");
SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, boost::indeterminate); SetOp(&prog, "relu", "relu1", {"f"}, {"g"}, "float32");
SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, false); SetOp(&prog, "pool2d", "pool1", {"g"}, {"h"}, "float32");
SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, false); SetOp(&prog, "conv2d", "conv2", {"h", "weights2", "bias2"}, {"k"}, "float32");
SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, false); SetOp(&prog, "pool2d", "pool2", {"k"}, {"l"}, "float32");
return prog; return prog;
} }
void MainTest(std::initializer_list<std::string> quantize_enabled_op_types, void MainTest(std::initializer_list<std::string> quantize_enabled_op_types,
std::initializer_list<int> quantize_excluded_op_ids, std::initializer_list<int> quantize_excluded_op_ids,
unsigned expected_use_quantizer_true_count) { unsigned expected_int8_data_type_count) {
auto prog = BuildProgramDesc(); auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog)); std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
...@@ -96,38 +94,34 @@ void MainTest(std::initializer_list<std::string> quantize_enabled_op_types, ...@@ -96,38 +94,34 @@ void MainTest(std::initializer_list<std::string> quantize_enabled_op_types,
graph.reset(pass->Apply(graph.release())); graph.reset(pass->Apply(graph.release()));
unsigned use_quantizer_true_count = 0; unsigned int8_data_type_count = 0;
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsOp()) { if (node->IsOp()) {
auto* op = node->Op(); if (platform::HasOpINT8DataType(node->Op())) {
if (op->HasAttr("use_quantizer") && ++int8_data_type_count;
BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) {
++use_quantizer_true_count;
} }
} }
} }
EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count); EXPECT_EQ(int8_data_type_count, expected_int8_data_type_count);
} }
void DefaultAttrTest(unsigned expected_use_quantizer_true_count) { void DefaultAttrTest(unsigned expected_int8_data_type_count) {
auto prog = BuildProgramDesc(); auto prog = BuildProgramDesc();
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog)); std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass"); auto pass = PassRegistry::Instance().Get("cpu_quantize_placement_pass");
graph.reset(pass->Apply(graph.release())); graph.reset(pass->Apply(graph.release()));
unsigned use_quantizer_true_count = 0; unsigned int8_data_type_count = 0;
for (auto* node : graph->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsOp()) { if (node->IsOp()) {
auto* op = node->Op(); if (platform::HasOpINT8DataType(node->Op())) {
if (op->HasAttr("use_quantizer") && ++int8_data_type_count;
BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) {
++use_quantizer_true_count;
} }
} }
} }
EXPECT_EQ(use_quantizer_true_count, expected_use_quantizer_true_count); EXPECT_EQ(int8_data_type_count, expected_int8_data_type_count);
} }
TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); } TEST(QuantizerPlacementPass, enabled_pool) { MainTest({"pool2d"}, {}, 2); }
...@@ -137,13 +131,13 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) { ...@@ -137,13 +131,13 @@ TEST(QuantizerPlacementPass, enabled_conv_excluded_one) {
} }
TEST(QuantizerPlacementPass, excluded_none) { TEST(QuantizerPlacementPass, excluded_none) {
// 2 conv + 2 pool // all operators quantized
MainTest({}, {}, 4); MainTest({}, {}, 6);
} }
TEST(QuantizerPlacementPass, default_attr_value) { TEST(QuantizerPlacementPass, default_attr_value) {
// 2 conv + 2 pool // all operators quantized
DefaultAttrTest(4); DefaultAttrTest(6);
} }
} // namespace ir } // namespace ir
......
...@@ -1057,4 +1057,5 @@ USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm); ...@@ -1057,4 +1057,5 @@ USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm);
USE_TRT_CONVERTER(skip_layernorm); USE_TRT_CONVERTER(skip_layernorm);
USE_TRT_CONVERTER(slice); USE_TRT_CONVERTER(slice);
USE_TRT_CONVERTER(scale); USE_TRT_CONVERTER(scale);
USE_TRT_CONVERTER(stack);
#endif #endif
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/pretty_log.h" #include "paddle/fluid/string/pretty_log.h"
...@@ -50,8 +51,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() { ...@@ -50,8 +51,7 @@ bool AnalysisPredictor::MkldnnQuantizer::CalculateScales() {
using VariableNameMap = std::map<std::string, std::vector<std::string>>; using VariableNameMap = std::map<std::string, std::vector<std::string>>;
std::map<std::string, std::map<std::string, LoDTensor>> gathered_data; std::map<std::string, std::map<std::string, LoDTensor>> gathered_data;
for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) { for (const auto* op : predictor_.inference_program_->Block(0).AllOps()) {
if (op->HasAttr("use_quantizer") && if (platform::HasOpINT8DataType(op)) {
BOOST_GET_CONST(bool, op->GetAttr("use_quantizer"))) {
const VariableNameMap& connections_in = op->Inputs(); const VariableNameMap& connections_in = op->Inputs();
const VariableNameMap& connections_out = op->Outputs(); const VariableNameMap& connections_out = op->Outputs();
......
...@@ -3,8 +3,8 @@ nv_library(tensorrt_converter ...@@ -3,8 +3,8 @@ nv_library(tensorrt_converter
SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
shuffle_channel_op.cc swish_op.cc instance_norm_op.cc shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
nv_test(test_op_converter SRCS test_op_converter.cc DEPS nv_test(test_op_converter SRCS test_op_converter.cc DEPS
......
...@@ -58,6 +58,24 @@ class ScaleOpConverter : public OpConverter { ...@@ -58,6 +58,24 @@ class ScaleOpConverter : public OpConverter {
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
0}; 0};
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
auto input_dim = input->getDimensions();
PADDLE_ENFORCE_GE(input_dim.nbDims, 3,
platform::errors::Fatal(
"Paddle-TRT scale mode only support dimension >= 3"));
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
if (input_dim.nbDims == 3) {
// TensorRT scale layer is not supporting input dims < 4 when using
// explicit batch
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
nvinfer1::Dims4 target_shape(0, 0, 0, 1); // expand 1 dims
expand_layer->setReshapeDimensions(target_shape);
input = expand_layer->getOutput(0);
}
if (bias_after_scale) { if (bias_after_scale) {
layer = TRT_ENGINE_ADD_LAYER( layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM,
...@@ -73,6 +91,18 @@ class ScaleOpConverter : public OpConverter { ...@@ -73,6 +91,18 @@ class ScaleOpConverter : public OpConverter {
power_weights.get(), scale_weights.get(), power_weights.get()); power_weights.get(), scale_weights.get(), power_weights.get());
} }
PADDLE_ENFORCE_EQ(layer != nullptr, true,
platform::errors::Fatal("Create scale layer failed."));
if (input_dim.nbDims == 3) {
// TensorRT scale layer is not supporting input dims < 4 when using
// explicit batch
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
nvinfer1::Dims3 target_shape(0, 0, 0); // expand 1 dims
squeeze_layer->setReshapeDimensions(target_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
}
RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode); RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);
} }
}; };
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h"
namespace paddle {
namespace inference {
namespace tensorrt {
/*
* Stack converter from fluid to tensorRT.
*/
class StackOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert fluid stack op to tensorrt stack layer";
framework::OpDesc op_desc(op, nullptr);
auto input = op_desc.Input("X");
int input_num = input.size();
nvinfer1::ITensor** inputs =
(nvinfer1::ITensor**)malloc(input_num * sizeof(nvinfer1::ITensor*));
for (int i = 0; i < input_num; ++i) {
inputs[i] = engine_->GetITensor(input[i]);
}
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
if (axis < 0) {
axis = axis + inputs[0]->getDimensions().nbDims + 1;
}
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
plugin::StackPluginDynamic* plugin =
new plugin::StackPluginDynamic(axis, input_num);
layer = engine_->AddPluginV2(inputs, input_num, plugin);
assert(layer != nullptr);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
}
auto output_name = op_desc.Output("Y").front();
RreplenishLayerAndOutput(layer, "stack", {output_name}, test_mode);
free(inputs);
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(stack, StackOpConverter);
...@@ -86,6 +86,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -86,6 +86,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"layer_norm", "layer_norm",
"scale", "scale",
"slice", "slice",
"stack",
}; };
}; };
......
...@@ -2,7 +2,7 @@ nv_library(tensorrt_plugin ...@@ -2,7 +2,7 @@ nv_library(tensorrt_plugin
SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu
prelu_op_plugin.cu trt_plugin_factory.cc gelu_op_plugin.cu prelu_op_plugin.cu trt_plugin_factory.cc gelu_op_plugin.cu
pool_op_plugin.cu swish_op_plugin.cu layer_norm_op_plugin.cu pool_op_plugin.cu swish_op_plugin.cu layer_norm_op_plugin.cu
cast_int_plugin.cu cast_int_plugin.cu stack_op_plugin.cu
instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu
qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu hard_swish_op_plugin.cu qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu hard_swish_op_plugin.cu
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cassert>
#include <cstring>
#include <vector>
#include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
#if IS_TRT_VERSION_GE(6000)
nvinfer1::DimsExprs StackPluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs,
nvinfer1::IExprBuilder& expr_builder) {
nvinfer1::DimsExprs output(inputs[0]);
output.nbDims = inputs[0].nbDims + 1;
for (int i = inputs[0].nbDims; i > axis_; --i) {
output.d[i] = inputs[0].d[i - 1];
}
output.d[axis_] = expr_builder.constant(nb_inputs);
return output;
}
bool StackPluginDynamic::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs,
int nb_outputs) {
PADDLE_ENFORCE_NOT_NULL(
in_out, platform::errors::InvalidArgument(
"The input of stack plugin should not be nullptr."));
PADDLE_ENFORCE_LT(
pos, nb_inputs + nb_outputs,
platform::errors::InvalidArgument("The pos(%d) should be less than the "
"num(%d) of the input and the output.",
pos, nb_inputs + nb_outputs));
const nvinfer1::PluginTensorDesc& in = in_out[pos];
if (pos == 0) {
#ifdef SUPPORTS_CUDA_FP16
return (in.type == nvinfer1::DataType::kFLOAT ||
in.type == nvinfer1::DataType::kHALF) &&
(in.format == nvinfer1::TensorFormat::kLINEAR);
#else
return (in.type == nvinfer1::DataType::kFLOAT) &&
(in.format == nvinfer1::TensorFormat::kLINEAR);
#endif
}
const nvinfer1::PluginTensorDesc& prev = in_out[pos - 1];
// output
return in.type == prev.type && in.format == prev.format;
}
nvinfer1::DataType StackPluginDynamic::getOutputDataType(
int index, const nvinfer1::DataType* input_types, int nb_inputs) const {
PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument(
"The index should be equal to 0"));
return input_types[0];
}
template <typename T>
__global__ void StackKernel(const T* const* input, T* output, int num_stack,
int base_unit) {
int stack_id = blockIdx.x;
int lead_id = blockIdx.y;
for (int i = threadIdx.x; i < base_unit; i += blockDim.x) {
output[lead_id * num_stack * base_unit + stack_id * base_unit + i] =
input[stack_id][lead_id * base_unit + i];
}
}
int StackPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
const nvinfer1::PluginTensorDesc* output_desc,
const void* const* inputs, void* const* outputs,
void* workspace, cudaStream_t stream) {
auto input_dims = input_desc[0].dims; // (batch, seq, seq)
auto out_dims = output_desc[0].dims; // (batch, num_head, seq, seq)
auto out_num_dims = out_dims.nbDims;
int base_unit = 1;
for (int i = axis_ + 1; i < out_num_dims; ++i) {
PADDLE_ENFORCE_GT(out_dims.d[i], 0,
platform::errors::InvalidArgument(
"Input dimensions should be greater than 0"));
base_unit *= out_dims.d[i];
}
int lead_unit = 1;
for (int i = 0; i < axis_; ++i) {
PADDLE_ENFORCE_GT(out_dims.d[i], 0,
platform::errors::InvalidArgument(
"Input dimensions should be greater than 0"));
lead_unit *= out_dims.d[i];
}
cudaMemcpyAsync(reinterpret_cast<void*>(in_ptr_gpu_),
reinterpret_cast<const void* const>(inputs),
sizeof(void*) * out_dims.d[axis_], cudaMemcpyHostToDevice,
stream);
const int num_stacks = out_dims.d[axis_];
dim3 num_blocks(num_stacks, lead_unit);
const int num_threads = 256;
auto infer_type = input_desc[0].type;
if (infer_type == nvinfer1::DataType::kFLOAT) {
float* output = static_cast<float*>(outputs[0]);
StackKernel<float><<<num_blocks, num_threads, 0, stream>>>(
reinterpret_cast<const float* const*>(in_ptr_gpu_), output, num_stacks,
base_unit);
} else if (infer_type == nvinfer1::DataType::kHALF) {
#ifdef SUPPORTS_CUDA_FP16
__half* output = static_cast<__half*>(outputs[0]);
StackKernel<__half><<<num_blocks, num_threads, 0, stream>>>(
reinterpret_cast<const __half* const*>(in_ptr_gpu_), output, num_stacks,
base_unit);
#else
PADDLE_THROW(platform::errors::Fatal(
"The cuda archs you specific should greater than 600."));
#endif
} else {
PADDLE_THROW(
platform::errors::Fatal("The Stack TRT Plugin's input type only "
"support float or half currently."));
}
return cudaGetLastError() != cudaSuccess;
}
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <stdio.h>
#include <cassert>
#include <string>
#include <vector>
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
#if IS_TRT_VERSION_GE(6000)
class StackPluginDynamic : public DynamicPluginTensorRT {
public:
StackPluginDynamic(int axis, int num_stack)
: axis_(axis), num_stack_(num_stack) {
init();
}
StackPluginDynamic(void const* serialData, size_t serialLength) {
DeserializeValue(&serialData, &serialLength, &axis_);
DeserializeValue(&serialData, &serialLength, &num_stack_);
init();
}
~StackPluginDynamic() {}
nvinfer1::IPluginV2DynamicExt* clone() const override {
return new StackPluginDynamic(axis_, num_stack_);
}
void init() {
int device_id;
cudaGetDevice(&device_id);
in_ptr_tensor_.Resize({num_stack_});
in_ptr_gpu_ =
in_ptr_tensor_.mutable_data<int64_t>(platform::CUDAPlace(device_id));
}
const char* getPluginType() const override { return "stack_plugin"; }
int getNbOutputs() const override { return 1; }
int initialize() override { return 0; }
size_t getSerializationSize() const override {
size_t serialize_size = 0;
serialize_size += SerializedSize(axis_);
serialize_size += SerializedSize(num_stack_);
return serialize_size;
}
void serialize(void* buffer) const override {
SerializeValue(&buffer, axis_);
SerializeValue(&buffer, num_stack_);
}
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) override;
bool supportsFormatCombination(int pos,
const nvinfer1::PluginTensorDesc* inOut,
int nbInputs, int nbOutputs) override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out,
int nbOutputs) override {}
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
int nbInputs,
const nvinfer1::PluginTensorDesc* outputs,
int nbOutputs) const override {
return 0;
}
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) override;
nvinfer1::DataType getOutputDataType(int index,
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;
void destroy() override { delete this; }
private:
int axis_;
int num_stack_;
framework::Tensor in_ptr_tensor_;
int64_t* in_ptr_gpu_;
};
class StackPluginV2Creator : public nvinfer1::IPluginCreator {
public:
StackPluginV2Creator() {}
const char* getPluginName() const override { return "stack_plugin"; }
const char* getPluginVersion() const override { return "1"; }
const nvinfer1::PluginFieldCollection* getFieldNames() override {
return &field_collection_;
}
nvinfer1::IPluginV2* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override {
int axis = -1;
int num_stack = -1;
for (int i = 0; i < fc->nbFields; ++i) {
const std::string name(fc->fields[i].name);
if (name == "axis") {
axis = static_cast<const int*>(fc->fields[i].data)[0];
} else if (name == "num_stack") {
num_stack = static_cast<const int*>(fc->fields[i].data)[0];
} else {
PADDLE_THROW(
platform::errors::Fatal("Meet an unknown plugin field '" + name +
"' when creating stack op plugin."));
}
}
return new StackPluginDynamic(axis, num_stack);
}
nvinfer1::IPluginV2* deserializePlugin(const char* name,
const void* serial_data,
size_t serial_length) override {
auto plugin = new StackPluginDynamic(serial_data, serial_length);
return plugin;
}
void setPluginNamespace(const char* lib_namespace) override {
plugin_namespace_ = lib_namespace;
}
const char* getPluginNamespace() const override {
return plugin_namespace_.c_str();
}
private:
std::string plugin_namespace_;
std::string plugin_name_;
nvinfer1::PluginFieldCollection field_collection_{0, nullptr};
std::vector<nvinfer1::PluginField> plugin_attributes_;
};
REGISTER_TRT_PLUGIN_V2(StackPluginV2Creator);
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
...@@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector<float> result) { ...@@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector<float> result) {
config.SwitchUseFeedFetchOps(false); config.SwitchUseFeedFetchOps(false);
int head_number = 12;
int batch = 1; int batch = 1;
int min_seq_len = 1; int min_seq_len = 1;
int max_seq_len = 128; int max_seq_len = 128;
...@@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector<float> result) { ...@@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector<float> result) {
{"read_file_0.tmp_0", min_shape}, {"read_file_0.tmp_0", min_shape},
{"read_file_0.tmp_1", min_shape}, {"read_file_0.tmp_1", min_shape},
{"read_file_0.tmp_2", min_shape}, {"read_file_0.tmp_2", min_shape},
{"stack_0.tmp_0", {batch, head_number, min_seq_len, min_seq_len}}}; {"matmul_0.tmp_0", {batch, min_seq_len, min_seq_len}}};
std::map<std::string, std::vector<int>> max_input_shape = { std::map<std::string, std::vector<int>> max_input_shape = {
{"read_file_0.tmp_0", max_shape}, {"read_file_0.tmp_0", max_shape},
{"read_file_0.tmp_1", max_shape}, {"read_file_0.tmp_1", max_shape},
{"read_file_0.tmp_2", max_shape}, {"read_file_0.tmp_2", max_shape},
{"stack_0.tmp_0", {batch, head_number, max_seq_len, max_seq_len}}}; {"matmul_0.tmp_0", {batch, max_seq_len, max_seq_len}}};
std::map<std::string, std::vector<int>> opt_input_shape = { std::map<std::string, std::vector<int>> opt_input_shape = {
{"read_file_0.tmp_0", opt_shape}, {"read_file_0.tmp_0", opt_shape},
{"read_file_0.tmp_1", opt_shape}, {"read_file_0.tmp_1", opt_shape},
{"read_file_0.tmp_2", opt_shape}, {"read_file_0.tmp_2", opt_shape},
{"stack_0.tmp_0", {batch, head_number, opt_seq_len, opt_seq_len}}}; {"matmul_0.tmp_0", {batch, opt_seq_len, opt_seq_len}}};
auto precision = AnalysisConfig::Precision::kFloat32; auto precision = AnalysisConfig::Precision::kFloat32;
if (with_fp16) { if (with_fp16) {
......
...@@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector<float> result) { ...@@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector<float> result) {
config.SwitchUseFeedFetchOps(false); config.SwitchUseFeedFetchOps(false);
int head_number = 12;
int batch = 1; int batch = 1;
int min_seq_len = 1; int min_seq_len = 1;
int max_seq_len = 128; int max_seq_len = 128;
...@@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector<float> result) { ...@@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector<float> result) {
{"read_file_0.tmp_0", min_shape}, {"read_file_0.tmp_0", min_shape},
{"read_file_0.tmp_1", min_shape}, {"read_file_0.tmp_1", min_shape},
{"read_file_0.tmp_2", min_shape}, {"read_file_0.tmp_2", min_shape},
{"stack_0.tmp_0", {batch, head_number, min_seq_len, min_seq_len}}}; {"matmul_0.tmp_0", {batch, min_seq_len, min_seq_len}}};
std::map<std::string, std::vector<int>> max_input_shape = { std::map<std::string, std::vector<int>> max_input_shape = {
{"read_file_0.tmp_0", max_shape}, {"read_file_0.tmp_0", max_shape},
{"read_file_0.tmp_1", max_shape}, {"read_file_0.tmp_1", max_shape},
{"read_file_0.tmp_2", max_shape}, {"read_file_0.tmp_2", max_shape},
{"stack_0.tmp_0", {batch, head_number, max_seq_len, max_seq_len}}}; {"matmul_0.tmp_0", {batch, max_seq_len, max_seq_len}}};
std::map<std::string, std::vector<int>> opt_input_shape = { std::map<std::string, std::vector<int>> opt_input_shape = {
{"read_file_0.tmp_0", opt_shape}, {"read_file_0.tmp_0", opt_shape},
{"read_file_0.tmp_1", opt_shape}, {"read_file_0.tmp_1", opt_shape},
{"read_file_0.tmp_2", opt_shape}, {"read_file_0.tmp_2", opt_shape},
{"stack_0.tmp_0", {batch, head_number, opt_seq_len, opt_seq_len}}}; {"matmul_0.tmp_0", {batch, opt_seq_len, opt_seq_len}}};
auto precision = AnalysisConfig::Precision::kFloat32; auto precision = AnalysisConfig::Precision::kFloat32;
if (with_fp16) { if (with_fp16) {
......
...@@ -199,7 +199,7 @@ $$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ ...@@ -199,7 +199,7 @@ $$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
UNUSED constexpr char SqrtDoc[] = R"DOC( UNUSED constexpr char SqrtDoc[] = R"DOC(
Sqrt Activation Operator. Sqrt Activation Operator.
.. math:: out=\sqrt x=x^{1/2} .. math:: out=\\sqrt{x}=x^{1/2}
**Note**: **Note**:
input value must be greater than or equal to zero. input value must be greater than or equal to zero.
...@@ -211,7 +211,7 @@ Rsqrt Activation Operator. ...@@ -211,7 +211,7 @@ Rsqrt Activation Operator.
Please make sure input is legal in case of numeric errors. Please make sure input is legal in case of numeric errors.
$$out = \frac{1}{\sqrt{x}}$$ $$out = \\frac{1}{\\sqrt{x}}$$
)DOC"; )DOC";
......
...@@ -122,12 +122,16 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -122,12 +122,16 @@ class ConcatOpMaker : public framework::OpProtoAndCheckerMaker {
"It has higher priority than Attr(axis). " "It has higher priority than Attr(axis). "
"The shape of AxisTensor must be [1].") "The shape of AxisTensor must be [1].")
.AsDispensable(); .AsDispensable();
AddAttr<bool>("use_quantizer", AddAttr<bool>(
"use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
AddComment(R"DOC( AddComment(R"DOC(
Concat Operator. Concat Operator.
......
...@@ -279,12 +279,16 @@ void Conv2DOpMaker::Make() { ...@@ -279,12 +279,16 @@ void Conv2DOpMaker::Make() {
AddAttr<bool>("use_mkldnn", AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel") "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("use_quantizer", AddAttr<bool>(
"use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel") AddAttr<bool>("fuse_relu", "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("fuse_brelu", AddAttr<bool>("fuse_brelu",
......
...@@ -14,6 +14,8 @@ limitations under the License. */ ...@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/detection/prior_box_op.h" #include "paddle/fluid/operators/detection/prior_box_op.h"
#include <string>
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#endif #endif
...@@ -218,12 +220,16 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -218,12 +220,16 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<bool>("use_mkldnn", AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel") "(bool, default false) Only used in mkldnn kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("use_quantizer", AddAttr<bool>(
"use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
AddComment(R"DOC( AddComment(R"DOC(
Prior box operator Prior box operator
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
......
...@@ -140,12 +140,17 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -140,12 +140,17 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(""); .SetDefault("");
AddAttr<std::string>("y_data_format", "This parameter is no longer used.") AddAttr<std::string>("y_data_format", "This parameter is no longer used.")
.SetDefault(""); .SetDefault("");
/* int8 parameters */ AddAttr<bool>(
AddAttr<bool>("use_quantizer", "use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
/* int8 parameters */
AddAttr<float>("Scale_x", AddAttr<float>("Scale_x",
"(float, default 1.0f), The quantize scale of X tensor") "(float, default 1.0f), The quantize scale of X tensor")
.SetDefault(1.0f); .SetDefault(1.0f);
......
...@@ -142,13 +142,17 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -142,13 +142,17 @@ class FCOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape, AddAttr<bool>(framework::kAllKernelsMustComputeRuntimeShape,
"Skip calling InferShape() function in the runtime.") "Skip calling InferShape() function in the runtime.")
.SetDefault(true); .SetDefault(true);
/* int8 parameters */ AddAttr<bool>(
AddAttr<bool>("use_quantizer", "use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
/* int8 parameters */
AddAttr<float>("Scale_in", AddAttr<float>("Scale_in",
"(float, default 1.0f), The quantize scale of input data") "(float, default 1.0f), The quantize scale of input data")
.SetDefault(1.0f); .SetDefault(1.0f);
......
...@@ -535,13 +535,17 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -535,13 +535,17 @@ class MatMulOpMaker : public framework::OpProtoAndCheckerMaker {
R"DOC(When MKLDNN MatMul_transpose_reshape fuse activated, " R"DOC(When MKLDNN MatMul_transpose_reshape fuse activated, "
"it's a axis atribute of fused transpose for `Out` output.)DOC") "it's a axis atribute of fused transpose for `Out` output.)DOC")
.SetDefault({}); .SetDefault({});
/* int8 parameters */ AddAttr<bool>(
AddAttr<bool>("use_quantizer", "use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
/* int8 parameters */
AddAttr<float>("Scale_x", AddAttr<float>("Scale_x",
"(float, default 1.0f), The quantize scale of X tensor") "(float, default 1.0f), The quantize scale of X tensor")
.SetDefault(1.0f); .SetDefault(1.0f);
......
...@@ -306,12 +306,16 @@ void Pool2dOpMaker::Make() { ...@@ -306,12 +306,16 @@ void Pool2dOpMaker::Make() {
AddAttr<bool>("use_mkldnn", AddAttr<bool>("use_mkldnn",
"(bool) Only used in mkldnn kernel. Default False") "(bool) Only used in mkldnn kernel. Default False")
.SetDefault(false); .SetDefault(false);
AddAttr<bool>("use_quantizer", AddAttr<bool>(
"(bool) " "use_quantizer",
"Set to true for operators that should be quantized and use " "(bool, default false) "
"int8 kernel. " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"Only used on CPU. Default False")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
AddAttr<std::string>( AddAttr<std::string>(
"data_format", "data_format",
"(string, default NCHW) Only used in " "(string, default NCHW) Only used in "
......
...@@ -431,13 +431,16 @@ class Reshape2OpMaker : public ReshapeOpMaker { ...@@ -431,13 +431,16 @@ class Reshape2OpMaker : public ReshapeOpMaker {
"XShape is just used to store the shape and lod of X, which will " "XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp.") "be used in FlattenGradOp.")
.AsIntermediate(); .AsIntermediate();
/* int8 parameters */ AddAttr<bool>(
AddAttr<bool>("use_quantizer", "use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Used only on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
} }
}; };
......
...@@ -304,6 +304,7 @@ REGISTER_OPERATOR(squeeze2_grad, ops::Squeeze2GradOp, ...@@ -304,6 +304,7 @@ REGISTER_OPERATOR(squeeze2_grad, ops::Squeeze2GradOp,
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squeeze, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, float>, squeeze, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, float>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, double>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, double>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, bool>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::SqueezeKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -311,12 +312,14 @@ REGISTER_OP_CPU_KERNEL( ...@@ -311,12 +312,14 @@ REGISTER_OP_CPU_KERNEL(
squeeze_grad, squeeze_grad,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, float>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, double>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, bool>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::SqueezeGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
squeeze2, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, float>, squeeze2, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, float>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, double>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, double>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, bool>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>, ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>); ops::Squeeze2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -324,6 +327,7 @@ REGISTER_OP_CPU_KERNEL( ...@@ -324,6 +327,7 @@ REGISTER_OP_CPU_KERNEL(
squeeze2_grad, squeeze2_grad,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, float>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, float>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, double>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, double>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, bool>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int8_t>, ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int8_t>,
ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int64_t>); ops::Squeeze2GradKernel<paddle::platform::CPUDeviceContext, int64_t>);
...@@ -21,6 +21,7 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -21,6 +21,7 @@ REGISTER_OP_CUDA_KERNEL(
squeeze, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, float>, squeeze, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, float>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, double>, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, double>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, plat::float16>, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, bool>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int>, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int8_t>, ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int8_t>,
ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::SqueezeKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -29,6 +30,7 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -29,6 +30,7 @@ REGISTER_OP_CUDA_KERNEL(
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, float>, ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, double>, ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, plat::float16>, ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, bool>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int>, ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int8_t>, ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int8_t>,
ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::SqueezeGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -36,6 +38,7 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -36,6 +38,7 @@ REGISTER_OP_CUDA_KERNEL(
squeeze2, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, float>, squeeze2, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, float>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, double>, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, double>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, plat::float16>, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, bool>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int>, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int8_t>, ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int8_t>,
ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int64_t>); ops::Squeeze2Kernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -44,6 +47,7 @@ REGISTER_OP_CUDA_KERNEL( ...@@ -44,6 +47,7 @@ REGISTER_OP_CUDA_KERNEL(
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, float>, ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, float>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, double>, ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, double>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, plat::float16>, ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, bool>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int>, ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int8_t>, ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int8_t>,
ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int64_t>); ops::Squeeze2GradKernel<paddle::platform::CUDADeviceContext, int64_t>);
...@@ -108,13 +108,17 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -108,13 +108,17 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
"Defaults to \"NHWC\". Specify the data format of the output data, " "Defaults to \"NHWC\". Specify the data format of the output data, "
"the input will be transformed automatically. ") "the input will be transformed automatically. ")
.SetDefault("AnyLayout"); .SetDefault("AnyLayout");
/* int8 parameters */ AddAttr<bool>(
AddAttr<bool>("use_quantizer", "use_quantizer",
"(bool, default false) " "(bool, default false) "
"Set to true for operators that should be quantized and use " "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
"int8 kernel. "
"Only used on CPU.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>(
"mkldnn_data_type",
"(string, default \"float32\"). Data type of mkldnn kernel")
.SetDefault("float32")
.InEnum({"float32", "int8", "bfloat16"});
/* int8 parameters */
AddComment(R"DOC( AddComment(R"DOC(
Transpose Operator. Transpose Operator.
......
...@@ -33,6 +33,7 @@ limitations under the License. */ ...@@ -33,6 +33,7 @@ limitations under the License. */
#include <curand.h> #include <curand.h>
#include <thrust/system/cuda/error.h> #include <thrust/system/cuda/error.h>
#include <thrust/system_error.h> #include <thrust/system_error.h>
#include "paddle/fluid/platform/cuda_error.pb.h" #include "paddle/fluid/platform/cuda_error.pb.h"
#endif // PADDLE_WITH_CUDA #endif // PADDLE_WITH_CUDA
...@@ -69,6 +70,8 @@ limitations under the License. */ ...@@ -69,6 +70,8 @@ limitations under the License. */
#include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/type_defs.h"
DECLARE_int32(call_stack_level);
namespace paddle { namespace paddle {
namespace platform { namespace platform {
...@@ -226,9 +229,7 @@ inline std::string SimplifyDemangleStr(std::string str) { ...@@ -226,9 +229,7 @@ inline std::string SimplifyDemangleStr(std::string str) {
return str; return str;
} }
template <typename StrType> inline std::string GetCurrentTraceBackString() {
inline std::string GetTraceBackString(StrType&& what, const char* file,
int line) {
static constexpr int TRACE_STACK_LIMIT = 100; static constexpr int TRACE_STACK_LIMIT = 100;
std::ostringstream sout; std::ostringstream sout;
...@@ -256,6 +257,13 @@ inline std::string GetTraceBackString(StrType&& what, const char* file, ...@@ -256,6 +257,13 @@ inline std::string GetTraceBackString(StrType&& what, const char* file,
#else #else
sout << "Windows not support stack backtrace yet.\n"; sout << "Windows not support stack backtrace yet.\n";
#endif #endif
return sout.str();
}
template <typename StrType>
inline std::string GetErrorSumaryString(StrType&& what, const char* file,
int line) {
std::ostringstream sout;
sout << "\n----------------------\nError Message " sout << "\n----------------------\nError Message "
"Summary:\n----------------------\n"; "Summary:\n----------------------\n";
sout << string::Sprintf("%s at (%s:%d)", std::forward<StrType>(what), file, sout << string::Sprintf("%s at (%s:%d)", std::forward<StrType>(what), file,
...@@ -264,6 +272,17 @@ inline std::string GetTraceBackString(StrType&& what, const char* file, ...@@ -264,6 +272,17 @@ inline std::string GetTraceBackString(StrType&& what, const char* file,
return sout.str(); return sout.str();
} }
template <typename StrType>
inline std::string GetTraceBackString(StrType&& what, const char* file,
int line) {
if (FLAGS_call_stack_level > 1) {
// FLAGS_call_stack_level>1 means showing c++ call stack
return GetCurrentTraceBackString() + GetErrorSumaryString(what, file, line);
} else {
return GetErrorSumaryString(what, file, line);
}
}
inline bool is_error(bool stat) { return !stat; } inline bool is_error(bool stat) { return !stat; }
inline void throw_on_error(bool stat, const std::string& msg) { inline void throw_on_error(bool stat, const std::string& msg) {
...@@ -427,7 +446,7 @@ struct EnforceNotMet : public std::exception { ...@@ -427,7 +446,7 @@ struct EnforceNotMet : public std::exception {
* *
* Examples: * Examples:
* GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul"); * GET_DATA_SAFELY(ctx.Input<LoDTensor>("X"), "Input", "X", "Mul");
*/ */
#define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \ #define GET_DATA_SAFELY(__PTR, __ROLE, __NAME, __OP_TYPE) \
(([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \ (([&]() -> std::add_lvalue_reference<decltype(*(__PTR))>::type { \
auto* __ptr = (__PTR); \ auto* __ptr = (__PTR); \
...@@ -463,7 +482,7 @@ struct EnforceNotMet : public std::exception { ...@@ -463,7 +482,7 @@ struct EnforceNotMet : public std::exception {
* *
* Examples: * Examples:
* OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul"); * OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Mul");
*/ */
#define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \ #define OP_INOUT_CHECK(__EXPR, __ROLE, __NAME, __OP_TYPE) \
do { \ do { \
PADDLE_ENFORCE_EQ(__EXPR, true, paddle::platform::errors::NotFound( \ PADDLE_ENFORCE_EQ(__EXPR, true, paddle::platform::errors::NotFound( \
...@@ -491,7 +510,7 @@ struct EnforceNotMet : public std::exception { ...@@ -491,7 +510,7 @@ struct EnforceNotMet : public std::exception {
* Note: GCC 4.8 cannot select right overloaded function here, so need * Note: GCC 4.8 cannot select right overloaded function here, so need
* to define different functions and macros here, after we upgreade * to define different functions and macros here, after we upgreade
* CI gcc version, we can only define one BOOST_GET macro. * CI gcc version, we can only define one BOOST_GET macro.
*/ */
namespace details { namespace details {
#define DEFINE_SAFE_BOOST_GET(__InputType, __OutputType, __OutputTypePtr, \ #define DEFINE_SAFE_BOOST_GET(__InputType, __OutputType, __OutputTypePtr, \
......
...@@ -483,3 +483,28 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes ...@@ -483,3 +483,28 @@ DEFINE_double(local_exe_sub_scope_limit, 256.0, // MBytes
* Note: * Note:
*/ */
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run"); DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
/**
* Debug related FLAG
* Name: FLAGS_call_stack_level
* Since Version: 2.0.0
* Value Range: int, default=2
* Example:
* Note: Used to debug. Determine the call stack to print when error or
* exeception happens.
* If FLAGS_call_stack_level == 0, only the error message summary will be shown.
* If FLAGS_call_stack_level == 1, the python stack and error message summary
* will be shown.
* If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error
* message summary will be shown.
*/
DEFINE_int32(
call_stack_level, 2,
"Determine the call stack to print when error or exeception happens."
// TODO(zhiqiu): implement logic of FLAGS_call_stack_level==0
// "If FLAGS_call_stack_level == 0, only the error message summary will be "
// "shown. "
"If FLAGS_call_stack_level == 1, the python stack and error message "
"summary will be shown."
"If FLAGS_call_stack_level == 2, the python stack, c++ stack, and "
"error message summary will be shown.");
...@@ -422,6 +422,11 @@ inline std::vector<std::vector<int64_t>> ToMkldnnPadding( ...@@ -422,6 +422,11 @@ inline std::vector<std::vector<int64_t>> ToMkldnnPadding(
} }
} }
inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) {
return (op->GetAttrIfExists<std::string>("mkldnn_data_type") == "int8" ||
op->GetAttrIfExists<bool>("use_quantizer"));
}
enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP }; enum class RNNReorderType { PP_NTC, PP_TNC, NTC_PP, TNC_PP };
} // namespace platform } // namespace platform
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/pybind/global_value_getter_setter.h" #include "paddle/fluid/pybind/global_value_getter_setter.h"
#include <cctype> #include <cctype>
#include <functional> #include <functional>
#include <string> #include <string>
...@@ -20,6 +21,7 @@ ...@@ -20,6 +21,7 @@
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/framework/python_headers.h" #include "paddle/fluid/framework/python_headers.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -35,6 +37,7 @@ DECLARE_bool(cpu_deterministic); ...@@ -35,6 +37,7 @@ DECLARE_bool(cpu_deterministic);
DECLARE_bool(enable_rpc_profiler); DECLARE_bool(enable_rpc_profiler);
DECLARE_int32(multiple_of_cupti_buffer_size); DECLARE_int32(multiple_of_cupti_buffer_size);
DECLARE_bool(reader_queue_speed_test_mode); DECLARE_bool(reader_queue_speed_test_mode);
DECLARE_int32(call_stack_level);
// device management // device management
DECLARE_int32(paddle_num_threads); DECLARE_int32(paddle_num_threads);
// executor // executor
...@@ -337,14 +340,15 @@ static void RegisterGlobalVarGetterSetter() { ...@@ -337,14 +340,15 @@ static void RegisterGlobalVarGetterSetter() {
REGISTER_PUBLIC_GLOBAL_VAR( REGISTER_PUBLIC_GLOBAL_VAR(
FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph, FLAGS_eager_delete_tensor_gb, FLAGS_enable_parallel_graph,
FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf, FLAGS_allocator_strategy, FLAGS_use_system_allocator, FLAGS_check_nan_inf,
FLAGS_cpu_deterministic, FLAGS_enable_rpc_profiler, FLAGS_call_stack_level, FLAGS_cpu_deterministic,
FLAGS_multiple_of_cupti_buffer_size, FLAGS_reader_queue_speed_test_mode, FLAGS_enable_rpc_profiler, FLAGS_multiple_of_cupti_buffer_size,
FLAGS_pe_profile_fname, FLAGS_print_sub_graph_dir, FLAGS_reader_queue_speed_test_mode, FLAGS_pe_profile_fname,
FLAGS_fraction_of_cpu_memory_to_use, FLAGS_fuse_parameter_groups_size, FLAGS_print_sub_graph_dir, FLAGS_fraction_of_cpu_memory_to_use,
FLAGS_fuse_parameter_memory_size, FLAGS_init_allocated_mem, FLAGS_fuse_parameter_groups_size, FLAGS_fuse_parameter_memory_size,
FLAGS_initial_cpu_memory_in_mb, FLAGS_memory_fraction_of_eager_deletion, FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb,
FLAGS_use_pinned_memory, FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory,
FLAGS_tracer_profile_fname, FLAGS_paddle_num_threads); FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname,
FLAGS_paddle_num_threads);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
REGISTER_PUBLIC_GLOBAL_VAR( REGISTER_PUBLIC_GLOBAL_VAR(
......
...@@ -166,17 +166,34 @@ def __bootstrap__(): ...@@ -166,17 +166,34 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
sysstr = platform.system() sysstr = platform.system()
read_env_flags = [ read_env_flags = [
'check_nan_inf', 'fast_check_nan_inf', 'benchmark', 'check_nan_inf',
'eager_delete_scope', 'fraction_of_cpu_memory_to_use', 'fast_check_nan_inf',
'initial_cpu_memory_in_mb', 'init_allocated_mem', 'paddle_num_threads', 'benchmark',
'dist_threadpool_size', 'eager_delete_tensor_gb', 'eager_delete_scope',
'fast_eager_deletion_mode', 'memory_fraction_of_eager_deletion', 'fraction_of_cpu_memory_to_use',
'allocator_strategy', 'reader_queue_speed_test_mode', 'initial_cpu_memory_in_mb',
'print_sub_graph_dir', 'pe_profile_fname', 'inner_op_parallelism', 'init_allocated_mem',
'enable_parallel_graph', 'fuse_parameter_groups_size', 'paddle_num_threads',
'multiple_of_cupti_buffer_size', 'fuse_parameter_memory_size', 'dist_threadpool_size',
'tracer_profile_fname', 'dygraph_debug', 'use_system_allocator', 'eager_delete_tensor_gb',
'enable_unused_var_check', 'free_idle_chunk', 'free_when_no_cache_hit' 'fast_eager_deletion_mode',
'memory_fraction_of_eager_deletion',
'allocator_strategy',
'reader_queue_speed_test_mode',
'print_sub_graph_dir',
'pe_profile_fname',
'inner_op_parallelism',
'enable_parallel_graph',
'fuse_parameter_groups_size',
'multiple_of_cupti_buffer_size',
'fuse_parameter_memory_size',
'tracer_profile_fname',
'dygraph_debug',
'use_system_allocator',
'enable_unused_var_check',
'free_idle_chunk',
'free_when_no_cache_hit',
'call_stack_level',
] ]
if 'Darwin' not in sysstr: if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory') read_env_flags.append('use_pinned_memory')
...@@ -208,12 +225,19 @@ def __bootstrap__(): ...@@ -208,12 +225,19 @@ def __bootstrap__():
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
'fraction_of_gpu_memory_to_use', 'initial_gpu_memory_in_mb', 'fraction_of_gpu_memory_to_use',
'reallocate_gpu_memory_in_mb', 'cudnn_deterministic', 'initial_gpu_memory_in_mb',
'enable_cublas_tensor_op_math', 'conv_workspace_size_limit', 'reallocate_gpu_memory_in_mb',
'cudnn_exhaustive_search', 'selected_gpus', 'sync_nccl_allreduce', 'cudnn_deterministic',
'cudnn_batchnorm_spatial_persistent', 'gpu_allocator_retry_time', 'enable_cublas_tensor_op_math',
'local_exe_sub_scope_limit', 'gpu_memory_limit_mb' 'conv_workspace_size_limit',
'cudnn_exhaustive_search',
'selected_gpus',
'sync_nccl_allreduce',
'cudnn_batchnorm_spatial_persistent',
'gpu_allocator_retry_time',
'local_exe_sub_scope_limit',
'gpu_memory_limit_mb',
] ]
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)]) core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
core.init_glog(sys.argv[0]) core.init_glog(sys.argv[0])
......
...@@ -20,13 +20,13 @@ from paddle.fluid.dygraph.dygraph_to_static.origin_info import Location, OriginI ...@@ -20,13 +20,13 @@ from paddle.fluid.dygraph.dygraph_to_static.origin_info import Location, OriginI
ERROR_DATA = "Error data about original source code information and traceback." ERROR_DATA = "Error data about original source code information and traceback."
def attach_error_data(error): def attach_error_data(error, in_runtime=False):
""" """
Attachs error data about original source code information and traceback to an error. Attachs error data about original source code information and traceback to an error.
Args: Args:
error(Exception): An native error. error(Exception): An native error.
in_runtime(bool): `error` is raised in runtime if in_runtime is True, otherwise in compile time
Returns: Returns:
An error attached data about original source code information and traceback. An error attached data about original source code information and traceback.
""" """
...@@ -34,6 +34,8 @@ def attach_error_data(error): ...@@ -34,6 +34,8 @@ def attach_error_data(error):
tb = traceback.extract_tb(e_traceback)[1:] tb = traceback.extract_tb(e_traceback)[1:]
error_data = ErrorData(e_type, e_value, tb, global_origin_info_map) error_data = ErrorData(e_type, e_value, tb, global_origin_info_map)
error_data.in_runtime = in_runtime
setattr(error, ERROR_DATA, error_data) setattr(error, ERROR_DATA, error_data)
return error return error
...@@ -53,8 +55,6 @@ class TraceBackFrame(OriginInfo): ...@@ -53,8 +55,6 @@ class TraceBackFrame(OriginInfo):
class ErrorData(object): class ErrorData(object):
""" """
Error data attached to an exception which is raised in un-transformed code. Error data attached to an exception which is raised in un-transformed code.
TODO(liym27): Consider the case that op_callstack when error raised from c++ code
""" """
def __init__(self, error_type, error_value, origin_traceback, def __init__(self, error_type, error_value, origin_traceback,
...@@ -63,6 +63,7 @@ class ErrorData(object): ...@@ -63,6 +63,7 @@ class ErrorData(object):
self.error_value = error_value self.error_value = error_value
self.origin_traceback = origin_traceback self.origin_traceback = origin_traceback
self.origin_info_map = origin_info_map self.origin_info_map = origin_info_map
self.in_runtime = False
def create_exception(self): def create_exception(self):
message = self.create_message() message = self.create_message()
...@@ -81,6 +82,12 @@ class ErrorData(object): ...@@ -81,6 +82,12 @@ class ErrorData(object):
message_lines.append(header_message) message_lines.append(header_message)
message_lines.append("") message_lines.append("")
# Simplify error value to improve readability if error is raised in runtime
if self.in_runtime:
self._simplify_error_value()
message_lines.append(str(self.error_value))
return '\n'.join(message_lines)
# Step2: Optimizes stack information with source code information of dygraph from user. # Step2: Optimizes stack information with source code information of dygraph from user.
for filepath, lineno, funcname, code in self.origin_traceback: for filepath, lineno, funcname, code in self.origin_traceback:
loc = Location(filepath, lineno) loc = Location(filepath, lineno)
...@@ -102,3 +109,25 @@ class ErrorData(object): ...@@ -102,3 +109,25 @@ class ErrorData(object):
message_lines.append(error_message) message_lines.append(error_message)
return '\n'.join(message_lines) return '\n'.join(message_lines)
def _simplify_error_value(self):
"""
Simplifies error value to improve readability if error is raised in runtime.
NOTE(liym27): The op callstack information about transformed static code has been replaced with original dygraph code.
TODO(liym27):
1. Need a more robust way because the code of start_trace may change.
2. Set the switch to determine whether to simplify error_value
"""
assert self.in_runtime is True
error_value_lines = str(self.error_value).split("\n")
error_value_lines_strip = [mes.lstrip(" ") for mes in error_value_lines]
start_trace = "outputs = static_func(*inputs)"
start_idx = error_value_lines_strip.index(start_trace)
error_value_lines = error_value_lines[start_idx + 1:]
error_value_str = '\n'.join(error_value_lines)
self.error_value = self.error_type(error_value_str)
...@@ -19,6 +19,9 @@ import inspect ...@@ -19,6 +19,9 @@ import inspect
import gast import gast
from paddle.fluid import core
from paddle.fluid.framework import Program
# NOTE(liym27): Please use `getattr(ast_node, ORIGI_INFO)` instead of . operation to get the original information of ast node. # NOTE(liym27): Please use `getattr(ast_node, ORIGI_INFO)` instead of . operation to get the original information of ast node.
ORIGI_INFO = "Original information of source code for ast node." ORIGI_INFO = "Original information of source code for ast node."
ORIGI_INFO_MAP = "Original information map of source code." ORIGI_INFO_MAP = "Original information map of source code."
...@@ -70,6 +73,10 @@ class OriginInfo(object): ...@@ -70,6 +73,10 @@ class OriginInfo(object):
self.location.filepath, self.location.lineno, self.function_name, self.location.filepath, self.location.lineno, self.function_name,
self.source_code.lstrip()) self.source_code.lstrip())
def as_frame(self):
return (self.location.filepath, self.location.lineno,
self.function_name, self.source_code.lstrip())
class OriginInfoAttacher(gast.NodeTransformer): class OriginInfoAttacher(gast.NodeTransformer):
""" """
...@@ -249,3 +256,63 @@ def ast_walk(transformed_node, static_node): ...@@ -249,3 +256,63 @@ def ast_walk(transformed_node, static_node):
if isinstance(d_item, gast.AST): if isinstance(d_item, gast.AST):
transformed_node_list.append(d_item) transformed_node_list.append(d_item)
static_node_list.append(s_item) static_node_list.append(s_item)
def update_op_callstack_with_origin_info(program):
"""
Replaces op callstack information about transformed static code with original dygraph code.
"""
assert isinstance(program, Program)
def get_new_op_callstack(callstack):
"""
An example of callstack:
File "path1/to/file.py", line 10, in func_1
y = fluid.layers.fill_constant(x, shape=[1], dtype="int32")
File "path2/to/file.py", line 740, in fill_constant
stop_gradient=True)
File "path3/to/file.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "path4/to/file.py", line 2811, in append_op
attrs=kwargs.get("attrs", None))
File "path5/to/file.py", line 1919, in __init__
for frame in traceback.extract_stack():
"""
assert len(callstack) % 2 == 0
for i in range(0, len(callstack), 2):
file_line = callstack[i].lstrip(" ").split(",")
filepath = file_line[0][6:-1]
lineno = int(file_line[1][6:])
funcname = file_line[2][4:]
code = callstack[i + 1].lstrip(" ")
loc = Location(filepath, lineno)
dygraph_func_info = global_origin_info_map.get(loc.line_location)
if dygraph_func_info:
filepath, lineno, funcname, code = \
dygraph_func_info.as_frame()
callstack[i] = ' File "{}", line {}, in {}'.format(
filepath, lineno, funcname)
callstack[i + 1] = ' {}'.format(code)
return callstack
op_maker = core.op_proto_and_checker_maker
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
for block in program.blocks:
for i, op in enumerate(block.ops):
if op.has_attr(callstack_var_name):
callstack = op.attr(callstack_var_name)
callstack = get_new_op_callstack(callstack)
op._set_attr(callstack_var_name, callstack)
return program
...@@ -130,8 +130,6 @@ class PartialProgramLayer(layers.Layer): ...@@ -130,8 +130,6 @@ class PartialProgramLayer(layers.Layer):
self._check_params_all_inited(main_program) self._check_params_all_inited(main_program)
# 2. Prune the parameters not used anywhere in the program. # 2. Prune the parameters not used anywhere in the program.
self._prune_unused_params(main_program) self._prune_unused_params(main_program)
# 3. Remove op's python call stack with redundant low-level error messages.
main_program = self._remove_op_call_stack(main_program)
return main_program return main_program
......
...@@ -37,6 +37,7 @@ from paddle.fluid.dygraph.base import param_guard ...@@ -37,6 +37,7 @@ from paddle.fluid.dygraph.base import param_guard
from paddle.fluid.data_feeder import check_type from paddle.fluid.data_feeder import check_type
from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from from paddle.fluid.dygraph.dygraph_to_static.partial_program import partial_program_from
from paddle.fluid.dygraph.dygraph_to_static.origin_info import attach_origin_info, create_and_update_origin_info_map from paddle.fluid.dygraph.dygraph_to_static.origin_info import attach_origin_info, create_and_update_origin_info_map
from paddle.fluid.dygraph.dygraph_to_static.origin_info import update_op_callstack_with_origin_info
from paddle.fluid.dygraph.dygraph_to_static.error import attach_error_data, ERROR_DATA from paddle.fluid.dygraph.dygraph_to_static.error import attach_error_data, ERROR_DATA
__all__ = ['ProgramTranslator', 'convert_to_static'] __all__ = ['ProgramTranslator', 'convert_to_static']
...@@ -304,6 +305,8 @@ class ConcreteProgram(object): ...@@ -304,6 +305,8 @@ class ConcreteProgram(object):
(tuple, list)) and outputs is not None: (tuple, list)) and outputs is not None:
outputs = [outputs] outputs = [outputs]
main_program = update_op_callstack_with_origin_info(main_program)
return ConcreteProgram( return ConcreteProgram(
inputs=inputs, inputs=inputs,
outputs=outputs, outputs=outputs,
...@@ -516,7 +519,7 @@ class ProgramTranslator(object): ...@@ -516,7 +519,7 @@ class ProgramTranslator(object):
# 2. If e raised in runtime, e should be attached to ERROR_DATA here. # 2. If e raised in runtime, e should be attached to ERROR_DATA here.
if not hasattr(e, ERROR_DATA): if not hasattr(e, ERROR_DATA):
# runtime error # runtime error
attach_error_data(e) attach_error_data(e, in_runtime=True)
raise raise
def get_func(self, dygraph_func): def get_func(self, dygraph_func):
......
...@@ -176,6 +176,16 @@ def _declarative_(dygraph_func): ...@@ -176,6 +176,16 @@ def _declarative_(dygraph_func):
error_data = getattr(e, ERROR_DATA, None) error_data = getattr(e, ERROR_DATA, None)
if error_data: if error_data:
new_exception = error_data.create_exception() new_exception = error_data.create_exception()
if six.PY3:
# NOTE(liym27):
# 1. Why `raise new_exception from None`?
# In Python 3, by default, an new exception is raised with trace information of the caught exception.
# This only raises new_exception and hides unwanted implementation details from tracebacks of the
# caught exception.
# 2. Use exec to bypass syntax error checking in Python 2.
six.exec_("raise new_exception from None")
else:
raise new_exception raise new_exception
else: else:
raise raise
......
...@@ -25,11 +25,13 @@ import six ...@@ -25,11 +25,13 @@ import six
from .data_feeder import convert_dtype from .data_feeder import convert_dtype
from .framework import Program, default_main_program, Variable, Operator, convert_np_dtype_to_dtype_ from .framework import Program, default_main_program, Variable, Operator, convert_np_dtype_to_dtype_
from . import core from . import core
from . import unique_name
from . import compiler from . import compiler
from .. import compat as cpt from .. import compat as cpt
from .trainer_factory import TrainerFactory from .trainer_factory import TrainerFactory
from .trainer_factory import FetchHandlerMonitor from .trainer_factory import FetchHandlerMonitor
import copy import copy
from .incubate.checkpoint import auto_checkpoint as acp
__all__ = ['Executor', 'global_scope', 'scope_guard'] __all__ = ['Executor', 'global_scope', 'scope_guard']
...@@ -559,6 +561,9 @@ class Executor(object): ...@@ -559,6 +561,9 @@ class Executor(object):
self._closed = False self._closed = False
self.pruned_program_scope_caches = dict() self.pruned_program_scope_caches = dict()
self._auto_checkpoint_name = unique_name.generate(
"__auto_checkpoint_executor__")
def _get_scope_cache(self, program_cache_key): def _get_scope_cache(self, program_cache_key):
return self.scope_caches.get(program_cache_key, None) return self.scope_caches.get(program_cache_key, None)
...@@ -1152,6 +1157,8 @@ class Executor(object): ...@@ -1152,6 +1157,8 @@ class Executor(object):
compiled = isinstance(program, compiler.CompiledProgram) compiled = isinstance(program, compiler.CompiledProgram)
acp._auto_checkpoint(self, program)
# For backward compatibility, run directly. # For backward compatibility, run directly.
if not compiled: if not compiled:
# In distributed training, the compiled program is saved in Program._graph # In distributed training, the compiled program is saved in Program._graph
......
...@@ -2385,10 +2385,27 @@ class Operator(object): ...@@ -2385,10 +2385,27 @@ class Operator(object):
def _is_optimize_op(self): def _is_optimize_op(self):
op_maker = core.op_proto_and_checker_maker op_maker = core.op_proto_and_checker_maker
OPTIMIZE = core.op_proto_and_checker_maker.OpRole.Optimize OPTIMIZE = core.op_proto_and_checker_maker.OpRole.Optimize
if not self.desc.has_attr(op_maker.kOpRoleAttrName()):
return False
op_role = self.desc.attr(op_maker.kOpRoleAttrName()) op_role = self.desc.attr(op_maker.kOpRoleAttrName())
if op_role & int(OPTIMIZE): if op_role & int(OPTIMIZE):
return True return True
else:
return False
def _is_backward_op(self):
op_maker = core.op_proto_and_checker_maker
BACKWARD = core.op_proto_and_checker_maker.OpRole.Backward
if not self.desc.has_attr(op_maker.kOpRoleAttrName()):
return False
op_role = self.desc.attr(op_maker.kOpRoleAttrName())
if op_role & int(BACKWARD):
return True
return False return False
...@@ -3942,6 +3959,10 @@ class Program(object): ...@@ -3942,6 +3959,10 @@ class Program(object):
# appending gradients times # appending gradients times
self._appending_grad_times = 0 self._appending_grad_times = 0
# identifier for auto checkpoint
self._auto_checkpoint_name = unique_name.generate(
"__auto_checkpoint_program__")
# compiled program, i.e. Graph # compiled program, i.e. Graph
self._graph = None self._graph = None
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..fleet.utils.fs import FS, LocalFS
from ..fleet.utils.hdfs import HDFSClient
from ...compiler import CompiledProgram
class SerializableBase(object):
def serialize(self, path):
raise NotImplementedError
def deserialize(self, path):
raise NotImplementedError
class PaddleModel(SerializableBase):
def __init__(self, exe, program):
self._exe = exe
self._origin_program = program
self._program = program
if isinstance(program, CompiledProgram):
self._program = program._program
self._file_name = "_paddle_fleet_param__"
def serialize(self, path):
from ...io import save_persistables
save_persistables(
executor=self._exe,
dirname=path,
main_program=self._program,
filename=self._file_name)
def deserialize(self, path):
from ...io import load_persistables
load_persistables(
executor=self._exe,
dirname=path,
main_program=self._program,
filename=self._file_name)
class CheckpointSaver(object):
def __init__(self, fs):
self._fs = fs
self._checkpoint_prefix = "__paddle_checkpoint__"
def save_checkpoint(self,
path,
slists,
trainer_id=None,
local_cache_path=".cache"):
"""
Serialize objects in slists to path
Return really saved path and checkpoint_no
"""
if not self._fs.is_exist(path):
self._fs.mkdirs(path)
else:
assert self._fs.is_dir(path), "path:{} must be a directory".format(
path)
max_no = self._get_last_checkpoint_no(path)
if max_no < 0:
max_no = -1
max_no += 1
real_path = "{}/{}.{}".format(path, self._checkpoint_prefix, max_no)
tmp_path = "{}.tmp".format(real_path)
saved_path = tmp_path
local_fs = LocalFS()
cache_path = None
if self._fs.need_upload_download():
cache_path = "{}/{}.{}.saved_cache".format(
local_cache_path, self._checkpoint_prefix, max_no)
if trainer_id is not None:
cache_path = "{}.{}".format(cache_path, trainer_id)
if not local_fs.is_exist(cache_path):
local_fs.mkdirs(cache_path)
else:
assert local_fs.is_dir(cache_path), \
"cache path:{} must be a directory".format(cache_path)
saved_path = cache_path
for s in slists:
s.serialize(saved_path)
if self._fs.need_upload_download():
self._fs.delete(tmp_path)
self._fs.upload(cache_path, tmp_path)
local_fs.delete(cache_path)
self._fs.mv(tmp_path, real_path)
return real_path, max_no
def load_checkpoint(self,
path,
slists,
trainer_id,
local_cache_path=".cache",
checkpoint_no=None,
ignore_empty=True):
"""
Deserialize objects in slists from path
Return really load path
"""
if checkpoint_no is None:
max_no = self._get_last_checkpoint_no(path)
if not ignore_empty:
assert max_no >= 0, "Can't find checkpoint"
if max_no < 0:
return None
checkpoint_no = max_no
else:
assert isinstance(checkpoint_no, int)
assert checkpoint_no >= 0
local_fs = LocalFS()
if self._fs.need_upload_download():
cache_path = "{}/{}.{}.load_cache".format(
local_cache_path, self._checkpoint_prefix, checkpoint_no)
if trainer_id is not None:
cache_path = "{}.{}".format(cache_path, trainer_id)
if not local_fs.is_exist(local_cache_path):
local_fs.mkdirs(local_cache_path)
if local_fs.is_exist(cache_path):
local_fs.delete(cache_path)
real_path = "{}/{}.{}".format(path, self._checkpoint_prefix,
checkpoint_no)
load_path = real_path
if self._fs.need_upload_download():
self._fs.download(real_path, cache_path)
load_path = cache_path
for s in slists:
s.deserialize(load_path)
if self._fs.need_upload_download() and cache_path:
local_fs.delete(cache_path)
return real_path
def get_checkpoint_no(self, root_path):
a = []
dirs = self._fs.list_dirs(root_path)
for d in dirs:
g = d.split(".")
if len(g) != 2:
continue
if g[0] != self._checkpoint_prefix:
continue
try:
n = int(g[1])
a.append(n)
except:
continue
a.sort()
return a
def _get_last_checkpoint_no(self, root_path):
"""
only get the first depth
"""
a = self.get_checkpoint_no(root_path)
if len(a) > 0:
return a[-1]
return -1
def clean_redundant_checkpoints(self, root_path, reserved=[]):
max_no = self._get_last_checkpoint_no(root_path)
if max_no < 0:
return
s = set(reserved)
if len(s) == 0:
s.add(max_no)
dirs = self._fs.list_dirs(root_path)
for d in dirs:
g = d.split(".")
if len(g) != 2:
continue
if g[0] != self._checkpoint_prefix:
continue
try:
n = int(g[1])
if n not in s:
path = "{}/{}.{}".format(root_path, self._checkpoint_prefix,
n)
self._fs.delete(path)
except Exception as e:
print(e)
continue
...@@ -27,6 +27,7 @@ from paddle.fluid.incubate.fleet.base.fleet_base import DistributedOptimizer ...@@ -27,6 +27,7 @@ from paddle.fluid.incubate.fleet.base.fleet_base import DistributedOptimizer
from paddle.fluid import compiler from paddle.fluid import compiler
from paddle.fluid.incubate.fleet.utils.fs import LocalFS from paddle.fluid.incubate.fleet.utils.fs import LocalFS
from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel, CheckpointSaver
import os import os
import sys import sys
...@@ -46,21 +47,6 @@ class DistFCConfig(object): ...@@ -46,21 +47,6 @@ class DistFCConfig(object):
pass pass
class TrainStatus(object):
def __init__(self, epoch_no=-1):
# completed epoch
self._epoch_no = epoch_no
def next(self):
return self._epoch_no + 1
def __eq__(self, t):
return self._epoch_no == t._epoch_no
def __ne__(self, t):
return not self == t
class Collective(Fleet): class Collective(Fleet):
def __init__(self): def __init__(self):
super(Collective, self).__init__(Mode.COLLECTIVE) super(Collective, self).__init__(Mode.COLLECTIVE)
...@@ -152,90 +138,10 @@ class Collective(Fleet): ...@@ -152,90 +138,10 @@ class Collective(Fleet):
io.save_persistables(executor, dirname, main_program, filename=filename) io.save_persistables(executor, dirname, main_program, filename=filename)
def _save_train_status(self, path, train_status):
d = {}
d["epoch_no"] = train_status._epoch_no
file_name = "{}/fleet_train_status".format(path)
with open(file_name, 'w') as f:
json.dump(d, f)
def _load_train_status(self, path):
file_name = "{}/fleet_train_status".format(path)
r = TrainStatus()
if not os.path.isfile(file_name):
return r
d = {}
with open(file_name, 'r') as f:
d = json.load(f)
assert "epoch_no" in d, "Can't find epoch_no in dict from train_status file:{}".format(
d)
r._epoch_no = d["epoch_no"]
assert r._epoch_no >= 0, "Data in checkpoint file is not valid:{}".format(
d)
return r
def _get_last_checkpoint_no(self, root_path, fs):
"""
only get the first depth
"""
max_no = -1
d = {}
dirs = fs.list_dirs(root_path)
for d in dirs:
g = d.split(".")
if len(g) != 2:
continue
if g[0] != "__paddle_fleet_checkpoint__":
continue
try:
n = int(g[1])
if n > max_no:
max_no = n
except:
continue
return max_no
def clean_redundant_checkpoints(self,
root_path,
fs=LocalFS(),
checkpoint_num=1):
max_no = self._get_last_checkpoint_no(root_path, fs)
if max_no < 0:
return
if checkpoint_num < 1:
checkpoint_num = 1
dirs = fs.list_dirs(root_path)
for d in dirs:
g = d.split(".")
if len(g) != 2:
continue
if g[0] != self._checkpoint_prefix:
continue
try:
n = int(g[1])
if n <= max_no - checkpoint_num:
path = "{}/{}.{}".format(root_path, self._checkpoint_prefix,
n)
fs.delete(path)
except Exception as e:
print(e)
continue
def save_checkpoint(self, def save_checkpoint(self,
executor, executor,
path, path,
trainer_id,
train_status, train_status,
main_program=None, main_program=None,
fs=LocalFS(), fs=LocalFS(),
...@@ -248,53 +154,25 @@ class Collective(Fleet): ...@@ -248,53 +154,25 @@ class Collective(Fleet):
if main_program == None: if main_program == None:
main_program = self._transpiled_program main_program = self._transpiled_program
if not fs.is_exist(path): m = PaddleModel(executor, main_program)
fs.mkdirs(path) t = train_status
else: c = CheckpointSaver(fs)
assert fs.is_dir(path), "path:%s must be a directory".format(path) real_path, checkpoint_no = c.save_checkpoint(
path=path,
max_no = self._get_last_checkpoint_no(path, fs=fs) slists=[m, t],
if max_no < 0: trainer_id=trainer_id,
max_no = -1 local_cache_path=local_cache_path)
real_path = "{}/{}.{}".format(path, self._checkpoint_prefix, max_no + 1)
tmp_path = "{}.tmp".format(real_path)
saved_path = tmp_path
local_fs = LocalFS()
cache_path = None
if fs.need_upload_download():
cache_path = "{}/{}.{}.saved_cache".format(
local_cache_path, self._checkpoint_prefix, max_no + 1)
if not local_fs.is_exist(cache_path):
local_fs.mkdirs(cache_path)
else:
assert fs.is_dir(
path), "cache path:{} must be a directory".format(
cache_path)
saved_path = cache_path
self.save_persistables(
executor=executor,
dirname=saved_path,
main_program=main_program,
filename=self._param_file_name)
self._save_train_status(path=saved_path, train_status=train_status)
if fs.need_upload_download():
fs.delete(tmp_path)
fs.upload(cache_path, tmp_path)
fs.mv(tmp_path, real_path)
if not remain_all_checkpoint: if not remain_all_checkpoint:
self.clean_redundant_checkpoints(path) c.clean_redundant_checkpoints(path)
return real_path, checkpoint_no
def load_checkpoint(self, def load_checkpoint(self,
executor, executor,
path, path,
trainer_id, trainer_id,
train_status,
main_program=None, main_program=None,
fs=LocalFS(), fs=LocalFS(),
local_cache_path=".cache", local_cache_path=".cache",
...@@ -302,39 +180,17 @@ class Collective(Fleet): ...@@ -302,39 +180,17 @@ class Collective(Fleet):
""" """
This function load persistables and current epoch num from path. This function load persistables and current epoch num from path.
""" """
max_no = self._get_last_checkpoint_no(path, fs)
if not ignore_empty:
assert max_no >= 0, "Can't find checkpoint"
if max_no < 0:
return None
local_fs = LocalFS()
if fs.need_upload_download():
cache_path = "{}/{}.{}.load_cache.{}".format(
local_cache_path, self._checkpoint_prefix, max_no, trainer_id)
if not local_fs.is_exist(local_cache_path):
local_fs.mkdirs(local_cache_path)
if local_fs.is_exist(cache_path):
local_fs.delete(cache_path)
real_path = "{}/{}.{}".format(path, self._checkpoint_prefix, max_no)
load_path = real_path
if fs.need_upload_download():
fs.download(real_path, cache_path)
load_path = cache_path
if main_program == None: if main_program == None:
main_program = self._transpiled_program main_program = self._transpiled_program
io.load_persistables( m = PaddleModel(executor, main_program)
executor=executor, c = CheckpointSaver(fs)
dirname=load_path, return c.load_checkpoint(
main_program=main_program, path, [m, train_status],
filename=self._param_file_name) trainer_id=trainer_id,
ignore_empty=ignore_empty,
return self._load_train_status(load_path) local_cache_path=local_cache_path)
fleet = Collective() fleet = Collective()
......
...@@ -579,7 +579,7 @@ class FleetTranspiler(Fleet): ...@@ -579,7 +579,7 @@ class FleetTranspiler(Fleet):
block.append_op( block.append_op(
type='recv_save', type='recv_save',
attrs={ attrs={
"trainer_id": self._role_maker.worker_id(), "trainer_id": self._role_maker.worker_index(),
"shape": var.shape, "shape": var.shape,
"slice_shapes": "slice_shapes":
[",".join([str(i) for i in var.shape])], [",".join([str(i) for i in var.shape])],
......
...@@ -329,7 +329,7 @@ class CompileTimeStrategy(object): ...@@ -329,7 +329,7 @@ class CompileTimeStrategy(object):
is_distributed = True if param_name in distibuted_varnames else False is_distributed = True if param_name in distibuted_varnames else False
ctx = self.build_ctx(grad, self.grad_var_mapping, True, False, ctx = self.build_ctx(grad, self.grad_var_mapping, True, True,
True, is_distributed) True, is_distributed)
send_ctx[ctx.var_name()] = ctx send_ctx[ctx.var_name()] = ctx
......
...@@ -45,6 +45,10 @@ class FSTimeOut(Exception): ...@@ -45,6 +45,10 @@ class FSTimeOut(Exception):
pass pass
class FSShellCmdAborted(ExecuteError):
pass
class FS(object): class FS(object):
@abc.abstractmethod @abc.abstractmethod
def ls_dir(self, fs_path): def ls_dir(self, fs_path):
...@@ -87,7 +91,7 @@ class FS(object): ...@@ -87,7 +91,7 @@ class FS(object):
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def mv(self, fs_src_path, fs_dst_path): def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=False):
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
...@@ -98,6 +102,10 @@ class FS(object): ...@@ -98,6 +102,10 @@ class FS(object):
def list_dirs(self, fs_path): def list_dirs(self, fs_path):
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod
def touch(self, fs_path, exist_ok=True):
raise NotImplementedError
class LocalFS(FS): class LocalFS(FS):
def ls_dir(self, fs_path): def ls_dir(self, fs_path):
...@@ -138,13 +146,21 @@ class LocalFS(FS): ...@@ -138,13 +146,21 @@ class LocalFS(FS):
def is_exist(self, fs_path): def is_exist(self, fs_path):
return os.path.exists(fs_path) return os.path.exists(fs_path)
def touch(self, fs_path): def touch(self, fs_path, exist_ok=True):
return Path(fs_path).touch() if self.is_exist(fs_path):
if exist_ok:
return
raise FSFileExistsError
def mv(self, src_path, dst_path): return Path(fs_path).touch(exist_ok=True)
def mv(self, src_path, dst_path, overwrite=False, test_exists=False):
if not self.is_exist(src_path): if not self.is_exist(src_path):
raise FSFileNotExistsError raise FSFileNotExistsError
if overwrite and self.is_exist(dst_path):
self.delete(dst_path)
if self.is_exist(dst_path): if self.is_exist(dst_path):
raise FSFileExistsError raise FSFileExistsError
......
...@@ -26,8 +26,8 @@ import time ...@@ -26,8 +26,8 @@ import time
import logging import logging
import six import six
from . import fs from . import fs
from .fs import FS, LocalFS, FSFileExistsError, FSFileNotExistsError, ExecuteError, FSTimeOut from .fs import FS, LocalFS, FSFileExistsError, FSFileNotExistsError, ExecuteError, FSTimeOut, FSShellCmdAborted
import paddle.fluid as fluid from paddle.fluid import core
import functools import functools
from pathlib import PurePosixPath, Path from pathlib import PurePosixPath, Path
...@@ -36,21 +36,39 @@ import shutil ...@@ -36,21 +36,39 @@ import shutil
__all__ = ["HDFSClient"] __all__ = ["HDFSClient"]
def _handle_errors(f): def _handle_errors(max_time_out=None):
def decorator(f):
@functools.wraps(f)
def handler(*args, **kwargs): def handler(*args, **kwargs):
o = args[0]
time_out = max_time_out
if time_out is None:
time_out = float(o._time_out) / 1000.0
else:
time_out /= 1000.0
inter = float(o._sleep_inter) / 1000.0
start = time.time() start = time.time()
last_print_time = start
while True: while True:
try: try:
return f(*args, **kwargs) return f(*args, **kwargs)
#important: only ExecuteError need to retry
except ExecuteError as e: except ExecuteError as e:
o = args[0]
time_out = float(o._time_out) / 1000.0
inter = float(o._sleep_inter) / 1000.0
if time.time() - start >= time_out: if time.time() - start >= time_out:
raise FSTimeOut raise FSTimeOut("args:{} timeout:{}".format(
args, time.time() - start))
time.sleep(inter) time.sleep(inter)
return functools.wraps(f)(handler) if time.time() - last_print_time > 30:
print("hadoop operator timeout:args:{} timeout:{}".format(
args, time.time() - start))
last_print_time = time.time()
return handler
return decorator
class HDFSClient(FS): class HDFSClient(FS):
...@@ -72,6 +90,7 @@ class HDFSClient(FS): ...@@ -72,6 +90,7 @@ class HDFSClient(FS):
if configs: if configs:
for k, v in six.iteritems(configs): for k, v in six.iteritems(configs):
config_command = '-D%s=%s' % (k, v) config_command = '-D%s=%s' % (k, v)
self.pre_commands.append(config_command)
self._time_out = time_out self._time_out = time_out
self._sleep_inter = sleep_inter self._sleep_inter = sleep_inter
...@@ -80,17 +99,22 @@ class HDFSClient(FS): ...@@ -80,17 +99,22 @@ class HDFSClient(FS):
r'\s?responseErrorMsg\s?\:.*, errorCode\:\s?[0-9]+, path\:') r'\s?responseErrorMsg\s?\:.*, errorCode\:\s?[0-9]+, path\:')
def _run_cmd(self, cmd, redirect_stderr=False): def _run_cmd(self, cmd, redirect_stderr=False):
ret, output = fluid.core.shell_execute_cmd(cmd, 0, 0, redirect_stderr) exe_cmd = "{} -{}".format(self._base_cmd, cmd)
return int(ret), output.splitlines() ret, output = core.shell_execute_cmd(exe_cmd, 0, 0, redirect_stderr)
ret = int(ret)
if ret == 134:
raise FSShellCmdAborted(cmd)
return ret, output.splitlines()
@_handle_errors()
def list_dirs(self, fs_path): def list_dirs(self, fs_path):
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
return [] return []
dirs, _ = self.ls_dir(fs_path) dirs, files = self._ls_dir(fs_path)
return dirs return dirs
@_handle_errors @_handle_errors()
def ls_dir(self, fs_path): def ls_dir(self, fs_path):
""" """
list directory under fs_path, and only give the pure name, not include the fs_path list directory under fs_path, and only give the pure name, not include the fs_path
...@@ -98,11 +122,14 @@ class HDFSClient(FS): ...@@ -98,11 +122,14 @@ class HDFSClient(FS):
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
return [], [] return [], []
cmd = "{} -ls {}".format(self._base_cmd, fs_path) return self._ls_dir(fs_path)
def _ls_dir(self, fs_path):
cmd = "ls {}".format(fs_path)
ret, lines = self._run_cmd(cmd) ret, lines = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
dirs = [] dirs = []
files = [] files = []
...@@ -111,9 +138,6 @@ class HDFSClient(FS): ...@@ -111,9 +138,6 @@ class HDFSClient(FS):
if len(arr) != 8: if len(arr) != 8:
continue continue
if fs_path not in arr[7]:
continue
p = PurePosixPath(arr[7]) p = PurePosixPath(arr[7])
if arr[0][0] == 'd': if arr[0][0] == 'd':
dirs.append(p.name) dirs.append(p.name)
...@@ -130,18 +154,20 @@ class HDFSClient(FS): ...@@ -130,18 +154,20 @@ class HDFSClient(FS):
return None return None
@_handle_errors @_handle_errors()
def is_dir(self, fs_path): def is_dir(self, fs_path):
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
return False return False
cmd = "{} -test -d {}".format( return self._is_dir(fs_path)
self._base_cmd, fs_path, redirect_stderr=True)
def _is_dir(self, fs_path):
cmd = "test -d {}".format(fs_path, redirect_stderr=True)
ret, lines = self._run_cmd(cmd) ret, lines = self._run_cmd(cmd)
if ret: if ret:
# other error # other error
if self._test_match(lines) != None: if self._test_match(lines):
raise ExecuteError raise ExecuteError(cmd)
return False return False
...@@ -151,94 +177,155 @@ class HDFSClient(FS): ...@@ -151,94 +177,155 @@ class HDFSClient(FS):
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
return False return False
return not self.is_dir(fs_path) return not self._is_dir(fs_path)
@_handle_errors @_handle_errors()
def is_exist(self, fs_path): def is_exist(self, fs_path):
cmd = "{} -ls {} ".format(self._base_cmd, fs_path) cmd = "ls {} ".format(fs_path)
ret, out = self._run_cmd(cmd, redirect_stderr=True) ret, out = self._run_cmd(cmd, redirect_stderr=True)
if ret != 0: if ret != 0:
for l in out: for l in out:
if "No such file or directory" in l: if "No such file or directory" in l:
return False return False
raise ExecuteError raise ExecuteError(cmd)
return True return True
@_handle_errors # can't retry
def upload(self, local_path, fs_path): def upload(self, local_path, fs_path):
if self.is_exist(fs_path): if self.is_exist(fs_path):
raise FSFileExistsError raise FSFileExistsError("{} exists".format(fs_path))
local = LocalFS() local = LocalFS()
if not local.is_exist(local_path): if not local.is_exist(local_path):
raise FSFileNotExistsError raise FSFileNotExistsError("{} not exists".format(local_path))
cmd = "{} -put {} {}".format(self._base_cmd, local_path, fs_path) return self._try_upload(local_path, fs_path)
@_handle_errors()
def _try_upload(self, local_path, fs_path):
cmd = "put {} {}".format(local_path, fs_path)
ret = 0
try:
ret, lines = self._run_cmd(cmd) ret, lines = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
except Exception as e:
self.delete(fs_path)
raise e
@_handle_errors # can't retry
def download(self, fs_path, local_path): def download(self, fs_path, local_path):
if self.is_exist(local_path): if self.is_exist(local_path):
raise FSFileExistsError raise FSFileExistsError("{} exists".format(local_path))
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
raise FSFileNotExistsError raise FSFileNotExistsError("{} not exits".format(fs_path))
return self._try_download(fs_path, local_path)
cmd = "{} -get {} {}".format(self._base_cmd, fs_path, local_path) @_handle_errors()
def _try_download(self, fs_path, local_path):
cmd = "get {} {}".format(fs_path, local_path)
ret = 0
try:
ret, lines = self._run_cmd(cmd) ret, lines = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
except Exception as e:
local_fs = LocalFS()
local_fs.delete(local_path)
raise e
@_handle_errors @_handle_errors()
def mkdirs(self, fs_path): def mkdirs(self, fs_path):
if self.is_exist(fs_path): if self.is_exist(fs_path):
return return
cmd = "{} -mkdir {}".format(self._base_cmd, fs_path) out_hdfs = False
cmd = "mkdir {} ".format(fs_path)
ret, out = self._run_cmd(cmd, redirect_stderr=True)
if ret != 0:
for l in out:
if "No such file or directory" in l:
out_hdfs = True
break
if not out_hdfs:
raise ExecuteError(cmd)
if out_hdfs and not self.is_exist(fs_path):
cmd = "mkdir -p {}".format(fs_path)
ret, lines = self._run_cmd(cmd) ret, lines = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
def mv(self, fs_src_path, fs_dst_path, overwrite=False, test_exists=True):
if overwrite and self.is_exist(fs_dst_path):
self.delete(fs_dst_path)
@_handle_errors
def mv(self, fs_src_path, fs_dst_path, test_exists=True):
if test_exists: if test_exists:
if not self.is_exist(fs_src_path): if not self.is_exist(fs_src_path):
raise FSFileNotExistsError raise FSFileNotExistsError("{} is not exists".format(
fs_src_path))
if self.is_exist(fs_dst_path): if self.is_exist(fs_dst_path):
raise FSFileExistsError raise FSFileExistsError("{} exists already".format(
fs_src_path, fs_dst_path, fs_dst_path))
cmd = "{} -mv {} {}".format(self._base_cmd, fs_src_path, fs_dst_path) return self._try_mv(fs_src_path, fs_dst_path)
@_handle_errors()
def _try_mv(self, fs_src_path, fs_dst_path):
cmd = "mv {} {}".format(fs_src_path, fs_dst_path)
ret = 0
try:
ret, _ = self._run_cmd(cmd) ret, _ = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
except Exception as e:
if not self.is_exist(fs_src_path) and \
self.is_exist(fs_dst_path):
return
raise e
@_handle_errors
def _rmr(self, fs_path): def _rmr(self, fs_path):
cmd = "{} -rmr {}".format(self._base_cmd, fs_path) cmd = "rmr {}".format(fs_path)
ret, _ = self._run_cmd(cmd) ret, _ = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
@_handle_errors
def _rm(self, fs_path): def _rm(self, fs_path):
cmd = "{} -rm {}".format(self._base_cmd, fs_path) cmd = "rm {}".format(fs_path)
ret, _ = self._run_cmd(cmd) ret, _ = self._run_cmd(cmd)
if ret != 0: if ret != 0:
raise ExecuteError raise ExecuteError(cmd)
@_handle_errors()
def delete(self, fs_path): def delete(self, fs_path):
if not self.is_exist(fs_path): if not self.is_exist(fs_path):
return return
is_dir = self.is_dir(fs_path) is_dir = self._is_dir(fs_path)
if is_dir: if is_dir:
return self._rmr(fs_path) return self._rmr(fs_path)
return self._rm(fs_path) return self._rm(fs_path)
def touch(self, fs_path, exist_ok=True):
if self.is_exist(fs_path):
if exist_ok:
return
raise FSFileExistsError
return self._touchz(fs_path)
@_handle_errors()
def _touchz(self, fs_path):
cmd = "touchz {}".format(fs_path)
ret, _ = self._run_cmd(cmd)
if ret != 0:
raise ExecuteError
def need_upload_download(self): def need_upload_download(self):
return True return True
...@@ -25,8 +25,7 @@ from ..layer_helper import LayerHelper ...@@ -25,8 +25,7 @@ from ..layer_helper import LayerHelper
from ..data_feeder import check_variable_and_dtype from ..data_feeder import check_variable_and_dtype
__all__ = [ __all__ = [
'deprecated', 'generate_layer_fn', 'generate_activation_fn', 'autodoc', 'generate_layer_fn', 'generate_activation_fn', 'autodoc', 'templatedoc'
'templatedoc'
] ]
...@@ -82,8 +81,9 @@ def _generate_doc_string_(op_proto, ...@@ -82,8 +81,9 @@ def _generate_doc_string_(op_proto,
buf.write(escape_math(op_proto.comment)) buf.write(escape_math(op_proto.comment))
buf.write('\nArgs:\n') buf.write('\nArgs:\n')
for each_input in op_proto.inputs: for each_input in op_proto.inputs:
line_begin = ' {0}: '.format(_convert_(each_input.name)) line_begin = ' {0}'.format(_convert_(each_input.name))
buf.write(line_begin) buf.write(line_begin)
buf.write(" (Tensor): ")
buf.write(escape_math(each_input.comment)) buf.write(escape_math(each_input.comment))
if each_input.duplicable: if each_input.duplicable:
buf.write(" Duplicatable.") buf.write(" Duplicatable.")
...@@ -125,6 +125,8 @@ def _generate_doc_string_(op_proto, ...@@ -125,6 +125,8 @@ def _generate_doc_string_(op_proto,
for each_opt in op_proto.outputs: for each_opt in op_proto.outputs:
if not each_opt.intermediate: if not each_opt.intermediate:
break break
buf.write(_convert_(each_opt.name))
buf.write(' (Tensor): ')
buf.write(escape_math(each_opt.comment)) buf.write(escape_math(each_opt.comment))
return buf.getvalue() return buf.getvalue()
...@@ -275,50 +277,11 @@ def generate_activation_fn(op_type): ...@@ -275,50 +277,11 @@ def generate_activation_fn(op_type):
func.__doc__ = _generate_doc_string_( func.__doc__ = _generate_doc_string_(
op_proto, op_proto,
additional_args_lines=[ additional_args_lines=[
"name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` ." "name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`."
]) ])
func.__doc__ = func.__doc__ + """
Return type
Variable
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.enable_imperative()
x_data = np.array([1, 2, 3, 4]).astype(np.float32)
x = paddle.imperative.to_variable(x_data)
res = paddle.%s(x)
print(res.numpy())
""" % op_type
return func return func
def deprecated(func_or_class):
"""
Deprecated warning decorator. It will result a warning message.
Should be used before class or function, member function
"""
@functools.wraps(func)
def func_wrapper(*args, **kwargs):
"""
Wrap func with deprecated warning
"""
warnings.simplefilter('always', DeprecationWarning) # turn off filter
warnings.warn(
"Call to deprecated function {}.".format(func.__name__),
category=DeprecationWarning,
stacklevel=2)
warnings.simplefilter('default', DeprecationWarning) # reset filter
return func(*args, **kwargs)
return func_wrapper
def autodoc(comment=""): def autodoc(comment=""):
def __impl__(func): def __impl__(func):
func.__doc__ = _generate_doc_string_(OpProtoHolder.instance( func.__doc__ = _generate_doc_string_(OpProtoHolder.instance(
...@@ -384,3 +347,14 @@ def templatedoc(op_type=None): ...@@ -384,3 +347,14 @@ def templatedoc(op_type=None):
return func return func
return __impl__ return __impl__
def add_sample_code(func, sample_code):
"""
Append sample code for dynamically generated functions.
Args:
func: The function of the function to be append sample code to.
sample_code: sample code session in rst format.
"""
func.__doc__ = func.__doc__ + sample_code
...@@ -6200,7 +6200,7 @@ def squeeze(input, axes, name=None): ...@@ -6200,7 +6200,7 @@ def squeeze(input, axes, name=None):
Out.shape = [1,3,5] Out.shape = [1,3,5]
Args: Args:
input (Variable): The input Tensor. Support data type: float16, float32, float64, int8, int32, int64. input (Variable): The input Tensor. Supported data type: float32, float64, bool, int8, int32, int64.
axes (list): One integer or List of integers, indicating the dimensions to be squeezed. axes (list): One integer or List of integers, indicating the dimensions to be squeezed.
Axes range is :math:`[-rank(input), rank(input))`. Axes range is :math:`[-rank(input), rank(input))`.
If axes is negative, :math:`axes=axes+rank(input)`. If axes is negative, :math:`axes=axes+rank(input)`.
...@@ -6226,8 +6226,9 @@ def squeeze(input, axes, name=None): ...@@ -6226,8 +6226,9 @@ def squeeze(input, axes, name=None):
helper = LayerHelper("squeeze", **locals()) helper = LayerHelper("squeeze", **locals())
check_variable_and_dtype( check_variable_and_dtype(
input, 'input', input, 'input',
['float16', 'float32', 'float64', 'int8', 'int32', 'int64'], 'squeeze') ['float16', 'float32', 'float64', 'bool', 'int8', 'int32', 'int64'],
check_type(axes, 'axes', (list, tuple), 'squeeze') 'squeeze')
check_type(axes, 'axis/axes', (list, tuple), 'squeeze')
out = helper.create_variable_for_type_inference(dtype=input.dtype) out = helper.create_variable_for_type_inference(dtype=input.dtype)
x_shape = helper.create_variable_for_type_inference(dtype=input.dtype) x_shape = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op( helper.append_op(
...@@ -6254,12 +6255,12 @@ def unsqueeze(input, axes, name=None): ...@@ -6254,12 +6255,12 @@ def unsqueeze(input, axes, name=None):
then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1]. then Unsqueezed tensor with axes=[0, 4] has shape [1, 3, 4, 5, 1].
Args: Args:
input (Variable): The input Tensor to be unsqueezed. It is a N-D Tensor of data types float32, float64, int32. input (Variable): The input Tensor to be unsqueezed. Supported data type: float32, float64, bool, int8, int32, int64.
axes (int|list|tuple|Variable): Indicates the dimensions to be inserted. The data type is ``int32`` . If ``axes`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``axes`` is an Variable, it should be an 1-D Tensor . axes (int|list|tuple|Variable): Indicates the dimensions to be inserted. The data type is ``int32`` . If ``axes`` is a list or tuple, the elements of it should be integers or Tensors with shape [1]. If ``axes`` is an Variable, it should be an 1-D Tensor .
name (str|None): Name for this layer. name (str|None): Name for this layer.
Returns: Returns:
Variable: Output unsqueezed Tensor, with data type being float32, float64, int32, int64. Variable: Unsqueezed Tensor, with the same data type as input.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -6269,10 +6270,15 @@ def unsqueeze(input, axes, name=None): ...@@ -6269,10 +6270,15 @@ def unsqueeze(input, axes, name=None):
y = fluid.layers.unsqueeze(input=x, axes=[1]) y = fluid.layers.unsqueeze(input=x, axes=[1])
""" """
if not isinstance(axes, (int, list, tuple, Variable)): if in_dygraph_mode():
raise TypeError( out, _ = core.ops.unsqueeze2(input, 'axes', axes)
"The type of 'axes' in unsqueeze must be int, list, tuple or Variable, but " return out
"received %s." % (type(axes)))
check_type(axes, 'axis/axes', (int, list, tuple, Variable), 'unsqueeze')
check_variable_and_dtype(
input, 'input',
['float16', 'float32', 'float64', 'bool', 'int8', 'int32', 'int64'],
'unsqueeze')
helper = LayerHelper("unsqueeze2", **locals()) helper = LayerHelper("unsqueeze2", **locals())
inputs = {"X": input} inputs = {"X": input}
attrs = {} attrs = {}
...@@ -9966,7 +9972,7 @@ def stack(x, axis=0, name=None): ...@@ -9966,7 +9972,7 @@ def stack(x, axis=0, name=None):
must be the same. Supposing input is N dims must be the same. Supposing input is N dims
Tensors :math:`[d_0, d_1, ..., d_{n-1}]`, the output is N+1 dims Tensors :math:`[d_0, d_1, ..., d_{n-1}]`, the output is N+1 dims
Tensor :math:`[d_0, d_1, d_{axis-1}, len(x), d_{axis}, ..., d_{n-1}]`. Tensor :math:`[d_0, d_1, d_{axis-1}, len(x), d_{axis}, ..., d_{n-1}]`.
Support data types: float32, float64, int32, int64. Supported data types: float32, float64, int32, int64.
axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is :math:`[-(R+1), R+1)`. axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is :math:`[-(R+1), R+1)`.
R is the first tensor of inputs. If ``axis`` < 0, :math:`axis=axis+rank(x[0])+1`. R is the first tensor of inputs. If ``axis`` < 0, :math:`axis=axis+rank(x[0])+1`.
The default value of axis is 0. The default value of axis is 0.
...@@ -11963,7 +11969,7 @@ for func in [ ...@@ -11963,7 +11969,7 @@ for func in [
], ],
skip_attrs_set={ skip_attrs_set={
"x_data_format", "y_data_format", "axis", "use_quantizer", "x_data_format", "y_data_format", "axis", "use_quantizer",
"Scale_x", "Scale_y", "Scale_out" "mkldnn_data_type", "Scale_x", "Scale_y", "Scale_out"
}) + """\n""" + str(func.__doc__) }) + """\n""" + str(func.__doc__)
for func in []: for func in []:
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
from __future__ import print_function from __future__ import print_function
import os import os
from .layer_function_generator import generate_layer_fn, generate_activation_fn from .layer_function_generator import generate_layer_fn, generate_activation_fn, add_sample_code
from .. import core from .. import core
from ..framework import convert_np_dtype_to_dtype_, Variable from ..framework import convert_np_dtype_to_dtype_, Variable
from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
...@@ -61,6 +61,363 @@ __all__ += __activations_noattr__ ...@@ -61,6 +61,363 @@ __all__ += __activations_noattr__
for _OP in set(__activations_noattr__): for _OP in set(__activations_noattr__):
globals()[_OP] = generate_activation_fn(_OP) globals()[_OP] = generate_activation_fn(_OP)
add_sample_code(globals()["sigmoid"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = F.sigmoid(x)
print(out.numpy())
# [0.40131234 0.450166 0.52497919 0.57444252]
""")
add_sample_code(globals()["logsigmoid"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = F.logsigmoid(x)
print(out.numpy())
# [-0.91301525 -0.79813887 -0.64439666 -0.55435524]
""")
add_sample_code(globals()["exp"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.exp(x)
print(out.numpy())
# [0.67032005 0.81873075 1.10517092 1.34985881]
""")
add_sample_code(globals()["tanh"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.tanh(x)
print(out.numpy())
# [-0.37994896 -0.19737532 0.09966799 0.29131261]
""")
add_sample_code(globals()["atan"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.atan(x)
print(out.numpy())
# [-0.38050638 -0.19739556 0.09966865 0.29145679]
""")
add_sample_code(globals()["tanh_shrink"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = F.tanh_shrink(x)
print(out.numpy())
# [-0.02005104 -0.00262468 0.00033201 0.00868739]
""")
add_sample_code(globals()["sqrt"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([0.1, 0.2, 0.3, 0.4])
x = paddle.imperative.to_variable(x_data)
out = paddle.sqrt(x)
print(out.numpy())
# [0.31622777 0.4472136 0.54772256 0.63245553]
""")
add_sample_code(globals()["rsqrt"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([0.1, 0.2, 0.3, 0.4])
x = paddle.imperative.to_variable(x_data)
out = paddle.rsqrt(x)
print(out.numpy())
# [3.16227766 2.23606798 1.82574186 1.58113883]
""")
add_sample_code(globals()["abs"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.abs(x)
print(out.numpy())
# [0.4 0.2 0.1 0.3]
""")
add_sample_code(globals()["ceil"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.ceil(x)
print(out.numpy())
# [-0. -0. 1. 1.]
""")
add_sample_code(globals()["floor"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.floor(x)
print(out.numpy())
# [-1. -1. 0. 0.]
""")
add_sample_code(globals()["cos"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.cos(x)
print(out.numpy())
# [0.92106099 0.98006658 0.99500417 0.95533649]
""")
add_sample_code(globals()["acos"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.acos(x)
print(out.numpy())
# [1.98231317 1.77215425 1.47062891 1.26610367]
""")
add_sample_code(globals()["sin"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.sin(x)
print(out.numpy())
# [-0.38941834 -0.19866933 0.09983342 0.29552021]
""")
add_sample_code(globals()["asin"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.asin(x)
print(out.numpy())
# [-0.41151685 -0.20135792 0.10016742 0.30469265]
""")
add_sample_code(globals()["cosh"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.cosh(x)
print(out.numpy())
# [1.08107237 1.02006676 1.00500417 1.04533851]
""")
add_sample_code(globals()["sinh"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.sinh(x)
print(out.numpy())
# [-0.41075233 -0.201336 0.10016675 0.30452029]
""")
add_sample_code(globals()["round"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.5, -0.2, 0.6, 1.5])
x = paddle.imperative.to_variable(x_data)
out = paddle.round(x)
print(out.numpy())
# [-1. -0. 1. 2.]
""")
add_sample_code(globals()["reciprocal"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.reciprocal(x)
print(out.numpy())
# [-2.5 -5. 10. 3.33333333]
""")
add_sample_code(globals()["square"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = paddle.square(x)
print(out.numpy())
# [0.16 0.04 0.01 0.09]
""")
add_sample_code(globals()["softplus"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = F.softplus(x)
print(out.numpy())
# [0.51301525 0.59813887 0.74439666 0.85435524]
""")
add_sample_code(globals()["softsign"], r"""
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn.functional as F
paddle.enable_imperative()
x_data = np.array([-0.4, -0.2, 0.1, 0.3])
x = paddle.imperative.to_variable(x_data)
out = F.softsign(x)
print(out.numpy())
# [-0.28571429 -0.16666667 0.09090909 0.23076923]
""")
__all__ += ['softshrink'] __all__ += ['softshrink']
_softshrink_ = generate_layer_fn('softshrink') _softshrink_ = generate_layer_fn('softshrink')
......
...@@ -685,8 +685,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): ...@@ -685,8 +685,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
""" """
attrs = {'force_cpu': force_cpu} attrs = {'force_cpu': force_cpu}
dtype = convert_dtype(dtype)
if not isinstance(value, Variable): if not isinstance(value, Variable):
if convert_dtype(dtype) in ['int64', 'int32']: if dtype in ['int64', 'int32']:
attrs['str_value'] = str(int(value)) attrs['str_value'] = str(int(value))
else: else:
attrs['str_value'] = str(float(value)) attrs['str_value'] = str(float(value))
...@@ -697,7 +698,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): ...@@ -697,7 +698,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
out = _varbase_creator(dtype=dtype) out = _varbase_creator(dtype=dtype)
if isinstance(value, Variable): if isinstance(value, Variable):
if convert_dtype(dtype) in ['int64', 'int32']: if dtype in ['int64', 'int32']:
attrs['str_value'] = str(int(value.numpy())) attrs['str_value'] = str(int(value.numpy()))
else: else:
attrs['str_value'] = str(float(value.numpy())) attrs['str_value'] = str(float(value.numpy()))
...@@ -712,6 +713,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): ...@@ -712,6 +713,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
helper = LayerHelper("fill_constant", **locals()) helper = LayerHelper("fill_constant", **locals())
inputs = {} inputs = {}
if isinstance(value, Variable): if isinstance(value, Variable):
if convert_dtype(value.dtype) != dtype:
value = cast(value, dtype)
inputs['ValueTensor'] = value inputs['ValueTensor'] = value
check_dtype(dtype, 'dtype', check_dtype(dtype, 'dtype',
......
...@@ -86,6 +86,10 @@ if(WIN32) ...@@ -86,6 +86,10 @@ if(WIN32)
LIST(REMOVE_ITEM TEST_OPS test_ref_by_trainer_id_op) LIST(REMOVE_ITEM TEST_OPS test_ref_by_trainer_id_op)
endif() endif()
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint)
LIST(REMOVE_ITEM TEST_OPS test_auto_checkpoint2)
LIST(REMOVE_ITEM TEST_OPS test_checkpoint_saver)
if(APPLE OR WIN32) if(APPLE OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_hdfs) LIST(REMOVE_ITEM TEST_OPS test_hdfs)
LIST(REMOVE_ITEM TEST_OPS test_fs_interface) LIST(REMOVE_ITEM TEST_OPS test_fs_interface)
...@@ -190,10 +194,11 @@ function(bash_test_modules TARGET_NAME) ...@@ -190,10 +194,11 @@ function(bash_test_modules TARGET_NAME)
endif() endif()
set(options SERIAL) set(options SERIAL)
set(oneValueArgs "") set(oneValueArgs TIMEOUT START_BASH)
set(multiValueArgs MODULES DEPS ENVS LABELS) set(multiValueArgs DEPS ENVS LABELS)
cmake_parse_arguments(bash_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(bash_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(timeout 350) set(timeout 350)
if(${bash_test_modules_TIMEOUT}) if(${bash_test_modules_TIMEOUT})
set(timeout ${bash_test_modules_TIMEOUT}) set(timeout ${bash_test_modules_TIMEOUT})
...@@ -204,13 +209,13 @@ function(bash_test_modules TARGET_NAME) ...@@ -204,13 +209,13 @@ function(bash_test_modules TARGET_NAME)
COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python
TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS} TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS}
WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data WITH_COVERAGE=ON COVERAGE_FILE=${PADDLE_BINARY_DIR}/python-coverage.data
bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_MODULES} bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else() else()
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python
TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS} TEST_TARGET_NAME=${TARGET_NAME} TEST_TIMEOUT=${timeout} ${bash_test_modules_ENVS}
bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_MODULES} bash ${CMAKE_CURRENT_BINARY_DIR}/${bash_test_modules_START_BASH}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
...@@ -397,15 +402,16 @@ if(WITH_DISTRIBUTE) ...@@ -397,15 +402,16 @@ if(WITH_DISTRIBUTE)
if(NOT APPLE) if(NOT APPLE)
if(WITH_GPU) if(WITH_GPU)
# NOTE. test_launch only work in gpu collective mode # NOTE. test_launch only work in gpu collective mode
bash_test_modules(test_launch MODULES test_launch.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) bash_test_modules(test_launch START_BASH test_launch.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint) py_test_modules(test_fleet_checkpoint MODULES test_fleet_checkpoint)
endif() endif()
bash_test_modules(test_launch_ps MODULES test_launch_ps.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
bash_test_modules(test_fleet_launch MODULES test_fleet_launch.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) bash_test_modules(test_launch_ps START_BASH test_launch_ps.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
bash_test_modules(test_fleet_launch START_BASH test_fleet_launch.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
set(dist_ut_port 20001) set(dist_ut_port 20001)
foreach(TEST_OP ${DIST_TEST_OPS}) foreach(TEST_OP ${DIST_TEST_OPS})
bash_test_modules(${TEST_OP} MODULES dist_test.sh SERIAL LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}") bash_test_modules(${TEST_OP} START_BASH dist_test.sh SERIAL LABELS "RUN_TYPE=EXCLUSIVE" ENVS "PADDLE_DIST_UT_PORT=${dist_ut_port}")
MATH(EXPR dist_ut_port "${dist_ut_port}+50") MATH(EXPR dist_ut_port "${dist_ut_port}+50")
endforeach(TEST_OP) endforeach(TEST_OP)
endif(NOT APPLE) endif(NOT APPLE)
...@@ -441,6 +447,12 @@ if(NOT WIN32) ...@@ -441,6 +447,12 @@ if(NOT WIN32)
set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450) set_tests_properties(test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450)
endif() endif()
if(NOT APPLE AND NOT WIN32)
bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 600)
bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 600)
bash_test_modules(test_checkpoint_saver START_BASH dist_test.sh TIMEOUT 600)
endif()
add_subdirectory(sequence) add_subdirectory(sequence)
add_subdirectory(dygraph_to_static) add_subdirectory(dygraph_to_static)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet
import os
import sys
from paddle.fluid.incubate.fleet.utils.fs import LocalFS
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp
from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel
from paddle.fluid.framework import program_guard
from paddle.fluid import unique_name
import numpy as np
from paddle.io import Dataset, BatchSampler, DataLoader
BATCH_NUM = 20
BATCH_SIZE = 16
#IMAGE_SIZE = 128
CLASS_NUM = 10
USE_GPU = False # whether use GPU to run model
places = fluid.cuda_places() if USE_GPU else fluid.cpu_places()
logger = None
def get_logger():
global logger
logger = acp._get_logger(20)
return logger
def get_random_images_and_labels(image_shape, label_shape):
image = np.random.random(size=image_shape).astype('float32')
label = np.random.random(size=label_shape).astype('int64')
return image, label
def sample_list_generator_creator():
def __reader__():
for _ in range(BATCH_NUM):
sample_list = []
for _ in range(BATCH_SIZE):
image, label = get_random_images_and_labels([16, 16], [1])
sample_list.append([image, label])
yield sample_list
return __reader__
class AutoCheckpointBase(unittest.TestCase):
def _init_env(self,
exe,
main_prog,
startup_prog,
minimize=True,
iterable=True):
def simple_net():
image = fluid.data(
name='image', shape=[-1, 16, 16], dtype='float32')
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
fc_tmp = fluid.layers.fc(image, size=CLASS_NUM)
cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp,
label)
loss = fluid.layers.reduce_mean(cross_entropy)
sgd = fluid.optimizer.SGD(learning_rate=1e-3)
if minimize:
sgd.minimize(loss)
return sgd, loss, image, label
with program_guard(main_prog, startup_prog):
sgd, loss, image, label = simple_net()
if minimize:
compiled = fluid.CompiledProgram(main_prog).with_data_parallel(
loss_name=loss.name)
else:
compiled = None
loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label],
capacity=64,
use_double_buffer=True,
iterable=iterable)
loader.set_sample_list_generator(sample_list_generator_creator(),
places[0])
if minimize:
exe.run(startup_prog)
return compiled, loader, sgd, loss, image, label
def _generate(self):
main_prog = fluid.Program()
startup_prog = fluid.Program()
exe = fluid.Executor(places[0])
return exe, main_prog, startup_prog
def _reset_generator(self):
unique_name.generator = fluid.unique_name.UniqueNameGenerator()
acp.generator = fluid.unique_name.UniqueNameGenerator()
acp.g_acp_type = None
acp.g_checker = acp.AutoCheckpointChecker()
acp.g_program_attr = {}
def _clear_envs(self):
os.environ.pop("PADDLE_RUNNING_ENV", None)
def _readd_envs(self):
os.environ["PADDLE_RUNNING_ENV"] = "PADDLE_EDL_AUTO_CHECKPOINT"
...@@ -51,13 +51,9 @@ def func_error_in_compile_time_2(x): ...@@ -51,13 +51,9 @@ def func_error_in_compile_time_2(x):
@declarative @declarative
def func_error_in_runtime(x, iter_num=3): def func_error_in_runtime(x, iter_num=3):
x = fluid.dygraph.to_variable(x) x = fluid.dygraph.to_variable(x)
a = [] two = fluid.layers.fill_constant(shape=[1], value=2, dtype="int32")
iter_num = fluid.layers.fill_constant( x = fluid.layers.reshape(x, shape=[1, two])
shape=[1], value=iter_num, dtype="int32") return x
for i in range(iter_num):
a.append(b)
a = fluid.layers.concat(a, axis=0)
return a
class TestErrorInCompileTime(unittest.TestCase): class TestErrorInCompileTime(unittest.TestCase):
...@@ -118,7 +114,6 @@ class TestErrorInCompileTime2(TestErrorInCompileTime): ...@@ -118,7 +114,6 @@ class TestErrorInCompileTime2(TestErrorInCompileTime):
] ]
# TODO(liym27): Consider the case that op_callstack when error raised from c++ code
class TestErrorInRuntime(TestErrorInCompileTime): class TestErrorInRuntime(TestErrorInCompileTime):
def set_func(self): def set_func(self):
self.func = func_error_in_runtime self.func = func_error_in_runtime
...@@ -126,10 +121,26 @@ class TestErrorInRuntime(TestErrorInCompileTime): ...@@ -126,10 +121,26 @@ class TestErrorInRuntime(TestErrorInCompileTime):
def set_exception_type(self): def set_exception_type(self):
self.exception_type = EnforceNotMet self.exception_type = EnforceNotMet
def test(self): def set_message(self):
with fluid.dygraph.guard(): self.expected_message = \
with self.assertRaises(self.exception_type) as cm: [
self.func(self.input) 'File "{}", line 55, in func_error_in_runtime'.format(self.filepath),
'x = fluid.layers.reshape(x, shape=[1, two])'
]
def _test_create_message(self, error_data):
self.filepath = inspect.getfile(unwrap(self.func))
self.set_message()
with self.assertRaises(ValueError):
error_data.create_message()
error_data.in_runtime = False
error_message = error_data.create_message()
self.assertIn('In user code:', error_message)
for m in self.expected_message:
self.assertIn(m, error_message)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -30,16 +30,8 @@ class TestMKLDNNReluDim2(TestRelu): ...@@ -30,16 +30,8 @@ class TestMKLDNNReluDim2(TestRelu):
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNLeakyReluDim2(TestLeakyRelu): class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
...@@ -48,16 +40,8 @@ class TestMKLDNNLeakyReluDim2(TestLeakyRelu): ...@@ -48,16 +40,8 @@ class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNGeluDim2(TestActivation): class TestMKLDNNGeluDim2(TestActivation):
...@@ -92,16 +76,8 @@ class TestMKLDNNTanhDim2(TestTanh): ...@@ -92,16 +76,8 @@ class TestMKLDNNTanhDim2(TestTanh):
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNSqrtDim2(TestSqrt): class TestMKLDNNSqrtDim2(TestSqrt):
...@@ -110,16 +86,8 @@ class TestMKLDNNSqrtDim2(TestSqrt): ...@@ -110,16 +86,8 @@ class TestMKLDNNSqrtDim2(TestSqrt):
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNAbsDim2(TestAbs): class TestMKLDNNAbsDim2(TestAbs):
...@@ -127,16 +95,8 @@ class TestMKLDNNAbsDim2(TestAbs): ...@@ -127,16 +95,8 @@ class TestMKLDNNAbsDim2(TestAbs):
super(TestMKLDNNAbsDim2, self).setUp() super(TestMKLDNNAbsDim2, self).setUp()
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNSwishDim2(TestSwish): class TestMKLDNNSwishDim2(TestSwish):
...@@ -151,15 +111,8 @@ class TestMKLDNNSwishDim2(TestSwish): ...@@ -151,15 +111,8 @@ class TestMKLDNNSwishDim2(TestSwish):
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = {"use_mkldnn": True, "beta": beta} self.attrs = {"use_mkldnn": True, "beta": beta}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(['X'], 'Out')
class TestMKLDNNSigmoidDim2(TestSigmoid): class TestMKLDNNSigmoidDim2(TestSigmoid):
...@@ -181,16 +134,8 @@ class TestMKLDNNReluDim4(TestRelu): ...@@ -181,16 +134,8 @@ class TestMKLDNNReluDim4(TestRelu):
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNLeakyReluDim4(TestLeakyRelu): class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
...@@ -206,16 +151,8 @@ class TestMKLDNNLeakyReluDim4(TestLeakyRelu): ...@@ -206,16 +151,8 @@ class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNGeluDim4(TestActivation): class TestMKLDNNGeluDim4(TestActivation):
...@@ -254,17 +191,6 @@ class TestMKLDNNTanhDim4(TestTanh): ...@@ -254,17 +191,6 @@ class TestMKLDNNTanhDim4(TestTanh):
self.outputs = {'Out': np.tanh(self.inputs['X'])} self.outputs = {'Out': np.tanh(self.inputs['X'])}
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNSqrtDim4(TestSqrt): class TestMKLDNNSqrtDim4(TestSqrt):
def setUp(self): def setUp(self):
...@@ -276,17 +202,6 @@ class TestMKLDNNSqrtDim4(TestSqrt): ...@@ -276,17 +202,6 @@ class TestMKLDNNSqrtDim4(TestSqrt):
self.outputs = {'Out': np.sqrt(self.inputs['X'])} self.outputs = {'Out': np.sqrt(self.inputs['X'])}
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNAbsDim4(TestAbs): class TestMKLDNNAbsDim4(TestAbs):
def setUp(self): def setUp(self):
...@@ -299,23 +214,15 @@ class TestMKLDNNAbsDim4(TestAbs): ...@@ -299,23 +214,15 @@ class TestMKLDNNAbsDim4(TestAbs):
self.outputs = {'Out': np.abs(self.inputs['X'])} self.outputs = {'Out': np.abs(self.inputs['X'])}
self.attrs = {"use_mkldnn": True} self.attrs = {"use_mkldnn": True}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(
['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
class TestMKLDNNSwishDim4(TestSwish): class TestMKLDNNSwishDim4(TestSwish):
def setUp(self): def setUp(self):
super(TestMKLDNNSwishDim4, self).setUp() super(TestMKLDNNSwishDim4, self).setUp()
x = np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype("float32") x = np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype(self.dtype)
beta = 2.3 beta = 2.3
out = x * expit(beta * x) out = x * expit(beta * x)
...@@ -323,15 +230,8 @@ class TestMKLDNNSwishDim4(TestSwish): ...@@ -323,15 +230,8 @@ class TestMKLDNNSwishDim4(TestSwish):
self.outputs = {'Out': out} self.outputs = {'Out': out}
self.attrs = {"use_mkldnn": True, "beta": beta} self.attrs = {"use_mkldnn": True, "beta": beta}
def test_check_output(self): def init_dtype(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode self.dtype = np.float32
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
# TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_grad(['X'], 'Out')
class TestMKLDNNSigmoidDim4(TestSigmoid): class TestMKLDNNSigmoidDim4(TestSigmoid):
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet
import os
import sys
from paddle.fluid.incubate.fleet.utils.fs import LocalFS
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp
from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel
from paddle.fluid.framework import program_guard
from paddle.fluid import unique_name
import numpy as np
from paddle.io import Dataset, BatchSampler, DataLoader
from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger
logger = get_logger()
class AutoCheckPointACLBase(AutoCheckpointBase):
def setUp(self):
get_logger()
logger.info("enter tests")
self._old_environ = dict(os.environ)
proc_env = {
"PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT",
"PADDLE_TRAINER_ID": "0",
"PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD",
"PADDLE_JOB_ID": "test_job_auto",
"PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7",
"PADDLE_EDL_HDFS_NAME": "",
"PADDLE_EDL_HDFS_UGI": "",
"PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint",
"PADDLE_EDL_ONLY_FOR_CE_TEST": "1",
"PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test",
"PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0"
}
os.environ.update(proc_env)
def tearDown(self):
os.environ.clear()
os.environ.update(self._old_environ)
def _run_normal(self):
exe, main_prog, startup_prog = self._generate()
save_dir = "./run_save_model"
fs = LocalFS()
fs.delete(save_dir)
logger.info("begin _run_normal")
compiled, data_loader, optimizer, loss, image, label = self._init_env(
exe, main_prog, startup_prog)
for i in range(3):
self.assertEqual(acp._get_train_epoch_range(), None)
self.assertEqual(acp.g_acp_type, None)
for data in data_loader():
self.assertEqual(acp.g_acp_type, None)
self.assertEqual(acp._get_train_epoch_range(), None)
fetch = exe.run(compiled, feed=data, fetch_list=[loss])
self.assertEqual(acp.g_acp_type, None)
self.assertEqual(acp._get_train_epoch_range(), None)
m1 = PaddleModel(exe, compiled)
m1.serialize(save_dir)
m2 = PaddleModel(exe, compiled)
m2.deserialize(save_dir)
logger.info("end _run_normal")
fs.delete(save_dir)
def _not_use_train(self):
logger.info("begin _not_use_train")
exe, main_prog, startup_prog = self._generate()
compiled, data_loader, optimizer, loss, image, label = \
self._init_env(exe, main_prog, startup_prog)
epochs = []
for i in acp.train_epoch_range(3, 0):
epochs.append(i)
for data in data_loader():
fetch = exe.run(compiled, feed=data, fetch_list=[loss])
self.assertEqual(epochs, [0, 1, 2])
logger.info("end _not_use_train")
def _run_save_0(self, break_epoch_no=None):
logger.info("begin _run_save_0")
fs = LocalFS()
save_dir = "./run_save_0"
fs.delete(save_dir)
exe, main_prog, startup_prog = self._generate()
compiled, data_loader, optimizer, loss, image, label = \
self._init_env(exe, main_prog, startup_prog)
o = None
i = 0
name = None
for i in acp.train_epoch_range(3, 0):
o = acp._get_train_epoch_range()
name = o.name
for data in data_loader():
fetch = exe.run(compiled, feed=data, fetch_list=[loss])
self.assertEqual(len(o._exe_status), 1)
if break_epoch_no is not None:
if i == break_epoch_no:
break
o = acp._get_train_epoch_range()
assert o == None, "now train epoch must not exits now"
if break_epoch_no is None:
self.assertEqual(i, 2)
else:
self.assertEqual(i, break_epoch_no)
fs.delete(save_dir)
logger.info("end _run_save_0")
def _run_load_0(self, break_epoch_no=None):
logger.info("begin _run_load_0")
exe, main_prog, startup_prog = self._generate()
fs = LocalFS()
save_dir = "./run_load_0"
fs.delete(save_dir)
compiled, data_loader, optimizer, loss, image, label = self._init_env(
exe, main_prog, startup_prog)
o = None
i = 0
check = False
epochs = []
for i in acp.train_epoch_range(3, 0):
epochs.append(i)
for data in data_loader():
fetch = exe.run(compiled, feed=data, fetch_list=[loss])
o = acp._get_train_epoch_range()
self.assertTrue(o == None, "now train epoch must not exits now")
self.assertEqual(i, 2)
if break_epoch_no is not None:
if break_epoch_no == 0:
self.assertEqual(epochs, [0, 1, 2])
elif break_epoch_no == 1:
self.assertEqual(epochs, [1, 2])
elif break_epoch_no == 2:
self.assertEqual(epochs, [2])
else:
self.assertEqual(epochs, [2])
fs.delete(save_dir)
logger.info("begin _run_load_0")
class AutoCheckpointTest(AutoCheckPointACLBase):
def setUp(self):
get_logger()
logger.info("enter tests")
self._old_environ = dict(os.environ)
proc_env = {
"PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT",
"PADDLE_TRAINER_ID": "0",
"PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD",
"PADDLE_JOB_ID": "test_job_auto_1",
"PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7",
"PADDLE_EDL_HDFS_NAME": "",
"PADDLE_EDL_HDFS_UGI": "",
"PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_1",
"PADDLE_EDL_ONLY_FOR_CE_TEST": "1",
"PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_1",
"PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0"
}
os.environ.update(proc_env)
def test_normal(self):
logger.info("begin test_normal")
checker = acp._get_checker()
fs = HDFSClient(checker.hdfs_home, None)
fs.delete(checker.hdfs_checkpoint_path)
self._clear_envs()
self._reset_generator()
self._run_normal()
self._readd_envs()
logger.info("end test_normal")
def test_basic(self):
logger.info("begin test_basic")
checker = acp._get_checker()
self.assertEqual(checker.run_env, "PADDLE_EDL_AUTO_CHECKPOINT")
self.assertEqual(checker.platform, "PADDLE_CLOUD")
self.assertEqual(checker.save_checkpoint_inter, 0)
print(checker)
fs = HDFSClient(checker.hdfs_home, None)
fs.delete(checker.hdfs_checkpoint_path)
self._reset_generator()
self._run_save_0()
self._reset_generator()
self._run_load_0()
logger.info("end test_basic")
def test_not_use(self):
logger.info("begin test_not_use")
self._clear_envs()
self._reset_generator()
self._not_use_train()
self._readd_envs()
logger.info("end test_not_use")
def test_multiple(self):
checker = acp._get_checker()
fs = HDFSClient(checker.hdfs_home, None)
fs.delete(checker.hdfs_checkpoint_path)
self._reset_generator()
logger.info("begin test_multiple")
fs = LocalFS()
save_dir = "./run_save_0"
fs.delete(save_dir)
exe, main_prog1, startup_prog1 = self._generate()
_, main_prog2, startup_prog2 = self._generate()
compiled1, data_loader1, optimizer1, loss1, image1, label1 = \
self._init_env(exe, main_prog1, startup_prog1)
compiled2, data_loader2, optimizer2, loss2, image2, label2 = \
self._init_env(exe, main_prog2, startup_prog2)
o = None
epochs = []
for i in acp.train_epoch_range(3, 0):
for data in data_loader1():
fetch = exe.run(compiled1, feed=data, fetch_list=[loss1])
for data in data_loader2():
fetch = exe.run(compiled2, feed=data, fetch_list=[loss2])
o = acp._get_train_epoch_range()
self.assertEqual(len(o._exe_status), 2)
print(o._exe_status)
epochs.append(i)
o = acp._get_train_epoch_range()
self.assertTrue(o == None, "now train epoch must not exits now")
self.assertEqual(i, 2)
self.assertEqual(epochs, [0, 1, 2])
fs.delete(save_dir)
logger.info("end test_multiple")
def test_distributed_basic(self):
checker = acp._get_checker()
fs = HDFSClient(checker.hdfs_home, None)
fs.delete(checker.hdfs_checkpoint_path)
self._reset_generator()
logger.info("begin test_distributed_basic")
fs = LocalFS()
save_dir = "./run_save_0"
fs.delete(save_dir)
#basic
exe, main_prog, startup_prog = self._generate()
compiled, data_loader, optimizer, loss, image, label = \
self._init_env(exe, main_prog, startup_prog, minimize=False)
#fleet
os.environ["TRAINING_ROLE"] = "TRAINER"
os.environ["PADDLE_TRAINER_ID"] = "0"
os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:6070"
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
with fluid.program_guard(main_prog, startup_prog):
dist_optimizer = fleet.distributed_optimizer(optimizer)
dist_optimizer.minimize(loss)
exe.run(startup_prog)
o = None
i = 0
name = None
for i in acp.train_epoch_range(3, 0):
o = acp._get_train_epoch_range()
name = o.name
logger.info("_run_save_0 name:{} epoch_no:{}".format(o.name, i))
for data in data_loader():
fetch = exe.run(fleet.main_program,
feed=data,
fetch_list=[loss])
self.assertEqual(len(o._exe_status), 1)
o = acp._get_train_epoch_range()
assert o == None, "now train epoch must not exits now"
self.assertEqual(i, 2)
fs.delete(save_dir)
logger.info("end test_distributed_basic")
def test_checker(self):
os.environ.pop("PADDLE_JOB_ID", None)
try:
checker = AutoCheckpointChecker()
self.assertFalse(True)
except Exception as e:
pass
os.environ["PADDLE_JOB_ID"] = "test_job_auto_1"
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle
import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet
import os
import sys
from paddle.fluid.incubate.fleet.utils.fs import LocalFS
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
import paddle.fluid.incubate.checkpoint.auto_checkpoint as acp
from paddle.fluid.incubate.checkpoint.checkpoint_saver import PaddleModel
from paddle.fluid.framework import program_guard
from paddle.fluid import unique_name
import numpy as np
from paddle.io import Dataset, BatchSampler, DataLoader
from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger
from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase
logger = get_logger()
class AutoCheckpointTest2(AutoCheckPointACLBase):
def setUp(self):
get_logger()
logger.info("enter tests")
self._old_environ = dict(os.environ)
proc_env = {
"PADDLE_RUNNING_ENV": "PADDLE_EDL_AUTO_CHECKPOINT",
"PADDLE_TRAINER_ID": "0",
"PADDLE_RUNNING_PLATFORM": "PADDLE_CLOUD",
"PADDLE_JOB_ID": "test_job_auto_2",
"PADDLE_EDL_HDFS_HOME": "/usr/local/hadoop-2.7.7",
"PADDLE_EDL_HDFS_NAME": "",
"PADDLE_EDL_HDFS_UGI": "",
"PADDLE_EDL_HDFS_CHECKPOINT_PATH": "auto_checkpoint_2",
"PADDLE_EDL_ONLY_FOR_CE_TEST": "1",
"PADDLE_EDL_FS_CACHE": ".auto_checkpoint_test_2",
"PADDLE_EDL_SAVE_CHECKPOINT_INTER": "0"
}
os.environ.update(proc_env)
def test_corner_epoch_no(self):
logger.info("begin test_corener_epoch_no")
checker = acp._get_checker()
fs = HDFSClient(checker.hdfs_home, None)
for i in range(3):
fs.delete(checker.hdfs_checkpoint_path)
self._reset_generator()
self._run_save_0(break_epoch_no=i)
self._reset_generator()
self._run_load_0(break_epoch_no=i)
fs.delete(checker.hdfs_checkpoint_path)
logger.info("end test_corener_epoch_no")
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet
from paddle.fluid.incubate.checkpoint.auto_checkpoint import ExeTrainStatus
from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver
import os
import sys
from paddle.fluid.incubate.fleet.utils.fs import LocalFS
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
from paddle.fluid.incubate.checkpoint.checkpoint_saver import CheckpointSaver
class CheckpointerSaverTest(unittest.TestCase):
def test(self):
fs = HDFSClient("/usr/local/hadoop-2.7.7", None)
dir_path = "./checkpointsaver_test"
fs.delete(dir_path)
s = CheckpointSaver(fs)
fs.mkdirs("{}/exe.exe".format(dir_path))
fs.mkdirs("{}/exe.1".format(dir_path))
fs.mkdirs("{}/exe".format(dir_path))
a = s.get_checkpoint_no(dir_path)
self.assertEqual(len(a), 0)
fs.mkdirs("{}/__paddle_checkpoint__.0".format(dir_path))
fs.mkdirs("{}/__paddle_checkpoint__.exe".format(dir_path))
a = s.get_checkpoint_no(dir_path)
self.assertEqual(len(a), 1)
s.clean_redundant_checkpoints(dir_path)
s.clean_redundant_checkpoints(dir_path)
fs.delete(dir_path)
if __name__ == '__main__':
unittest.main()
...@@ -170,7 +170,8 @@ def program_equal(a, b): ...@@ -170,7 +170,8 @@ def program_equal(a, b):
k)) k))
return False return False
assert (len(a.blocks) == len(b.blocks)) assert (len(a.blocks) == len(b.blocks))
elif k == '_auto_checkpoint_name':
continue
elif (v != b.__dict__[k]): elif (v != b.__dict__[k]):
raise ValueError("In program_equal not equal:{0}\n".format(k)) raise ValueError("In program_equal not equal:{0}\n".format(k))
......
...@@ -269,18 +269,26 @@ class TestFillConstantAPI(unittest.TestCase): ...@@ -269,18 +269,26 @@ class TestFillConstantAPI(unittest.TestCase):
out_6 = fluid.layers.fill_constant( out_6 = fluid.layers.fill_constant(
shape=shape_tensor_int64, dtype=np.float32, value=1.1) shape=shape_tensor_int64, dtype=np.float32, value=1.1)
val = fluid.layers.fill_constant(shape=[1], dtype=np.float32, value=1.1) val1 = fluid.layers.fill_constant(
shape=[1], dtype=np.float32, value=1.1)
val2 = fluid.layers.fill_constant(
shape=[1], dtype=np.float64, value=1.1)
out_7 = fluid.layers.fill_constant( out_7 = fluid.layers.fill_constant(
shape=shape_tensor_int64, dtype=np.float32, value=val) shape=shape_tensor_int64, dtype=np.float32, value=val1)
out_8 = fluid.layers.fill_constant(
shape=shape_tensor_int64, dtype=np.float32, value=val2)
exe = fluid.Executor(place=fluid.CPUPlace()) exe = fluid.Executor(place=fluid.CPUPlace())
res_1, res_2, res_3, res_4, res_5, res_6, res_7 = exe.run( res_1, res_2, res_3, res_4, res_5, res_6, res_7, res_8 = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed={ feed={
"shape_tensor_int32": np.array([1, 2]).astype("int32"), "shape_tensor_int32": np.array([1, 2]).astype("int32"),
"shape_tensor_int64": np.array([1, 2]).astype("int64"), "shape_tensor_int64": np.array([1, 2]).astype("int64"),
}, },
fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7]) fetch_list=[
out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8
])
assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32"))
...@@ -289,6 +297,31 @@ class TestFillConstantAPI(unittest.TestCase): ...@@ -289,6 +297,31 @@ class TestFillConstantAPI(unittest.TestCase):
assert np.array_equal(res_5, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_5, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_6, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_6, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_7, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_7, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_8, np.full([1, 2], 1.1, dtype="float32"))
class TestFillConstantImperative(unittest.TestCase):
def test_api(self):
with fluid.dygraph.guard():
data1 = np.array([1, 2]).astype('int32')
data2 = np.array([1.1]).astype('float32')
shape = fluid.dygraph.to_variable(data1)
val = fluid.dygraph.to_variable(data2)
res1 = fluid.layers.fill_constant(
shape=[1, 2], dtype='float32', value=1.1)
res2 = fluid.layers.fill_constant(
shape=shape, dtype='float32', value=1.1)
res3 = fluid.layers.fill_constant(
shape=shape, dtype='float32', value=val)
assert np.array_equal(
res1.numpy(), np.full(
[1, 2], 1.1, dtype="float32"))
assert np.array_equal(
res2.numpy(), np.full(
[1, 2], 1.1, dtype="float32"))
assert np.array_equal(
res3.numpy(), np.full(
[1, 2], 1.1, dtype="float32"))
class TestFillConstantOpError(unittest.TestCase): class TestFillConstantOpError(unittest.TestCase):
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.incubate.fleet.base.role_maker as role_maker import paddle.fluid.incubate.fleet.base.role_maker as role_maker
from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet, TrainStatus from paddle.fluid.incubate.fleet.collective import CollectiveOptimizer, fleet
import os import os
import sys import sys
import inspect import inspect
...@@ -38,6 +38,8 @@ class FSTest(unittest.TestCase): ...@@ -38,6 +38,8 @@ class FSTest(unittest.TestCase):
func(a) func(a)
elif len(args) == 3: elif len(args) == 3:
func(a, a) func(a, a)
elif len(args) == 5:
func(a, a, a, a)
print("args:", args, len(args), "func:", func) print("args:", args, len(args), "func:", func)
self.assertFalse(True) self.assertFalse(True)
except NotImplementedError as e: except NotImplementedError as e:
......
...@@ -248,7 +248,7 @@ def zeros(shape, dtype=None, name=None): ...@@ -248,7 +248,7 @@ def zeros(shape, dtype=None, name=None):
# shape is a Tensor # shape is a Tensor
shape = paddle.fill_constant(shape=[2], dtype='int32', value=2) shape = paddle.fill_constant(shape=[2], dtype='int32', value=2)
data3 = paddle.ones(shape=shape, dtype='int32') data3 = paddle.zeros(shape=shape, dtype='int32')
# [[0 0] # [[0 0]
# [0 0]] # [0 0]]
""" """
......
此差异已折叠。
...@@ -178,6 +178,7 @@ packages=['paddle', ...@@ -178,6 +178,7 @@ packages=['paddle',
'paddle.fluid.incubate', 'paddle.fluid.incubate',
'paddle.fluid.incubate.data_generator', 'paddle.fluid.incubate.data_generator',
'paddle.fluid.incubate.fleet', 'paddle.fluid.incubate.fleet',
'paddle.fluid.incubate.checkpoint',
'paddle.fluid.incubate.fleet.base', 'paddle.fluid.incubate.fleet.base',
'paddle.fluid.incubate.fleet.parameter_server', 'paddle.fluid.incubate.fleet.parameter_server',
'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler', 'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册