未验证 提交 6b393e45 编写于 作者: S Sławomir Siwek 提交者: GitHub

convert todos to internal tasks (#51174)

上级 cf1a1c4b
...@@ -36,9 +36,8 @@ static int BuildFusion(Graph* graph, ...@@ -36,9 +36,8 @@ static int BuildFusion(Graph* graph,
->assert_is_op_input("lookup_table_v2") ->assert_is_op_input("lookup_table_v2")
->assert_var_not_persistable(); ->assert_var_not_persistable();
patterns::Embedding embedding_pattern(pattern, name_scope); patterns::Embedding embedding_pattern(pattern, name_scope);
// TODO(jczaja): Intermediate can only be for val that are not used anywhere // Intermediate can only be for val that are not used anywhere but
// but lookup table output may go into other LSTM (for reverse // lookup table output may go into other LSTM (for reverse direction)
// direction)
auto* embedding_out = embedding_pattern(x); auto* embedding_out = embedding_pattern(x);
patterns::FC fc_pattern(pattern, name_scope); patterns::FC fc_pattern(pattern, name_scope);
...@@ -227,13 +226,13 @@ static int BuildFusion(Graph* graph, ...@@ -227,13 +226,13 @@ static int BuildFusion(Graph* graph,
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
// TODO(jczaja): Add support for is_sparse / is_distributed
auto is_sparse = auto is_sparse =
PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_sparse")); PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_sparse"));
auto is_distributed = auto is_distributed =
PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_distributed")); PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_distributed"));
if (is_sparse == true || is_distributed == true) { if (is_sparse || is_distributed) {
VLOG(4) << "Only dense embedding is supported in oneDNN";
return; return;
} }
...@@ -252,10 +251,7 @@ static int BuildFusion(Graph* graph, ...@@ -252,10 +251,7 @@ static int BuildFusion(Graph* graph,
Cell, Cell,
fc_out, fc_out,
fc_bias); fc_bias);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
std::unordered_set<const Node*> marked_nodes( std::unordered_set<const Node*> marked_nodes(
// {lookup_table, mul, lstm, elementwise_add, fc_bias, W});
{mul, lstm, elementwise_add, fc_bias}); {mul, lstm, elementwise_add, fc_bias});
GraphSafeRemoveNodes(graph, marked_nodes); GraphSafeRemoveNodes(graph, marked_nodes);
} else { } else {
...@@ -271,10 +267,6 @@ static int BuildFusion(Graph* graph, ...@@ -271,10 +267,6 @@ static int BuildFusion(Graph* graph,
Cell, Cell,
fc_out, fc_out,
nullptr); nullptr);
// Remove unneeded nodes.
// TODO(jczaja): Proper removing of lookup table
// std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
// lstm});
std::unordered_set<const Node*> marked_nodes({mul, lstm}); std::unordered_set<const Node*> marked_nodes({mul, lstm});
GraphSafeRemoveNodes(graph, marked_nodes); GraphSafeRemoveNodes(graph, marked_nodes);
} }
......
...@@ -196,7 +196,7 @@ int FCGRUFusePass::BuildFusion(Graph* graph, ...@@ -196,7 +196,7 @@ int FCGRUFusePass::BuildFusion(Graph* graph,
SET_IN(WeightH, weight_h); SET_IN(WeightH, weight_h);
SET_IN(Bias, bias); SET_IN(Bias, bias);
#undef SET_IN #undef SET_IN
// TODO(grygielski): Add H0 to the pass // H0 is required for oneDNN and optional in PaddlePaddle
op_desc.SetInput("H0", {}); op_desc.SetInput("H0", {});
op_desc.SetOutput("Hidden", {hidden->Name()}); op_desc.SetOutput("Hidden", {hidden->Name()});
op_desc.SetAttr("is_reverse", gru->Op()->GetAttr("is_reverse")); op_desc.SetAttr("is_reverse", gru->Op()->GetAttr("is_reverse"));
......
...@@ -1925,7 +1925,6 @@ struct FusionGru : public PatternBase { ...@@ -1925,7 +1925,6 @@ struct FusionGru : public PatternBase {
struct FusionLSTM : public PatternBase { struct FusionLSTM : public PatternBase {
FusionLSTM(PDPattern* pattern, const std::string& name_scope) FusionLSTM(PDPattern* pattern, const std::string& name_scope)
: PatternBase(pattern, name_scope, "fusion_lstm") {} : PatternBase(pattern, name_scope, "fusion_lstm") {}
// TODO(lidanqing): Is it enough to detect fusion_lstm with these things
PDNode* operator()(); PDNode* operator()();
// declare op // declare op
......
...@@ -1033,7 +1033,6 @@ void CPUQuantizePass::QuantizeElementwise( ...@@ -1033,7 +1033,6 @@ void CPUQuantizePass::QuantizeElementwise(
auto input_x_scale = GetScaleValueForNode(elementwise_x, &is_x_unsigned); auto input_x_scale = GetScaleValueForNode(elementwise_x, &is_x_unsigned);
auto input_y_scale = GetScaleValueForNode(elementwise_y, &is_y_unsigned); auto input_y_scale = GetScaleValueForNode(elementwise_y, &is_y_unsigned);
// TODO(sfraczek): add support for different signness
if (is_x_unsigned != is_y_unsigned) { if (is_x_unsigned != is_y_unsigned) {
MarkAndLogCannotQuantizeOp( MarkAndLogCannotQuantizeOp(
elementwise_op, "Elementwise inputs must be of the same type."); elementwise_op, "Elementwise inputs must be of the same type.");
......
...@@ -141,8 +141,7 @@ bool CPUQuantizeSquashPass::IsDequantizeQuantizeIncompatible( ...@@ -141,8 +141,7 @@ bool CPUQuantizeSquashPass::IsDequantizeQuantizeIncompatible(
bool is_input_signed = bool is_input_signed =
dequant_op->Op()->GetAttrIfExists<bool>("is_negative_input"); dequant_op->Op()->GetAttrIfExists<bool>("is_negative_input");
/* TODO(sfraczek): remove elementwise from this condition when BinaryMKLDNN // BinaryOneDNN doesn't support two different input data types
kernel will support two different input data types */
bool is_next_op_concat_or_elementwise = bool is_next_op_concat_or_elementwise =
next_op->Op()->Type() == "concat" || next_op->Op()->Type() == "concat" ||
next_op->Op()->Type().find("elementwise") == 0; next_op->Op()->Type().find("elementwise") == 0;
......
...@@ -645,21 +645,18 @@ if(WITH_MKLDNN) ...@@ -645,21 +645,18 @@ if(WITH_MKLDNN)
${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH}) ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})
# resnet101 int8 # resnet101 int8
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101") set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101")
download_int8_data_without_verify(${INT8_RESNET101_MODEL_DIR} download_int8_data_without_verify(${INT8_RESNET101_MODEL_DIR}
"Res101_int8_model.tar.gz") "Res101_int8_model.tar.gz")
# inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH}) # inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH})
# vgg16 int8 # vgg16 int8
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16") set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR} download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR}
"VGG16_int8_model.tar.gz") "VGG16_int8_model.tar.gz")
# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH}) # inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
# vgg19 int8 # vgg19 int8
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19") set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19")
download_int8_data_without_verify(${INT8_VGG19_MODEL_DIR} download_int8_data_without_verify(${INT8_VGG19_MODEL_DIR}
"VGG19_int8_model.tar.gz") "VGG19_int8_model.tar.gz")
......
...@@ -83,8 +83,8 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -83,8 +83,8 @@ class ConditionalBlockOp : public ConditionalOp {
auto &cur_scope = *scopes->front(); auto &cur_scope = *scopes->front();
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// (jczaja) Executor on being destroyed clears oneDNN cache and // Executor on being destroyed clears oneDNN cache and resets
// reset registered model data layout. This is unwanted for nested // registered model data layout. This is unwanted for nested
// Executors (executors declared inside control ops) // Executors (executors declared inside control ops)
platform::DontClearMKLDNNCache(dev_place); platform::DontClearMKLDNNCache(dev_place);
#endif #endif
......
...@@ -109,8 +109,8 @@ class WhileOp : public framework::OperatorBase { ...@@ -109,8 +109,8 @@ class WhileOp : public framework::OperatorBase {
".\n")); ".\n"));
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// (jczaja) Executor on being destroyed clears oneDNN cache and // Executor on being destroyed clears oneDNN cache and resets
// resets registered model data layout. This is unwanted for nested // registered model data layout. This is unwanted for nested
// Executors (executors declared inside control ops) // Executors (executors declared inside control ops)
platform::DontClearMKLDNNCache(dev_place); platform::DontClearMKLDNNCache(dev_place);
#endif #endif
......
...@@ -436,7 +436,6 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const { ...@@ -436,7 +436,6 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
phi::KernelKey ConvOpGrad::GetExpectedKernelType( phi::KernelKey ConvOpGrad::GetExpectedKernelType(
const framework::ExecutionContext& ctx) const { const framework::ExecutionContext& ctx) const {
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input"); auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
return phi::KernelKey(data_type, ctx.GetPlace()); return phi::KernelKey(data_type, ctx.GetPlace());
} }
......
...@@ -109,7 +109,7 @@ class ElementwiseOp : public framework::OperatorWithKernel { ...@@ -109,7 +109,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
std::vector<int> y_dims_array(max_dim); std::vector<int> y_dims_array(max_dim);
std::vector<int> out_dims_array(max_dim); std::vector<int> out_dims_array(max_dim);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// (jczaja): Broadcasting of dims has to be done on Paddle shapes (NHWC) // Broadcasting of dims has to be done on Paddle shapes (NHWC)
// if model is using NHWC and any of shapes in at least 3D // if model is using NHWC and any of shapes in at least 3D
bool should_rotate = bool should_rotate =
ctx->IsRunMKLDNNKernel() && ctx->IsRunMKLDNNKernel() &&
......
...@@ -191,9 +191,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> { ...@@ -191,9 +191,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
return memory_p; return memory_p;
} }
// TODO(grygielski) H0 is for now persistable // H0 is for now persistable
// TODO(jczaja) H0 should be updated each iter and of T type (Fusion pass does
// not support in yet)
template <typename U> template <typename U>
std::shared_ptr<dnnl::memory> AcquireH0Memory(const phi::DenseTensor* h0) { std::shared_ptr<dnnl::memory> AcquireH0Memory(const phi::DenseTensor* h0) {
const std::string h0_key = memory_key_ + "@h0"; const std::string h0_key = memory_key_ + "@h0";
......
...@@ -328,7 +328,7 @@ class MultiGRUHandler { ...@@ -328,7 +328,7 @@ class MultiGRUHandler {
return out_mem; return out_mem;
} }
// TODO(grygielski) H0 is for now persistable // H0 is for now persistable
std::shared_ptr<dnnl::memory> AcquireH0Memory(int layer, Direction dir) { std::shared_ptr<dnnl::memory> AcquireH0Memory(int layer, Direction dir) {
auto key = memory_key_; auto key = memory_key_;
key.append("@h0").append(dir2str(dir)).append(std::to_string(layer)); key.append("@h0").append(dir2str(dir)).append(std::to_string(layer));
......
...@@ -583,8 +583,8 @@ class MatMulOp : public framework::OperatorWithKernel { ...@@ -583,8 +583,8 @@ class MatMulOp : public framework::OperatorWithKernel {
auto dim_y = GetDimForInput(*context, "Y"); auto dim_y = GetDimForInput(*context, "Y");
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// (jczaja): For NHWC execution output shape needs // For NHWC execution output shape needs to be
// to be computed like instead x*y we are to do y*x // computed like instead x*y we are to do y*x
bool channelwise_onednn = bool channelwise_onednn =
context->IsRunMKLDNNKernel() && context->IsRunMKLDNNKernel() &&
(phi::OneDNNContext::tls().get_cur_paddle_data_layout() == (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
......
...@@ -156,7 +156,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -156,7 +156,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
// TODO(jczaja): Enable FP32 when performance is good
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_KERNEL(layer_norm, REGISTER_OP_KERNEL(layer_norm,
MKLDNN, MKLDNN,
......
...@@ -31,7 +31,7 @@ namespace operators { ...@@ -31,7 +31,7 @@ namespace operators {
bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) { bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) {
if (ctx.Attr<bool>("adaptive") == false) return true; if (ctx.Attr<bool>("adaptive") == false) return true;
// (jczaja): oneDNN is supporting only unchangable in size pool window // oneDNN is supporting only unchangable in size pool window
auto src_tz = phi::vectorize(ctx.Input<phi::DenseTensor>("X")->dims()); auto src_tz = phi::vectorize(ctx.Input<phi::DenseTensor>("X")->dims());
if (!ctx.HasAttr("ksize")) { if (!ctx.HasAttr("ksize")) {
return false; return false;
......
...@@ -73,8 +73,7 @@ class SequencePoolKernel : public framework::OpKernel<T> { ...@@ -73,8 +73,7 @@ class SequencePoolKernel : public framework::OpKernel<T> {
bool is_test = bool is_test =
context.HasAttr("is_test") ? context.Attr<bool>("is_test") : false; context.HasAttr("is_test") ? context.Attr<bool>("is_test") : false;
// Do not create index buffer for inference (is_test) mode // Do not create index buffer for inference mode
// TODO(jczaja): Skip index buffer creation for other devices eg. GPU
if (pooltype == "MAX" && if (pooltype == "MAX" &&
(is_test == false || (is_test == false ||
platform::is_cpu_place(context.GetPlace()) == false)) { platform::is_cpu_place(context.GetPlace()) == false)) {
......
...@@ -50,8 +50,7 @@ class OneDNNAXPYHandler { ...@@ -50,8 +50,7 @@ class OneDNNAXPYHandler {
private: private:
OneDNNAXPYHandler() = delete; OneDNNAXPYHandler() = delete;
// (arogowie-intel) Private implementation idiom to hide dependency // Private implementation idiom to hide dependency on oneDNN headers.
// on OneDNN headers.
class Impl; class Impl;
// We need custom deleter, since the compiler is unable to parameterize // We need custom deleter, since the compiler is unable to parameterize
// an allocator's default deleter due to incomple type. // an allocator's default deleter due to incomple type.
......
...@@ -91,7 +91,6 @@ class TestMKLDNNElementwiseDivOp4(TestMKLDNNElementwiseDivOp): ...@@ -91,7 +91,6 @@ class TestMKLDNNElementwiseDivOp4(TestMKLDNNElementwiseDivOp):
self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype) self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype)
self.out = np.divide(self.x, self.y) self.out = np.divide(self.x, self.y)
# TODO(piotrekobiIntel): Enable when grad is ready
def test_check_grad_normal(self): def test_check_grad_normal(self):
pass pass
...@@ -105,7 +104,6 @@ class TestMKLDNNElementwiseDivOp5(TestMKLDNNElementwiseDivOp): ...@@ -105,7 +104,6 @@ class TestMKLDNNElementwiseDivOp5(TestMKLDNNElementwiseDivOp):
self.y = np.random.uniform(1, 2, [100]).astype(self.dtype) self.y = np.random.uniform(1, 2, [100]).astype(self.dtype)
self.out = np.divide(self.x, self.y) self.out = np.divide(self.x, self.y)
# TODO(piotrekobiIntel): Enable when grad is ready
def test_check_grad_normal(self): def test_check_grad_normal(self):
pass pass
......
...@@ -67,16 +67,6 @@ class TestSplitSectionsBF16OneDNNOp(OpTest): ...@@ -67,16 +67,6 @@ class TestSplitSectionsBF16OneDNNOp(OpTest):
self.check_output_with_place(core.CPUPlace()) self.check_output_with_place(core.CPUPlace())
# TODO jakpiase enable grad check(concat op)
# def test_check_grad(self):
# self.check_grad_with_place(
# core.CPUPlace(), ["X"],
# "Out",
# chck_dgrph=
# user_defined_grads=[self.inputs['X']],
# user_defined_grad_outputs=self.out[0])
class TestSplitNumBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp): class TestSplitNumBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp):
def init_data(self): def init_data(self):
self.x = np.random.random((4, 8, 5, 3)).astype("uint16") self.x = np.random.random((4, 8, 5, 3)).astype("uint16")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册