diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
index 2e8cd7f75ea0f75ad253e3bbb371a5de0005b35a..68992abad3c89b5107e11d280b3881a816b9a366 100644
--- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc
@@ -36,9 +36,8 @@ static int BuildFusion(Graph* graph,
                   ->assert_is_op_input("lookup_table_v2")
                   ->assert_var_not_persistable();
   patterns::Embedding embedding_pattern(pattern, name_scope);
-  // TODO(jczaja): Intermediate can only be for val that are not used anywhere
-  //               but lookup table output may go into other LSTM (for reverse
-  //               direction)
+  // Intermediate can only be for val that are not used anywhere but
+  // lookup table output may go into other LSTM (for reverse direction)
   auto* embedding_out = embedding_pattern(x);
   patterns::FC fc_pattern(pattern, name_scope);
 
@@ -227,13 +226,13 @@ static int BuildFusion(Graph* graph,
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern);
 
-    // TODO(jczaja): Add support for is_sparse / is_distributed
     auto is_sparse =
         PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_sparse"));
     auto is_distributed =
         PADDLE_GET_CONST(bool, lookup_table->Op()->GetAttr("is_distributed"));
 
-    if (is_sparse == true || is_distributed == true) {
+    if (is_sparse || is_distributed) {
+      VLOG(4) << "Only dense embedding is supported in oneDNN";
       return;
     }
 
@@ -252,10 +251,7 @@ static int BuildFusion(Graph* graph,
                              Cell,
                              fc_out,
                              fc_bias);
-      // Remove unneeded nodes.
-      // TODO(jczaja): Proper removing of lookup table
       std::unordered_set<const Node*> marked_nodes(
-          // {lookup_table, mul, lstm, elementwise_add, fc_bias, W});
           {mul, lstm, elementwise_add, fc_bias});
       GraphSafeRemoveNodes(graph, marked_nodes);
     } else {
@@ -271,10 +267,6 @@ static int BuildFusion(Graph* graph,
                              Cell,
                              fc_out,
                              nullptr);
-      // Remove unneeded nodes.
-      // TODO(jczaja): Proper removing of lookup table
-      // std::unordered_set<const Node*> marked_nodes({lookup_table, W, mul,
-      // lstm});
       std::unordered_set<const Node*> marked_nodes({mul, lstm});
       GraphSafeRemoveNodes(graph, marked_nodes);
     }
diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
index 5f05b41bdc538366351a8142a0fc07350690efa9..583e51dc931d211a68e621d55e1fe86e528cae0e 100644
--- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc
@@ -196,7 +196,7 @@ int FCGRUFusePass::BuildFusion(Graph* graph,
     SET_IN(WeightH, weight_h);
     SET_IN(Bias, bias);
 #undef SET_IN
-    // TODO(grygielski): Add H0 to the pass
+    // H0 is required for oneDNN and optional in PaddlePaddle
     op_desc.SetInput("H0", {});
     op_desc.SetOutput("Hidden", {hidden->Name()});
     op_desc.SetAttr("is_reverse", gru->Op()->GetAttr("is_reverse"));
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h
index 34eae981fc13fb47bb5645b36508b524f5ab8a94..1be0b15b04961a158c247bb78f7de46eb365b626 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1925,7 +1925,6 @@ struct FusionGru : public PatternBase {
 struct FusionLSTM : public PatternBase {
   FusionLSTM(PDPattern* pattern, const std::string& name_scope)
       : PatternBase(pattern, name_scope, "fusion_lstm") {}
-  // TODO(lidanqing): Is it enough to detect fusion_lstm with these things
   PDNode* operator()();
 
   // declare op
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
index dd0a90fc8dd0f4b285599d9e66432d778280833f..b25b17125363f43716985b2e45fba1d8abb2e858 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
@@ -1033,7 +1033,6 @@ void CPUQuantizePass::QuantizeElementwise(
     auto input_x_scale = GetScaleValueForNode(elementwise_x, &is_x_unsigned);
     auto input_y_scale = GetScaleValueForNode(elementwise_y, &is_y_unsigned);
 
-    // TODO(sfraczek): add support for different signness
     if (is_x_unsigned != is_y_unsigned) {
       MarkAndLogCannotQuantizeOp(
           elementwise_op, "Elementwise inputs must be of the same type.");
diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
index aa232ce5409b546c70737ced75e9c8ed8465317b..052c26ba8e2681d9a755549f79901484be89bca6 100644
--- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
@@ -141,8 +141,7 @@ bool CPUQuantizeSquashPass::IsDequantizeQuantizeIncompatible(
   bool is_input_signed =
       dequant_op->Op()->GetAttrIfExists<bool>("is_negative_input");
 
-  /* TODO(sfraczek): remove elementwise from this condition when BinaryMKLDNN
-   kernel will support two different input data types */
+  // BinaryOneDNN doesn't support two different input data types
   bool is_next_op_concat_or_elementwise =
       next_op->Op()->Type() == "concat" ||
       next_op->Op()->Type().find("elementwise") == 0;
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index 4be16d0d830972784c55cda179beae6ef1c34a08..693f0582c5d523325247ec450f763be393cadf12 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -645,21 +645,18 @@ if(WITH_MKLDNN)
     ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
   # resnet101 int8
-  # TODO(grygielski) Enable after MKL-DNN 1.0 merge
   set(INT8_RESNET101_MODEL_DIR "${INT8_DATA_DIR}/resnet101")
   download_int8_data_without_verify(${INT8_RESNET101_MODEL_DIR}
                                     "Res101_int8_model.tar.gz")
   #   inference_analysis_api_int8_test_run(test_analyzer_int8_resnet101 ${INT8_IMG_CLASS_TEST_APP} ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
   # vgg16 int8
-  # TODO(grygielski) Enable after MKL-DNN 1.0 merge
   set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
   download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR}
                                     "VGG16_int8_model.tar.gz")
   #  inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
   # vgg19 int8
-  # TODO(grygielski) Enable after MKL-DNN 1.0 merge
   set(INT8_VGG19_MODEL_DIR "${INT8_DATA_DIR}/vgg19")
   download_int8_data_without_verify(${INT8_VGG19_MODEL_DIR}
                                     "VGG19_int8_model.tar.gz")
diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.cc b/paddle/fluid/operators/controlflow/conditional_block_op.cc
index 436d13e999d16245c5982c32260c40abeaa0c237..4544dade327e0fc8ded23b779259d27facbb2f91 100644
--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -83,8 +83,8 @@ class ConditionalBlockOp : public ConditionalOp {
 
       auto &cur_scope = *scopes->front();
 #ifdef PADDLE_WITH_MKLDNN
-      // (jczaja) Executor on being destroyed clears oneDNN cache and
-      // reset registered model data layout. This is unwanted for nested
+      // Executor on being destroyed clears oneDNN cache and resets
+      // registered model data layout. This is unwanted for nested
       // Executors (executors declared inside control ops)
       platform::DontClearMKLDNNCache(dev_place);
 #endif
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index 413764da64d58373c530e1f98a2b12b89b5ba525..3017a1e0fc4b7900192125fcc6b4a78217f1f693 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -109,8 +109,8 @@ class WhileOp : public framework::OperatorBase {
             ".\n"));
 
 #ifdef PADDLE_WITH_MKLDNN
-    // (jczaja) Executor on being destroyed clears oneDNN cache and
-    // resets registered model data layout. This is unwanted for nested
+    // Executor on being destroyed clears oneDNN cache and resets
+    // registered model data layout. This is unwanted for nested
     // Executors (executors declared inside control ops)
     platform::DontClearMKLDNNCache(dev_place);
 #endif
diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc
index 710f57b4280a407821e75063fd84a1f6325a6858..9b388f74d6ee21b9371b37f6aeb273dc1a565c23 100644
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -436,7 +436,6 @@ void ConvOpGrad::InferShape(framework::InferShapeContext* ctx) const {
 
 phi::KernelKey ConvOpGrad::GetExpectedKernelType(
     const framework::ExecutionContext& ctx) const {
-  // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
   auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Input");
   return phi::KernelKey(data_type, ctx.GetPlace());
 }
diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
index 7048cf50293c926ebb404069495ccd7cd66227f6..4ef896ff01b87e414c3b687165a433099ab73e0c 100644
--- a/paddle/fluid/operators/elementwise/elementwise_op.h
+++ b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -109,7 +109,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
       std::vector<int> y_dims_array(max_dim);
       std::vector<int> out_dims_array(max_dim);
 #ifdef PADDLE_WITH_MKLDNN
-      // (jczaja): Broadcasting of dims has to be done on Paddle shapes (NHWC)
+      // Broadcasting of dims has to be done on Paddle shapes (NHWC)
       // if model is using NHWC and any of shapes in at least 3D
       bool should_rotate =
           ctx->IsRunMKLDNNKernel() &&
diff --git a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
index b45af1b4bc9ad6610862bfd6e9cb12dc778fac52..f8854d3d7b4898e5b2a97c8f68f28c42f555ce8f 100644
--- a/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
+++ b/paddle/fluid/operators/fused/mkldnn/fusion_rnn_mkldnn.h
@@ -191,9 +191,7 @@ class RNNMKLDNNHandler : public phi::funcs::OneDNNHandlerT<T, T_alg> {
     return memory_p;
   }
 
-  // TODO(grygielski) H0 is for now persistable
-  // TODO(jczaja) H0 should be updated each iter and of T type (Fusion pass does
-  // not support in yet)
+  // H0 is for now persistable
   template <typename U>
   std::shared_ptr<dnnl::memory> AcquireH0Memory(const phi::DenseTensor* h0) {
     const std::string h0_key = memory_key_ + "@h0";
diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
index 01b12af1a22c586c2a5b9cff1a623d9bc38c7a03..dba3087f1a01bf73de9307cae30594384ae2c755 100644
--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -328,7 +328,7 @@ class MultiGRUHandler {
     return out_mem;
   }
 
-  // TODO(grygielski) H0 is for now persistable
+  // H0 is for now persistable
   std::shared_ptr<dnnl::memory> AcquireH0Memory(int layer, Direction dir) {
     auto key = memory_key_;
     key.append("@h0").append(dir2str(dir)).append(std::to_string(layer));
diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc
index 5c7474f65ee0e6bb2e20f37ec94e77be85ba2914..bc5e5aa6ea1514b3c8fb50acd542179532bc5832 100644
--- a/paddle/fluid/operators/matmul_op.cc
+++ b/paddle/fluid/operators/matmul_op.cc
@@ -583,8 +583,8 @@ class MatMulOp : public framework::OperatorWithKernel {
     auto dim_y = GetDimForInput(*context, "Y");
 
 #ifdef PADDLE_WITH_MKLDNN
-    // (jczaja): For NHWC execution output shape needs
-    // to be computed like instead x*y we are to do y*x
+    // For NHWC execution output shape needs to be
+    // computed like instead x*y we are to do y*x
     bool channelwise_onednn =
         context->IsRunMKLDNNKernel() &&
         (phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
diff --git a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
index c41a2aabeef50df1816b4f3d6b7a9926a96339c5..793f7b35fad13ec5d032ac95598018c582c7146b 100644
--- a/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
@@ -156,7 +156,6 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
 }  // namespace operators
 }  // namespace paddle
 
-// TODO(jczaja): Enable FP32 when performance is good
 namespace ops = paddle::operators;
 REGISTER_OP_KERNEL(layer_norm,
                    MKLDNN,
diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc
index b03f2954d2c2053fc8e5e52feb2c1315d51a1999..f6ba1c624b5c6e9f198dd3949a0e86062da9a5b1 100644
--- a/paddle/fluid/operators/pool_op.cc
+++ b/paddle/fluid/operators/pool_op.cc
@@ -31,7 +31,7 @@ namespace operators {
 
 bool CanMKLDNNSupportPool(const framework::ExecutionContext& ctx) {
   if (ctx.Attr<bool>("adaptive") == false) return true;
-  // (jczaja): oneDNN is supporting only unchangable in size pool window
+  // oneDNN is supporting only unchangable in size pool window
   auto src_tz = phi::vectorize(ctx.Input<phi::DenseTensor>("X")->dims());
   if (!ctx.HasAttr("ksize")) {
     return false;
diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
index 78acb4eef28a7a313bb4bb53c2ea704f60da286e..ddf0d496a77fb51580d28b2f7eb889d90eb2b12a 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h
@@ -73,8 +73,7 @@ class SequencePoolKernel : public framework::OpKernel<T> {
     bool is_test =
         context.HasAttr("is_test") ? context.Attr<bool>("is_test") : false;
 
-    // Do not create index buffer for inference (is_test) mode
-    // TODO(jczaja): Skip index buffer creation for other devices eg. GPU
+    // Do not create index buffer for inference mode
     if (pooltype == "MAX" &&
         (is_test == false ||
          platform::is_cpu_place(context.GetPlace()) == false)) {
diff --git a/paddle/phi/backends/onednn/axpy_handler.h b/paddle/phi/backends/onednn/axpy_handler.h
index dd9a8108f59b05fb27fec41026961aba60dc167b..f9c21187ddb91e6f6e3934c28bbe9b7e73164092 100644
--- a/paddle/phi/backends/onednn/axpy_handler.h
+++ b/paddle/phi/backends/onednn/axpy_handler.h
@@ -50,8 +50,7 @@ class OneDNNAXPYHandler {
 
  private:
   OneDNNAXPYHandler() = delete;
-  // (arogowie-intel) Private implementation idiom to hide dependency
-  // on OneDNN headers.
+  // Private implementation idiom to hide dependency on oneDNN headers.
   class Impl;
   // We need custom deleter, since the compiler is unable to parameterize
   // an allocator's default deleter due to incomple type.
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py
index 741026d441d954482135f078a8f73688bc06dacc..0dafa35d90b8ddcef78fbc238994f4929503a65d 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_elementwise_div_mkldnn_op.py
@@ -91,7 +91,6 @@ class TestMKLDNNElementwiseDivOp4(TestMKLDNNElementwiseDivOp):
         self.y = np.random.uniform(1, 2, [4, 32]).astype(self.dtype)
         self.out = np.divide(self.x, self.y)
 
-    # TODO(piotrekobiIntel): Enable when grad is ready
     def test_check_grad_normal(self):
         pass
 
@@ -105,7 +104,6 @@ class TestMKLDNNElementwiseDivOp5(TestMKLDNNElementwiseDivOp):
         self.y = np.random.uniform(1, 2, [100]).astype(self.dtype)
         self.out = np.divide(self.x, self.y)
 
-    # TODO(piotrekobiIntel): Enable when grad is ready
     def test_check_grad_normal(self):
         pass
 
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py
index 2ed0dcdc5da42e7d7bd92d16de849e419485c8ce..d5ea69e9a4b9f38b0b455a82b38fae0328c0fcd7 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_split_bf16_mkldnn_op.py
@@ -67,16 +67,6 @@ class TestSplitSectionsBF16OneDNNOp(OpTest):
         self.check_output_with_place(core.CPUPlace())
 
 
-# TODO jakpiase enable grad check(concat op)
-#    def test_check_grad(self):
-#        self.check_grad_with_place(
-#            core.CPUPlace(), ["X"],
-#            "Out",
-#            chck_dgrph=
-#            user_defined_grads=[self.inputs['X']],
-#            user_defined_grad_outputs=self.out[0])
-
-
 class TestSplitNumBF16OneDNNOp(TestSplitSectionsBF16OneDNNOp):
     def init_data(self):
         self.x = np.random.random((4, 8, 5, 3)).astype("uint16")