diff --git a/mace/core/memory_optimizer.cc b/mace/core/memory_optimizer.cc
index 004fb1a927ae9a15ad733ebcf61918c4983f99e0..0136e3f130e679a13f3e50ccc86c6c4bfda1e4f6 100644
--- a/mace/core/memory_optimizer.cc
+++ b/mace/core/memory_optimizer.cc
@@ -79,12 +79,10 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock(
               *op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL));
     }
     std::vector<size_t> image_shape;
-    if (shape.size() == 1) {
-      shape = {shape[0], 1, 1, 1};
-    } else if (shape.size() == 2) {
+    if (shape.size() == 2) {
       shape = {shape[0], 1, 1, shape[1]};
     } else {
-      MACE_CHECK(shape.size() == 4) << "GPU only support 1D/2D/4D input";
+      MACE_CHECK(shape.size() == 4) << "GPU only support 2D/4D input";
     }
     OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape);
     block.set_x(image_shape[0]);
diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc
index 6b2ac58a23e3ebbcb59e72300b682cd809263cca..8d4248570f7453002cc68024cd4017208da7e284 100644
--- a/mace/ops/concat.cc
+++ b/mace/ops/concat.cc
@@ -29,22 +29,21 @@ class ConcatOpBase : public Operation {
  public:
   explicit ConcatOpBase(OpConstructContext *context)
       : Operation(context),
-        axis_(Operation::GetOptionalArg<int>("axis", 3)),
-        checked_(false) {}
+        axis_(Operation::GetOptionalArg<int>("axis", 3)) {}
 
  protected:
-  void Validate() {
+  int FormatAxis() {
     const int32_t input_dims = this->Input(0)->dim_size();
     axis_ =
         axis_ < 0 ? axis_ + input_dims : axis_;
     MACE_CHECK((0 <= axis_ && axis_ < input_dims),
                "Expected concatenating axis in the range [", -input_dims, ", ",
                input_dims, "], but got ", axis_);
+    return axis_;
   }
 
  protected:
   int axis_;
-  bool checked_;
 };
 
 template <DeviceType D, class T>
@@ -54,20 +53,17 @@ template <typename T>
 class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
  public:
   explicit ConcatOp(OpConstructContext *context)
-      : ConcatOpBase(context) {}
+      : ConcatOpBase(context),
+        has_data_format_(Operation::GetOptionalArg<int>(
+            "has_data_format", 0) == 1) {}
 
   MaceStatus Run(OpContext *context) override {
     MACE_UNUSED(context);
-    if (!checked_) {
-      Validate();
-      auto has_df = Operation::GetOptionalArg<int>(
-          "has_data_format", 0);
-      if (has_df && this->Input(0)->dim_size() == 4) {
-        if (axis_ == 3) axis_ = 1;
-        else if (axis_ == 2) axis_ = 3;
-        else if (axis_ == 1) axis_ = 2;
-      }
-      checked_ = true;
+    int axis = FormatAxis();
+    if (has_data_format_ && this->Input(0)->dim_size() == 4) {
+      if (axis == 3) axis = 1;
+      else if (axis == 2) axis = 3;
+      else if (axis == 1) axis = 2;
     }
     const std::vector<const Tensor *> &inputs = this->Inputs();
     Tensor *output = this->Output(0);
@@ -76,7 +72,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
 
     std::vector<index_t> output_shape(input0->shape());
     index_t inner_size = 1;
-    for (int i = 0; i < axis_; ++i) {
+    for (int i = 0; i < axis; ++i) {
       inner_size *= output_shape[i];
     }
     std::vector<index_t> outer_sizes(inputs_count, 0);
@@ -86,14 +82,14 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
       MACE_CHECK(input->dim_size() == input0->dim_size(),
                  "Ranks of all input tensors must be same.");
       for (int j = 0; j < input->dim_size(); ++j) {
-        if (j == axis_) {
+        if (j == axis) {
           continue;
         }
         MACE_CHECK(input->dim(j) == input0->dim(j),
                    "Dimensions of inputs should equal except axis.");
       }
       outer_sizes[i] = input->size() / inner_size;
-      output_shape[axis_] += input->dim(axis_);
+      output_shape[axis] += input->dim(axis);
     }
     MACE_RETURN_IF_ERROR(output->Resize(output_shape));
 
@@ -119,6 +115,9 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
 
     return MaceStatus::MACE_SUCCESS;
   }
+
+ private:
+  bool has_data_format_;
 };
 
 #ifdef MACE_ENABLE_QUANTIZE
@@ -130,7 +129,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
 
   MaceStatus Run(OpContext *context) override {
     MACE_UNUSED(context);
-    Validate();
+    int axis = FormatAxis();
     const std::vector<const Tensor *> &inputs = this->Inputs();
     Tensor *output = this->Output(0);
     MACE_CHECK(output->scale() != 0);
@@ -139,7 +138,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
 
     std::vector<index_t> output_shape(input0->shape());
     index_t inner_size = 1;
-    for (int i = 0; i < axis_; ++i) {
+    for (int i = 0; i < axis; ++i) {
       inner_size *= output_shape[i];
     }
     std::vector<index_t> outer_sizes(inputs_count, 0);
@@ -149,14 +148,14 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
       MACE_CHECK(input->dim_size() == input0->dim_size(),
                  "Ranks of all input tensors must be same.");
       for (int j = 0; j < input->dim_size(); ++j) {
-        if (j == axis_) {
+        if (j == axis) {
           continue;
         }
         MACE_CHECK(input->dim(j) == input0->dim(j),
                    "Dimensions of inputs should equal except axis.");
       }
       outer_sizes[i] = input->size() / inner_size;
-      output_shape[axis_] += input->dim(axis_);
+      output_shape[axis] += input->dim(axis);
     }
     MACE_RETURN_IF_ERROR(output->Resize(output_shape));
 
@@ -200,15 +199,14 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
   explicit ConcatOp(OpConstructContext *context)
       : ConcatOpBase(context) {
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_ = make_unique<opencl::image::ConcatKernel<T>>(axis_);
+      kernel_ = make_unique<opencl::image::ConcatKernel<T>>();
     } else {
       MACE_NOT_IMPLEMENTED;
     }
   }
   MaceStatus Run(OpContext *context) override {
-    Validate();
     Tensor *output = this->Output(0);
-    return kernel_->Compute(context, inputs_, output);
+    return kernel_->Compute(context, inputs_, FormatAxis(), output);
   }
 
  private:
diff --git a/mace/ops/one_hot.cc b/mace/ops/one_hot.cc
index 2077c6861ebffbecbf0f84572221ce0370db1a0c..1d243f202f1fa5ad65c4abd58892df2a31dd9155 100644
--- a/mace/ops/one_hot.cc
+++ b/mace/ops/one_hot.cc
@@ -16,9 +16,6 @@
 #include <memory>
 
 #include "mace/core/operator.h"
-#ifdef MACE_ENABLE_OPENCL
-#include "mace/ops/opencl/image/one_hot.h"
-#endif  // MACE_ENABLE_OPENCL
 
 namespace mace {
 namespace ops {
@@ -148,52 +145,9 @@ class OneHotOp<DeviceType::CPU, T> : public OneHotOpBase {
   }
 };
 
-#ifdef MACE_ENABLE_OPENCL
-template <typename T>
-class OneHotOp<DeviceType::GPU, T> : public OneHotOpBase {
- public:
-  explicit OneHotOp(OpConstructContext *context) : OneHotOpBase(context) {
-    if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::OneHotKernel<T>(
-          depth_, on_value_, off_value_, axis_));
-    } else {
-      MACE_NOT_IMPLEMENTED;
-    }
-  }
-  MaceStatus Run(OpContext *context) override {
-    const Tensor *input = this->Input(0);
-    Tensor *output = this->Output(0);
-
-    return kernel_->Compute(context, input, output);
-  }
-
- private:
-  std::unique_ptr<OpenCLOneHotKernel> kernel_;
-};
-#endif  // MACE_ENABLE_OPENCL
 
 void RegisterOneHot(OpRegistryBase *op_registry) {
   MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float);
-
-#ifdef MACE_ENABLE_OPENCL
-  MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, float);
-  MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, half);
-
-  MACE_REGISTER_OP_CONDITION(
-      op_registry,
-      OpConditionBuilder("OneHot")
-          .SetDevicePlacerFunc(
-              [](OpConstructContext *context) -> std::set<DeviceType> {
-                auto op = context->operator_def();
-                if (op->output_shape_size() != op->output_size()) {
-                  return { DeviceType::CPU, DeviceType::GPU };
-                }
-                if (op->output_shape(0).dims_size() != 2) {
-                  return { DeviceType::CPU };
-                }
-                return { DeviceType::CPU, DeviceType::GPU };
-              }));
-#endif  // MACE_ENABLE_OPENCL
 }
 
 }  // namespace ops
diff --git a/mace/ops/one_hot_benchmark.cc b/mace/ops/one_hot_benchmark.cc
index a536f61a3df2a71998e62dc70e2781ab4434e6f9..ecb5ca8a165315c2729dae9af9f6f80d80d4f812 100644
--- a/mace/ops/one_hot_benchmark.cc
+++ b/mace/ops/one_hot_benchmark.cc
@@ -61,9 +61,7 @@ void OneHot(int iters, int batch, int depth, int axis) {
   MACE_BENCHMARK(MACE_BM_ONE_HOT_##N##_##DEPTH##_##AXIS##_##TYPE##_##DEVICE)
 
 #define MACE_BM_ONE_HOT(N, DEPTH, AXIS)                 \
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU);    \
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, GPU);    \
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, half, GPU);
+  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU);
 
 MACE_BM_ONE_HOT(512, 16, 0);
 MACE_BM_ONE_HOT(512, 16, 1);
diff --git a/mace/ops/one_hot_test.cc b/mace/ops/one_hot_test.cc
index 07a9821c85d00b033d9e98336c7f68f51a4caf65..59594ceb82a552127be807a7a399c46fc13f5abc 100644
--- a/mace/ops/one_hot_test.cc
+++ b/mace/ops/one_hot_test.cc
@@ -45,7 +45,6 @@ void TestOneHot(const std::vector<index_t> &input_shape,
   .AddFloatArg("on_value", on_value)
   .AddFloatArg("off_value", off_value)
   .AddIntArg("axis", axis)
-  .AddIntArg("data_format", DataFormat::NHWC)
   .Finalize(net.NewOperatorDef());
 
   // Run
@@ -77,10 +76,6 @@ TEST_F(OneHotTest, Dim1) {
 
   TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                      expected_data, 5, -1);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 5, -1);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 5, -1);
 
   expected_shape = {5, 10};
   expected_data  = {
@@ -93,10 +88,6 @@ TEST_F(OneHotTest, Dim1) {
 
   TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                      expected_data, 5, 0);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 5, 0);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 5, 0);
 }
 
 TEST_F(OneHotTest, OnOffValue) {
@@ -111,10 +102,6 @@ TEST_F(OneHotTest, OnOffValue) {
 
   TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                      expected_data, 6, -1, 7, 8);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 6, -1, 7, 8);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 6, -1, 7, 8);
 }
 
 TEST_F(OneHotTest, Dim2) {
diff --git a/mace/ops/opencl/cl/one_hot.cl b/mace/ops/opencl/cl/one_hot.cl
deleted file mode 100644
index 55645e73f6567c2bb9ecc21347f1da19975b6f7b..0000000000000000000000000000000000000000
--- a/mace/ops/opencl/cl/one_hot.cl
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <common.h>
-
-__kernel void one_hot(OUT_OF_RANGE_PARAMS
-                      GLOBAL_WORK_GROUP_SIZE_DIM2
-                      __read_only image2d_t input,
-                      __write_only image2d_t output,
-#ifdef AXIS_0
-                      __private const int in_size,
-#endif
-                      __private const float on_value,
-                      __private const float off_value) {
-
-  const int channel_idx = get_global_id(0);
-  const int batch_idx   = get_global_id(1);
-
-#ifndef NON_UNIFORM_WORK_GROUP
-  if (channel_idx >= global_size_dim0 || batch_idx >= global_size_dim1) {
-    return;
-  }
-#endif
-
-  DATA_TYPE4 out = off_value;
-
-#ifdef AXIS_0
-  int in_idx = channel_idx * 4;
-  DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-
-  if (in.s0 == batch_idx) {
-    out.s0 = on_value;
-  }
-
-  if (++in_idx < in_size) {
-    in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-
-    if (in.s0 == batch_idx) {
-      out.s1 = on_value;
-    }
-
-    if (++in_idx < in_size) {
-      in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-
-      if (in.s0 == batch_idx) {
-        out.s2 = on_value;
-      }
-
-      if (++in_idx < in_size) {
-        in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-
-        if (in.s0 == batch_idx) {
-          out.s3 = on_value;
-        }
-      }
-    }
-  }
-#else
-  DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, batch_idx));
-  int i = in.s0;
-
-  if (i / 4 == channel_idx) {
-    switch (i % 4) {
-    case 0:
-      out.s0 = on_value;
-      break;
-    case 1:
-      out.s1 = on_value;
-      break;
-    case 2:
-      out.s2 = on_value;
-      break;
-    case 3:
-      out.s3 = on_value;
-      break;
-    }
-  }
-#endif
-
-  WRITE_IMAGET(output, (int2)(channel_idx, batch_idx), out);
-}
diff --git a/mace/ops/opencl/concat.h b/mace/ops/opencl/concat.h
index abeec7c62e25299ac4de95e0b0dadc61bdb35900..5570a29430663ad7cf6b144044d4f5f022d97583 100644
--- a/mace/ops/opencl/concat.h
+++ b/mace/ops/opencl/concat.h
@@ -31,6 +31,7 @@ class OpenCLConcatKernel {
   virtual MaceStatus Compute(
       OpContext *context,
       const std::vector<const Tensor *> &input_list,
+      const int32_t axis,
       Tensor *output) = 0;
   MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel);
 };
diff --git a/mace/ops/opencl/helper.cc b/mace/ops/opencl/helper.cc
index 46d4fd5b288d8463bfb44a5a879d9a93a5aebc70..912a8d8d87e549290cf5d174187d288c2462fcb1 100644
--- a/mace/ops/opencl/helper.cc
+++ b/mace/ops/opencl/helper.cc
@@ -34,8 +34,6 @@ std::vector<index_t> FormatBufferShape(
         return buffer_shape;
       } else if (buffer_shape_size == 2) {  // NC
         return {buffer_shape[0], 1, 1, buffer_shape[1]};
-      } else if (buffer_shape_size == 1) {  // N
-        return {buffer_shape[0], 1, 1, 1};
       } else {
         LOG(FATAL) << "GPU only support 2D or 4D input and output";
       }
diff --git a/mace/ops/opencl/image/concat.h b/mace/ops/opencl/image/concat.h
index e1d9b1e70d79021df775aec320bb2a0096931155..3a7af7ed15b35b6f35fb4e844f76c61b1f1a6985 100644
--- a/mace/ops/opencl/image/concat.h
+++ b/mace/ops/opencl/image/concat.h
@@ -48,14 +48,14 @@ MaceStatus ConcatN(OpContext *context,
 template <typename T>
 class ConcatKernel : public OpenCLConcatKernel {
  public:
-  explicit ConcatKernel(const int32_t axis) : axis_(axis) {}
+  ConcatKernel() {}
   MaceStatus Compute(
       OpContext *context,
       const std::vector<const Tensor *> &input_list,
+      const int32_t axis,
       Tensor *output) override;
 
  private:
-  int32_t axis_;
   cl::Kernel kernel_;
   uint32_t kwg_size_;
   std::vector<index_t> input_shape_;
@@ -65,6 +65,7 @@ template <typename T>
 MaceStatus ConcatKernel<T>::Compute(
     OpContext *context,
     const std::vector<const Tensor *> &input_list,
+    const int32_t axis,
     Tensor *output) {
   const int inputs_count = input_list.size();
 
@@ -76,13 +77,13 @@ MaceStatus ConcatKernel<T>::Compute(
     MACE_CHECK(input->dim_size() == input0->dim_size(),
                "Ranks of all input tensors must be same.");
     for (int j = 0; j < input->dim_size(); ++j) {
-      if (j == axis_) {
+      if (j == axis) {
         continue;
       }
       MACE_CHECK(input->dim(j) == input0->dim(j),
                  "Dimensions of inputs should equal except axis.");
     }
-    output_shape[axis_] += input->dim(axis_);
+    output_shape[axis] += input->dim(axis);
   }
   std::vector<size_t> image_shape;
   OpenCLUtil::CalImage2DShape(output_shape,
diff --git a/mace/ops/opencl/image/one_hot.h b/mace/ops/opencl/image/one_hot.h
deleted file mode 100644
index 5ba7ba797eb0aab3b8aa4c48664d3dbd55afc0d7..0000000000000000000000000000000000000000
--- a/mace/ops/opencl/image/one_hot.h
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright 2018 The MACE Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
-#define MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
-
-#include "mace/ops/opencl/one_hot.h"
-
-#include <memory>
-#include <vector>
-#include <set>
-#include <string>
-
-#include "mace/core/op_context.h"
-#include "mace/core/tensor.h"
-#include "mace/ops/opencl/helper.h"
-
-
-namespace mace {
-namespace ops {
-namespace opencl {
-namespace image {
-
-template <typename T>
-class OneHotKernel : public OpenCLOneHotKernel {
- public:
-  OneHotKernel(const int depth, const float on_value,
-               const float off_value, const int axis)
-      : depth_(depth), on_value_(on_value),
-        off_value_(off_value), axis_(axis) {}
-
-  MaceStatus Compute(
-      OpContext *context,
-      const Tensor *input,
-      Tensor *output) override;
-
- private:
-  int depth_;
-  float on_value_;
-  float off_value_;
-  int axis_;
-  cl::Kernel kernel_;
-  uint32_t kwg_size_;
-  std::vector<index_t> input_shape_;
-};
-
-template <typename T>
-MaceStatus OneHotKernel<T>::Compute(
-    OpContext *context,
-    const Tensor *input,
-    Tensor *output) {
-
-  auto input_shape = input->shape();
-  index_t axis = axis_ == -1 ? input->dim_size() : axis_;
-
-  MACE_CHECK(input->dim_size() == 1, "OneHot GPU only supports 1D input");
-  MACE_CHECK(axis >= 0 && axis <= input->dim_size());
-
-  std::vector<index_t> output_shape =
-      axis == 0 ? std::vector<index_t>{depth_, input_shape[0]} :
-                  std::vector<index_t>{input_shape[0], depth_};
-  std::vector<size_t> output_image_shape{
-      static_cast<size_t>(RoundUpDiv4(output_shape[1])),
-      static_cast<size_t>(output_shape[0])};
-  MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape));
-
-  auto runtime = context->device()->gpu_runtime()->opencl_runtime();
-  MACE_OUT_OF_RANGE_DEFINITION;
-
-  if (kernel_.get() == nullptr) {
-    std::set<std::string> built_options;
-    MACE_OUT_OF_RANGE_CONFIG;
-    MACE_NON_UNIFORM_WG_CONFIG;
-    std::string kernel_name = MACE_OBFUSCATE_SYMBOL("one_hot");
-    built_options.emplace("-Done_hot=" + kernel_name);
-    auto dt = DataTypeToEnum<T>::value;
-    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
-    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
-    if (axis == 0) {
-      built_options.emplace("-DAXIS_0");
-    }
-    MACE_RETURN_IF_ERROR(runtime->BuildKernel("one_hot", kernel_name,
-                                              built_options, &kernel_));
-
-    kwg_size_ =
-        static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel_));
-  }
-
-  const uint32_t gws[2] = {
-    static_cast<uint32_t>(output_image_shape[0]),
-    static_cast<uint32_t>(output_image_shape[1])
-  };
-  MACE_OUT_OF_RANGE_INIT(kernel_);
-
-  if (!IsVecEqual(input_shape_, input->shape())) {
-    int idx = 0;
-    MACE_OUT_OF_RANGE_SET_ARGS(kernel_);
-    MACE_SET_2D_GWS_ARGS(kernel_, gws);
-    kernel_.setArg(idx++, *(input->opencl_image()));
-    kernel_.setArg(idx++, *(output->opencl_image()));
-    if (axis == 0) {
-      kernel_.setArg(idx++, static_cast<int>(input_shape[0]));
-    }
-    kernel_.setArg(idx++, on_value_);
-    kernel_.setArg(idx++, off_value_);
-
-    input_shape_ = input->shape();
-  }
-
-  const std::vector<uint32_t> lws = {kwg_size_ / 64, 64, 0};
-  std::string tuning_key = Concat("one_hot", output->dim(0), output->dim(1));
-  MACE_RETURN_IF_ERROR(TuningOrRun2DKernel(runtime, kernel_, tuning_key,
-                                           gws, lws, context->future()));
-
-  MACE_OUT_OF_RANGE_VALIDATION;
-  return MaceStatus::MACE_SUCCESS;
-}
-
-}  // namespace image
-}  // namespace opencl
-}  // namespace ops
-}  // namespace mace
-
-#endif  // MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
diff --git a/mace/ops/opencl/one_hot.h b/mace/ops/opencl/one_hot.h
deleted file mode 100644
index 6197258d948145901da52c4c649342edfa8b8a41..0000000000000000000000000000000000000000
--- a/mace/ops/opencl/one_hot.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2018 The MACE Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef MACE_OPS_OPENCL_ONE_HOT_H_
-#define MACE_OPS_OPENCL_ONE_HOT_H_
-
-#include "mace/public/mace.h"
-#include "mace/utils/utils.h"
-namespace mace {
-
-class OpContext;
-class Tensor;
-
-namespace ops {
-class OpenCLOneHotKernel {
- public:
-  virtual MaceStatus Compute(
-      OpContext *context,
-      const Tensor *input,
-      Tensor *output) = 0;
-  MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLOneHotKernel);
-};
-
-}  // namespace ops
-}  // namespace mace
-
-#endif  // MACE_OPS_OPENCL_ONE_HOT_H_
diff --git a/repository/opencl-kernel/opencl_kernel_configure.bzl b/repository/opencl-kernel/opencl_kernel_configure.bzl
index 572219b161bf496b68c0949da53c6820554f13c9..63191cda20032c191992ea3624c13c121c585121 100644
--- a/repository/opencl-kernel/opencl_kernel_configure.bzl
+++ b/repository/opencl-kernel/opencl_kernel_configure.bzl
@@ -50,7 +50,6 @@ def _opencl_encrypt_kernel_impl(repository_ctx):
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl"))
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl"))
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl"))
-        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/one_hot.cl"))
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl"))
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl"))
         unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))