Merge branch 'fix-one-hot' into 'master'

Remove GPU code of OneHot op for only support 1D input. See merge request !1039

Merge branch 'fix-one-hot' into 'master'
Remove GPU code of OneHot op for only support 1D input. See merge request !1039
4888bd5f · 叶剑武 · 8c73e233 · 081da2a3 · 4888bd5f · 4888bd5f
12 changed file
--- a/mace/core/memory_optimizer.cc
+++ b/mace/core/memory_optimizer.cc
@@ -79,12 +79,10 @@ MemoryBlock MemoryOptimizer::CreateMemoryBlock(
              *op_def, "buffer_type", OpenCLBufferType::IN_OUT_CHANNEL));
    }
    std::vector<size_t> image_shape;
-    if (shape.size() == 1) {
+    if (shape.size() == 2) {
-      shape = {shape[0], 1, 1, 1};
-    } else if (shape.size() == 2) {
      shape = {shape[0], 1, 1, shape[1]};
    } else {
-      MACE_CHECK(shape.size() == 4) << "GPU only support 1D/2D/4D input";
+      MACE_CHECK(shape.size() == 4) << "GPU only support 2D/4D input";
    }
    OpenCLUtil::CalImage2DShape(shape, buffer_type, &image_shape);
    block.set_x(image_shape[0]);

--- a/mace/ops/concat.cc
+++ b/mace/ops/concat.cc
@@ -29,22 +29,21 @@ class ConcatOpBase : public Operation {
 public:
  explicit ConcatOpBase(OpConstructContext *context)
      : Operation(context),
-        axis_(Operation::GetOptionalArg<int>("axis", 3)),
+        axis_(Operation::GetOptionalArg<int>("axis", 3)) {}
-        checked_(false) {}
 protected:
-  void Validate() {
+  int FormatAxis() {
    const int32_t input_dims = this->Input(0)->dim_size();
    axis_ =
        axis_ < 0 ? axis_ + input_dims : axis_;
    MACE_CHECK((0 <= axis_ && axis_ < input_dims),
               "Expected concatenating axis in the range [", -input_dims, ", ",
               input_dims, "], but got ", axis_);
+    return axis_;
  }
 protected:
  int axis_;
-  bool checked_;
 };
 template <DeviceType D, class T>
@@ -54,20 +53,17 @@ template <typename T>
 class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
 public:
  explicit ConcatOp(OpConstructContext *context)
-      : ConcatOpBase(context) {}
+      : ConcatOpBase(context),
+        has_data_format_(Operation::GetOptionalArg<int>(
+            "has_data_format", 0) == 1) {}
  MaceStatus Run(OpContext *context) override {
    MACE_UNUSED(context);
-    if (!checked_) {
+    int axis = FormatAxis();
-      Validate();
+    if (has_data_format_ && this->Input(0)->dim_size() == 4) {
-      auto has_df = Operation::GetOptionalArg<int>(
+      if (axis == 3) axis = 1;
-          "has_data_format", 0);
+      else if (axis == 2) axis = 3;
-      if (has_df && this->Input(0)->dim_size() == 4) {
+      else if (axis == 1) axis = 2;
-        if (axis_ == 3) axis_ = 1;
-        else if (axis_ == 2) axis_ = 3;
-        else if (axis_ == 1) axis_ = 2;
-      }
-      checked_ = true;
    }
    const std::vector<const Tensor *> &inputs = this->Inputs();
    Tensor *output = this->Output(0);
@@ -76,7 +72,7 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
    std::vector<index_t> output_shape(input0->shape());
    index_t inner_size = 1;
-    for (int i = 0; i < axis_; ++i) {
+    for (int i = 0; i < axis; ++i) {
      inner_size *= output_shape[i];
    }
    std::vector<index_t> outer_sizes(inputs_count, 0);
@@ -86,14 +82,14 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
      MACE_CHECK(input->dim_size() == input0->dim_size(),
                 "Ranks of all input tensors must be same.");
      for (int j = 0; j < input->dim_size(); ++j) {
-        if (j == axis_) {
+        if (j == axis) {
          continue;
        }
        MACE_CHECK(input->dim(j) == input0->dim(j),
                   "Dimensions of inputs should equal except axis.");
      }
      outer_sizes[i] = input->size() / inner_size;
-      output_shape[axis_] += input->dim(axis_);
+      output_shape[axis] += input->dim(axis);
    }
    MACE_RETURN_IF_ERROR(output->Resize(output_shape));
@@ -119,6 +115,9 @@ class ConcatOp<DeviceType::CPU, T> : public ConcatOpBase {
    return MaceStatus::MACE_SUCCESS;
  }
+ private:
+  bool has_data_format_;
 };
 #ifdef MACE_ENABLE_QUANTIZE
@@ -130,7 +129,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
  MaceStatus Run(OpContext *context) override {
    MACE_UNUSED(context);
-    Validate();
+    int axis = FormatAxis();
    const std::vector<const Tensor *> &inputs = this->Inputs();
    Tensor *output = this->Output(0);
    MACE_CHECK(output->scale() != 0);
@@ -139,7 +138,7 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
    std::vector<index_t> output_shape(input0->shape());
    index_t inner_size = 1;
-    for (int i = 0; i < axis_; ++i) {
+    for (int i = 0; i < axis; ++i) {
      inner_size *= output_shape[i];
    }
    std::vector<index_t> outer_sizes(inputs_count, 0);
@@ -149,14 +148,14 @@ class ConcatOp<DeviceType::CPU, uint8_t> : public ConcatOpBase {
      MACE_CHECK(input->dim_size() == input0->dim_size(),
                 "Ranks of all input tensors must be same.");
      for (int j = 0; j < input->dim_size(); ++j) {
-        if (j == axis_) {
+        if (j == axis) {
          continue;
        }
        MACE_CHECK(input->dim(j) == input0->dim(j),
                   "Dimensions of inputs should equal except axis.");
      }
      outer_sizes[i] = input->size() / inner_size;
-      output_shape[axis_] += input->dim(axis_);
+      output_shape[axis] += input->dim(axis);
    }
    MACE_RETURN_IF_ERROR(output->Resize(output_shape));
@@ -200,15 +199,14 @@ class ConcatOp<DeviceType::GPU, T> : public ConcatOpBase {
  explicit ConcatOp(OpConstructContext *context)
      : ConcatOpBase(context) {
    if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_ = make_unique<opencl::image::ConcatKernel<T>>(axis_);
+      kernel_ = make_unique<opencl::image::ConcatKernel<T>>();
    } else {
      MACE_NOT_IMPLEMENTED;
    }
  }
  MaceStatus Run(OpContext *context) override {
-    Validate();
    Tensor *output = this->Output(0);
-    return kernel_->Compute(context, inputs_, output);
+    return kernel_->Compute(context, inputs_, FormatAxis(), output);
  }
 private:

--- a/mace/ops/one_hot.cc
+++ b/mace/ops/one_hot.cc
@@ -16,9 +16,6 @@
 #include <memory>
 #include "mace/core/operator.h"
-#ifdef MACE_ENABLE_OPENCL
-#include "mace/ops/opencl/image/one_hot.h"
-#endif  // MACE_ENABLE_OPENCL
 namespace mace {
 namespace ops {
@@ -148,52 +145,9 @@ class OneHotOp<DeviceType::CPU, T> : public OneHotOpBase {
  }
 };
-#ifdef MACE_ENABLE_OPENCL
-template <typename T>
-class OneHotOp<DeviceType::GPU, T> : public OneHotOpBase {
- public:
-  explicit OneHotOp(OpConstructContext *context) : OneHotOpBase(context) {
-    if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::OneHotKernel<T>(
-          depth_, on_value_, off_value_, axis_));
-    } else {
-      MACE_NOT_IMPLEMENTED;
-    }
-  }
-  MaceStatus Run(OpContext *context) override {
-    const Tensor *input = this->Input(0);
-    Tensor *output = this->Output(0);
-    return kernel_->Compute(context, input, output);
-  }
- private:
-  std::unique_ptr<OpenCLOneHotKernel> kernel_;
-};
-#endif  // MACE_ENABLE_OPENCL
 void RegisterOneHot(OpRegistryBase *op_registry) {
  MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::CPU, float);
-#ifdef MACE_ENABLE_OPENCL
-  MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, float);
-  MACE_REGISTER_OP(op_registry, "OneHot", OneHotOp, DeviceType::GPU, half);
-  MACE_REGISTER_OP_CONDITION(
-      op_registry,
-      OpConditionBuilder("OneHot")
-          .SetDevicePlacerFunc(
-              [](OpConstructContext *context) -> std::set<DeviceType> {
-                auto op = context->operator_def();
-                if (op->output_shape_size() != op->output_size()) {
-                  return { DeviceType::CPU, DeviceType::GPU };
-                }
-                if (op->output_shape(0).dims_size() != 2) {
-                  return { DeviceType::CPU };
-                }
-                return { DeviceType::CPU, DeviceType::GPU };
-              }));
-#endif  // MACE_ENABLE_OPENCL
 }
 }  // namespace ops

--- a/mace/ops/one_hot_benchmark.cc
+++ b/mace/ops/one_hot_benchmark.cc
@@ -61,9 +61,7 @@ void OneHot(int iters, int batch, int depth, int axis) {
  MACE_BENCHMARK(MACE_BM_ONE_HOT_##N##_##DEPTH##_##AXIS##_##TYPE##_##DEVICE)
 #define MACE_BM_ONE_HOT(N, DEPTH, AXIS)                 \
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU);    \
+  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, CPU);
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, float, GPU);    \
-  MACE_BM_ONE_HOT_MACRO(N, DEPTH, AXIS, half, GPU);
 MACE_BM_ONE_HOT(512, 16, 0);
 MACE_BM_ONE_HOT(512, 16, 1);

--- a/mace/ops/one_hot_test.cc
+++ b/mace/ops/one_hot_test.cc
@@ -45,7 +45,6 @@ void TestOneHot(const std::vector<index_t> &input_shape,
  .AddFloatArg("on_value", on_value)
  .AddFloatArg("off_value", off_value)
  .AddIntArg("axis", axis)
-  .AddIntArg("data_format", DataFormat::NHWC)
  .Finalize(net.NewOperatorDef());
  // Run
@@ -77,10 +76,6 @@ TEST_F(OneHotTest, Dim1) {
  TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                     expected_data, 5, -1);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 5, -1);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 5, -1);
  expected_shape = {5, 10};
  expected_data  = {
@@ -93,10 +88,6 @@ TEST_F(OneHotTest, Dim1) {
  TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                     expected_data, 5, 0);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 5, 0);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 5, 0);
 }
 TEST_F(OneHotTest, OnOffValue) {
@@ -111,10 +102,6 @@ TEST_F(OneHotTest, OnOffValue) {
  TestOneHot<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
                                     expected_data, 6, -1, 7, 8);
-  TestOneHot<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
-                                     expected_data, 6, -1, 7, 8);
-  TestOneHot<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
-                                    expected_data, 6, -1, 7, 8);
 }
 TEST_F(OneHotTest, Dim2) {

--- a/mace/ops/opencl/cl/one_hot.cl
+++ b/mace/ops/opencl/cl/one_hot.cl
-#include <common.h>
-__kernel void one_hot(OUT_OF_RANGE_PARAMS
-                      GLOBAL_WORK_GROUP_SIZE_DIM2
-                      __read_only image2d_t input,
-                      __write_only image2d_t output,
-#ifdef AXIS_0
-                      __private const int in_size,
-#endif
-                      __private const float on_value,
-                      __private const float off_value) {
-  const int channel_idx = get_global_id(0);
-  const int batch_idx   = get_global_id(1);
-#ifndef NON_UNIFORM_WORK_GROUP
-  if (channel_idx >= global_size_dim0 || batch_idx >= global_size_dim1) {
-    return;
-  }
-#endif
-  DATA_TYPE4 out = off_value;
-#ifdef AXIS_0
-  int in_idx = channel_idx * 4;
-  DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-  if (in.s0 == batch_idx) {
-    out.s0 = on_value;
-  }
-  if (++in_idx < in_size) {
-    in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-    if (in.s0 == batch_idx) {
-      out.s1 = on_value;
-    }
-    if (++in_idx < in_size) {
-      in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-      if (in.s0 == batch_idx) {
-        out.s2 = on_value;
-      }
-      if (++in_idx < in_size) {
-        in = READ_IMAGET(input, SAMPLER, (int2)(0, in_idx));
-        if (in.s0 == batch_idx) {
-          out.s3 = on_value;
-        }
-      }
-    }
-  }
-#else
-  DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(0, batch_idx));
-  int i = in.s0;
-  if (i / 4 == channel_idx) {
-    switch (i % 4) {
-    case 0:
-      out.s0 = on_value;
-      break;
-    case 1:
-      out.s1 = on_value;
-      break;
-    case 2:
-      out.s2 = on_value;
-      break;
-    case 3:
-      out.s3 = on_value;
-      break;
-    }
-  }
-#endif
-  WRITE_IMAGET(output, (int2)(channel_idx, batch_idx), out);
-}
--- a/mace/ops/opencl/concat.h
+++ b/mace/ops/opencl/concat.h
@@ -31,6 +31,7 @@ class OpenCLConcatKernel {
  virtual MaceStatus Compute(
      OpContext *context,
      const std::vector<const Tensor *> &input_list,
+      const int32_t axis,
      Tensor *output) = 0;
  MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConcatKernel);
 };

--- a/mace/ops/opencl/helper.cc
+++ b/mace/ops/opencl/helper.cc
@@ -34,8 +34,6 @@ std::vector<index_t> FormatBufferShape(
        return buffer_shape;
      } else if (buffer_shape_size == 2) {  // NC
        return {buffer_shape[0], 1, 1, buffer_shape[1]};
-      } else if (buffer_shape_size == 1) {  // N
-        return {buffer_shape[0], 1, 1, 1};
      } else {
        LOG(FATAL) << "GPU only support 2D or 4D input and output";
      }

--- a/mace/ops/opencl/image/concat.h
+++ b/mace/ops/opencl/image/concat.h
@@ -48,14 +48,14 @@ MaceStatus ConcatN(OpContext *context,
 template <typename T>
 class ConcatKernel : public OpenCLConcatKernel {
 public:
-  explicit ConcatKernel(const int32_t axis) : axis_(axis) {}
+  ConcatKernel() {}
  MaceStatus Compute(
      OpContext *context,
      const std::vector<const Tensor *> &input_list,
+      const int32_t axis,
      Tensor *output) override;
 private:
-  int32_t axis_;
  cl::Kernel kernel_;
  uint32_t kwg_size_;
  std::vector<index_t> input_shape_;
@@ -65,6 +65,7 @@ template <typename T>
 MaceStatus ConcatKernel<T>::Compute(
    OpContext *context,
    const std::vector<const Tensor *> &input_list,
+    const int32_t axis,
    Tensor *output) {
  const int inputs_count = input_list.size();
@@ -76,13 +77,13 @@ MaceStatus ConcatKernel<T>::Compute(
    MACE_CHECK(input->dim_size() == input0->dim_size(),
               "Ranks of all input tensors must be same.");
    for (int j = 0; j < input->dim_size(); ++j) {
-      if (j == axis_) {
+      if (j == axis) {
        continue;
      }
      MACE_CHECK(input->dim(j) == input0->dim(j),
                 "Dimensions of inputs should equal except axis.");
    }
-    output_shape[axis_] += input->dim(axis_);
+    output_shape[axis] += input->dim(axis);
  }
  std::vector<size_t> image_shape;
  OpenCLUtil::CalImage2DShape(output_shape,

--- a/mace/ops/opencl/image/one_hot.h
+++ b/mace/ops/opencl/image/one_hot.h
-// Copyright 2018 The MACE Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
-#define MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
-#include "mace/ops/opencl/one_hot.h"
-#include <memory>
-#include <vector>
-#include <set>
-#include <string>
-#include "mace/core/op_context.h"
-#include "mace/core/tensor.h"
-#include "mace/ops/opencl/helper.h"
-namespace mace {
-namespace ops {
-namespace opencl {
-namespace image {
-template <typename T>
-class OneHotKernel : public OpenCLOneHotKernel {
- public:
-  OneHotKernel(const int depth, const float on_value,
-               const float off_value, const int axis)
-      : depth_(depth), on_value_(on_value),
-        off_value_(off_value), axis_(axis) {}
-  MaceStatus Compute(
-      OpContext *context,
-      const Tensor *input,
-      Tensor *output) override;
- private:
-  int depth_;
-  float on_value_;
-  float off_value_;
-  int axis_;
-  cl::Kernel kernel_;
-  uint32_t kwg_size_;
-  std::vector<index_t> input_shape_;
-};
-template <typename T>
-MaceStatus OneHotKernel<T>::Compute(
-    OpContext *context,
-    const Tensor *input,
-    Tensor *output) {
-  auto input_shape = input->shape();
-  index_t axis = axis_ == -1 ? input->dim_size() : axis_;
-  MACE_CHECK(input->dim_size() == 1, "OneHot GPU only supports 1D input");
-  MACE_CHECK(axis >= 0 && axis <= input->dim_size());
-  std::vector<index_t> output_shape =
-      axis == 0 ? std::vector<index_t>{depth_, input_shape[0]} :
-                  std::vector<index_t>{input_shape[0], depth_};
-  std::vector<size_t> output_image_shape{
-      static_cast<size_t>(RoundUpDiv4(output_shape[1])),
-      static_cast<size_t>(output_shape[0])};
-  MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape));
-  auto runtime = context->device()->gpu_runtime()->opencl_runtime();
-  MACE_OUT_OF_RANGE_DEFINITION;
-  if (kernel_.get() == nullptr) {
-    std::set<std::string> built_options;
-    MACE_OUT_OF_RANGE_CONFIG;
-    MACE_NON_UNIFORM_WG_CONFIG;
-    std::string kernel_name = MACE_OBFUSCATE_SYMBOL("one_hot");
-    built_options.emplace("-Done_hot=" + kernel_name);
-    auto dt = DataTypeToEnum<T>::value;
-    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
-    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
-    if (axis == 0) {
-      built_options.emplace("-DAXIS_0");
-    }
-    MACE_RETURN_IF_ERROR(runtime->BuildKernel("one_hot", kernel_name,
-                                              built_options, &kernel_));
-    kwg_size_ =
-        static_cast<uint32_t>(runtime->GetKernelMaxWorkGroupSize(kernel_));
-  }
-  const uint32_t gws[2] = {
-    static_cast<uint32_t>(output_image_shape[0]),
-    static_cast<uint32_t>(output_image_shape[1])
-  };
-  MACE_OUT_OF_RANGE_INIT(kernel_);
-  if (!IsVecEqual(input_shape_, input->shape())) {
-    int idx = 0;
-    MACE_OUT_OF_RANGE_SET_ARGS(kernel_);
-    MACE_SET_2D_GWS_ARGS(kernel_, gws);
-    kernel_.setArg(idx++, *(input->opencl_image()));
-    kernel_.setArg(idx++, *(output->opencl_image()));
-    if (axis == 0) {
-      kernel_.setArg(idx++, static_cast<int>(input_shape[0]));
-    }
-    kernel_.setArg(idx++, on_value_);
-    kernel_.setArg(idx++, off_value_);
-    input_shape_ = input->shape();
-  }
-  const std::vector<uint32_t> lws = {kwg_size_ / 64, 64, 0};
-  std::string tuning_key = Concat("one_hot", output->dim(0), output->dim(1));
-  MACE_RETURN_IF_ERROR(TuningOrRun2DKernel(runtime, kernel_, tuning_key,
-                                           gws, lws, context->future()));
-  MACE_OUT_OF_RANGE_VALIDATION;
-  return MaceStatus::MACE_SUCCESS;
-}
-}  // namespace image
-}  // namespace opencl
-}  // namespace ops
-}  // namespace mace
-#endif  // MACE_OPS_OPENCL_IMAGE_ONE_HOT_H_
--- a/mace/ops/opencl/one_hot.h
+++ b/mace/ops/opencl/one_hot.h
-// Copyright 2018 The MACE Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#ifndef MACE_OPS_OPENCL_ONE_HOT_H_
-#define MACE_OPS_OPENCL_ONE_HOT_H_
-#include "mace/public/mace.h"
-#include "mace/utils/utils.h"
-namespace mace {
-class OpContext;
-class Tensor;
-namespace ops {
-class OpenCLOneHotKernel {
- public:
-  virtual MaceStatus Compute(
-      OpContext *context,
-      const Tensor *input,
-      Tensor *output) = 0;
-  MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLOneHotKernel);
-};
-}  // namespace ops
-}  // namespace mace
-#endif  // MACE_OPS_OPENCL_ONE_HOT_H_
--- a/repository/opencl-kernel/opencl_kernel_configure.bzl
+++ b/repository/opencl-kernel/opencl_kernel_configure.bzl
@@ -50,7 +50,6 @@ def _opencl_encrypt_kernel_impl(repository_ctx):
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/fully_connected.cl"))
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/lstmcell.cl"))
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/matmul.cl"))
-        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/one_hot.cl"))
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pad.cl"))
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling.cl"))
        unused_var = repository_ctx.path(Label("//:mace/ops/opencl/cl/pooling_buffer.cl"))