search_fc_compute.h 1.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <cudnn.h>
W
Wilber 已提交
17
#include <memory>
18
#include "lite/backends/cuda/cuda_utils.h"
W
Wilber 已提交
19
#include "lite/backends/cuda/math/gemm.h"
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
#include "lite/core/kernel.h"

namespace paddle {
namespace lite {
namespace kernels {
namespace cuda {

const int CUDA_NUM_THREADS = 512;
inline int CUDA_GET_BLOCKS(const int N) {
  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}
inline int CUDA_GET_BLOCKS(const int N, const int base) {
  return (N + base - 1) / base;
}

template <typename T>
class SearchFcCompute : public KernelLite<TARGET(kCUDA), PRECISION(kFloat)> {
 public:
  using param_t = operators::SearchFcParam;
W
Wilber 已提交
39
  void PrepareForRun() override;
40 41 42 43
  void Run() override;
  virtual ~SearchFcCompute() = default;

 private:
W
Wilber 已提交
44
  std::unique_ptr<lite::cuda::math::Gemm<float, float>> gemm_impl_{nullptr};
45 46 47 48 49 50 51 52 53
  int _M;
  int _K;
  int _N;
};

}  // namespace cuda
}  // namespace kernels
}  // namespace lite
}  // namespace paddle