提交 e421f0c7 编写于 作者: L Liangliang He

Fix cpplint warning for core/runtime/opencl

上级 cedb6e8d
...@@ -29,7 +29,7 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { ...@@ -29,7 +29,7 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
return 0; return 0;
} }
} }
} } // namespace
OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::OpenCLAllocator() {}
...@@ -93,7 +93,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { ...@@ -93,7 +93,7 @@ void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const {
return mapped_ptr; return mapped_ptr;
} }
// TODO : there is something wrong with half type. // TODO(liuqi) there is something wrong with half type.
void *OpenCLAllocator::MapImage(void *buffer, void *OpenCLAllocator::MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const { std::vector<size_t> *mapped_image_pitch) const {
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_ #ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_
#define MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_ #define MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_
#include <vector>
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
namespace mace { namespace mace {
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <fstream> #include <fstream>
#include <map> #include <map>
#include <string> #include <string>
...@@ -12,8 +16,8 @@ namespace mace { ...@@ -12,8 +16,8 @@ namespace mace {
bool GetSourceOrBinaryProgram(const std::string &program_name, bool GetSourceOrBinaryProgram(const std::string &program_name,
const std::string &binary_file_name_prefix, const std::string &binary_file_name_prefix,
cl::Context &context, const cl::Context &context,
cl::Device &device, const cl::Device &device,
cl::Program *program, cl::Program *program,
bool *is_binary) { bool *is_binary) {
extern const std::map<std::string, std::vector<unsigned char>> extern const std::map<std::string, std::vector<unsigned char>>
......
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#ifndef MACE_CORE_RUNTIME_OPENCL_EXTENSION_H_ #ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_
#define MACE_CORE_RUNTIME_OPENCL_EXTENSION_H_ #define MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_
#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/cl2_header.h"
...@@ -25,4 +25,4 @@ typedef cl_uint cl_priority_hint; ...@@ -25,4 +25,4 @@ typedef cl_uint cl_priority_hint;
#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB #define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC #define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
#endif // MACE_CORE_RUNTIME_OPENCL_EXTENSION_H_ #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <map> #include <map>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -10,8 +14,8 @@ namespace mace { ...@@ -10,8 +14,8 @@ namespace mace {
bool GetSourceOrBinaryProgram(const std::string &program_name, bool GetSourceOrBinaryProgram(const std::string &program_name,
const std::string &binary_file_name_prefix, const std::string &binary_file_name_prefix,
cl::Context &context, const cl::Context &context,
cl::Device &device, const cl::Device &device,
cl::Program *program, cl::Program *program,
bool *is_binary) { bool *is_binary) {
extern const std::map<std::string, std::vector<unsigned char>> extern const std::map<std::string, std::vector<unsigned char>>
......
...@@ -2,13 +2,16 @@ ...@@ -2,13 +2,16 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <mutex> #include <mutex> // NOLINT(build/c++11)
#include <string>
#include <vector>
#include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/tuner.h" #include "mace/utils/tuner.h"
...@@ -78,48 +81,48 @@ OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type, ...@@ -78,48 +81,48 @@ OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
return opencl_runtime_instance; return opencl_runtime_instance;
} }
void ParseOpenCLRuntimeConfig(cl_context_properties *properties, void ParseOpenCLRuntimeConfig(std::vector<cl_context_properties> *properties,
GPUType gpu_type, GPUType gpu_type,
GPUPerfHint gpu_perf_hint, GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) { GPUPriorityHint gpu_priority_hint) {
int index = 0; MACE_CHECK_NOTNULL(properties);
if (gpu_type == GPUType::ADRENO) { if (gpu_type == GPUType::ADRENO) {
switch (gpu_perf_hint) { switch (gpu_perf_hint) {
case GPUPerfHint::PERF_LOW: case GPUPerfHint::PERF_LOW:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM; properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties[index++] = CL_PERF_HINT_LOW_QCOM; properties->push_back(CL_PERF_HINT_LOW_QCOM);
break; break;
case GPUPerfHint::PERF_NORMAL: case GPUPerfHint::PERF_NORMAL:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM; properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties[index++] = CL_PERF_HINT_NORMAL_QCOM; properties->push_back(CL_PERF_HINT_NORMAL_QCOM);
break; break;
case GPUPerfHint::PERF_HIGH: case GPUPerfHint::PERF_HIGH:
properties[index++] = CL_CONTEXT_PERF_HINT_QCOM; properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties[index++] = CL_PERF_HINT_HIGH_QCOM; properties->push_back(CL_PERF_HINT_HIGH_QCOM);
break; break;
default:break; default:break;
} }
switch (gpu_priority_hint) { switch (gpu_priority_hint) {
case GPUPriorityHint::PRIORITY_LOW: case GPUPriorityHint::PRIORITY_LOW:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM; properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties[index++] = CL_PRIORITY_HINT_LOW_QCOM; properties->push_back(CL_PRIORITY_HINT_LOW_QCOM);
break; break;
case GPUPriorityHint::PRIORITY_NORMAL: case GPUPriorityHint::PRIORITY_NORMAL:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM; properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties[index++] = CL_PRIORITY_HINT_NORMAL_QCOM; properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM);
break; break;
case GPUPriorityHint::PRIORITY_HIGH: case GPUPriorityHint::PRIORITY_HIGH:
properties[index++] = CL_CONTEXT_PRIORITY_HINT_QCOM; properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties[index++] = CL_PRIORITY_HINT_HIGH_QCOM; properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM);
break; break;
default:break; default:break;
} }
} else { } else {
// TODO: support Mali GPU context properties LOG(WARNING) << "GPU options are only supported by Adreno GPU";
} }
// The properties list should be terminated with 0 // The properties list should be terminated with 0
properties[index] = 0; properties->push_back(0);
} }
OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint, OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
...@@ -165,12 +168,12 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint, ...@@ -165,12 +168,12 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
properties |= CL_QUEUE_PROFILING_ENABLE; properties |= CL_QUEUE_PROFILING_ENABLE;
} }
std::unique_ptr<cl_context_properties[]> context_properties( std::vector<cl_context_properties> context_properties;
new cl_context_properties[5]); context_properties.reserve(5);
ParseOpenCLRuntimeConfig(context_properties.get(), gpu_type, gpu_perf_hint, ParseOpenCLRuntimeConfig(&context_properties, gpu_type, gpu_perf_hint,
gpu_priority_hint); gpu_priority_hint);
cl::Context context({gpu_device}, context_properties.get()); cl::Context context({gpu_device}, context_properties.data());
cl::CommandQueue command_queue(context, gpu_device, properties); cl::CommandQueue command_queue(context, gpu_device, properties);
const char *kernel_path = getenv("MACE_KERNEL_PATH"); const char *kernel_path = getenv("MACE_KERNEL_PATH");
...@@ -198,7 +201,7 @@ cl::CommandQueue &OpenCLRuntime::command_queue() { return *command_queue_; } ...@@ -198,7 +201,7 @@ cl::CommandQueue &OpenCLRuntime::command_queue() { return *command_queue_; }
std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix( std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix(
const std::string &filename_msg) { const std::string &filename_msg) {
// TODO This can be long and slow, fix it // TODO(heliangliang) This can be long and slow, fix it
std::string filename_prefix = filename_msg; std::string filename_prefix = filename_msg;
for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) { for (auto it = filename_prefix.begin(); it != filename_prefix.end(); ++it) {
if (*it == ' ' || *it == '-' || *it == '=') { if (*it == ' ' || *it == '-' || *it == '=') {
...@@ -210,8 +213,8 @@ std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix( ...@@ -210,8 +213,8 @@ std::string OpenCLRuntime::GenerateCLBinaryFilenamePrefix(
extern bool GetSourceOrBinaryProgram(const std::string &program_name, extern bool GetSourceOrBinaryProgram(const std::string &program_name,
const std::string &binary_file_name_prefix, const std::string &binary_file_name_prefix,
cl::Context &context, const cl::Context &context,
cl::Device &device, const cl::Device &device,
cl::Program *program, cl::Program *program,
bool *is_opencl_binary); bool *is_opencl_binary);
...@@ -317,13 +320,13 @@ void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) { ...@@ -317,13 +320,13 @@ void OpenCLRuntime::GetCallStats(const cl::Event &event, CallStats *stats) {
} }
uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() { uint32_t OpenCLRuntime::GetDeviceMaxWorkGroupSize() {
unsigned long long size = 0; uint64_t size = 0;
device_->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size); device_->getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &size);
return static_cast<uint32_t>(size); return static_cast<uint32_t>(size);
} }
uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) { uint32_t OpenCLRuntime::GetKernelMaxWorkGroupSize(const cl::Kernel &kernel) {
unsigned long long size = 0; uint64_t size = 0;
kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &size); kernel.getWorkGroupInfo(*device_, CL_KERNEL_WORK_GROUP_SIZE, &size);
return static_cast<uint32_t>(size); return static_cast<uint32_t>(size);
} }
......
...@@ -7,8 +7,9 @@ ...@@ -7,8 +7,9 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <mutex> #include <mutex> // NOLINT(build/c++11)
#include <set> #include <set>
#include <string>
#include "mace/core/future.h" #include "mace/core/future.h"
#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/cl2_header.h"
...@@ -20,7 +21,7 @@ namespace mace { ...@@ -20,7 +21,7 @@ namespace mace {
class OpenCLProfilingTimer : public Timer { class OpenCLProfilingTimer : public Timer {
public: public:
explicit OpenCLProfilingTimer(const cl::Event *event) explicit OpenCLProfilingTimer(const cl::Event *event)
: event_(event), accumulated_micros_(0) {}; : event_(event), accumulated_micros_(0) {}
void StartTiming() override; void StartTiming() override;
void StopTiming() override; void StopTiming() override;
void AccumulateTiming() override; void AccumulateTiming() override;
......
...@@ -2,12 +2,14 @@ ...@@ -2,12 +2,14 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "CL/opencl.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h" #include "mace/core/runtime/opencl/opencl_wrapper.h"
#include "mace/utils/logging.h"
#include <CL/opencl.h>
#include <dlfcn.h> #include <dlfcn.h>
#include <string>
#include <vector>
#include "mace/utils/logging.h"
/** /**
* Wrapper of OpenCL 2.0 (based on 1.2) * Wrapper of OpenCL 2.0 (based on 1.2)
...@@ -51,13 +53,13 @@ class OpenCLLibraryImpl final { ...@@ -51,13 +53,13 @@ class OpenCLLibraryImpl final {
const cl_context_properties *, const cl_context_properties *,
cl_uint, cl_uint,
const cl_device_id *, const cl_device_id *,
void(CL_CALLBACK *)(const char *, const void *, size_t, void *), void (CL_CALLBACK *)(const char *, const void *, size_t, void *), // NOLINT
void *, void *,
cl_int *); cl_int *);
using clCreateContextFromTypeFunc = cl_context (*)( using clCreateContextFromTypeFunc = cl_context (*)(
const cl_context_properties *, const cl_context_properties *,
cl_device_type, cl_device_type,
void(CL_CALLBACK *)(const char *, const void *, size_t, void *), void (CL_CALLBACK *)(const char *, const void *, size_t, void *), // NOLINT
void *, void *,
cl_int *); cl_int *);
using clReleaseContextFunc = cl_int (*)(cl_context); using clReleaseContextFunc = cl_int (*)(cl_context);
......
...@@ -54,7 +54,7 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */ ...@@ -54,7 +54,7 @@ __kernel void conv_2d(__read_only image2d_t input, /* [c%4 * w * c/4, h * b] */
const int in_idx = mul24(in_ch_blk, in_width); const int in_idx = mul24(in_ch_blk, in_width);
int filter_x_part0 = in_ch_blk << 2; int filter_x_part0 = in_ch_blk << 2;
for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) { for (short hb_idx = 0; hb_idx < filter_height; ++hb_idx) {
// TODO (heliangliang) optimize out these muls // TODO(heliangliang) optimize out these muls
int in_hb_value = height_idx + mul24(hb_idx, dilation_h); int in_hb_value = height_idx + mul24(hb_idx, dilation_h);
in_hb_value = select(in_hb_value + batch_idx, in_hb_value = select(in_hb_value + batch_idx,
-1, -1,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册