提交 7b95cd02 编写于 作者: L liuruilong

merge opencl

cmake_minimum_required(VERSION 3.0) cmake_minimum_required(VERSION 3.6)
project(paddle-mobile)
# select the platform to build
option(CPU "armv7 with neon support" ON)
option(MALI_GPU "mali gpu support" OFF)
option(FPGA "fpga support" OFF)
option(USE_OPENMP "openmp support" OFF) option(USE_OPENMP "openmp support" ON)
option(DEBUGING "enable debug mode" ON) option(DEBUGING "enable debug mode" ON)
option(USE_EXCEPTION "use std exception" OFF) option(USE_EXCEPTION "use std exception" ON)
option(LOG_PROFILE "log profile" OFF) option(LOG_PROFILE "log profile" OFF)
# select the platform to build
option(CPU "armv7 with neon" ON)
option(GPU_MALI "mali gpu" OFF)
option(GPU_CL "opencl gpu" ON)
option(FPGA "fpga" OFF)
project(paddle-mobile)
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
...@@ -70,7 +71,27 @@ else() ...@@ -70,7 +71,27 @@ else()
endforeach() endforeach()
endif() endif()
if(MALI_GPU) if (GPU_CL)
add_definitions(-DPADDLE_MOBILE_CL)
# opencl version
add_definitions(-DCL_TARGET_OPENCL_VERSION=220)
link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so)
include_directories(third_party/opencl/OpenCL-Headers)
else()
file(GLOB_RECURSE _tmp_list src/framework/cl/*.cpp src/operators/kernel/cl/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/framework/cl/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if (GPU_MALI)
add_definitions(-DPADDLE_MOBILE_MALI_GPU) add_definitions(-DPADDLE_MOBILE_MALI_GPU)
add_definitions(-DUSE_ACL=1) add_definitions(-DUSE_ACL=1)
add_definitions(-DUSE_OPENCL) add_definitions(-DUSE_OPENCL)
...@@ -124,17 +145,17 @@ endif() ...@@ -124,17 +145,17 @@ endif()
if(ANDROID_NDK_TOOLCHAIN_INCLUDED) if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/jni/paddle_mobile_jni.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/jni/paddle_mobile_jni.cpp) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/io/jni/paddle_mobile_jni.cpp)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
endif() endif()
if(IS_IOS) if(IS_IOS)
else() else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/PaddleMobileCPU.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/PaddleMobileCPU.mm) list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/PaddleMobileCPU.mm)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/ios_io/op_symbols.h) list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/op_symbols.h)
endif() endif ()
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
......
...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <chrono> #include <chrono> // NOLINT
namespace paddle_mobile {
using Time = decltype(std::chrono::high_resolution_clock::now()); using Time = decltype(std::chrono::high_resolution_clock::now());
...@@ -25,3 +27,5 @@ inline double time_diff(Time t1, Time t2) { ...@@ -25,3 +27,5 @@ inline double time_diff(Time t1, Time t2) {
ms counter = std::chrono::duration_cast<ms>(diff); ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0; return counter.count() / 1000.0;
} }
} // namespace paddle_mobile
...@@ -46,7 +46,8 @@ struct PaddleMobileException : public std::exception { ...@@ -46,7 +46,8 @@ struct PaddleMobileException : public std::exception {
std::string detail(buffer); \ std::string detail(buffer); \
throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \ throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \
__FILE__, __LINE__); \ __FILE__, __LINE__); \
} } \
exit(0);
#define PADDLE_MOBILE_ENFORCE(stat, ...) \ #define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \ { \
......
...@@ -39,7 +39,13 @@ struct PrecisionTrait<Precision::FP16> { ...@@ -39,7 +39,13 @@ struct PrecisionTrait<Precision::FP16> {
}; };
//! device type //! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2 }; enum DeviceTypeEnum {
kINVALID = -1,
kCPU = 0,
kFPGA = 1,
kGPU_MALI = 2,
kGPU_CL = 3
};
template <DeviceTypeEnum T> template <DeviceTypeEnum T>
struct DeviceType {}; struct DeviceType {};
...@@ -47,6 +53,7 @@ struct DeviceType {}; ...@@ -47,6 +53,7 @@ struct DeviceType {};
typedef DeviceType<kCPU> CPU; typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA; typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI; typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kGPU_CL> GPU_CL;
//! data type //! data type
enum DataType { enum DataType {
......
...@@ -117,9 +117,9 @@ class Attribute { ...@@ -117,9 +117,9 @@ class Attribute {
template <typename Vistor> template <typename Vistor>
static typename Vistor::type_t ApplyVistor(Vistor vistor, Attribute attr) { static typename Vistor::type_t ApplyVistor(Vistor vistor, Attribute attr) {
if (attr.variant_.TypeId() == typeid(int).hash_code()) { if (attr.variant_.TypeId() == typeid(int).hash_code()) { // NOLINT
return vistor(attr.variant_.Get<int>()); return vistor(attr.variant_.Get<int>());
} else if (attr.variant_.TypeId() == typeid(float).hash_code()) { } else if (attr.variant_.TypeId() == typeid(float).hash_code()) { // NOLINT
return vistor(attr.variant_.Get<float>()); return vistor(attr.variant_.Get<float>());
} else if (attr.variant_.TypeId() == typeid(string).hash_code()) { } else if (attr.variant_.TypeId() == typeid(string).hash_code()) {
return vistor(attr.variant_.GetString()); return vistor(attr.variant_.GetString());
...@@ -129,7 +129,7 @@ class Attribute { ...@@ -129,7 +129,7 @@ class Attribute {
return vistor(attr.variant_.Get<vector<float>>()); return vistor(attr.variant_.Get<vector<float>>());
} else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) { } else if (attr.variant_.TypeId() == typeid(vector<string>).hash_code()) {
return vistor(attr.variant_.Get<vector<string>>()); return vistor(attr.variant_.Get<vector<string>>());
} else if (attr.variant_.TypeId() == typeid(bool).hash_code()) { } else if (attr.variant_.TypeId() == typeid(bool).hash_code()) { // NOLINT
return vistor(attr.variant_.Get<bool>()); return vistor(attr.variant_.Get<bool>());
} else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) { } else if (attr.variant_.TypeId() == typeid(vector<bool>).hash_code()) {
return vistor(attr.variant_.Get<vector<bool>>()); return vistor(attr.variant_.Get<vector<bool>>());
...@@ -137,7 +137,6 @@ class Attribute { ...@@ -137,7 +137,6 @@ class Attribute {
return vistor(attr.variant_.Get<int64_t>()); return vistor(attr.variant_.Get<int64_t>());
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("type not support"); PADDLE_MOBILE_THROW_EXCEPTION("type not support");
exit(0);
} }
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "CL/cl.h"
struct CLKernelDeleter {
template <class T>
void operator()(T *clKernelObj) {
clReleaseKernel(clKernelObj);
}
};
struct CLMemDeleter {
template <class T>
void operator()(T *clMemObj) {
clReleaseMemObject(clMemObj);
}
};
struct CLEventDeleter {
template <class T>
void operator()(T *clEventObj) {
clReleaseEvent(clEventObj);
}
};
struct CLCommQueueDeleter {
template <class T>
void operator()(T *clQueueObj) {
clReleaseCommandQueue(clQueueObj);
}
};
struct CLContextDeleter {
template <class T>
void operator()(T *clContextObj) {
clReleaseContext(clContextObj);
}
};
struct CLProgramDeleter {
template <class T>
void operator()(T *clProgramObj) {
clReleaseProgram(clProgramObj);
}
};
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_engine.h"
#include "CL/cl.h"
#include "framework/cl/cl_tool.h"
#include <cstdlib>
#include <cstring>
namespace paddle_mobile {
namespace framework {
bool CLEngine::Init() {
if (initialized_) {
return true;
}
cl_int status;
SetPlatform();
SetClDeviceId();
initialized_ = true;
return initialized_;
// setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str());
// buildProgram();
}
CLEngine *CLEngine::Instance() {
static CLEngine cl_engine_;
cl_engine_.Init();
return &cl_engine_;
}
bool CLEngine::SetPlatform() {
platform_ = NULL; // the chosen platform
cl_uint numPlatforms; // the NO. of platforms
cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
/**For clarity, choose the first available platform. */
if (numPlatforms > 0) {
cl_platform_id *platforms = reinterpret_cast<cl_platform_id *>(
malloc(numPlatforms * sizeof(cl_platform_id)));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
platform_ = platforms[0];
free(platforms);
return true;
} else {
return false;
}
}
bool CLEngine::SetClDeviceId() {
cl_uint numDevices = 0;
devices_ = NULL;
cl_int status =
clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices > 0) {
devices_ = reinterpret_cast<cl_device_id *>(
malloc(numDevices * sizeof(cl_device_id)));
status = clGetDeviceIDs(platform_, CL_DEVICE_TYPE_GPU, numDevices, devices_,
NULL);
return true;
}
return false;
}
// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// const std::string &kernel_name) {
// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
// clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
// return std::move(kernel);
//}
//
// bool CLEngine::SetClCommandQueue() {
// cl_int status;
// command_queue_.reset(
// clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
// return true;
//}
// bool CLEngine::SetClContext() {
// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
// return true;
//}
// bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// size_t size;
// char *str;
// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
//
// if (!f.is_open()) {
// return false;
// }
//
// size_t fileSize;
// f.seekg(0, std::fstream::end);
// size = fileSize = (size_t)f.tellg();
// f.seekg(0, std::fstream::beg);
// str = new char[size + 1];
// if (!str) {
// f.close();
// return 0;
// }
//
// f.read(str, fileSize);
// f.close();
// str[size] = '\0';
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize,
// NULL));
// return true;
//}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include "CL/cl.h"
#include "common/enforce.h"
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace framework {
class CLEngine {
public:
static CLEngine *Instance();
bool Init();
std::unique_ptr<_cl_context, CLContextDeleter> CreateContext() {
cl_int status;
cl_context c = clCreateContext(NULL, 1, devices_, NULL, NULL, &status);
std::unique_ptr<_cl_context, CLContextDeleter> context_ptr(c);
CL_CHECK_ERRORS(status);
return std::move(context_ptr);
}
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue(
cl_context context) {
cl_int status;
cl_command_queue queue =
clCreateCommandQueue(context, devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(
queue);
CL_CHECK_ERRORS(status);
return std::move(command_queue_ptr);
}
std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(
cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
file_name.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
rewind(file);
char *data = new char[size + 1];
size_t bytes_read = fread(data, 1, size, file);
data[size] = '\0';
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
fclose(file);
const char *source = data;
size_t sourceSize[] = {strlen(source)};
cl_program p =
clCreateProgramWithSource(context, 1, &source, sourceSize, &status_);
DLOG << " cl kernel file name: " << file_name;
DLOG << " source size: " << sourceSize[0];
CL_CHECK_ERRORS(status_);
std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
return std::move(program_ptr);
}
std::unique_ptr<_cl_event, CLEventDeleter> CreateEvent(cl_context context) {
cl_event event = clCreateUserEvent(context, &status_);
std::unique_ptr<_cl_event, CLEventDeleter> event_ptr(event);
CL_CHECK_ERRORS(status_);
return std::move(event_ptr);
}
bool BuildProgram(cl_program program) {
cl_int status;
status = clBuildProgram(program, 0, 0, "-cl-fast-relaxed-math -I cl_kernel",
0, 0);
CL_CHECK_ERRORS(status);
if (status_ == CL_BUILD_PROGRAM_FAILURE) {
size_t log_size;
clGetProgramBuildInfo(program, CLEngine::Instance()->DeviceID(),
CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *log = reinterpret_cast<char *>(malloc(log_size));
clGetProgramBuildInfo(program, CLEngine::Instance()->DeviceID(),
CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
DLOG << " program build error: " << log;
}
if (status == CL_SUCCESS) {
return true;
} else {
return false;
}
}
cl_device_id DeviceID(int index = 0) { return devices_[index]; }
private:
CLEngine() { initialized_ = false; }
bool SetPlatform();
bool SetClDeviceId();
bool initialized_;
cl_platform_id platform_;
cl_device_id *devices_;
cl_int status_;
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
// bool SetClContext();
// bool SetClCommandQueue();
// bool LoadKernelFromFile(const char *kernel_file);
// bool BuildProgram();
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
#include "framework/cl/cl_half.h"
namespace paddle_mobile {
namespace framework {
static const uint32_t mantissatable[2048] = {
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
0x387fc000, 0x387fe000};
static const uint16_t offsettable[64] = {
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};
static const uint32_t exponenttable[64] = {
0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000};
static const uint16_t basetable[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,
0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000,
0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400,
0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800,
0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,
0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400,
0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800,
0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
static const uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
half_t Float2Half(float f) {
uint32_t v = *reinterpret_cast<uint32_t *>(&f);
return basetable[(v >> 23) & 0x1ff] +
((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
}
float Half2Float(half_t h) {
uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
exponenttable[h >> 10];
return *reinterpret_cast<float *>(&v);
}
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
for (int i = 0; i < count; ++i) {
h_array[i] = Float2Half(f_array[i]);
}
}
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
for (int i = 0; i < count; ++i) {
f_array[i] = Half2Float(h_array[i]);
}
}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace paddle_mobile {
namespace framework {
typedef uint16_t half_t;
half_t Float2Half(float f);
float Half2Float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count);
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <type_traits>
#include <vector>
#include "common/log.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
namespace paddle_mobile {
namespace framework {
class CLHelper {
public:
CLHelper() = default;
explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
DLOG << " begin add kernel ";
auto kernel = scope_->GetKernel(kernel_name, file_name);
DLOG << " add kernel ing ";
kernels.emplace_back(std::move(kernel));
}
cl_kernel KernelAt(const int index) {
DLOG << " kernel count: " << kernels.size();
return kernels[index].get();
}
cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
cl_context CLContext() { return scope_->Context(); }
std::vector<size_t> DefaultWorkSize(const CLImage &image) {
// n c h w
auto image_dim = image.dims();
if (image_dim.size() == 4) {
auto n = image_dim[0];
auto h = image_dim[2];
auto w = image_dim[3];
auto image_width = image.ImageWidth();
auto work_size_0 = image_width / w;
auto work_size_1 = w;
auto work_size_2 = n * h;
return {work_size_0, work_size_1, work_size_2};
} else if (image_dim.size() == 2) {
return {1, image.ImageWidth(), image.ImageHeight()};
} else if (image_dim.size() == 1) {
return {1, image.ImageWidth(), 1};
}
PADDLE_MOBILE_THROW_EXCEPTION(" not support this dim, need imp ");
}
private:
CLScope *scope_;
std::vector<std::unique_ptr<_cl_kernel, CLKernelDeleter>> kernels;
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_image.h"
namespace paddle_mobile {
namespace framework {
void CLImageToTensor(CLImage *cl_image, Tensor *tensor,
cl_command_queue commandQueue) {
// TODO(yangfei): need imp
}
void TensorToCLImage(const Tensor *tensor, CLImage *cl_image,
cl_command_queue commandQueue) {
// TODO(yangfei): need imp
}
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &cl_image) {
int width = cl_image.ImageDims()[0];
int height = cl_image.ImageDims()[1];
half_t *image_data = new half_t[height * width * 4];
cl_int err;
cl_mem image = cl_image.GetCLImage();
size_t origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
err = clEnqueueReadImage(cl_image.CommandQueue(), image, CL_TRUE, origin,
region, 0, 0, image_data, 0, NULL, NULL);
CL_CHECK_ERRORS(err);
float *tensor_data = new float[cl_image.numel()];
auto converter = cl_image.Converter();
converter->ImageToNCHW(image_data, tensor_data, cl_image.ImageDims(),
cl_image.dims());
int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1;
printer << " dims: " << cl_image.dims() << "\n";
for (int i = 0; i < cl_image.numel(); i += stride) {
printer << tensor_data[i] << " ";
}
delete[](tensor_data);
delete[](image_data);
return printer;
}
#endif
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_half.h"
#include "framework/cl/cl_image_converter.h"
#include "framework/cl/cl_tool.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
namespace paddle_mobile {
namespace framework {
class CLImage {
public:
CLImage() = default;
~CLImage() {
if (tensor_data_ != nullptr) {
delete[](tensor_data_);
}
if (image_converter_) {
delete (image_converter_);
}
}
/*
* will not hold input tensor data, memcpy in this method
* */
void SetTensorData(float *tensorData, const DDim &dim) {
int numel = product(dim);
if (tensor_data_ != nullptr) {
delete[](tensor_data_);
tensor_data_ = nullptr;
}
tensor_data_ = new float[numel];
memcpy(tensor_data_, tensorData, numel * sizeof(float));
tensor_dims_ = dim;
}
/*
* need call SetTensorData first
*
* folder when one dim or two dim
* */
void InitCLImage(cl_context context, cl_command_queue command_queue) {
PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
" need call SetTensorData first");
CLImageConverterFolder *folder_converter = new CLImageConverterFolder();
InitCLImage(context, command_queue, folder_converter);
}
void InitCLImage(cl_context context, cl_command_queue command_queue,
CLImageConverterBase *converter) {
if (image_converter_ != nullptr) {
delete (image_converter_);
}
PADDLE_MOBILE_ENFORCE(tensor_data_ != nullptr,
" need call SetTensorData first");
DLOG << " begin init cl image ";
image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
half_t *image_data = new half_t[product(image_dims_) * 4];
DLOG << " convert to image";
converter->NCHWToImage(tensor_data_, image_data, tensor_dims_);
DLOG << " end convert to image";
InitCLImage(context, image_dims_[0], image_dims_[1], image_data);
delete[](image_data);
delete[](tensor_data_);
command_queue_ = command_queue;
tensor_data_ = nullptr;
image_converter_ = converter;
initialized_ = true;
DLOG << " end init cl image";
}
void InitNImage(cl_context context, cl_command_queue command_queue) {
if (tensor_data_ == nullptr) {
PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
}
CLImageConverterNWBlock *folder_converter = new CLImageConverterNWBlock();
InitCLImage(context, command_queue, folder_converter);
PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
}
void InitDWImage(cl_context context, cl_command_queue command_queue) {
if (tensor_data_ == nullptr) {
PADDLE_MOBILE_THROW_EXCEPTION(" need call SetTensorData first");
}
CLImageConverterDWBlock *dw_converter = new CLImageConverterDWBlock();
InitCLImage(context, command_queue, dw_converter);
PADDLE_MOBILE_ENFORCE(tensor_dims_.size() == 4, " tensor dim is not 4");
}
void InitEmptyImage(cl_context context, cl_command_queue command_queue,
const DDim &dim) {
PADDLE_MOBILE_ENFORCE(tensor_data_ == nullptr,
" empty image tensor data shouldn't have value");
CLImageConverterFolder *folder_converter = new CLImageConverterFolder();
DLOG << " to get image dims ";
image_dims_ = folder_converter->InitImageDimInfoWith(dim);
DLOG << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
tensor_dims_ = dim;
command_queue_ = command_queue;
image_converter_ = folder_converter;
cl_event_ = CLEngine::Instance()->CreateEvent(context);
initialized_ = true;
DLOG << " end init cl image";
}
cl_mem GetCLImage() const { return cl_image_.get(); }
const DDim &ImageDims() const { return image_dims_; }
inline size_t ImageWidth() const { return image_dims_[0]; }
inline size_t ImageHeight() const { return image_dims_[1]; }
inline cl_command_queue CommandQueue() const { return command_queue_; }
/*
* resize original tensor dim
* */
inline CLImage &Resize(const DDim &dims) {
tensor_dims_ = dims;
return *this;
}
template <typename T>
T *data() const {
if (initialized_) {
PADDLE_MOBILE_THROW_EXCEPTION(
" cl image has initialized, tensor data has been deleted, can't use "
"tensor data");
}
return reinterpret_cast<T *>(tensor_data_);
}
/*
* numel of tensor dim
* */
inline int64_t numel() const { return product(tensor_dims_); }
/*
* original tensor dim
* */
const DDim &dims() const { return tensor_dims_; }
cl_event GetClEvent() const { return cl_event_.get(); }
CLImageConverterBase *Converter() const { return image_converter_; }
private:
void InitCLImage(cl_context context, int width, int height, void *data) {
cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT};
cl_image_desc cid = {
.image_type = CL_MEM_OBJECT_IMAGE2D,
.image_width = width,
.image_height = height,
.image_depth = 1,
.image_array_size = 1,
.image_row_pitch = 0,
.image_slice_pitch = 0,
.num_mip_levels = 0,
.num_samples = 0,
// .buffer = nullptr
};
cid.buffer = nullptr;
cl_int err;
cl_mem cl_image = clCreateImage(
context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0),
&cf, // const cl_image_format *image_format
&cid, // const cl_image_desc *image_desc
data, // void *host_ptr
&err);
cl_image_.reset(cl_image);
if (err != CL_SUCCESS) {
CL_CHECK_ERRORS(err);
PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error ");
}
}
bool initialized_ = false;
std::unique_ptr<_cl_mem, CLMemDeleter> cl_image_;
std::unique_ptr<_cl_event, CLEventDeleter> cl_event_;
DDim tensor_dims_;
DDim image_dims_;
float *tensor_data_ = nullptr;
cl_context context_;
cl_command_queue command_queue_;
CLImageConverterBase *image_converter_ = nullptr;
};
void TensorToCLImage(Tensor *tensor, CLImage *image,
cl_command_queue commandQueue);
void CLImageToTensor(CLImage *image, Tensor *tensor,
cl_command_queue commandQueue);
#ifdef PADDLE_MOBILE_DEBUG
Print &operator<<(Print &printer, const CLImage &image);
#endif
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_image_converter.h"
namespace paddle_mobile {
namespace framework {
const DDim &CLImageConverterDefault::InitImageDimInfoWith(
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
return make_ddim({width, height});
}
void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
DLOG << " tensor dim " << tensor_dim;
DLOG << " image dim " << in_image_dim;
size_t width = in_image_dim[0];
size_t height = in_image_dim[1];
int w_block = width / W;
float *p = nchw;
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
if (c < C) {
// int x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDefault::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
int width = image_dim[0];
int height = image_dim[0];
float *p = tensor;
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
const DDim &CLImageConverterFolder::InitImageDimInfoWith(
const DDim &tensor_dim) {
if (tensor_dim.size() <= 2) {
int tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
int width = (tdim[1] + 3) / 4;
int height = tdim[0];
width_of_one_block_ = width;
height_of_one_block_ = height;
c_block_ = 1;
return make_ddim({width, height});
} else {
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
width_of_one_block_ = W;
height_of_one_block_ = H;
c_block_ = width / W;
return make_ddim({width, height});
}
}
void CLImageConverterFolder::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() <= 4 && tensor_dim.size() > 0,
"tensor dim is not support ");
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.NCHWToImage(tensor, image, tensor_dim);
} else {
int tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
DDim image_dim = InitImageDimInfoWith(tensor_dim);
int width = image_dim[0];
for (int h = 0; h < tdim[0]; h++) {
for (int w = 0; w < tdim[1]; w++) {
image[(h * width + w / 4) * 4 + (w % 4)] =
Float2Half(tensor[h * tdim[1] + w]);
}
}
}
}
void CLImageConverterFolder::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
} else {
int width = image_dim[0];
int height = image_dim[1];
int H, W;
if (tensor_dim.size() == 2) {
H = tensor_dim[0];
W = tensor_dim[1];
} else if (tensor_dim.size() == 1) {
H = 1;
W = tensor_dim[0];
}
float *p = tensor;
for (int h = 0; h < H; h++) {
for (int w = 0; w < W; w++) {
p[h * W + w] = Half2Float(image[(h * width + w / 4) * 4 + (w % 4)]);
}
}
}
}
const DDim &CLImageConverterNWBlock::InitImageDimInfoWith(
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return make_ddim({width, height});
}
void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
auto image_dim = InitImageDimInfoWith(tensor_dim);
float *p = tensor;
int N = tensor_dim[0];
int C = tensor_dim[1];
int H = tensor_dim[2];
int W = tensor_dim[3];
int width = image_dim[0];
int height = image_dim[1];
int block = image_dim[0] / tensor_dim[3];
for (int n = 0; n < block * 4; n++) {
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
if (n < N) {
image[index] = Float2Half(*p);
p++;
} else {
image[index] = 0.0;
}
if (index >= (width * height * 4)) {
DLOG << " index out of range ";
}
}
}
}
}
DLOG << " init done";
}
void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
float *p = tensor;
int N = tensor_dim[0];
int C = tensor_dim[1];
int H = tensor_dim[2];
int W = tensor_dim[3];
int width = image_dim[0];
int height = image_dim[1];
int block = image_dim[0] / tensor_dim[3];
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; ++h) {
for (int w = 0; w < W; ++w) {
int index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
*p = Half2Float(image[index]);
p++;
if (index >= (width * height * 4)) {
DLOG << " index out of range ";
}
}
}
}
}
DLOG << " init done";
}
const DDim &CLImageConverterDWBlock::InitImageDimInfoWith(
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return make_ddim({width, height});
}
void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[1];
C = new_dims[0];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
DLOG << " tensor dim " << tensor_dim;
DLOG << " image dim " << in_image_dim;
size_t width = in_image_dim[0];
size_t height = in_image_dim[1];
int w_block = width / W;
float *p = tensor;
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
if (c < C) {
// int x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
PADDLE_MOBILE_ENFORCE(tensor_dim.size() == 4, " tensor dim is not 4");
float *p = tensor;
int N = tensor_dim[1];
int C = tensor_dim[0];
int H = tensor_dim[2];
int W = tensor_dim[3];
int width = image_dim[0];
int height = image_dim[0];
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/cl/cl_half.h"
#include "framework/ddim.h"
namespace paddle_mobile {
namespace framework {
class CLImageConverterBase {
public:
virtual void NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) = 0;
virtual void ImageToNCHW(half_t *image, float *nchw, const DDim &image_dim,
const DDim &tensor_dim) = 0;
virtual const DDim &InitImageDimInfoWith(const DDim &tensor_dim) = 0;
};
class CLImageConverterDefault : public CLImageConverterBase {
public:
const DDim &InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *nchw, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterFolder : public CLImageConverterBase {
public:
const DDim &InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
/*
* width of original tensor
* */
inline size_t WidthOfOneBlock() const { return width_of_one_block_; }
/*
* height of original tensor
* */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
int GetCBlock() const { return c_block_; }
private:
int c_block_;
int width_of_one_block_;
int height_of_one_block_;
};
class CLImageConverterNWBlock : public CLImageConverterBase {
const DDim &InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterDWBlock : public CLImageConverterBase {
const DDim &InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "CL/cl.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace framework {
class CLScope {
public:
CLScope() {
CLEngine *engin = CLEngine::Instance();
context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue(context_.get());
}
cl_command_queue CommandQueue() { return command_queue_.get(); }
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) {
DLOG << " to get program " << file_name;
auto program = Program(file_name);
DLOG << " end get program ~ ";
DLOG << " to create kernel: " << kernel_name;
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), &status_));
CL_CHECK_ERRORS(status_);
DLOG << " end create kernel ~ ";
return std::move(kernel);
}
cl_context Context() { return context_.get(); }
cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name);
if (it != programs_.end()) {
return it->second.get();
}
auto program = CLEngine::Instance()->CreateProgramWith(
context_.get(), "./cl_kernel/" + file_name);
DLOG << " --- begin build program -> " << file_name << " --- ";
CLEngine::Instance()->BuildProgram(program.get());
DLOG << " --- end build program -> " << file_name << " --- ";
programs_[file_name] = std::move(program);
return programs_[file_name].get();
}
private:
cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unordered_map<std::string,
std::unique_ptr<_cl_program, CLProgramDeleter>>
programs_;
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_engine.h"
#include "framework/tensor_base.h"
namespace paddle_mobile {
namespace framework {
class CLTensor : TensorBase {
public:
CLTensor(cl_context context, cl_command_queue command_queue)
: context_(context), command_queue_(command_queue) {}
CLTensor() = default;
/*
* if init method haven't set context and command_queue, need set
* */
void SetContextAndCommandQueue(cl_context context,
cl_command_queue command_queue) {
context_ = context;
command_queue_ = command_queue;
}
/*! Resize the dimensions of the memory block. */
inline CLTensor &Resize(const DDim &dims) {
dims_ = dims;
return *this;
}
template <typename T>
inline cl_mem mutable_with_data(const T *data) {
int64_t size = numel() * sizeof(T);
holder_.reset(new PlaceholderImpl(
size, reinterpret_cast<void *>(const_cast<T *>(data)), typeid(T),
context_, command_queue_));
return reinterpret_cast<cl_mem>(holder_->ptr());
}
inline cl_mem mutable_data(std::type_index type) {
if (holder_ != nullptr) {
holder_->set_type(type);
}
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
int64_t size = numel() * SizeOfType(type);
if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type, context_, command_queue_));
offset_ = 0;
}
return reinterpret_cast<cl_mem>(holder_->ptr());
}
/**
* @brief Return a pointer to cl buffer.
* @note If not exist, then allocation.
*/
template <typename T>
inline cl_mem mutable_data() {
return reinterpret_cast<cl_mem>(mutable_data(typeid(T)));
}
/**
* @brief Return a pointer to cl buffer.
*
* @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block.
*
* @note If not exist, then allocation.
*/
template <typename T>
inline cl_mem mutable_data(DDim dims) {
Resize(dims);
return mutable_data<T>();
}
inline cl_mem CLBuffer() {
check_memory_size();
return reinterpret_cast<cl_mem>(
reinterpret_cast<uintptr_t>(holder_->ptr()));
}
template <typename T>
inline T *Data() {
if (host_ptr_) {
delete (host_ptr_);
host_ptr_ = nullptr;
}
cl_mem buffer = CLBuffer();
host_ptr_ = new char[holder_->size()];
cl_int status;
status = clEnqueueReadBuffer(command_queue_, buffer, CL_TRUE, 0,
holder_->size(), host_ptr_, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
return reinterpret_cast<T *>(host_ptr_);
}
int memorySize() { return holder_->size(); }
~CLTensor() {
DLOG << "~CLTensor";
if (host_ptr_) {
DLOG << " delete host ptr ";
delete (host_ptr_);
host_ptr_ = nullptr;
}
}
private:
cl_context context_;
cl_command_queue command_queue_;
void *host_ptr_ = nullptr;
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, void *input, std::type_index type,
cl_context context, cl_command_queue command_queue)
: ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
size, reinterpret_cast<void *>(input), NULL)),
size_(size),
type_(type),
command_queue_(command_queue) {}
PlaceholderImpl(size_t size, std::type_index type, cl_context context,
cl_command_queue command_queue)
: ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)),
size_(size),
type_(type),
command_queue_(command_queue) {}
virtual size_t size() const { return size_; }
virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }
virtual std::type_index type() const { return type_; }
virtual void set_type(std::type_index type) { type_ = type; }
std::unique_ptr<_cl_mem, CLMemDeleter> ptr_;
size_t size_;
/* the current type of memory */
std::type_index type_;
cl_command_queue command_queue_;
};
};
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace framework {
const char *opencl_error_to_str(cl_int error) {
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch (error) {
CASE_CL_CONSTANT(CL_SUCCESS)
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_MAP_FAILURE)
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_VALUE)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
CASE_CL_CONSTANT(CL_INVALID_BINARY)
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_EVENT)
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
default:
return "UNKNOWN ERROR CODE";
}
#undef CASE_CL_CONSTANT
}
} // namespace framework
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "CL/cl.h"
namespace paddle_mobile {
namespace framework {
const char* opencl_error_to_str(cl_int error);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
paddle_mobile::framework::opencl_error_to_str(ERR), __FILE__, \
__LINE__); \
}
} // namespace framework
} // namespace paddle_mobile
...@@ -41,7 +41,6 @@ inline DataLayout StringToDataLayout(const std::string &str) { ...@@ -41,7 +41,6 @@ inline DataLayout StringToDataLayout(const std::string &str) {
return DataLayout::kAnyLayout; return DataLayout::kAnyLayout;
} else { } else {
PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str()) PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string: %s", s.c_str())
exit(0);
} }
} }
...@@ -55,7 +54,6 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) { ...@@ -55,7 +54,6 @@ inline std::string DataLayoutToString(const DataLayout &data_layout) {
return "ANY_LAYOUT"; return "ANY_LAYOUT";
default: default:
PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string ") PADDLE_MOBILE_THROW_EXCEPTION("Unknown storage order string ")
exit(0);
break; break;
} }
} }
......
...@@ -42,7 +42,7 @@ struct Dim { ...@@ -42,7 +42,7 @@ struct Dim {
: head(idx % size.head), tail(idx / size.head, size.tail) {} : head(idx % size.head), tail(idx / size.head, size.tail) {}
/** Construct a Dim with each dimension set to the given index */ /** Construct a Dim with each dimension set to the given index */
Dim(int64_t idx) : head(idx), tail(idx) {} explicit Dim(int64_t idx) : head(idx), tail(idx) {}
bool operator==(const Dim<i> &o) const { bool operator==(const Dim<i> &o) const {
return (head == o.head) && (tail == o.tail); return (head == o.head) && (tail == o.tail);
...@@ -65,7 +65,7 @@ template <> ...@@ -65,7 +65,7 @@ template <>
struct Dim<0> { struct Dim<0> {
static constexpr int dimensions = 0; static constexpr int dimensions = 0;
Dim(int64_t _head) {} explicit Dim(int64_t _head) {}
Dim() {} Dim() {}
...@@ -131,7 +131,6 @@ int64_t &indexer(Dim<D> &dim, int idx) { ...@@ -131,7 +131,6 @@ int64_t &indexer(Dim<D> &dim, int idx) {
template <> template <>
int64_t &indexer<0>(Dim<0> &dim, int idx) { int64_t &indexer<0>(Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
exit(0);
} }
template <int D> template <int D>
...@@ -148,7 +147,6 @@ int64_t indexer(const Dim<D> &dim, int idx) { ...@@ -148,7 +147,6 @@ int64_t indexer(const Dim<D> &dim, int idx) {
template <> template <>
int64_t indexer<0>(const Dim<0> &dim, int idx) { int64_t indexer<0>(const Dim<0> &dim, int idx) {
PADDLE_MOBILE_THROW_EXCEPTION("Invalid index") PADDLE_MOBILE_THROW_EXCEPTION("Invalid index")
exit(0);
} }
} // namespace } // namespace
......
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io/executor.h" #include "framework/executor.h"
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -26,11 +26,24 @@ limitations under the License. */ ...@@ -26,11 +26,24 @@ limitations under the License. */
#include "framework/program/var_desc.h" #include "framework/program/var_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "operators/math/gemm.h"
#ifdef PADDLE_EXECUTOR_MULTITHREAD
#include <queue>
#include <utility>
#include "common/threadpool.h"
#endif
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace framework {
using framework::Variable; using framework::Variable;
using framework::Variable;
#pragma mark - executor
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size, Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
...@@ -390,15 +403,92 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict( ...@@ -390,15 +403,92 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
const std::vector<Ptype> &input, const std::vector<int64_t> &dims) { const std::vector<Ptype> &input, const std::vector<int64_t> &dims) {
framework::Tensor tensor(input, framework::make_ddim(dims)); framework::Tensor tensor(input, framework::make_ddim(dims));
std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0); std::shared_ptr<framework::Tensor> output_tensor = Predict(tensor, 0);
Executor<Dtype, P>::Ptype *output_ptr = if (output_tensor != nullptr) {
output_tensor->data<typename Executor<Dtype, P>::Ptype>(); Executor<Dtype, P>::Ptype *output_ptr =
std::vector<typename Executor<Dtype, P>::Ptype> result_vector; output_tensor->data<typename Executor<Dtype, P>::Ptype>();
for (int j = 0; j < output_tensor->numel(); ++j) { std::vector<typename Executor<Dtype, P>::Ptype> result_vector;
result_vector.push_back(output_ptr[j]); for (int j = 0; j < output_tensor->numel(); ++j) {
result_vector.push_back(output_ptr[j]);
}
return result_vector;
} else {
DLOG << "return empty vector";
return {};
} }
return result_vector;
} }
#ifdef PADDLE_MOBILE_FPGA
template <typename Dtype, Precision P>
void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
string var_name) {
framework::Variable *g_feed_value = program_.scope->Var(var_name);
framework::Tensor *feed_tensor =
g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t);
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
InjectVariable(t, "feed");
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range");
auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id];
auto output_map = last_op->Outputs();
std::vector<std::string> out_keys = last_op->GetOutKeys();
PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
#ifdef PADDLE_MOBILE_PROFILE
std::vector<ProfInfo> profile(ops.size());
#endif
for (int i = start; i < end; i++) {
#ifdef PADDLE_MOBILE_PROFILE
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
DLOG << "Running op: " << i << " " << ops[i]->Type();
ops[i]->Run();
#ifdef PADDLE_MOBILE_PROFILE
clock_gettime(CLOCK_MONOTONIC, &ts);
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start);
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end);
}
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t, void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
...@@ -470,8 +560,232 @@ void Executor<Dtype, P>::Predict_To(int end) { ...@@ -470,8 +560,232 @@ void Executor<Dtype, P>::Predict_To(int end) {
} }
#endif #endif
#ifdef PADDLE_MOBILE_CL
template <typename Dtype, Precision P>
void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
float *tensorInput, char **data) {}
template <>
void Executor<GPU_CL, Precision::FP32>::LoadMemory(
const framework::VarDesc var_desc, float *tensorInput, char **data) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 2 Lod information
uint64_t *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
uint64_t lod_level = *lod_level_ptr;
delete lod_level_ptr;
(*data) += sizeof(uint64_t);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(*data);
(*data) += sizeof(uint64_t);
std::vector<size_t> tmp(size / sizeof(size_t));
for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *reinterpret_cast<size_t *>(*data);
(*data) += sizeof(size_t);
}
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(*data);
(*data) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (*data)[m];
}
(*data) += (sizeof(char) * size);
const framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
// int type_size = 0;
// switch (desc.DataType()) {
// case framework::VARTYPE_TYPE_FP16:
// type_size = 2;
// break;
// case framework::VARTYPE_TYPE_FP32:
// type_size = 4;
// memory = tensor->mutable_data<float>();
// break;
// case framework::VARTYPE_TYPE_FP64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_INT32:
// memory = tensor->mutable_data<int32_t>();
// type_size = 4;
// break;
// case framework::VARTYPE_TYPE_INT64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_BOOL:
// type_size = 1;
// break;
// default:
// break;
// }
int type_size = 4;
memory = tensorInput;
if (program_.quantification) {
float min_value;
float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
}
template <>
void Executor<GPU_CL, Precision::FP32>::InitMemory() {
for (const auto &block : to_predict_program_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
if (var_desc->Persistable()) {
CLImage *cl_image = nullptr;
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
var->template GetMutable<framework::LoDTensor>();
continue;
} else {
cl_image = var->template GetMutable<framework::CLImage>();
}
char *origin_data =
ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
char *data = origin_data;
cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
int numel = 1;
for (auto l : desc.Dims()) {
numel *= l;
}
DLOG << var_desc->Name();
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc, tensorInput, &data);
framework::DDim ddim = framework::make_ddim(desc.Dims());
// has not init
cl_image->SetTensorData(tensorInput, ddim);
delete origin_data;
paddle_mobile::memory::Free(tensorInput);
} else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue =
program_.scope->GetCLScpoe()->CommandQueue();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
// framework::DDim ddim = framework::make_ddim(desc.Dims());
framework::DDim ddim = cl_image->dims();
DLOG << var_desc->Name();
cl_image->InitEmptyImage(context, command_queue, ddim);
}
}
}
}
}
template <>
void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
char *origin_data;
if (program_.combined_params_buf && program_.combined_params_len) {
LOG(kLOG_INFO) << "use outter memory";
origin_data = reinterpret_cast<char *>(program_.combined_params_buf);
} else {
LOG(kLOG_INFO) << " begin init combine memory";
origin_data = ReadFileToBuff(program_.para_path);
}
PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!");
float *data = reinterpret_cast<float *>(origin_data);
for (const auto &block : to_predict_program_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
if (var_desc->Persistable()) {
CLImage *cl_image = nullptr;
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
var->template GetMutable<framework::LoDTensor>();
continue;
} else {
cl_image = var->template GetMutable<framework::CLImage>();
}
cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = framework::make_ddim(desc.Dims());
int numel = 1;
for (int i = 0; i < ddim.size(); i++) {
numel = numel * ddim[i];
}
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc, tensorInput, &origin_data);
// has not init
cl_image->SetTensorData(tensorInput, ddim);
paddle_mobile::memory::Free(tensorInput);
} else {
auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue =
program_.scope->GetCLScpoe()->CommandQueue();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = cl_image->dims();
// framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->InitEmptyImage(context, command_queue, ddim);
}
}
}
delete origin_data;
LOG(kLOG_INFO) << " end init combine memory ";
}
#endif
template class Executor<CPU, Precision::FP32>; template class Executor<CPU, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>;
template class Executor<FPGA, Precision::FP32>; template class Executor<FPGA, Precision::FP32>;
template class Executor<GPU_CL, Precision::FP32>;
template class Executor<GPU_MALI, Precision::FP32>;
} // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -26,6 +26,7 @@ limitations under the License. */ ...@@ -26,6 +26,7 @@ limitations under the License. */
#include "framework/tensor.h" #include "framework/tensor.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32> template <typename Dtype = CPU, Precision P = Precision::FP32>
class Executor { class Executor {
...@@ -79,7 +80,10 @@ class Executor { ...@@ -79,7 +80,10 @@ class Executor {
void LoadMemory(void **data, void LoadMemory(void **data,
const std::shared_ptr<framework::VarDesc> var_desc, const std::shared_ptr<framework::VarDesc> var_desc,
framework::LoDTensor *tensor); framework::LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL
void LoadMemory(const framework::VarDesc var_desc, float *tensorInput,
char **data);
#endif
framework::Program<Dtype> program_; framework::Program<Dtype> program_;
int batch_size_ = 1; int batch_size_ = 1;
std::shared_ptr<framework::ProgramDesc> to_predict_program_; std::shared_ptr<framework::ProgramDesc> to_predict_program_;
...@@ -97,4 +101,5 @@ class Executor { ...@@ -97,4 +101,5 @@ class Executor {
bool loddable_ = false; bool loddable_ = false;
}; };
} // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -12,13 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,13 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "io/loader.h" #include "framework/loader.h"
#include "framework/lod_tensor.h" #include "framework/lod_tensor.h"
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
using framework::Variable; namespace framework {
/** /**
* muteandresize tensor as originProgramDesc and scope in loadParams * muteandresize tensor as originProgramDesc and scope in loadParams
...@@ -26,23 +29,24 @@ using framework::Variable; ...@@ -26,23 +29,24 @@ using framework::Variable;
* @param originProgramDesc * @param originProgramDesc
* @param scope * @param scope
*/ */
void InitMemoryFromProgram( template <typename Dtype, Precision P>
std::shared_ptr<framework::ProgramDesc> &originProgramDesc, // NOLINT void Loader<Dtype, P>::InitMemoryFromProgram(
std::shared_ptr<framework::Scope> &scope) { // NOLINT const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) { for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) { for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name()); auto var = scope.get()->Var(var_desc->Name());
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) { if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims(); auto dim = var_desc->Tensor_desc().Dims();
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<LoDTensor>();
tensor->Resize(framework::make_ddim(dim)); tensor->Resize(make_ddim(dim));
} else { } else {
auto dim = var_desc->Tensor_desc().Dims(); auto dim = var_desc->Tensor_desc().Dims();
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
dim[0] = 1; dim[0] = 1;
auto tensor = var->GetMutable<framework::LoDTensor>(); auto tensor = var->GetMutable<LoDTensor>();
tensor->Resize(framework::make_ddim(dim)); tensor->Resize(make_ddim(dim));
} }
} else { } else {
// TODO(codeWorm): some. // TODO(codeWorm): some.
...@@ -50,6 +54,36 @@ void InitMemoryFromProgram( ...@@ -50,6 +54,36 @@ void InitMemoryFromProgram(
} }
} }
} }
#ifdef PADDLE_MOBILE_CL
template <>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name());
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
} else {
auto dim = var_desc->Tensor_desc().Dims();
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
dim[0] = 1;
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
}
} else {
// TODO(codeWorm): some.
}
}
}
}
#endif
/** /**
* fusion and print someinfos * fusion and print someinfos
* @tparam Dtype * @tparam Dtype
...@@ -61,19 +95,18 @@ void InitMemoryFromProgram( ...@@ -61,19 +95,18 @@ void InitMemoryFromProgram(
*/ */
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
void FusionAndPrintInfos( void FusionAndPrintInfos(
bool optimize, bool can_add_split, bool optimize, bool can_add_split, Program<Dtype, P> *program,
framework::Program<Dtype, P> &program, // NOLINT const std::shared_ptr<ProgramDesc> &originProgramDesc) {
const std::shared_ptr<framework::ProgramDesc> &originProgramDesc) {
if (optimize) { if (optimize) {
framework::ProgramOptimize program_optimize; ProgramOptimize program_optimize;
program.optimizeProgram = program->optimizeProgram =
program_optimize.FusionOptimize(originProgramDesc, can_add_split); program_optimize.FusionOptimize(originProgramDesc, can_add_split);
if (!program.optimizeProgram) { if (!program->optimizeProgram) {
program.optimizeProgram = originProgramDesc; program->optimizeProgram = originProgramDesc;
} }
} }
if (optimize) { if (optimize) {
program.optimizeProgram->Description("optimize: "); program->optimizeProgram->Description("optimize: ");
} else { } else {
originProgramDesc->Description("program: "); originProgramDesc->Description("program: ");
} }
...@@ -102,9 +135,10 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { ...@@ -102,9 +135,10 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &dirname,
const std::string &dirname, bool optimize, bool quantification, bool optimize,
bool can_add_split) { bool quantification,
bool can_add_split) {
auto program = this->LoadProgram(dirname + "/__model__", optimize, auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split); quantification, can_add_split);
program.model_path = dirname; program.model_path = dirname;
...@@ -112,9 +146,10 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -112,9 +146,10 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::Load( const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
const std::string &model_path, const std::string &para_path, bool optimize, const std::string &para_path,
bool quantification) { bool optimize,
bool quantification) {
auto program = this->LoadProgram(model_path, optimize, quantification); auto program = this->LoadProgram(model_path, optimize, quantification);
program.para_path = para_path; program.para_path = para_path;
...@@ -124,7 +159,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load( ...@@ -124,7 +159,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram( const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification, const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) { bool can_add_split) {
std::string model_filename = model_path; std::string model_filename = model_path;
...@@ -141,29 +176,29 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram( ...@@ -141,29 +176,29 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
// //
DLOG << "n_ops: " << (*c_program->blocks)->n_ops; DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
// //
auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program); auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
framework::Program<Dtype, P> program; Program<Dtype, P> program;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = 0; program.combined_params_len = 0;
program.combined_params_buf = nullptr; program.combined_params_buf = nullptr;
auto scope = std::make_shared<framework::Scope>(); auto scope = std::make_shared<Scope>();
program.scope = scope; program.scope = scope;
// use originProgramDesc and scope to init tensors // use originProgramDesc and scope to init tensors
InitMemoryFromProgram(originProgramDesc, scope); InitMemoryFromProgram(originProgramDesc, scope);
// perform fusion and print infos // perform fusion and print infos
FusionAndPrintInfos(optimize, can_add_split, program, originProgramDesc); FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc);
paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL);
return program; return program;
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
const framework::Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory( const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len, size_t read_size, const uint8_t *buf, size_t combined_params_len,
const uint8_t *combined_params_buf, bool optimize, bool quantification) { uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false; bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program; PaddleMobile__Framework__Proto__ProgramDesc *c_program;
...@@ -177,26 +212,31 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory( ...@@ -177,26 +212,31 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
DLOG << "n_ops: " << (*c_program->blocks)->n_ops; DLOG << "n_ops: " << (*c_program->blocks)->n_ops;
// //
auto originProgramDesc = std::make_shared<framework::ProgramDesc>(c_program); auto originProgramDesc = std::make_shared<ProgramDesc>(c_program);
framework::Program<Dtype, P> program; Program<Dtype, P> program;
program.combined = true; program.combined = true;
program.originProgram = originProgramDesc; program.originProgram = originProgramDesc;
program.quantification = quantification; program.quantification = quantification;
program.combined_params_len = combined_params_len; program.combined_params_len = combined_params_len;
program.combined_params_buf = combined_params_buf; program.combined_params_buf = combined_params_buf;
auto scope = std::make_shared<framework::Scope>(); auto scope = std::make_shared<Scope>();
program.scope = scope; program.scope = scope;
InitMemoryFromProgram(originProgramDesc, scope); InitMemoryFromProgram(originProgramDesc, scope);
FusionAndPrintInfos(optimize, can_add_split, program, originProgramDesc); FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc);
paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, paddle_mobile__framework__proto__program_desc__free_unpacked(c_program,
nullptr); nullptr);
return program; return program;
} }
template class Loader<CPU, Precision::FP32>; template class Loader<CPU, Precision::FP32>;
template class Loader<FPGA, Precision::FP32>; template class Loader<FPGA, Precision::FP32>;
template class Loader<GPU_MALI, Precision::FP32>; template class Loader<GPU_MALI, Precision::FP32>;
template class Loader<GPU_CL, Precision::FP32>;
} // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "common/types.h"
#include "framework/program/program.h"
namespace paddle_mobile {
namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32>
class Loader {
public:
/*
* @b load separate format fluid model
* @b 加载分开形式的 fluid 模型
* */
const Program<Dtype, P> Load(const std::string &dirname,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* */
const Program<Dtype, P> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false,
bool quantification = false);
const Program<Dtype, P> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
uint8_t *combined_params_buf,
bool optimize = false,
bool quantification = false);
private:
const Program<Dtype, P> LoadProgram(const std::string &model_path,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope);
};
} // namespace framework
} // namespace paddle_mobile
...@@ -14,8 +14,10 @@ limitations under the License. */ ...@@ -14,8 +14,10 @@ limitations under the License. */
#pragma once #pragma once
#include <memory>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include "common/log.h" #include "common/log.h"
#include "common/type_define.h" #include "common/type_define.h"
#include "framework/op_info.h" #include "framework/op_info.h"
...@@ -120,5 +122,8 @@ class OpRegistry { ...@@ -120,5 +122,8 @@ class OpRegistry {
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ #define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA); REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
#define REGISTER_OPERATOR_CL(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, cl, paddle_mobile::GPU_CL);
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -56,37 +56,69 @@ template <typename Dtype> ...@@ -56,37 +56,69 @@ template <typename Dtype>
void OperatorBase<Dtype>::CheckAllInputOutputSet() const {} void OperatorBase<Dtype>::CheckAllInputOutputSet() const {}
template <typename Dtype> template <typename Dtype>
void OperatorBase<Dtype>::Run() const { void OperatorBase<Dtype>::Run() {
DLOG << " ----- Begin run impl --- " << type_ << " ----- ";
RunImpl(); RunImpl();
#ifdef PADDLE_MOBILE_DEBUG DLOG << " ----- End run impl --- " << type_ << " ----- ";
DLOG << "-------------" << type_ << "----------------------------"; //#ifdef PADDLE_MOBILE_DEBUG
vector<string> input_keys = GetInputKeys(); // DLOG << "-------------" << type_ << "----------------------------";
for (const auto key : input_keys) { // vector<string> input_keys = GetInputKeys();
auto var_vec_in = inputs_.at(key); // for (const auto key : input_keys) {
for (int i = 0; i < var_vec_in.size(); ++i) { // auto var_vec_in = inputs_.at(key);
auto vari = scope_->FindVar(var_vec_in[i]); // for (int i = 0; i < var_vec_in.size(); ++i) {
if (vari->IsInitialized()) { // auto vari = scope_->FindVar(var_vec_in[i]);
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); // if (vari->IsInitialized()) {
if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor; //#ifdef PADDLE_MOBILE_CL
} // if (type_ == "feed") {
} // Tensor *tensor = vari->template
} // GetMutable<framework::LoDTensor>(); if (tensor) DLOG << type_ << "
for (const auto key : GetOutKeys()) { // input- " << key << "=" << *tensor;
auto var_vec_out = outputs_.at(key); // } else {
for (int i = 0; i < var_vec_out.size(); ++i) { // CLImage *cl_image = vari->template
auto vari = scope_->FindVar(var_vec_out[i]); // GetMutable<framework::CLImage>(); if (cl_image) {
if (vari->IsInitialized()) { // DLOG << type_ << " input- " << key << "=" << *cl_image;
Tensor *tensor = vari->template GetMutable<framework::LoDTensor>(); // }
if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor; // }
} //
} //#else
} // Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
#endif // if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
//#endif
// }
// }
// }
// for (const auto key : GetOutKeys()) {
// auto var_vec_out = outputs_.at(key);
// for (int i = 0; i < var_vec_out.size(); ++i) {
// auto vari = scope_->FindVar(var_vec_out[i]);
// if (vari->IsInitialized()) {
//#ifdef PADDLE_MOBILE_CL
// if (type_ == "fetch") {
// Tensor *tensor = vari->template
// GetMutable<framework::LoDTensor>(); if (tensor) {
// DLOG << type_ << " output- " << key << "=" << *tensor;
// }
// } else {
// CLImage *cl_image = vari->template
// GetMutable<framework::CLImage>(); if (cl_image) {
// DLOG << type_ << " output- " << key << "=" << *cl_image;
// }
// }
//
//#else
// Tensor *tensor = vari->template GetMutable<framework::LoDTensor>();
// if (tensor) DLOG << type_ << " output- " << key << "=" << *tensor;
//#endif
// }
// }
// }
//#endif
} }
template class OperatorBase<CPU>; template class OperatorBase<CPU>;
template class OperatorBase<FPGA>; template class OperatorBase<FPGA>;
template class OperatorBase<GPU_MALI>; template class OperatorBase<GPU_MALI>;
template class OperatorBase<GPU_CL>;
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <map> #include <map>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
...@@ -31,7 +32,10 @@ limitations under the License. */ ...@@ -31,7 +32,10 @@ limitations under the License. */
#include "framework/scope.h" #include "framework/scope.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "framework/variable.h" #include "framework/variable.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_helper.h"
#include "framework/cl/cl_scope.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
using std::string; using std::string;
...@@ -59,10 +63,10 @@ class OperatorBase { ...@@ -59,10 +63,10 @@ class OperatorBase {
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope); std::shared_ptr<Scope> scope);
virtual ~OperatorBase() {} virtual ~OperatorBase() {}
void Run() const; void Run();
std::vector<string> GetOutKeys() const; std::vector<string> GetOutKeys() const;
std::vector<string> GetInputKeys() const; std::vector<string> GetInputKeys() const;
virtual void RunImpl() const = 0; virtual void RunImpl() = 0;
virtual void Init() = 0; virtual void Init() = 0;
/* /*
...@@ -112,9 +116,13 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -112,9 +116,13 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
const VariableNameMap &outputs, const AttributeMap &attrs, const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope) std::shared_ptr<Scope> scope)
: OperatorBase<Dtype>(type, inputs, outputs, attrs, scope), : OperatorBase<Dtype>(type, inputs, outputs, attrs, scope),
param_(inputs, outputs, attrs, *scope) {} param_(inputs, outputs, attrs, *scope) {
#ifdef PADDLE_MOBILE_CL
kernel_.InitCLHelper(scope->GetCLScpoe());
#endif
}
virtual void RunImpl() const { this->kernel_.Compute(this->param_); } virtual void RunImpl() { this->kernel_.Compute(this->param_); }
virtual void InferShape() const = 0; virtual void InferShape() const = 0;
...@@ -123,6 +131,7 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -123,6 +131,7 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
// DLOG << i.first; // DLOG << i.first;
// DLOG << i.second; // DLOG << i.second;
// } // }
PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed", PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed",
this->type_.c_str()); this->type_.c_str());
} }
...@@ -138,22 +147,35 @@ class OperatorWithKernel : public OperatorBase<Dtype> { ...@@ -138,22 +147,35 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
template <typename Dtype, typename P> template <typename Dtype, typename P>
class OpKernelBase { class OpKernelBase {
public: public:
/* OpKernelBase() = default;
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, #ifdef PADDLE_MOBILE_CL
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h virtual void InitCLHelper(CLScope *clScope) {
* */ cl_helper_ = CLHelper(clScope);
#ifdef PADDLE_MOBILE_MALI_GPU }
#endif
/*
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase() { acl_op_ = nullptr; } OpKernelBase() { acl_op_ = nullptr; }
void *GetAclOp() const { return acl_op_; } void *GetAclOp() const { return acl_op_; }
void SetAclOp(void *op, void *ob) const { void SetAclOp(void *op, void *ob) const {
reinterpret_cast<OpKernelBase<Dtype, P> *>(ob)->acl_op_ = op; reinterpret_cast<OpKernelBase<Dtype, P> *>(ob)->acl_op_ = op;
} }
#endif #endif
virtual void Compute(const P &para) const = 0; virtual void Compute(const P &para) = 0;
virtual bool Init(P *para) { return true; } virtual bool Init(P *para) { return true; }
virtual ~OpKernelBase() = default; virtual ~OpKernelBase() = default;
protected:
#ifdef PADDLE_MOBILE_CL
CLHelper cl_helper_;
#endif
private: private:
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_; void *acl_op_;
......
...@@ -18,6 +18,8 @@ limitations under the License. */ ...@@ -18,6 +18,8 @@ limitations under the License. */
#include "framework/program/program_desc.h" #include "framework/program/program_desc.h"
#include "framework/scope.h" #include "framework/scope.h"
#include <string>
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -32,7 +34,7 @@ class Program { ...@@ -32,7 +34,7 @@ class Program {
bool combined = false; bool combined = false;
bool quantification = false; bool quantification = false;
size_t combined_params_len; size_t combined_params_len;
const uint8_t *combined_params_buf; uint8_t *combined_params_buf;
}; };
} // namespace framework } // namespace framework
......
...@@ -15,8 +15,14 @@ limitations under the License. */ ...@@ -15,8 +15,14 @@ limitations under the License. */
#pragma once #pragma once
#include <list> #include <list>
#include <string>
#include <unordered_map> #include <unordered_map>
#include "variable.h" #include <vector>
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_scope.h"
#endif
#include "framework/variable.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
...@@ -33,6 +39,10 @@ class Scope { ...@@ -33,6 +39,10 @@ class Scope {
delete kid; delete kid;
} }
kids_.clear(); kids_.clear();
#ifdef PADDLE_MOBILE_CL
delete cl_scope_;
#endif
} }
Scope &NewScope() const; Scope &NewScope() const;
...@@ -72,6 +82,10 @@ class Scope { ...@@ -72,6 +82,10 @@ class Scope {
Variable *FindVarLocally(const std::string &name) const; Variable *FindVarLocally(const std::string &name) const;
#ifdef PADDLE_MOBILE_CL
CLScope *GetCLScpoe() { return cl_scope_; }
#endif
private: private:
// Call Scope::NewScope for a sub-scope. // Call Scope::NewScope for a sub-scope.
explicit Scope(Scope const *parent) : parent_(parent) {} explicit Scope(Scope const *parent) : parent_(parent) {}
...@@ -79,6 +93,10 @@ class Scope { ...@@ -79,6 +93,10 @@ class Scope {
mutable std::unordered_map<std::string, Variable *> vars_; mutable std::unordered_map<std::string, Variable *> vars_;
mutable std::list<Scope *> kids_; mutable std::list<Scope *> kids_;
Scope const *parent_{nullptr}; Scope const *parent_{nullptr};
#ifdef PADDLE_MOBILE_CL
CLScope *cl_scope_ = new CLScope();
#endif
}; };
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -24,65 +24,24 @@ limitations under the License. */ ...@@ -24,65 +24,24 @@ limitations under the License. */
#include <vector> #include <vector>
#include "common/enforce.h" #include "common/enforce.h"
#include "common/types.h"
#include "framework/data_layout.h" #include "framework/data_layout.h"
#include "framework/ddim.h" #include "framework/tensor_base.h"
#include "memory/t_malloc.h" #include "memory/t_malloc.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace framework { namespace framework {
template <typename... T>
struct SizeOfTypeFunctor;
template <typename T>
struct SizeOfTypeFunctor<T> {
size_t operator()(std::type_index type) const {
if (typeid(T).hash_code() == type.hash_code()) {
return sizeof(T);
} else {
return 0UL;
}
}
};
template <>
struct SizeOfTypeFunctor<> {
size_t operator()(std::type_index type) const { return 0UL; }
};
template <typename HEAD, typename... TAIL>
struct SizeOfTypeFunctor<HEAD, TAIL...> {
size_t operator()(std::type_index type) const {
SizeOfTypeFunctor<HEAD> head;
size_t head_size = head(type);
if (head_size != 0) {
return head_size;
}
SizeOfTypeFunctor<TAIL...> tail;
return tail(type);
}
};
static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int8_t, int, half, float, double, int16_t, int64_t, bool,
size_t>
functor;
size_t size = functor(type);
PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
return size;
}
class LoDTensor; class LoDTensor;
class Tensor { class Tensor : public TensorBase {
public: public:
Tensor() : offset_(0) {} Tensor() {}
template <typename T> template <typename T>
Tensor(std::vector<T> input, DDim ddim) : offset_(0) { Tensor(std::vector<T> input, DDim ddim) {
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
input.size() == framework::product(ddim), input.size() == framework::product(ddim),
"input vector'length should be equal to tensor's length"); "input vector'length should be equal to tensor's length");
auto input_ptr = mutable_data<T>(ddim); auto input_ptr = mutable_data<T>(ddim);
for (int i = 0; i < input.size(); ++i) { for (int i = 0; i < input.size(); ++i) {
input_ptr[i] = input[i]; input_ptr[i] = input[i];
...@@ -95,46 +54,6 @@ class Tensor { ...@@ -95,46 +54,6 @@ class Tensor {
this->offset_ = inTensor.offset_; this->offset_ = inTensor.offset_;
} }
/*! Return a pointer to mutable memory block. */
template <typename T>
inline T *data() {
check_memory_size();
PADDLE_MOBILE_ENFORCE(
(std::is_same<T, void>::value ||
holder_->type().hash_code() == typeid(T).hash_code()),
"Tensor holds the wrong type, it holds %s",
this->holder_->type().name());
return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
/*! Return a pointer to constant memory block. */
template <typename T>
inline const T *data() const {
check_memory_size();
PADDLE_MOBILE_ENFORCE(
(std::is_same<T, void>::value ||
holder_->type().hash_code() == typeid(T).hash_code()),
"Tensor holds the wrong type, it holds %s ,requested:%s",
this->holder_->type().name(), typeid(T).name());
return reinterpret_cast<const T *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
}
inline bool IsInitialized() const { return holder_ != nullptr; }
/**
* @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation.
*/
template <typename T>
inline T *mutable_data() {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T *>(mutable_data(typeid(T)));
}
#ifdef PADDLE_MOBILE_DEBUG #ifdef PADDLE_MOBILE_DEBUG
template <typename T> template <typename T>
inline void dump(std::string filename) const { inline void dump(std::string filename) const {
...@@ -151,6 +70,21 @@ class Tensor { ...@@ -151,6 +70,21 @@ class Tensor {
} }
#endif #endif
/*! Resize the dimensions of the memory block. */
inline Tensor &Resize(const DDim &dims) {
dims_ = dims;
return *this;
}
/*! The internal of two tensors share the same memory block. */
inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size();
if (holder_.get() != src.holder_.get()) {
*this = src;
}
return *this;
}
inline void *mutable_data(std::type_index type) { inline void *mutable_data(std::type_index type) {
if (holder_ != nullptr) { if (holder_ != nullptr) {
holder_->set_type(type); holder_->set_type(type);
...@@ -165,6 +99,16 @@ class Tensor { ...@@ -165,6 +99,16 @@ class Tensor {
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_); reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
} }
/**
* @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation.
*/
template <typename T>
inline T *mutable_data() {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T *>(mutable_data(typeid(T)));
}
/** /**
* @brief Return a pointer to mutable memory block. * @brief Return a pointer to mutable memory block.
* *
...@@ -180,27 +124,6 @@ class Tensor { ...@@ -180,27 +124,6 @@ class Tensor {
return mutable_data<T>(); return mutable_data<T>();
} }
/*! Return the dimensions of the memory block. */
inline const DDim &dims() const { return dims_; }
/*! Return the numel of the memory block. */
inline int64_t numel() const { return product(dims_); }
/*! Resize the dimensions of the memory block. */
inline Tensor &Resize(const DDim &dims) {
dims_ = dims;
return *this;
}
/*! The internal of two tensors share the same memory block. */
inline Tensor &ShareDataWith(const Tensor &src) {
src.check_memory_size();
if (holder_.get() != src.holder_.get()) {
*this = src;
}
return *this;
}
/** /**
* @brief Return a sub-tensor of the given tensor. * @brief Return a sub-tensor of the given tensor.
* *
...@@ -234,44 +157,35 @@ class Tensor { ...@@ -234,44 +157,35 @@ class Tensor {
} }
} }
std::type_index type() const { /*! Return a pointer to mutable memory block. */
template <typename T>
inline T *data() {
check_memory_size();
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
holder_ != nullptr, (std::is_same<T, void>::value ||
"Tensor not initialized yet when Tensor::type() is called.") holder_->type().hash_code() == typeid(T).hash_code()),
return holder_->type(); "Tensor holds the wrong type, it holds %s",
} this->holder_->type().name());
// memory size returns the holding memory size in byte. return reinterpret_cast<T *>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
size_t memory_size() const { offset_);
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
} }
inline void check_memory_size() const { /*! Return a pointer to constant memory block. */
template <typename T>
inline const T *data() const {
check_memory_size();
PADDLE_MOBILE_ENFORCE( PADDLE_MOBILE_ENFORCE(
holder_ != nullptr, (std::is_same<T, void>::value ||
"Tensor holds no memory. Call Tensor::mutable_data first."); holder_->type().hash_code() == typeid(T).hash_code()),
PADDLE_MOBILE_ENFORCE(numel() * SizeOfType(type()) <= memory_size(), "Tensor holds the wrong type, it holds %s ,requested:%s",
"Tensor's dims_ is out of bound. "); this->holder_->type().name(), typeid(T).name());
return reinterpret_cast<const T *>(
reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
} }
private: private:
/**
* @note Placeholder hides type T, so it doesn't appear as a
* template
* parameter of Variable.
*/
struct Placeholder {
virtual ~Placeholder() = default;
virtual void *ptr() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
virtual void set_type(std::type_index type) = 0;
};
struct PlaceholderImpl : public Placeholder { struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, std::type_index type) PlaceholderImpl(size_t size, std::type_index type)
: ptr_(static_cast<uint8_t *>(memory::Alloc(size)), : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
...@@ -299,27 +213,6 @@ class Tensor { ...@@ -299,27 +213,6 @@ class Tensor {
std::type_index type_; std::type_index type_;
}; };
/*! holds the memory block if allocated. */
std::shared_ptr<Placeholder> holder_;
/**
* @brief points to elements dimensions.
*
* @note dims_ do not indicate the memory block size.
*/
DDim dims_;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really
* begins.
*/
size_t offset_;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
public: // NOLINT public: // NOLINT
inline void reset_data_ptr(void *p) { inline void reset_data_ptr(void *p) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include <typeindex>
#include "common/enforce.h"
#include "common/types.h"
#include "framework/ddim.h"
namespace paddle_mobile {
namespace framework {
template <typename... T>
struct SizeOfTypeFunctor;
template <typename T>
struct SizeOfTypeFunctor<T> {
size_t operator()(std::type_index type) const {
if (typeid(T).hash_code() == type.hash_code()) {
return sizeof(T);
} else {
return 0UL;
}
}
};
template <>
struct SizeOfTypeFunctor<> {
size_t operator()(std::type_index type) const { return 0UL; }
};
template <typename HEAD, typename... TAIL>
struct SizeOfTypeFunctor<HEAD, TAIL...> {
size_t operator()(std::type_index type) const {
SizeOfTypeFunctor<HEAD> head;
size_t head_size = head(type);
if (head_size != 0) {
return head_size;
}
SizeOfTypeFunctor<TAIL...> tail;
return tail(type);
}
};
static inline size_t SizeOfType(std::type_index type) {
SizeOfTypeFunctor<int8_t, int, half, float, double, int16_t, int64_t, bool,
size_t>
functor;
size_t size = functor(type);
PADDLE_MOBILE_ENFORCE(size != 0UL, "Cannot get size of type %s", type.name());
return size;
}
class TensorBase {
public:
virtual inline TensorBase &Resize(const DDim &dims) = 0;
inline bool IsInitialized() const { return holder_ != nullptr; }
/*! Return the dimensions of the memory block. */
inline const DDim &dims() const { return dims_; }
/*! Return the numel of the memory block. */
inline int64_t numel() const { return product(dims_); }
std::type_index type() const {
PADDLE_MOBILE_ENFORCE(
holder_ != nullptr,
"Tensor not initialized yet when Tensor::type() is called.")
return holder_->type();
}
// memory size returns the holding memory size in byte.
size_t memory_size() const {
return holder_ == nullptr ? 0UL : holder_->size() - offset_;
}
inline void check_memory_size() const {
PADDLE_MOBILE_ENFORCE(
holder_ != nullptr,
"Tensor holds no memory. Call Tensor::mutable_data first.");
PADDLE_MOBILE_ENFORCE(numel() * SizeOfType(type()) <= memory_size(),
"Tensor's dims_ is out of bound. ");
}
protected:
/**
* @note Placeholder hides type T, so it doesn't appear as a
* template
* parameter of Variable.
*/
struct Placeholder {
virtual ~Placeholder() = default;
virtual void *ptr() const = 0;
virtual size_t size() const = 0;
virtual std::type_index type() const = 0;
virtual void set_type(std::type_index type) = 0;
};
/**
* @brief points to elements dimensions.
*
* @note dims_ do not indicate the memory block size.
*/
DDim dims_;
/*! holds the memory block if allocated. */
std::shared_ptr<Placeholder> holder_;
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really
* begins.
*/
size_t offset_ = 0;
};
} // namespace framework
} // namespace paddle_mobile
...@@ -126,6 +126,8 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>( ...@@ -126,6 +126,8 @@ CreatePaddlePredictor<PaddleMobileConfig, PaddleEngineKind::kPaddleMobile>(
x.reset(new PaddleMobilePredictor<FPGA, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<FPGA, Precision::FP32>(config));
} else if (config.device == PaddleMobileConfig::kGPU_MALI) { } else if (config.device == PaddleMobileConfig::kGPU_MALI) {
x.reset(new PaddleMobilePredictor<GPU_MALI, Precision::FP32>(config)); x.reset(new PaddleMobilePredictor<GPU_MALI, Precision::FP32>(config));
} else if (config.device == PaddleMobileConfig::kGPU_CL) {
x.reset(new PaddleMobilePredictor<GPU_CL, Precision::FP32>(config));
} else { } else {
LOG(kLOG_ERROR) << "unsupport device type!"; LOG(kLOG_ERROR) << "unsupport device type!";
return nullptr; return nullptr;
......
...@@ -44,7 +44,7 @@ class PaddleBuf { ...@@ -44,7 +44,7 @@ class PaddleBuf {
PaddleBuf(void* data, size_t length) PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {} : data_(data), length_(length), memory_owned_{false} {}
// Own memory. // Own memory.
PaddleBuf(size_t length) explicit PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {} : data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes. // Resize to `length` bytes.
void Resize(size_t length); void Resize(size_t length);
...@@ -121,7 +121,7 @@ struct PaddleModelMemoryPack { ...@@ -121,7 +121,7 @@ struct PaddleModelMemoryPack {
struct PaddleMobileConfig : public PaddlePredictor::Config { struct PaddleMobileConfig : public PaddlePredictor::Config {
enum Precision { FP32 = 0 }; enum Precision { FP32 = 0 };
enum Device { kCPU = 0, kFPGA = 1, kGPU_MALI = 2 }; enum Device { kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kGPU_CL = 3 };
enum Precision precision; enum Precision precision;
enum Device device; enum Device device;
......
...@@ -28,13 +28,13 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize, ...@@ -28,13 +28,13 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
bool quantification, int batch_size, bool quantification, int batch_size,
bool loddable) { bool loddable) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Dtype, P>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Dtype, P>>(
loader_->Load(dirname, optimize, quantification), batch_size, optimize, loader_->Load(dirname, optimize, quantification), batch_size, optimize,
loddable); loddable);
} else { } else {
...@@ -50,13 +50,13 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path, ...@@ -50,13 +50,13 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
bool quantification, int batch_size, bool quantification, int batch_size,
bool loddable) { bool loddable) {
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Dtype, P>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Dtype, P>>(
loader_->Load(model_path, para_path, optimize, quantification), loader_->Load(model_path, para_path, optimize, quantification),
batch_size, optimize, loddable); batch_size, optimize, loddable);
} else { } else {
...@@ -67,21 +67,22 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path, ...@@ -67,21 +67,22 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
} }
template <typename Dtype, Precision P> template <typename Dtype, Precision P>
bool PaddleMobile<Dtype, P>::LoadCombinedMemory( bool PaddleMobile<Dtype, P>::LoadCombinedMemory(size_t model_len,
size_t model_len, const uint8_t *model_buf, size_t combined_params_len, const uint8_t *model_buf,
const uint8_t *combined_params_buf) { size_t combined_params_len,
uint8_t *combined_params_buf) {
int batch_size = 1; int batch_size = 1;
bool optimise = true; bool optimise = true;
bool quantification = false; bool quantification = false;
if (loader_.get() == nullptr) { if (loader_.get() == nullptr) {
loader_ = std::make_shared<Loader<Dtype, P>>(); loader_ = std::make_shared<framework::Loader<Dtype, P>>();
} else { } else {
LOG(kLOG_INFO) << "loader inited"; LOG(kLOG_INFO) << "loader inited";
} }
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<Executor<Dtype, P>>( executor_ = std::make_shared<framework::Executor<Dtype, P>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len, loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimise, combined_params_buf, optimise,
quantification), quantification),
...@@ -161,4 +162,6 @@ template class PaddleMobile<CPU, Precision::FP32>; ...@@ -161,4 +162,6 @@ template class PaddleMobile<CPU, Precision::FP32>;
template class PaddleMobile<FPGA, Precision::FP32>; template class PaddleMobile<FPGA, Precision::FP32>;
template class PaddleMobile<GPU_MALI, Precision::FP32>; template class PaddleMobile<GPU_MALI, Precision::FP32>;
template class PaddleMobile<GPU_CL, Precision::FP32>;
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -22,10 +22,10 @@ limitations under the License. */ ...@@ -22,10 +22,10 @@ limitations under the License. */
#endif // _OPENMP #endif // _OPENMP
#include "common/types.h" #include "common/types.h"
#include "framework/executor.h"
#include "framework/load_ops.h" #include "framework/load_ops.h"
#include "framework/loader.h"
#include "framework/tensor.h" #include "framework/tensor.h"
#include "io/executor.h"
#include "io/loader.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -52,7 +52,7 @@ class PaddleMobile { ...@@ -52,7 +52,7 @@ class PaddleMobile {
bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf, bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf,
size_t combined_params_len, size_t combined_params_len,
const uint8_t *combined_params_buf); uint8_t *combined_params_buf);
void SetThreadNum(int num); void SetThreadNum(int num);
void Clear(); void Clear();
...@@ -69,8 +69,8 @@ class PaddleMobile { ...@@ -69,8 +69,8 @@ class PaddleMobile {
#endif #endif
private: private:
std::shared_ptr<Loader<Dtype, P>> loader_; std::shared_ptr<framework::Loader<Dtype, P>> loader_;
std::shared_ptr<Executor<Dtype, P>> executor_; std::shared_ptr<framework::Executor<Dtype, P>> executor_;
}; };
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#ifdef BATCHNORM_OP #ifdef BATCHNORM_OP
#include "batchnorm_op.h" #include "operators/batchnorm_op.h"
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
...@@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp); ...@@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(batch_norm, ops::BatchNormOp);
#endif
#endif #endif
...@@ -40,10 +40,6 @@ class BilinearOp : public framework::OperatorWithKernel< ...@@ -40,10 +40,6 @@ class BilinearOp : public framework::OperatorWithKernel<
DeviceType, BilinearInterpParam<DeviceType>, DeviceType, BilinearInterpParam<DeviceType>,
operators::BilinearInterpKernel<DeviceType, T>>( operators::BilinearInterpKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, BilinearInterpParam<DeviceType>,
operators::BilinearInterpKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -39,10 +39,6 @@ class BoxCoderOp : public framework::OperatorWithKernel< ...@@ -39,10 +39,6 @@ class BoxCoderOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, BoxCoderParam<DeviceType>,
operators::BoxCoderKernel<DeviceType, T>>( operators::BoxCoderKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, BoxCoderParam<DeviceType>,
operators::BoxCoderKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -34,10 +34,6 @@ class ConcatOp : public framework::OperatorWithKernel< ...@@ -34,10 +34,6 @@ class ConcatOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConcatParam<DeviceType>,
operators::ConcatKernel<DeviceType, T>>( operators::ConcatKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ConcatParam<DeviceType>,
operators::ConcatKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -62,4 +62,8 @@ REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp); ...@@ -62,4 +62,8 @@ REGISTER_OPERATOR_MALI_GPU(conv2d, ops::ConvOp);
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp); REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(conv2d, ops::ConvOp);
#endif
#endif #endif
...@@ -34,10 +34,6 @@ class ConvOp : public framework::OperatorWithKernel< ...@@ -34,10 +34,6 @@ class ConvOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>( operators::ConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ConvParam<DeviceType>,
operators::ConvKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
private: private:
......
...@@ -37,10 +37,6 @@ class CrfOp : public framework::OperatorWithKernel< ...@@ -37,10 +37,6 @@ class CrfOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, CrfParam<DeviceType>,
operators::CrfKernel<DeviceType, T>>( operators::CrfKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, CrfParam<DeviceType>,
operators::CrfKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -36,10 +36,6 @@ class DepthwiseConvOp : public framework::OperatorWithKernel< ...@@ -36,10 +36,6 @@ class DepthwiseConvOp : public framework::OperatorWithKernel<
DeviceType, ConvParam<DeviceType>, DeviceType, ConvParam<DeviceType>,
operators::DepthwiseConvKernel<DeviceType, T>>( operators::DepthwiseConvKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ConvParam<DeviceType>,
operators::DepthwiseConvKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
private: private:
......
...@@ -38,10 +38,6 @@ class DropoutOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class DropoutOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>,
operators::DropoutKernel<DeviceType, T>>( operators::DropoutKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
// using framework::OperatorWithKernel<DeviceType, DropoutParam<DeviceType>,
// operators::DropoutKernel<DeviceType,
// T>>;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#ifdef ELEMENTWISEADD_OP #ifdef ELEMENTWISEADD_OP
#include "elementwise_add_op.h" #include "operators/elementwise_add_op.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -36,4 +36,8 @@ REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp); ...@@ -36,4 +36,8 @@ REGISTER_OPERATOR_CPU(elementwise_add, ops::ElementwiseAddOp);
REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp); REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp);
#endif
#endif #endif
...@@ -37,10 +37,6 @@ class ElementwiseAddOp : public framework::OperatorWithKernel< ...@@ -37,10 +37,6 @@ class ElementwiseAddOp : public framework::OperatorWithKernel<
DeviceType, ElementwiseAddParam<DeviceType>, DeviceType, ElementwiseAddParam<DeviceType>,
operators::ElementwiseAddKernel<DeviceType, T>>( operators::ElementwiseAddKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ElementwiseAddParam<DeviceType>,
operators::ElementwiseAddKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -14,6 +14,19 @@ limitations under the License. */ ...@@ -14,6 +14,19 @@ limitations under the License. */
#include "operators/feed_op.h" #include "operators/feed_op.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void FeedOp<DeviceType, T>::InferShape() const {
auto out_dims = this->param_.Out()->dims();
out_dims[0] = this->param_.BatchSize();
this->param_.Out()->Resize(out_dims);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
...@@ -25,3 +38,6 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp); ...@@ -25,3 +38,6 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp); REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(feed, ops::FeedOp);
#endif
...@@ -16,68 +16,29 @@ limitations under the License. */ ...@@ -16,68 +16,29 @@ limitations under the License. */
#include <string> #include <string>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/feed_kernel.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using std::string;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FeedOp : public framework::OperatorBase<DeviceType> { class FeedOp
: public framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
FeedKernel<DeviceType, T>> {
public: public:
FeedOp(const std::string &type, const VariableNameMap &inputs, FeedOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope),
param_(inputs, outputs, attrs, scope.get()) {}
void InferShape() const {
auto out_dims = param_.Out()->dims();
out_dims[0] = param_.BatchSize();
param_.Out()->Resize(out_dims);
}
#ifdef PADDLE_MOBILE_FPGA
void Init() {
Tensor *output = param_.Out();
fpga::format_fp16_ofm(output);
}
void RunImpl() const {
auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX()); // NOLINT
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param_.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = (void *)input_ptr; // NOLINT
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0;
args.image.pad_width = 0;
args.output.address = output_ptr;
args.output.scale_address = output->scale;
fpga::PerformBypass(args);
}
#else : framework::OperatorWithKernel<DeviceType, FeedParam<DeviceType>,
void Init() {} FeedKernel<DeviceType, T>>(
void RunImpl() const { type, inputs, outputs, attrs, scope) {}
param_.Out()->ShareDataWith(*param_.InputX()); void InferShape() const override;
param_.Out()->set_lod(param_.InputX()->lod());
}
#endif
protected: protected:
FeedParam<DeviceType> param_;
}; };
} // namespace operators } // namespace operators
......
...@@ -13,6 +13,17 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,17 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "operators/fetch_op.h" #include "operators/fetch_op.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void FetchOp<DeviceType, T>::InferShape() const {
auto x_dims = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dims);
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators; namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
...@@ -24,3 +35,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); ...@@ -24,3 +35,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fetch, ops::FetchOp);
#endif
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <string> #include <string>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/fetch_kernel.h"
#include "operators/op_param.h" #include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -23,25 +24,20 @@ namespace operators { ...@@ -23,25 +24,20 @@ namespace operators {
using std::string; using std::string;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class FetchOp : public framework::OperatorBase<DeviceType> { class FetchOp
: public framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>,
FetchKernel<DeviceType, T>> {
public: public:
FetchOp(const string &type, const VariableNameMap &inputs, FetchOp(const string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const framework::AttributeMap attrs, const VariableNameMap &outputs, const framework::AttributeMap attrs,
std::shared_ptr<framework::Scope> scope) std::shared_ptr<framework::Scope> scope)
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs, : framework::OperatorWithKernel<DeviceType, FetchParam<DeviceType>,
scope), FetchKernel<DeviceType, T>>(
param_(inputs, outputs, attrs, *scope) {} type, inputs, outputs, attrs, scope) {}
void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
void Init() {} void InferShape() const override;
void InferShape() const {
auto x_dims = param_.InputX()->dims();
param_.Out()->Resize(x_dims);
}
protected: protected:
FetchParam<DeviceType> param_;
}; };
} // namespace operators } // namespace operators
......
...@@ -20,9 +20,6 @@ namespace ops = paddle_mobile::operators; ...@@ -20,9 +20,6 @@ namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU #ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(fill_constant, ops::FillConstantOp); REGISTER_OPERATOR_CPU(fill_constant, ops::FillConstantOp);
#endif #endif
#ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(fill_constant, ops::FillConstantOp);
#endif
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fill_constant, ops::FillConstantOp); REGISTER_OPERATOR_FPGA(fill_constant, ops::FillConstantOp);
#endif #endif
......
...@@ -37,7 +37,7 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> { ...@@ -37,7 +37,7 @@ class FillConstantOp : public framework::OperatorBase<DeviceType> {
: framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs, : framework::OperatorBase<DeviceType>(type, inputs, outputs, attrs,
scope), scope),
param_(inputs, outputs, attrs, *scope) {} param_(inputs, outputs, attrs, *scope) {}
void RunImpl() const { void RunImpl() {
auto data_type = auto data_type =
static_cast<_PaddleMobile__Framework__Proto__VarType__Type>( static_cast<_PaddleMobile__Framework__Proto__VarType__Type>(
param_.DataDtype()); param_.DataDtype());
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/flatten_kernel.h" #include "operators/kernel/flatten_kernel.h"
...@@ -53,10 +54,6 @@ class FlattenOp : public framework::OperatorWithKernel< ...@@ -53,10 +54,6 @@ class FlattenOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, FlattenParam<DeviceType>,
operators::FlattenKernel<DeviceType, T>>( operators::FlattenKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FlattenParam<DeviceType>,
operators::FlattenKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include <utility>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "operators/kernel/conv_add_add_prelu_kernel.h" #include "operators/kernel/conv_add_add_prelu_kernel.h"
...@@ -67,10 +68,6 @@ class FusionConvAddAddPReluOp ...@@ -67,10 +68,6 @@ class FusionConvAddAddPReluOp
DeviceType, FusionConvAddAddPReluParam<DeviceType>, DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>( operators::ConvAddAddPReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddAddPReluParam<DeviceType>,
operators::ConvAddAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -58,5 +58,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp); ...@@ -58,5 +58,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp); REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif
#endif #endif
...@@ -20,8 +20,8 @@ limitations under the License. */ ...@@ -20,8 +20,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_relu_kernel.h" #include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -66,10 +66,6 @@ class FusionConvAddBNReluOp ...@@ -66,10 +66,6 @@ class FusionConvAddBNReluOp
DeviceType, FusionConvAddBNReluParam<DeviceType>, DeviceType, FusionConvAddBNReluParam<DeviceType>,
operators::ConvAddBNReluKernel<DeviceType, T>>( operators::ConvAddBNReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddBNReluParam<DeviceType>,
operators::ConvAddBNReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -58,4 +58,8 @@ REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp); ...@@ -58,4 +58,8 @@ REGISTER_OPERATOR_CPU(fusion_conv_add, ops::FusionConvAddOp);
REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp); REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp);
#endif
#endif #endif
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h" #include "operators/op_param.h"
#include "operators/kernel/conv_add_kernel.h" #include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -56,10 +56,6 @@ class FusionConvAddOp : public framework::OperatorWithKernel< ...@@ -56,10 +56,6 @@ class FusionConvAddOp : public framework::OperatorWithKernel<
FusionConvAddParam<DeviceType>, FusionConvAddParam<DeviceType>,
operators::ConvAddKernel<DeviceType, T>>( operators::ConvAddKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddParam<DeviceType>,
operators::ConvAddKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -40,9 +40,7 @@ class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher { ...@@ -40,9 +40,7 @@ class FusionConvAddPReluOpMatcher : public framework::FusionOpMatcher {
node->Folder(node_.Depth(), Type(), node->Folder(node_.Depth(), Type(),
{{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}}, {{G_OP_TYPE_ELEMENTWISE_ADD, {{"Y", "Y"}}},
{G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}} {G_OP_TYPE_PRELU, {{"Alpha", "Alpha"}}}
}, },
removed_nodes); removed_nodes);
} }
std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; } std::string Type() { return G_OP_TYPE_FUSION_CONV_ADD_PRELU; }
...@@ -63,9 +61,6 @@ class FusionConvAddPReluOp ...@@ -63,9 +61,6 @@ class FusionConvAddPReluOp
operators::ConvAddPReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddPReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddPReluParam<DeviceType>,
operators::ConvAddPReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -56,5 +56,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp); ...@@ -56,5 +56,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp); REGISTER_OPERATOR_FPGA(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add_relu, ops::FusionConvAddReluOp);
#endif
#endif #endif
...@@ -29,9 +29,8 @@ namespace operators { ...@@ -29,9 +29,8 @@ namespace operators {
class FusionConvAddReluOpMatcher : public framework::FusionOpMatcher { class FusionConvAddReluOpMatcher : public framework::FusionOpMatcher {
public: public:
FusionConvAddReluOpMatcher() { FusionConvAddReluOpMatcher() {
node_ = framework::Node(G_OP_TYPE_CONV); node_ = framework::Node(G_OP_TYPE_FUSION_CONV_ADD);
node_ > std::make_shared<framework::Node>(G_OP_TYPE_ELEMENTWISE_ADD) > node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
std::make_shared<framework::Node>(G_OP_TYPE_RELU);
} }
void FolderNodes( void FolderNodes(
...@@ -57,9 +56,6 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel< ...@@ -57,9 +56,6 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvAddReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvAddReluParam<DeviceType>,
operators::ConvAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -18,9 +18,10 @@ limitations under the License. */ ...@@ -18,9 +18,10 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include <utility>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h" #include "operators/op_param.h"
#include "operators/kernel/conv_bn_add_relu_kernel.h" #include "operators/kernel/conv_bn_add_relu_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -71,10 +72,6 @@ class FusionConvBNAddReluOp ...@@ -71,10 +72,6 @@ class FusionConvBNAddReluOp
DeviceType, FusionConvBNAddReluParam<DeviceType>, DeviceType, FusionConvBNAddReluParam<DeviceType>,
operators::ConvBNAddReluKernel<DeviceType, T>>( operators::ConvBNAddReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvBNAddReluParam<DeviceType>,
operators::ConvBNAddReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -63,10 +63,6 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel< ...@@ -63,10 +63,6 @@ class FusionConvBNReluOp : public framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam<DeviceType>, DeviceType, FusionConvBNReluParam<DeviceType>,
operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::ConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionConvBNReluParam<DeviceType>,
operators::ConvBNReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "framework/operator.h" #include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h" #include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h" #include "operators/op_param.h"
#include "operators/kernel/dwconv_bn_relu_kernel.h" #include "operators/kernel/dwconv_bn_relu_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
...@@ -65,9 +65,6 @@ class FusionDWConvBNReluOp ...@@ -65,9 +65,6 @@ class FusionDWConvBNReluOp
operators::DWConvBNReluKernel<DeviceType, T>>(type, inputs, outputs, operators::DWConvBNReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionDWConvBNReluParam<DeviceType>,
operators::DWConvBNReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -56,10 +56,6 @@ class FusionFcOp : public framework::OperatorWithKernel< ...@@ -56,10 +56,6 @@ class FusionFcOp : public framework::OperatorWithKernel<
operators::FusionFcKernel<DeviceType, T>>( operators::FusionFcKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionFcParam<DeviceType>,
operators::FusionFcKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -56,9 +56,6 @@ class FusionFcReluOp : public framework::OperatorWithKernel< ...@@ -56,9 +56,6 @@ class FusionFcReluOp : public framework::OperatorWithKernel<
operators::FusionFcReluKernel<DeviceType, T>>(type, inputs, outputs, operators::FusionFcReluKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, FusionFcReluParam<DeviceType>,
operators::FusionFcReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -37,10 +37,6 @@ class GruOp : public framework::OperatorWithKernel< ...@@ -37,10 +37,6 @@ class GruOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, GruParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, GruParam<DeviceType>,
operators::GruKernel<DeviceType, T>>( operators::GruKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, GruParam<DeviceType>,
operators::GruKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -16,15 +16,14 @@ limitations under the License. */ ...@@ -16,15 +16,14 @@ limitations under the License. */
#pragma once #pragma once
#include <operators/op_param.h> #include <string>
#include "operators/op_param.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/kernel/im2sequence_kernel.h" #include "operators/kernel/im2sequence_kernel.h"
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using namespace framework;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class Im2SequenceOp : public framework::OperatorWithKernel< class Im2SequenceOp : public framework::OperatorWithKernel<
DeviceType, Im2SequenceParam<DeviceType>, DeviceType, Im2SequenceParam<DeviceType>,
...@@ -39,9 +38,6 @@ class Im2SequenceOp : public framework::OperatorWithKernel< ...@@ -39,9 +38,6 @@ class Im2SequenceOp : public framework::OperatorWithKernel<
operators::Im2SequenceKernel<DeviceType, T>>(type, inputs, outputs, operators::Im2SequenceKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
// using framework::OperatorWithKernel<
// DeviceType, Im2SequenceParam<DeviceType>,
// operators::Im2SequenceKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
private: private:
......
...@@ -26,8 +26,7 @@ bool BatchNormKernel<CPU, float>::Init(BatchNormParam<CPU> *param) { ...@@ -26,8 +26,7 @@ bool BatchNormKernel<CPU, float>::Init(BatchNormParam<CPU> *param) {
} }
template <> template <>
void BatchNormKernel<CPU, float>::Compute( void BatchNormKernel<CPU, float>::Compute(const BatchNormParam<CPU> &param) {
const BatchNormParam<CPU> &param) const {
BatchnormCompute<float>(param); BatchnormCompute<float>(param);
} }
......
...@@ -27,7 +27,7 @@ bool BilinearInterpKernel<CPU, float>::Init(BilinearInterpParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool BilinearInterpKernel<CPU, float>::Init(BilinearInterpParam<CPU> *param) {
template <> template <>
void BilinearInterpKernel<CPU, float>::Compute( void BilinearInterpKernel<CPU, float>::Compute(
const BilinearInterpParam<CPU> &param) const { const BilinearInterpParam<CPU> &param) {
BilinearInterpCompute<float>(param); BilinearInterpCompute<float>(param);
} }
......
...@@ -26,8 +26,7 @@ bool BoxCoderKernel<CPU, float>::Init(BoxCoderParam<CPU> *param) { ...@@ -26,8 +26,7 @@ bool BoxCoderKernel<CPU, float>::Init(BoxCoderParam<CPU> *param) {
} }
template <> template <>
void BoxCoderKernel<CPU, float>::Compute( void BoxCoderKernel<CPU, float>::Compute(const BoxCoderParam<CPU> &param) {
const BoxCoderParam<CPU> &param) const {
BoxCoderCompute<float>(param); BoxCoderCompute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool ConcatKernel<CPU, float>::Init(ConcatParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool ConcatKernel<CPU, float>::Init(ConcatParam<CPU> *param) {
} }
template <> template <>
void ConcatKernel<CPU, float>::Compute(const ConcatParam<CPU> &param) const { void ConcatKernel<CPU, float>::Compute(const ConcatParam<CPU> &param) {
ConcatCompute<float>(param); ConcatCompute<float>(param);
param.Out()->set_lod(param.Inputs()[0]->lod()); param.Out()->set_lod(param.Inputs()[0]->lod());
} }
......
...@@ -28,7 +28,7 @@ bool ConvAddAddPReluKernel<CPU, float>::Init( ...@@ -28,7 +28,7 @@ bool ConvAddAddPReluKernel<CPU, float>::Init(
template <> template <>
void ConvAddAddPReluKernel<CPU, float>::Compute( void ConvAddAddPReluKernel<CPU, float>::Compute(
const FusionConvAddAddPReluParam<CPU> &param) const { const FusionConvAddAddPReluParam<CPU> &param) {
ConvAddAddPReluCompute<float>(param); ConvAddAddPReluCompute<float>(param);
} }
template class ConvAddAddPReluKernel<CPU, float>; template class ConvAddAddPReluKernel<CPU, float>;
......
...@@ -55,7 +55,7 @@ bool ConvAddBNReluKernel<CPU, float>::Init( ...@@ -55,7 +55,7 @@ bool ConvAddBNReluKernel<CPU, float>::Init(
template <> template <>
void ConvAddBNReluKernel<CPU, float>::Compute( void ConvAddBNReluKernel<CPU, float>::Compute(
const FusionConvAddBNReluParam<CPU> &param) const { const FusionConvAddBNReluParam<CPU> &param) {
ConvAddBNReluCompute<float>(param); ConvAddBNReluCompute<float>(param);
} }
template class ConvAddBNReluKernel<CPU, float>; template class ConvAddBNReluKernel<CPU, float>;
......
...@@ -25,8 +25,7 @@ bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) { ...@@ -25,8 +25,7 @@ bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) {
} }
template <> template <>
void ConvAddKernel<CPU, float>::Compute( void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> &param) {
const FusionConvAddParam<CPU> &param) const {
ConvAddCompute<float>(param); ConvAddCompute<float>(param);
} }
......
...@@ -27,7 +27,7 @@ bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ConvAddPReluKernel<CPU, float>::Init(FusionConvAddPReluParam<CPU> *param) {
template <> template <>
void ConvAddPReluKernel<CPU, float>::Compute( void ConvAddPReluKernel<CPU, float>::Compute(
const FusionConvAddPReluParam<CPU> &param) const { const FusionConvAddPReluParam<CPU> &param) {
ConvAddPReluCompute<float>(param); ConvAddPReluCompute<float>(param);
} }
template class ConvAddPReluKernel<CPU, float>; template class ConvAddPReluKernel<CPU, float>;
......
...@@ -27,7 +27,7 @@ bool ConvAddReluKernel<CPU, float>::Init(FusionConvAddReluParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ConvAddReluKernel<CPU, float>::Init(FusionConvAddReluParam<CPU> *param) {
template <> template <>
void ConvAddReluKernel<CPU, float>::Compute( void ConvAddReluKernel<CPU, float>::Compute(
const FusionConvAddReluParam<CPU> &param) const { const FusionConvAddReluParam<CPU> &param) {
ConvAddReluCompute<float>(param); ConvAddReluCompute<float>(param);
} }
template class ConvAddReluKernel<CPU, float>; template class ConvAddReluKernel<CPU, float>;
......
...@@ -55,7 +55,7 @@ bool ConvBNAddReluKernel<CPU, float>::Init( ...@@ -55,7 +55,7 @@ bool ConvBNAddReluKernel<CPU, float>::Init(
template <> template <>
void ConvBNAddReluKernel<CPU, float>::Compute( void ConvBNAddReluKernel<CPU, float>::Compute(
const FusionConvBNAddReluParam<CPU> &param) const { const FusionConvBNAddReluParam<CPU> &param) {
ConvBNAddReluCompute<float>(param); ConvBNAddReluCompute<float>(param);
} }
template class ConvBNAddReluKernel<CPU, float>; template class ConvBNAddReluKernel<CPU, float>;
......
...@@ -57,7 +57,7 @@ bool ConvBNReluKernel<CPU, float>::Init(FusionConvBNReluParam<CPU> *param) { ...@@ -57,7 +57,7 @@ bool ConvBNReluKernel<CPU, float>::Init(FusionConvBNReluParam<CPU> *param) {
template <> template <>
void ConvBNReluKernel<CPU, float>::Compute( void ConvBNReluKernel<CPU, float>::Compute(
const FusionConvBNReluParam<CPU> &param) const { const FusionConvBNReluParam<CPU> &param) {
ConvBNReluCompute<float>(param); ConvBNReluCompute<float>(param);
} }
template class ConvBNReluKernel<CPU, float>; template class ConvBNReluKernel<CPU, float>;
......
...@@ -26,7 +26,7 @@ bool ConvKernel<CPU, float>::Init(ConvParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool ConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
} }
template <> template <>
void ConvKernel<CPU, float>::Compute(const ConvParam<CPU> &param) const { void ConvKernel<CPU, float>::Compute(const ConvParam<CPU> &param) {
ConvCompute<float>(param); ConvCompute<float>(param);
} }
......
...@@ -27,7 +27,7 @@ bool ConvTransposeKernel<CPU, float>::Init(ConvTransposeParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ConvTransposeKernel<CPU, float>::Init(ConvTransposeParam<CPU> *param) {
template <> template <>
void ConvTransposeKernel<CPU, float>::Compute( void ConvTransposeKernel<CPU, float>::Compute(
const ConvTransposeParam<CPU> &param) const { const ConvTransposeParam<CPU> &param) {
ConvTransposeCompute<float>(param); ConvTransposeCompute<float>(param);
} }
......
...@@ -27,7 +27,7 @@ bool CrfKernel<CPU, float>::Init(CrfParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool CrfKernel<CPU, float>::Init(CrfParam<CPU> *param) {
} }
template <> template <>
void CrfKernel<CPU, float>::Compute(const CrfParam<CPU> &param) const { void CrfKernel<CPU, float>::Compute(const CrfParam<CPU> &param) {
CrfCompute<float>(param); CrfCompute<float>(param);
} }
......
...@@ -26,8 +26,7 @@ bool DepthwiseConvKernel<CPU, float>::Init(ConvParam<CPU> *param) { ...@@ -26,8 +26,7 @@ bool DepthwiseConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
} }
template <> template <>
void DepthwiseConvKernel<CPU, float>::Compute( void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam<CPU> &param) {
const ConvParam<CPU> &param) const {
DepthwiseConvCompute<float>(param); DepthwiseConvCompute<float>(param);
} }
......
...@@ -29,8 +29,7 @@ bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) { ...@@ -29,8 +29,7 @@ bool DequantizeKernel<CPU, float>::Init(DequantizeParam<CPU> *param) {
} }
template <> template <>
void DequantizeKernel<CPU, float>::Compute( void DequantizeKernel<CPU, float>::Compute(const DequantizeParam<CPU> &param) {
const DequantizeParam<CPU> &param) const {
const Tensor *input = param.input_; const Tensor *input = param.input_;
Tensor *output = param.out_; Tensor *output = param.out_;
float activation_scale = param.activation_scale_->data<float>()[0]; float activation_scale = param.activation_scale_->data<float>()[0];
......
...@@ -27,7 +27,7 @@ bool DropoutKernel<CPU, float>::Init(DropoutParam<CPU> *para) { ...@@ -27,7 +27,7 @@ bool DropoutKernel<CPU, float>::Init(DropoutParam<CPU> *para) {
template <typename T> template <typename T>
struct DropoutFunctor { struct DropoutFunctor {
DropoutFunctor(T drop_pro) : dropout_pro_(drop_pro) {} explicit DropoutFunctor(T drop_pro) : dropout_pro_(drop_pro) {}
inline T operator()(T in) const { return (1 - dropout_pro_) * in; } inline T operator()(T in) const { return (1 - dropout_pro_) * in; }
private: private:
...@@ -35,7 +35,7 @@ struct DropoutFunctor { ...@@ -35,7 +35,7 @@ struct DropoutFunctor {
}; };
template <> template <>
void DropoutKernel<CPU, float>::Compute(const DropoutParam<CPU> &param) const { void DropoutKernel<CPU, float>::Compute(const DropoutParam<CPU> &param) {
const auto *input_x = param.InputX(); const auto *input_x = param.InputX();
auto *input_x_ptr = input_x->data<float>(); auto *input_x_ptr = input_x->data<float>();
auto *out = param.Out(); auto *out = param.Out();
......
...@@ -54,7 +54,7 @@ bool DWConvBNReluKernel<CPU, float>::Init(FusionDWConvBNReluParam<CPU> *param) { ...@@ -54,7 +54,7 @@ bool DWConvBNReluKernel<CPU, float>::Init(FusionDWConvBNReluParam<CPU> *param) {
template <> template <>
void DWConvBNReluKernel<CPU, float>::Compute( void DWConvBNReluKernel<CPU, float>::Compute(
const FusionDWConvBNReluParam<CPU> &param) const { const FusionDWConvBNReluParam<CPU> &param) {
DWConvBNReluCompute<float>(param); DWConvBNReluCompute<float>(param);
} }
template class DWConvBNReluKernel<CPU, float>; template class DWConvBNReluKernel<CPU, float>;
......
...@@ -27,7 +27,7 @@ bool ElementwiseAddKernel<CPU, float>::Init(ElementwiseAddParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ElementwiseAddKernel<CPU, float>::Init(ElementwiseAddParam<CPU> *param) {
template <> template <>
void ElementwiseAddKernel<CPU, float>::Compute( void ElementwiseAddKernel<CPU, float>::Compute(
const ElementwiseAddParam<CPU> &param) const { const ElementwiseAddParam<CPU> &param) {
ElementwiseAddCompute<float>(param); ElementwiseAddCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
......
...@@ -27,7 +27,7 @@ bool ElementwiseMulKernel<CPU, float>::Init(ElementwiseMulParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ElementwiseMulKernel<CPU, float>::Init(ElementwiseMulParam<CPU> *param) {
template <> template <>
void ElementwiseMulKernel<CPU, float>::Compute( void ElementwiseMulKernel<CPU, float>::Compute(
const ElementwiseMulParam<CPU> &param) const { const ElementwiseMulParam<CPU> &param) {
ElementwiseMulCompute<float>(param); ElementwiseMulCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
......
...@@ -27,7 +27,7 @@ bool ElementwiseSubKernel<CPU, float>::Init(ElementwiseSubParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool ElementwiseSubKernel<CPU, float>::Init(ElementwiseSubParam<CPU> *param) {
template <> template <>
void ElementwiseSubKernel<CPU, float>::Compute( void ElementwiseSubKernel<CPU, float>::Compute(
const ElementwiseSubParam<CPU> &param) const { const ElementwiseSubParam<CPU> &param) {
ElementwiseSubCompute<float>(param); ElementwiseSubCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<CPU, float>::Init(FeedParam<CPU> *param) {
return true;
}
template <>
void FeedKernel<CPU, float>::Compute(const FeedParam<CPU> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
param.Out()->set_lod(param.InputX()->lod());
}
template class FeedKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FetchKernel<CPU, float>::Init(FetchParam<CPU> *param) {
return true;
}
template <>
void FetchKernel<CPU, float>::Compute(const FetchParam<CPU> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
}
template class FetchKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -26,7 +26,7 @@ bool FlattenKernel<CPU, float>::Init(FlattenParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool FlattenKernel<CPU, float>::Init(FlattenParam<CPU> *param) {
} }
template <> template <>
void FlattenKernel<CPU, float>::Compute(const FlattenParam<CPU> &param) const { void FlattenKernel<CPU, float>::Compute(const FlattenParam<CPU> &param) {
FlattenCompute<float>(param); FlattenCompute<float>(param);
} }
......
...@@ -26,8 +26,7 @@ bool FusionFcKernel<CPU, float>::Init(FusionFcParam<CPU> *param) { ...@@ -26,8 +26,7 @@ bool FusionFcKernel<CPU, float>::Init(FusionFcParam<CPU> *param) {
} }
template <> template <>
void FusionFcKernel<CPU, float>::Compute( void FusionFcKernel<CPU, float>::Compute(const FusionFcParam<CPU> &param) {
const FusionFcParam<CPU> &param) const {
FusionFcCompute<float>(param); FusionFcCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
......
...@@ -26,7 +26,7 @@ bool GruKernel<CPU, float>::Init(GruParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool GruKernel<CPU, float>::Init(GruParam<CPU> *param) {
} }
template <> template <>
void GruKernel<CPU, float>::Compute(const GruParam<CPU> &param) const { void GruKernel<CPU, float>::Compute(const GruParam<CPU> &param) {
GruCompute<float>(param); GruCompute<float>(param);
param.OutHidden()->set_lod(param.InputInput()->lod()); param.OutHidden()->set_lod(param.InputInput()->lod());
// DLOG << "________________" << param.OutHidden()->dims(); // DLOG << "________________" << param.OutHidden()->dims();
......
...@@ -33,7 +33,7 @@ inline int Im2SeqOutputSize(int input_size, int filter_size, int padding_0, ...@@ -33,7 +33,7 @@ inline int Im2SeqOutputSize(int input_size, int filter_size, int padding_0,
template <> template <>
void Im2SequenceKernel<CPU, float>::Compute( void Im2SequenceKernel<CPU, float>::Compute(
const Im2SequenceParam<CPU> &param) const { const Im2SequenceParam<CPU> &param) {
const Tensor *in_x = param.Input(); const Tensor *in_x = param.Input();
framework::LoDTensor *out = param.Output(); framework::LoDTensor *out = param.Output();
out->mutable_data<float>(); out->mutable_data<float>();
...@@ -56,7 +56,7 @@ void Im2SequenceKernel<CPU, float>::Compute( ...@@ -56,7 +56,7 @@ void Im2SequenceKernel<CPU, float>::Compute(
out->mutable_data<float>({batch_size * output_height * output_width, out->mutable_data<float>({batch_size * output_height * output_width,
img_channels * kernels[0] * kernels[1]}); img_channels * kernels[0] * kernels[1]});
const std::vector<int> dilations({1, 1}); const std::vector<int> dilations({1, 1});
// TODO: verify // TODO(): verify
auto out_dims = out->dims(); auto out_dims = out->dims();
out->Resize({batch_size, out->numel() / batch_size}); out->Resize({batch_size, out->numel() / batch_size});
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
......
...@@ -25,7 +25,7 @@ bool LookupKernel<CPU, float>::Init(LookupParam<CPU> *param) { ...@@ -25,7 +25,7 @@ bool LookupKernel<CPU, float>::Init(LookupParam<CPU> *param) {
} }
template <> template <>
void LookupKernel<CPU, float>::Compute(const LookupParam<CPU> &param) const { void LookupKernel<CPU, float>::Compute(const LookupParam<CPU> &param) {
LookupCompute<float>(param); LookupCompute<float>(param);
param.Out()->set_lod(param.InputIds()->lod()); param.Out()->set_lod(param.InputIds()->lod());
} }
......
...@@ -26,7 +26,7 @@ bool LrnKernel<CPU, float>::Init(LrnParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool LrnKernel<CPU, float>::Init(LrnParam<CPU> *param) {
} }
template <> template <>
void LrnKernel<CPU, float>::Compute(const LrnParam<CPU> &param) const { void LrnKernel<CPU, float>::Compute(const LrnParam<CPU> &param) {
LrnCompute<float>(param); LrnCompute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool MulKernel<CPU, float>::Init(MulParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool MulKernel<CPU, float>::Init(MulParam<CPU> *param) {
} }
template <> template <>
void MulKernel<CPU, float>::Compute(const MulParam<CPU> &param) const { void MulKernel<CPU, float>::Compute(const MulParam<CPU> &param) {
MulCompute<float>(param); MulCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod()); param.Out()->set_lod(param.InputX()->lod());
} }
......
...@@ -27,7 +27,7 @@ bool MultiClassNMSKernel<CPU, float>::Init(MultiClassNMSParam<CPU> *param) { ...@@ -27,7 +27,7 @@ bool MultiClassNMSKernel<CPU, float>::Init(MultiClassNMSParam<CPU> *param) {
template <> template <>
void MultiClassNMSKernel<CPU, float>::Compute( void MultiClassNMSKernel<CPU, float>::Compute(
const MultiClassNMSParam<CPU> &param) const { const MultiClassNMSParam<CPU> &param) {
MultiClassNMSCompute<float>(param); MultiClassNMSCompute<float>(param);
} }
......
...@@ -28,7 +28,7 @@ bool PolygonBoxTransformKernel<CPU, float>::Init( ...@@ -28,7 +28,7 @@ bool PolygonBoxTransformKernel<CPU, float>::Init(
template <> template <>
void PolygonBoxTransformKernel<CPU, float>::Compute( void PolygonBoxTransformKernel<CPU, float>::Compute(
const PolygonBoxTransformParam<CPU> &param) const { const PolygonBoxTransformParam<CPU> &param) {
PolygonBoxTransformCompute<float>(param); PolygonBoxTransformCompute<float>(param);
} }
......
...@@ -25,7 +25,7 @@ bool PoolKernel<CPU, float>::Init(PoolParam<CPU> *param) { ...@@ -25,7 +25,7 @@ bool PoolKernel<CPU, float>::Init(PoolParam<CPU> *param) {
} }
template <> template <>
void PoolKernel<CPU, float>::Compute(const PoolParam<CPU> &param) const { void PoolKernel<CPU, float>::Compute(const PoolParam<CPU> &param) {
PoolCompute<float>(param); PoolCompute<float>(param);
} }
} // namespace operators } // namespace operators
......
...@@ -35,7 +35,7 @@ struct PReluFunctor { ...@@ -35,7 +35,7 @@ struct PReluFunctor {
* @b 特化到具体平台的实现, param 从 op 层传入 * @b 特化到具体平台的实现, param 从 op 层传入
* */ * */
template <> template <>
void PReluKernel<CPU, float>::Compute(const PReluParam<CPU> &param) const { void PReluKernel<CPU, float>::Compute(const PReluParam<CPU> &param) {
auto *x = param.InputX(); auto *x = param.InputX();
auto *alpha = param.InputAlpha(); auto *alpha = param.InputAlpha();
auto *out = param.Out(); auto *out = param.Out();
......
...@@ -26,8 +26,7 @@ bool PriorBoxKernel<CPU, float>::Init(PriorBoxParam<CPU> *param) { ...@@ -26,8 +26,7 @@ bool PriorBoxKernel<CPU, float>::Init(PriorBoxParam<CPU> *param) {
} }
template <> template <>
void PriorBoxKernel<CPU, float>::Compute( void PriorBoxKernel<CPU, float>::Compute(const PriorBoxParam<CPU> &param) {
const PriorBoxParam<CPU> &param) const {
PriorBoxCompute<float>(param); PriorBoxCompute<float>(param);
} }
......
...@@ -279,8 +279,7 @@ bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) { ...@@ -279,8 +279,7 @@ bool QuantizeKernel<CPU, float>::Init(QuantizeParam<CPU> *param) {
} }
template <> template <>
void QuantizeKernel<CPU, float>::Compute( void QuantizeKernel<CPU, float>::Compute(const QuantizeParam<CPU> &param) {
const QuantizeParam<CPU> &param) const {
float max_abs = 0.f; float max_abs = 0.f;
const Tensor *input = param.input_; const Tensor *input = param.input_;
Tensor *output = param.out_; Tensor *output = param.out_;
......
...@@ -26,7 +26,7 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool ReluKernel<CPU, float>::Init(ReluParam<CPU> *param) {
} }
template <> template <>
void ReluKernel<CPU, float>::Compute(const ReluParam<CPU> &param) const { void ReluKernel<CPU, float>::Compute(const ReluParam<CPU> &param) {
ReluCompute<float>(param); ReluCompute<float>(param);
} }
......
...@@ -26,8 +26,7 @@ bool Reshape2Kernel<CPU, float>::Init(Reshape2Param<CPU> *param) { ...@@ -26,8 +26,7 @@ bool Reshape2Kernel<CPU, float>::Init(Reshape2Param<CPU> *param) {
} }
template <> template <>
void Reshape2Kernel<CPU, float>::Compute( void Reshape2Kernel<CPU, float>::Compute(const Reshape2Param<CPU> &param) {
const Reshape2Param<CPU> &param) const {
Reshape2Compute<float>(param); Reshape2Compute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool ReshapeKernel<CPU, float>::Init(ReshapeParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool ReshapeKernel<CPU, float>::Init(ReshapeParam<CPU> *param) {
} }
template <> template <>
void ReshapeKernel<CPU, float>::Compute(const ReshapeParam<CPU> &param) const { void ReshapeKernel<CPU, float>::Compute(const ReshapeParam<CPU> &param) {
ReshapeCompute<float>(param); ReshapeCompute<float>(param);
} }
......
...@@ -108,7 +108,7 @@ void ResizeTensor(const Tensor* src, Tensor* dst) { ...@@ -108,7 +108,7 @@ void ResizeTensor(const Tensor* src, Tensor* dst) {
} }
template <> template <>
void ResizeKernel<CPU, float>::Compute(const ResizeParam<CPU>& param) const { void ResizeKernel<CPU, float>::Compute(const ResizeParam<CPU>& param) {
const auto* input_x = param.InputX(); const auto* input_x = param.InputX();
const auto& input_x_dims = input_x->dims(); const auto& input_x_dims = input_x->dims();
auto* out = param.Out(); auto* out = param.Out();
......
...@@ -23,7 +23,7 @@ namespace operators { ...@@ -23,7 +23,7 @@ namespace operators {
* @b 特化到具体平台的实现, param 从 op 层传入 * @b 特化到具体平台的实现, param 从 op 层传入
* */ * */
template <> template <>
void ScaleKernel<CPU, float>::Compute(const ScaleParam<CPU> &param) const { void ScaleKernel<CPU, float>::Compute(const ScaleParam<CPU> &param) {
const auto *input_x = param.InputX(); const auto *input_x = param.InputX();
auto *input_x_ptr = input_x->data<float>(); auto *input_x_ptr = input_x->data<float>();
auto *out = param.Out(); auto *out = param.Out();
......
...@@ -26,7 +26,7 @@ bool ShapeKernel<CPU, float>::Init(ShapeParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool ShapeKernel<CPU, float>::Init(ShapeParam<CPU> *param) {
} }
template <> template <>
void ShapeKernel<CPU, float>::Compute(const ShapeParam<CPU> &param) const { void ShapeKernel<CPU, float>::Compute(const ShapeParam<CPU> &param) {
ShapeCompute<float>(param); ShapeCompute<float>(param);
} }
......
...@@ -32,7 +32,7 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) { ...@@ -32,7 +32,7 @@ bool SigmoidKernel<CPU, float>::Init(SigmoidParam<CPU> *param) {
} }
template <> template <>
void SigmoidKernel<CPU, float>::Compute(const SigmoidParam<CPU> &param) const { void SigmoidKernel<CPU, float>::Compute(const SigmoidParam<CPU> &param) {
SigmoidCompute<float>(param); SigmoidCompute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool SoftmaxKernel<CPU, float>::Init(SoftmaxParam<CPU> *param) {
} }
template <> template <>
void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam<CPU> &param) const { void SoftmaxKernel<CPU, float>::Compute(const SoftmaxParam<CPU> &param) {
SoftmaxCompute<float>(param); SoftmaxCompute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool SplitKernel<CPU, float>::Init(SplitParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool SplitKernel<CPU, float>::Init(SplitParam<CPU> *param) {
} }
template <> template <>
void SplitKernel<CPU, float>::Compute(const SplitParam<CPU> &param) const { void SplitKernel<CPU, float>::Compute(const SplitParam<CPU> &param) {
SplitCompute<float>(param); SplitCompute<float>(param);
} }
......
...@@ -26,7 +26,7 @@ bool SumKernel<CPU, float>::Init(SumParam<CPU> *param) { ...@@ -26,7 +26,7 @@ bool SumKernel<CPU, float>::Init(SumParam<CPU> *param) {
} }
template <> template <>
void SumKernel<CPU, float>::Compute(const SumParam<CPU> &param) const { void SumKernel<CPU, float>::Compute(const SumParam<CPU> &param) {
SumCompute<float>(param); SumCompute<float>(param);
param.Out()->set_lod(param.Inputs()[0]->lod()); param.Out()->set_lod(param.Inputs()[0]->lod());
} }
......
...@@ -25,8 +25,7 @@ bool Transpose2Kernel<CPU, float>::Init(Transpose2Param<CPU> *param) { ...@@ -25,8 +25,7 @@ bool Transpose2Kernel<CPU, float>::Init(Transpose2Param<CPU> *param) {
} }
template <> template <>
void Transpose2Kernel<CPU, float>::Compute( void Transpose2Kernel<CPU, float>::Compute(const Transpose2Param<CPU> &param) {
const Transpose2Param<CPU> &param) const {
Transpose2Compute<float>(param); Transpose2Compute<float>(param);
} }
......
...@@ -25,8 +25,7 @@ bool TransposeKernel<CPU, float>::Init(TransposeParam<CPU> *param) { ...@@ -25,8 +25,7 @@ bool TransposeKernel<CPU, float>::Init(TransposeParam<CPU> *param) {
} }
template <> template <>
void TransposeKernel<CPU, float>::Compute( void TransposeKernel<CPU, float>::Compute(const TransposeParam<CPU> &param) {
const TransposeParam<CPU> &param) const {
TransposeCompute<float>(param); TransposeCompute<float>(param);
} }
......
...@@ -22,13 +22,11 @@ limitations under the License. */ ...@@ -22,13 +22,11 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using namespace framework;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class BatchNormKernel class BatchNormKernel
: public framework::OpKernelBase<DeviceType, BatchNormParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, BatchNormParam<DeviceType>> {
public: public:
void Compute(const BatchNormParam<DeviceType> &param) const; void Compute(const BatchNormParam<DeviceType> &param);
bool Init(BatchNormParam<DeviceType> *param); bool Init(BatchNormParam<DeviceType> *param);
}; };
......
...@@ -29,7 +29,7 @@ class BilinearInterpKernel ...@@ -29,7 +29,7 @@ class BilinearInterpKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
BilinearInterpParam<DeviceType>> { BilinearInterpParam<DeviceType>> {
public: public:
void Compute(const BilinearInterpParam<DeviceType>& param) const; void Compute(const BilinearInterpParam<DeviceType>& param);
bool Init(BilinearInterpParam<DeviceType>* param); bool Init(BilinearInterpParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T> ...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T>
class BoxCoderKernel class BoxCoderKernel
: public framework::OpKernelBase<DeviceType, BoxCoderParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, BoxCoderParam<DeviceType>> {
public: public:
void Compute(const BoxCoderParam<DeviceType>& param) const; void Compute(const BoxCoderParam<DeviceType>& param);
bool Init(BoxCoderParam<DeviceType>* param); bool Init(BoxCoderParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef BATCHNORM_OP
#include "operators/kernel/batchnorm_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool BatchNormKernel<GPU_CL, float>::Init(BatchNormParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("batchnorm", "batchnorm_kernel.cl");
const framework::CLImage *mean = param->InputMean();
const framework::CLImage *variance = param->InputVariance();
const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias();
const float epsilon = param->Epsilon();
auto mean_ptr = mean->data<float>();
auto variance_ptr = variance->data<float>();
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
const int C = mean->numel();
float inv_std_ptr[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
float *new_scale_ptr = new float[C];
float *new_bias_ptr = new float[C];
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
framework::CLImage *new_scale = new framework::CLImage();
new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
return true;
}
template <>
void BatchNormKernel<GPU_CL, float>::Compute(
const BatchNormParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.OutputY());
auto input = param.InputX()->GetCLImage();
auto out = param.OutputY()->GetCLImage();
auto new_scale = param.NewScale()->GetCLImage();
auto new_bias = param.NewBias()->GetCLImage();
const int out_width = default_work_size[1];
clSetKernelArg(kernel, 1, sizeof(int), &out_width);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &new_scale);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &new_bias);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &out);
// cl_event out_event = param.OutputY()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class BatchNormKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void batchnorm(__private const int out_width,
__read_only image2d_t input,
__read_only image2d_t new_scale_image,
__read_only image2d_t new_bias_image,
__write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
half4 new_scale = read_imageh(new_scale_image, sampler, (int2)(out_c, 0));
half4 new_bias = read_imageh(new_bias_image, sampler, (int2)(out_c, 0));
int pos_x = mad24(out_c, out_width, out_w);
half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
half4 out = mad(in, new_scale, new_bias);
write_imageh(output, (int2)(pos_x, out_nh), out);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void channel_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage,int w) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
int2 coords_bias;
coords_bias.x = x/w;
coords_bias.y = 0;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords_bias);
half4 output = in + biase;
write_imageh(outputImage,coords,output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline half4 activation(half4 in
#ifdef PRELU
,
half4 prelu_alpha
#endif
) {
half4 output;
#ifdef PRELU
output = select(prelu_alpha * in, in, in >= (half4)0.0);
#endif
#ifdef RELU
output = fmax(in, (half4)(0.0f));
#endif
return output;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#define BATCH_NORM
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
conv
conv_bn
conv_add
conv_relu
conv_bn_relu
conv_add_relu
conv_add_bn_relu
*/
#include "cl_common.h"
__kernel void conv_3x3(__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
__read_only image2d_t input_image,
__read_only image2d_t filter,
#ifdef BIASE
__read_only image2d_t bias,
#endif
#ifdef BATCH_NORM
__read_only image2d_t new_scale,
__read_only image2d_t new_biase,
#endif
__write_only image2d_t output_image,
__private const int stride,
__private const int offset,
__private const int input_c,
__private const int dilation,
__private const int input_width,/* of one block */
__private const int input_height,/* of one block */
__private const int output_width,
__private const int output_height) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
if (out_c >= global_size_dim0 ||
out_w >= global_size_dim1 ||
out_nh >= global_size_dim2) {
return;
}
int2 stride_xy;
stride_xy.x = stride;
stride_xy.y = stride;
int2 ouput_pos_in_one_block;
ouput_pos_in_one_block.x = out_w;
ouput_pos_in_one_block.y = out_nh;
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int2 in_pos_in_one_block;
in_pos_in_one_block.x = ouput_pos_in_one_block.x * stride + offset;
in_pos_in_one_block.y = ouput_pos_in_one_block.y * stride + offset;
#ifdef BIASE
half4 output = read_imageh(bias, sampler, (int2)(out_c, 0));
#else
half4 output = 0.0f;
#endif
half4 input[9];
for (int i = 0; i < input_c; ++i) {
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y);
input[0] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x - dilation, pos_in.y - dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height) << 15));
input[1] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x, pos_in.y - dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y - dilation >= input_height) << 15));
input[2] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x + dilation, pos_in.y - dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y - dilation < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y - dilation >= input_height) << 15));
input[3] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x - dilation, pos_in.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y >= input_height) << 15));
input[4] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x, pos_in.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y >= input_height) << 15));
input[5] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x + dilation, pos_in.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y >= input_height) << 15));
input[6] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x - dilation, pos_in.y + dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - dilation < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x - dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height) << 15));
input[7] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x, pos_in.y + dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y + dilation >= input_height) << 15));
input[8] = select(read_imageh(input_image, sampler,
(int2)(pos_in.x + dilation, pos_in.y + dilation)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + dilation < 0 || in_pos_in_one_block.y + dilation < 0 || in_pos_in_one_block.x + dilation >= input_width || in_pos_in_one_block.y + dilation >= input_height) << 15));
for (int j = 0; j < 9; ++j) {
int2 pos_of_weight;
pos_of_weight.x = i * 3 + j % 3;
pos_of_weight.y = out_c * 4 * 3 + 0 * 3 + j / 3;
half4 weight_x = read_imageh(filter, sampler, pos_of_weight);
output.x += dot(input[j], weight_x);
pos_of_weight.y = out_c * 4 * 3 + 1 * 3 + j / 3;
half4 weight_y = read_imageh(filter, sampler, pos_of_weight);
output.y += dot(input[j], weight_y);
pos_of_weight.y = out_c * 4 * 3 + 2 * 3 + j / 3;
half4 weight_z = read_imageh(filter, sampler, pos_of_weight);
output.z += dot(input[j], weight_z);
pos_of_weight.y = out_c * 4 * 3 + 3 * 3 + j / 3;
half4 weight_w = read_imageh(filter, sampler, pos_of_weight);
output.w += dot(input[j], weight_w);
}
}
#ifdef BATCH_NORM
output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0));
#endif
#ifdef RELU
output = activation(output);
#endif
write_imageh(output_image, (int2)(out_c * global_size_dim1 + out_w, out_nh), output);
}
__kernel void depth_conv_3x3(__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
__read_only image2d_t input,
__read_only image2d_t filter,
#ifdef BIASE
__read_only image2d_t bias,
#endif
#ifdef BATCH_NORM
__read_only image2d_t new_scale,
__read_only image2d_t new_biase,
#endif
__write_only image2d_t output_image,
__private const int stride,
__private const int offset,
__private const int input_c,
__private const int dilation,
__private const int input_width,/* of one block */
__private const int input_height, /* of one block */
__private const int output_width,
__private const int output_height) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
int2 output_pos = (int2)(out_c * global_size_dim1 + out_w, out_nh);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
const int batch_index = out_nh / output_height;
const int out_nh_in_one_batch = out_nh % output_height;
int2 stride_xy = (int2)(stride, stride);
int2 ouput_pos_in_one_block = (int2)(out_w, out_nh_in_one_batch);
int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + (int2)(offset, offset);
#ifdef BIASE
half4 output = read_imageh(bias, sampler, (int2)(out_c, 0));
#else
half4 output = 0.0f;
#endif
const int filter_width = 3;
const int filter_height = 3;
int2 pos_in_input_block = (int2)(out_c * input_width, batch_index * input_height);
int2 pos_in_filter_block = (int2)(out_c * filter_width, batch_index * filter_height);
int filter_x = pos_in_filter_block.x ;
int filter_y = pos_in_filter_block.y ;
half4 inputs[9];
inputs[0] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y - 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y - 1 >= input_height) << 15));
inputs[1] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y - 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y - 1 >= input_height) << 15));
inputs[2] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y - 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y - 1 < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y - 1 >= input_height) << 15));
inputs[3] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y >= input_height) << 15));
/*
if (output_pos.x == 112 && output_pos.y == 0) {
half4 input1 = inputs[3];
float4 in = (float4)(input1.x, input1.y, input1.z, input1.w);
printf(" input4 3 - %v4hlf \n", in);
printf(" --- %d ---\n", in_pos_in_one_block.x - 1);
}
*/
inputs[4] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y >= input_height) << 15));
inputs[5] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y >= input_height) << 15));
inputs[6] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x - 1, pos_in_input_block.y + in_pos_in_one_block.y + 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x - 1 < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x - 1 >= input_width || in_pos_in_one_block.y + 1 >= input_height) << 15));
inputs[7] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x, pos_in_input_block.y + in_pos_in_one_block.y + 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x >= input_width || in_pos_in_one_block.y + 1 >= input_height) << 15));
inputs[8] = select(read_imageh(input, sampler, (int2)(pos_in_input_block.x + in_pos_in_one_block.x + 1, pos_in_input_block.y + in_pos_in_one_block.y + 1)),
(half4)(0.0f),
(ushort4)((in_pos_in_one_block.x + 1 < 0 || in_pos_in_one_block.y + 1 < 0 || in_pos_in_one_block.x + 1 >= input_width || in_pos_in_one_block.y + 1 >= input_height) << 15));
half4 filters[9];
filters[0] = read_imageh(filter, sampler,(int2)(filter_x,filter_y));
filters[1] = read_imageh(filter, sampler,(int2)(filter_x + 1,filter_y));
filters[2] = read_imageh(filter, sampler,(int2)(filter_x + 2,filter_y));
filters[3] = read_imageh(filter, sampler,(int2)(filter_x,filter_y + 1));
filters[4] = read_imageh(filter, sampler,(int2)(filter_x + 1,filter_y + 1));
filters[5] = read_imageh(filter, sampler,(int2)(filter_x + 2,filter_y + 1));
filters[6] = read_imageh(filter, sampler,(int2)(filter_x,filter_y + 2));
filters[7] = read_imageh(filter, sampler,(int2)(filter_x + 1,filter_y + 2));
filters[8] = read_imageh(filter, sampler,(int2)(filter_x + 2,filter_y + 2));
for(int i = 0 ;i < 9 ; i++){
output += inputs[i] * filters[i];
}
#ifdef BATCH_NORM
output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0));
#endif
#ifdef RELU
output = activation(output);
#endif
/*
if (output_pos.x == 112 && output_pos.y == 0) {
for (int i = 0; i < 9; ++i) {
half4 input1 = inputs[i];
float4 in = (float4)(input1.x, input1.y, input1.z, input1.w);
printf(" input4 %d - %v4hlf \n", i, in);
}
float4 out = (float4)(output.x, output.y, output.z, output.w);
printf(" depth wise output output4 = %v4hlf \n", out);
printf(" pos_in_input_block -x %d \n ", pos_in_input_block.x);
printf(" pos_in_input_block -y %d \n ", pos_in_input_block.y);
printf(" in_pos_in_one_block - x %d \n", in_pos_in_one_block.x);
printf(" in_pos_in_one_block - y %d \n", in_pos_in_one_block.y);
}
*/
write_imageh(output_image, output_pos, output);
}
__kernel void conv_1x1(__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
__read_only image2d_t input_image,
__read_only image2d_t filter,
#ifdef BIASE
__read_only image2d_t bias,
#endif
#ifdef BATCH_NORM
__read_only image2d_t new_scale,
__read_only image2d_t new_biase,
#endif
__write_only image2d_t output_image,
__private const int stride,
__private const int offset,
__private const int input_c,
__private const int dilation,
__private const int input_width,/* of one block */
__private const int input_height,/* of one block */
__private const int output_width,
__private const int output_height) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
const uint kernelHXW = 1;
int2 stride_xy = (int2)(stride, stride);
int2 ouput_pos_in_one_block = (int2)(out_w, out_nh);
int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + (int2)(offset, offset);
#ifdef BIASE
half4 output = read_imageh(bias, sampler, (int2)(out_c, 0));
#else
half4 output = 0.0f;
#endif
for (int i = 0; i < input_c; ++i) {
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y);
half4 input = read_imageh(input_image, sampler, pos_in);
half4 weight0 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 0));
half4 weight1 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 1));
half4 weight2 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 2));
half4 weight3 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 3));
/*
output.x = dot(input, weight0);
output.y = dot(input, weight1);
output.z = dot(input, weight2);
output.w = dot(input, weight3);
*/
output = mad(input.x, weight0, output);
output = mad(input.y, weight1, output);
output = mad(input.z, weight2, output);
output = mad(input.w, weight3, output);
}
#ifdef BATCH_NORM
output = output * read_imageh(new_scale, sampler, (int2)(out_c, 0)) + read_imageh(new_biase, sampler, (int2)(out_c, 0));
#endif
#ifdef RELU
output = activation(output);
#endif
int2 output_pos = (int2)(out_c * global_size_dim1 + out_w, out_nh);
write_imageh(output_image, output_pos, output);
}
/*
__kernel void conv_1x1_4(__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
__read_only image2d_t input_image,
__read_only image2d_t filter,
#ifdef BIASE
__read_only image2d_t bias,
#endif
#ifdef BATCH_NORM
__read_only image2d_t new_scale,
__read_only image2d_t new_biase,
#endif
__write_only image2d_t output_image,
__private const int stride,
__private const int offset,
__private const int input_c,
__private const int dilation,
__private const int input_width,
__private const int input_height,
__private const int output_width,
__private const int output_height) {
const int out_c = get_global_id(0) * 4;
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
int2 stride_xy = (int2)(stride, stride);
int2 ouput_pos_in_one_block = (int2)(out_w, out_nh);
int2 in_pos_in_one_block = ouput_pos_in_one_block * stride_xy + (int2)(offset, offset);
#ifdef BIASE
half4 output0 = read_imageh(bias, sampler, (int2)(out_c, 0));
half4 output1 = read_imageh(bias, sampler, (int2)(out_c + 1, 0));
half4 output2 = read_imageh(bias, sampler, (int2)(out_c + 2, 0));
half4 output3 = read_imageh(bias, sampler, (int2)(out_c + 3, 0));
#else
half4 output0 = 0.0f;
half4 output1 = 0.0f;
half4 output2 = 0.0f;
half4 output3 = 0.0f;
#endif
for (int i = 0; i < input_c; ++i) {
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block.x, in_pos_in_one_block.y);
half4 input = read_imageh(input_image, sampler, pos_in);
half4 weight0_0 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 0));
half4 weight0_1 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 1));
half4 weight0_2 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 2));
half4 weight0_3 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 3));
output0 = mad(input.x, weight0_0, output0);
output0 = mad(input.y, weight0_1, output0);
output0 = mad(input.z, weight0_2, output0);
output0 = mad(input.w, weight0_3, output0);
half4 weight1_0 = read_imageh(filter, sampler, (int2)(out_c + 1, i * 4 + 0));
half4 weight1_1 = read_imageh(filter, sampler, (int2)(out_c + 1, i * 4 + 1));
half4 weight1_2 = read_imageh(filter, sampler, (int2)(out_c + 1, i * 4 + 2));
half4 weight1_3 = read_imageh(filter, sampler, (int2)(out_c + 1, i * 4 + 3));
output1 = mad(input.x, weight1_0, output1);
output1 = mad(input.y, weight1_1, output1);
output1 = mad(input.z, weight1_2, output1);
output1 = mad(input.w, weight1_3, output1);
half4 weight2_0 = read_imageh(filter, sampler, (int2)(out_c + 2, i * 4 + 0));
half4 weight2_1 = read_imageh(filter, sampler, (int2)(out_c + 2, i * 4 + 1));
half4 weight2_2 = read_imageh(filter, sampler, (int2)(out_c + 2, i * 4 + 2));
half4 weight2_3 = read_imageh(filter, sampler, (int2)(out_c + 2, i * 4 + 3));
output2 = mad(input.x, weight2_0, output2);
output2 = mad(input.y, weight2_1, output2);
output2 = mad(input.z, weight2_2, output2);
output2 = mad(input.w, weight2_3, output2);
half4 weight3_0 = read_imageh(filter, sampler, (int2)(out_c + 3, i * 4 + 0));
half4 weight3_1 = read_imageh(filter, sampler, (int2)(out_c + 3, i * 4 + 1));
half4 weight3_2 = read_imageh(filter, sampler, (int2)(out_c + 3, i * 4 + 2));
half4 weight3_3 = read_imageh(filter, sampler, (int2)(out_c + 3, i * 4 + 3));
output3 = mad(input.x, weight3_0, output3);
output3 = mad(input.y, weight3_1, output3);
output3 = mad(input.z, weight3_2, output3);
output3 = mad(input.w, weight3_3, output3);
}
#ifdef BATCH_NORM
output0 = output0 * read_imageh(new_scale, sampler, (int2)(out_c + 0, 0)) + read_imageh(new_biase, sampler, (int2)(out_c + 0, 0));
output1 = output1 * read_imageh(new_scale, sampler, (int2)(out_c + 1, 0)) + read_imageh(new_biase, sampler, (int2)(out_c + 1, 0));
output2 = output2 * read_imageh(new_scale, sampler, (int2)(out_c + 2, 0)) + read_imageh(new_biase, sampler, (int2)(out_c + 2, 0));
output3 = output3 * read_imageh(new_scale, sampler, (int2)(out_c + 3, 0)) + read_imageh(new_biase, sampler, (int2)(out_c + 3, 0));
#endif
#ifdef RELU
output0 = activation(output0);
output1 = activation(output1);
output2 = activation(output2);
output3 = activation(output3);
#endif
int2 output_pos0 = (int2)(out_c * global_size_dim1 + out_w, out_nh);
write_imageh(output_image, output_pos0, output0);
int2 output_pos1 = (int2)((out_c + 1) * global_size_dim1 + out_w, out_nh);
write_imageh(output_image, output_pos1, output1);
int2 output_pos2 = (int2)((out_c + 2) * global_size_dim1 + out_w, out_nh);
write_imageh(output_image, output_pos2, output2);
int2 output_pos3 = (int2)((out_c + 3) * global_size_dim1 + out_w, out_nh);
write_imageh(output_image, output_pos3, output3);
}
*/
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define BIASE
#define BATCH_NORM
#define RELU
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "conv_kernel.inc.cl"
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords);
half4 output = in + biase;
write_imageh(outputImage,coords,output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void feed(__global float *in, __write_only image2d_t outputImage,int h,int w)
{
int i = get_global_id(0);
int j = get_global_id(1);
half4 pixel;
pixel.x = convert_half(in[(i * w + j)]);
pixel.y = convert_half(in[h * w + (i * w + j)]);
pixel.z = convert_half(in[2 * h * w + (i * w + j)]);
pixel.w = 0.0;
int2 coords;
coords.x = j;
coords.y = i;
write_imageh(outputImage,coords,pixel);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void fetch(__private const int in_height,
__private const int in_width,
__read_only image2d_t input,
__global float* out,
__private const int size_ch,
__private const int size_block,
__private const int size_batch) {
const int in_c = get_global_id(0);
const int in_w = get_global_id(1);
const int in_nh = get_global_id(2);
const int in_n = in_nh / in_height;
const int in_h = in_nh % in_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
const int pos_x = mad24(in_c, in_width, in_w);
half4 in = read_imageh(input, sampler, (int2)(pos_x, in_nh));
const int index = in_n * size_batch + in_c * size_block + in_h * in_width + in_w;
out[index] = convert_float(in.x);
out[index + size_ch] = convert_float(in.y);
out[index + size_ch * 2] = convert_float(in.z);
out[index + size_ch * 3] = convert_float(in.w);
}
__kernel void fetch_2d(__private const int in_height,
__private const int in_width,
__read_only image2d_t input,
__global float* out) {
const int in_w = get_global_id(1);
const int in_h = get_global_id(2);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
half4 in = read_imageh(input, sampler, (int2)(in_w, in_h));
const int index = (in_h * in_width + in_w) * 4;
out[index] = convert_float(in.x);
out[index + 1] = convert_float(in.y);
out[index + 2] = convert_float(in.z);
out[index + 3] = convert_float(in.w);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define MIN_VALUE -FLT_MAX
__kernel void pool_max(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = max(out_h * stride_h - pad_top, 0);
int end_h = min(start_h + ksize_h, in_height);
int start_w = max(out_w * stride_w - pad_left, 0);
int end_w = min(start_w + ksize_w, in_width);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 max_value = (half4)(MIN_VALUE);
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
max_value = max(max_value, tmp);
}
}
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), max_value);
}
__kernel void pool_avg(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = max(out_h * stride_h - pad_top, 0);
int end_h = min(start_h + ksize_h, in_height);
int start_w = max(out_w * stride_w - pad_left, 0);
int end_w = min(start_w + ksize_w, in_width);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 sum = (half4)(0.0f);
int num = 0;
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
num++;
}
}
half4 avg = sum / num;
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), avg);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void relu(__read_only image2d_t input,
__write_only image2d_t output){
const int x = get_global_id(0);
const int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 in = read_imageh(input, sampler, (int2)(x, y));
in = max((half4)(0.0f, 0.0f, 0.0f, 0.0f), in);
write_imageh(output, (int2)(x, y), in);
}
__kernel void relu_p0(__read_only image2d_t input,
__write_only image2d_t output){
const int x = get_global_id(0);
const int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 in = read_imageh(input, sampler, (int2)(x, y));
in = max((half4)(0.0f, 0.0f, 0.0f, 0.0f), in);
write_imageh(output, (int2)(x, y), in);
}
__kernel void relu_p1(__read_only image2d_t input,
__write_only image2d_t output){
const int x = get_global_id(0);
const int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 in = read_imageh(input, sampler, (int2)(x, y));
write_imageh(output, (int2)(x, y), in);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void reshape(__read_only image2d_t input,
__write_only image2d_t output,
__private const int d0,
__private const int d1,
__private const int d2,
__private const int d3,
__private const int x0,
__private const int x1,
__private const int x2,
__private const int x3) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 in = read_imageh(input, sampler, (int2)(x, y));
write_imageh(output, (int2)(x, y), in);
}
/*
__kernel void reshape(__read_only image2d_t input,
__write_only image2d_t output,
__private const int d0,
__private const int d1,
__private const int d2,
__private const int d3,
__private const int x0,
__private const int x1,
__private const int x2,
__private const int x3) {
const int x = get_global_id(0);
const int y = get_global_id(1);
int obx = x / x3;
int oby = y / x2;
int ox = x % x3;
int oy = y % x2;
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 r;
for (int i = 0; i < 4; i++) {
int t = obx * 4 + i;
if (t > x1) break;
int oindex = oby * x1 * x2 * x3 + t * x2 * x3 + ox * x3 + oy;
int i3 = oindex % d3; oindex /= d3;
int i2 = oindex % d2; oindex /= d2;
int i1 = oindex % d1; oindex /= d1;
int i0 = oindex;
int ix = (i1 / 4) * d3 + i3;
int iy = i0 * d2 + i2;
half4 p = read_imageh(input, sampler, (int2)(ix, iy));
((half*)&r)[i] = ((half*)&p)[i1%4];
}
write_imageh(output, (int2)(x, y), r);
}
*/
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void softmax(__read_only image2d_t input_image,
__write_only image2d_t output_image,
__private const int group
) {
const int out_c = get_global_id(0); // block index
const int out_w = get_global_id(1); // index in one block
const int out_nh = get_global_id(2);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half maxv = 0.0f;
for (int i = 0; i < group; ++i) {
half4 temp = read_imageh(input_image, sampler, (int2)(i, 0));
maxv = max(maxv, max(temp.x, max(temp.y, max(temp.z, temp.w))));
}
half4 rsum = (half4)(0.0f);
for (int i = 0; i < group; ++i) {
half4 r = read_imageh(input_image, sampler, (int2)(i, 0));
rsum += convert_half4(exp(convert_float4(r - maxv)));
}
float sum = rsum.x + rsum.y + rsum.z + rsum.w;
half4 rr = read_imageh(input_image, sampler, (int2)(out_w, out_nh));
half4 result = convert_half4(exp(convert_float4(rr - maxv)) / sum);
write_imageh(output_image, (int2)(out_w, out_nh), result);
}
/*
__kernel void softmax(__read_only image2d_t input,
__write_only image2d_t output,
__private const int d0,
__private const int d1,
__private const int d2,
__private const int d3) {
const int z = get_global_id(0);
const int x = get_global_id(1);
const int y = get_global_id(2);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST;
half4 cv = read_imageh(input, sampler, (int2)(x, y));
half4 maxv = cv;
for (int i = 0; i < d3; i++) {
half4 temp = read_imageh(input, sampler, (int2)(z * d3 + i, y));
maxv = max(maxv, temp);
}
half4 sum = (half4)0.0f;
// half4 x = = (half4)0.0f;
for (int i = 0; i < d3; i++) {
half4 temp = read_imageh(input, sampler, (int2)(z * d3 + i, y));
sum += exp(temp - maxv);
}
half4 r = exp(cv - maxv) / sum;
write_imageh(output, (int2)(z * d3 + x, y), r);
}
*/
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNReluKernel<GPU_CL, float>::Init(
FusionConvAddBNReluParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Bias()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
// const CL *mean = param->InputMean();
const framework::CLImage *mean = param->InputMean();
const framework::CLImage *variance = param->InputVariance();
const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias();
const float epsilon = param->Epsilon();
const int C = mean->numel();
// for (int j = 0; j < C; ++j) {
// DLOG << " mean - " << j << mean->data<float>()[j];
// }
//
// for (int j = 0; j < C; ++j) {
// DLOG << " variance - " << j << variance->data<float>()[j];
// }
//
// for (int j = 0; j < C; ++j) {
// DLOG << " scale - " << j << scale->data<float>()[j];
// }
//
// for (int j = 0; j < C; ++j) {
// DLOG << " bias - " << j << bias->data<float>()[j];
// }
//
// DLOG << " climage mean: " << *mean;
// DLOG << " climage variance: " << *variance;
// DLOG << " climage scale: " << *scale;
// DLOG << " climage bias: " << *bias;
auto mean_ptr = mean->data<float>();
auto variance_ptr = variance->data<float>();
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
float inv_std_ptr[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
float *new_scale_ptr = new float[C];
float *new_bias_ptr = new float[C];
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
framework::CLImage *new_scale = new framework::CLImage();
// for (int j = 0; j < C; ++j) {
// DLOG << " new scale - " << j << new_scale_ptr[j];
// }
//
// for (int j = 0; j < C; ++j) {
// DLOG << " new bias - " << j << new_bias_ptr[j];
// }
new_scale->SetTensorData(new_scale_ptr, variance->dims());
new_scale->InitCLImage(this->cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
// DLOG << " climage - y bias: " << *(param->Bias());
//
// DLOG << " climage - new scale: " << *new_scale;
framework::CLImage *new_bias = new framework::CLImage();
new_bias->SetTensorData(new_bias_ptr, variance->dims());
new_bias->InitCLImage(this->cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
// DLOG << " climage - new bias: " << *new_bias;
//
// DLOG << " climage - filter: " << *(param->Filter());
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
delete[](new_scale_ptr);
delete[](new_bias_ptr);
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
/*
if (param->Filter()->dims()[2] == 1 &&
param->Filter()->dims()[3] == 1 &&
(param->Filter()->dims()[0] % 16) == 0) {
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1_4", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv 1x1 4";
}
*/
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv 1x1";
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu depth_conv_3x3";
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " conv add bn relu conv_3x3";
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
return true;
}
template <>
void ConvAddBNReluKernel<GPU_CL, float>::Compute(
const FusionConvAddBNReluParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
auto biase = param.Bias()->GetCLImage();
auto new_scale = param.NewScale()->GetCLImage();
auto new_bias = param.NewBias()->GetCLImage();
auto output = param.Output()->GetCLImage();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter())
->GetCBlock();
int dilation = param.Dilations()[0];
int input_width = param.Input()->dims()[3];
int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2];
// DLOG << " c block " << c_block;
// DLOG << " w " << w;
// DLOG << " nh " << nh;
// DLOG << " stride " << stride;
// DLOG << " offset " << offset;
// DLOG << " input_c " << input_c;
// DLOG << " dilation " << dilation;
// DLOG << " input width " << input_width;
// DLOG << " input height " << input_height;
// DLOG << " output width " << output_width;
// DLOG << " output height " << output_height;
// DLOG << " input dim " << param.Input()->dims();
// DLOG << " output dim " << param.Output()->dims();
// DLOG << " filter dim " << param.Filter()->dims();
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &new_scale);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(cl_mem), &new_bias);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(cl_mem), &output);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 9, sizeof(int), &stride);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 10, sizeof(int), &offset);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_c);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 12, sizeof(int), &dilation);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 13, sizeof(int), &input_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 14, sizeof(int), &input_height);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 15, sizeof(int), &output_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 16, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
/*
if (param.Filter()->dims()[2] == 1 &&
param.Filter()->dims()[3] == 1 &&
param.Filter()->dims()[0] % 16 == 0) {
DLOG << " before modifi work size: " << default_work_size;
default_work_size[0] = default_work_size[0] / 4;
DLOG << " modification work size: " << default_work_size;
DLOG << " input dims " << param.Input()->dims();
DLOG << " output dims " << param.Output()->dims();
DLOG << " filter dims: " << param.Filter()->dims();
DLOG << " biase dims : " << param.Bias()->dims();
}
*/
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class ConvAddBNReluKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Bias()->InitCLImage(cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1", "conv_add_kernel.cl");
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_kernel.cl");
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_kernel.cl");
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
return true;
}
template <>
void ConvAddKernel<GPU_CL, float>::Compute(
const FusionConvAddParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
DLOG << "---yangfei30---";
DLOG << *param.Filter();
DLOG << param.Paddings();
auto biase = param.Bias()->GetCLImage();
auto output = param.Output()->GetCLImage();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter())
->GetCBlock();
int dilation = param.Dilations()[0];
int input_width = param.Input()->dims()[3];
int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2];
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &output);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(int), &stride);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(int), &offset);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 9, sizeof(int), &input_c);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 10, sizeof(int), &dilation);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 12, sizeof(int), &input_height);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 14, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class ConvAddKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDRELU_OP
#include "operators/kernel/conv_add_relu_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddReluKernel<GPU_CL, float>::Init(
FusionConvAddReluParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Bias()->InitCLImage(cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
param->Filter()->InitNImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_1x1", "conv_add_relu_kernel.cl");
} else if (param->Filter()->dims()[1] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
param->Filter()->InitDWImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_relu_kernel.cl");
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
param->Filter()->InitCLImage(cl_helper_.CLContext(),
cl_helper_.CLCommandQueue());
this->cl_helper_.AddKernel("conv_3x3", "conv_add_relu_kernel.cl");
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
return true;
}
template <>
void ConvAddReluKernel<GPU_CL, float>::Compute(
const FusionConvAddReluParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
DLOG << "---yangfei30---";
DLOG << *param.Filter();
DLOG << param.Paddings();
auto biase = param.Bias()->GetCLImage();
auto output = param.Output()->GetCLImage();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter())
->GetCBlock();
int dilation = param.Dilations()[0];
int input_width = param.Input()->dims()[3];
int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2];
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 6, sizeof(cl_mem), &output);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 7, sizeof(int), &stride);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 8, sizeof(int), &offset);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 9, sizeof(int), &input_c);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 10, sizeof(int), &dilation);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 12, sizeof(int), &input_height);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 14, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class ConvAddReluKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef CONV_OP
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
auto filter_ddim = param->Filter()->dims();
std::vector<int64_t> filter_shape(
{filter_ddim[1], filter_ddim[0], filter_ddim[2], filter_ddim[3]});
framework::DDim ddim = framework::make_ddim(filter_shape);
if (filter_ddim[1] == 1) {
param->Filter()->Resize(ddim);
}
param->Filter()->InitCLImage(cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
DLOG << " init helper: " << &cl_helper_;
DLOG << " conv kernel add kernel ~ ";
DLOG << " width of one block: " << param->Filter()->dims()[3];
DLOG << " height of one block: " << param->Filter()->dims()[2];
DLOG << " filter dims: " << param->Filter()->dims();
if (param->Filter()->dims()[2] == 1 && param->Filter()->dims()[3] == 1) {
DLOG << " here1 ";
this->cl_helper_.AddKernel("conv_1x1", "conv_kernel.cl");
} else if (param->Filter()->dims()[0] == 1 &&
param->Input()->dims()[1] == param->Output()->dims()[1] &&
param->Filter()->dims()[2] == 3) {
DLOG << " here2 ";
this->cl_helper_.AddKernel("depth_conv_3x3", "depthwise_conv_kernel.cl");
} else if (param->Filter()->dims()[2] == 3 &&
param->Filter()->dims()[3] == 3) {
DLOG << " here3 ";
this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
} else {
PADDLE_MOBILE_THROW_EXCEPTION(" not support ");
}
return true;
}
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
auto output = param.Output()->GetCLImage();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter())
->GetCBlock();
int dilation = param.Dilations()[0];
int input_width = param.Input()->dims()[3];
int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2];
cl_int status;
DLOG << " begin set kernel arg ";
DLOG << " c block " << c_block;
DLOG << " w " << w;
DLOG << " nh " << nh;
DLOG << " stride " << stride;
DLOG << " offset " << offset;
DLOG << " input_c " << input_c;
DLOG << " dilation " << dilation;
DLOG << " input width " << input_width;
DLOG << " input height " << input_height;
DLOG << " output width " << output_width;
DLOG << " output height " << output_height;
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
status = clSetKernelArg(kernel, 12, sizeof(int), &output_width);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_height);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class ConvKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef DEPTHWISECONV_OP
#include "operators/kernel/depthwise_conv_kernel.h"
#include "operators/kernel/central-arm-func/depthwise_conv_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool DepthwiseConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
DLOG << " depthwise conv kernel init begin ";
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->Filter()->InitCLImage(cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
int offset = static_cast<int>(param->Filter()->dims()[2]) / 2 -
static_cast<int>(param->Paddings()[1]);
param->SetOffset(offset);
this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl");
DLOG << " depthwise conv kernel init end ";
return true;
}
template <>
void DepthwiseConvKernel<GPU_CL, float>::Compute(
const ConvParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
auto output = param.Output()->GetCLImage();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter())
->GetCBlock();
int dilation = param.Dilations()[0];
int input_width = param.Input()->dims()[3];
int input_height = param.Input()->dims()[2];
int output_width = param.Output()->dims()[3];
int output_height = param.Output()->dims()[2];
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(int), &c_block);
status = clSetKernelArg(kernel, 1, sizeof(int), &w);
status = clSetKernelArg(kernel, 2, sizeof(int), &nh);
status = clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
status = clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
status = clSetKernelArg(kernel, 5, sizeof(cl_mem), &output);
status = clSetKernelArg(kernel, 6, sizeof(int), &stride);
status = clSetKernelArg(kernel, 7, sizeof(int), &offset);
status = clSetKernelArg(kernel, 8, sizeof(int), &input_c);
status = clSetKernelArg(kernel, 9, sizeof(int), &dilation);
status = clSetKernelArg(kernel, 10, sizeof(int), &input_width);
status = clSetKernelArg(kernel, 11, sizeof(int), &input_height);
status = clSetKernelArg(kernel, 12, sizeof(int), &output_width);
status = clSetKernelArg(kernel, 13, sizeof(int), &output_height);
CL_CHECK_ERRORS(status);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class DepthwiseConvKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(
ElementwiseAddParam<GPU_CL> *param) {
DLOG << "-----init add-----";
CLImage *bias = (CLImage *)(param->InputY());
bias->InitCLImage(cl_helper_.CLContext(), this->cl_helper_.CLCommandQueue());
DLOG << " bias: " << *bias;
if (bias->dims().size() == 4) {
this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
} else if (param->InputY()->dims().size() == 1) {
this->cl_helper_.AddKernel("channel_add", "channel_add_kernel.cl");
} else {
DLOG << "error:bias dims is error";
}
return true;
}
template <>
void ElementwiseAddKernel<GPU_CL, float>::Compute(
const ElementwiseAddParam<GPU_CL> &param) {
auto input = param.InputX();
auto bias = param.InputY();
auto output = param.Out();
cl_int status;
auto kernel = this->cl_helper_.KernelAt(0);
if (bias->dims().size() == 4) {
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
int width = input->ImageWidth();
int height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else if (bias->dims().size() == 1) {
cl_mem input_image = input->GetCLImage();
cl_mem bias_image = bias->GetCLImage();
cl_mem output_image = output->GetCLImage();
int tensor_w = input->dims()[3];
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), reinterpret_cast<void *>(&input_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), reinterpret_cast<void *>(&bias_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), reinterpret_cast<void *>(&output_image));
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int), reinterpret_cast<void *>(&tensor_w));
CL_CHECK_ERRORS(status);
int width = input->ImageWidth();
int height = input->ImageHeight();
size_t global_work_size[2] = {width, height};
cl_event out_event = param.Out()->GetClEvent();
cl_event wait_event = param.InputX()->GetClEvent();
status =
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
} else {
DLOG << "error:bias dims is error";
}
}
template class ElementwiseAddKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
#include "framework/cl/cl_tensor.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<GPU_CL, float>::Init(FeedParam<GPU_CL> *param) {
DLOG << "Init feed";
this->cl_helper_.AddKernel("feed", "feed_kernel.cl");
return true;
}
template <>
void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
cl_int status;
auto output = param.Out();
const Tensor *input = param.InputX();
// DLOG << *input;
const float *input_data = input->data<float>();
int numel = input->numel();
cl_mem cl_image = output->GetCLImage();
int height = output->dims()[2];
int width = output->dims()[3];
CLTensor input_cl_tensor(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
input_cl_tensor.Resize(input->dims());
cl_mem inputBuffer = input_cl_tensor.mutable_with_data<float>(input_data);
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &cl_image);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 2, sizeof(cl_int), &width);
CL_CHECK_ERRORS(status);
status = clSetKernelArg(kernel, 3, sizeof(cl_int), &height);
CL_CHECK_ERRORS(status);
size_t global_work_size[2] = {width, height};
// cl_event out_event = param.Out()->GetClEvent();
status = clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2,
NULL, global_work_size, NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class FeedKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/fetch_kernel.h"
#include "framework/cl/cl_tensor.h"
// #include "common/common.h"
// #include <iostream>
namespace paddle_mobile {
namespace operators {
template <>
bool FetchKernel<GPU_CL, float>::Init(FetchParam<GPU_CL> *param) {
if (param->InputX()->dims().size() <= 2) {
this->cl_helper_.AddKernel("fetch_2d", "fetch_kernel.cl");
} else {
this->cl_helper_.AddKernel("fetch", "fetch_kernel.cl");
}
auto *out = param->Out();
out->mutable_data<float>();
return true;
}
template <>
void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.InputX());
auto input = param.InputX()->GetCLImage();
auto *out = param.Out();
const auto &dim = param.InputX()->dims();
size_t new_dims[] = {1, 1, 1, 1};
for (int j = 0; j < dim.size(); ++j) {
new_dims[4 - dim.size() + j] = dim[j];
}
size_t C, in_height, in_width;
C = new_dims[1];
in_height = new_dims[2];
if (dim.size() <= 2) {
in_width = param.InputX()->ImageWidth();
} else {
in_width = new_dims[3];
}
CLTensor out_cl_tensor(this->cl_helper_.CLContext(),
this->cl_helper_.CLCommandQueue());
out_cl_tensor.Resize(out->dims());
cl_mem outBuffer = out_cl_tensor.mutable_data<float>();
clSetKernelArg(kernel, 0, sizeof(int), &in_height);
clSetKernelArg(kernel, 1, sizeof(int), &in_width);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &outBuffer);
if (dim.size() > 2) {
int size_ch = in_height * in_width;
int size_block = size_ch * 4;
int size_batch = size_ch * C;
clSetKernelArg(kernel, 4, sizeof(int), &size_ch);
clSetKernelArg(kernel, 5, sizeof(int), &size_block);
clSetKernelArg(kernel, 6, sizeof(int), &size_batch);
}
// cl_event wait_event = param.InpdutX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
// auto time1 = paddle_mobile::time();
// printf(" before finish \n");
// clFlsh(this->cl_helper_.CLCommandQueue());
clFinish(this->cl_helper_.CLCommandQueue());
// printf(" after finish \n");
// auto time2 = paddle_mobile::time();
//
//
// std::cout << " finish cost :" << paddle_mobile::time_diff(time1, time2)
// << "ms" << std::endl;
memcpy(out->data<float>(), out_cl_tensor.Data<float>(), out->memory_size());
}
template class FetchKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef POOL_OP
#include "operators/kernel/pool_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool PoolKernel<GPU_CL, float>::Init(PoolParam<GPU_CL> *param) {
std::string pooling_type = param->PoolingType();
this->cl_helper_.AddKernel("pool_" + pooling_type, "pool_kernel.cl");
return true;
}
template <>
void PoolKernel<GPU_CL, float>::Compute(const PoolParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
auto input = param.Input()->GetCLImage();
auto out = param.Output()->GetCLImage();
framework::CLImageConverterFolder *input_folder_converter =
reinterpret_cast<framework::CLImageConverterFolder *>(
param.Input()->Converter());
framework::CLImageConverterFolder *output_folder_converter =
reinterpret_cast<framework::CLImageConverterFolder *>(
param.Output()->Converter());
const int in_height = input_folder_converter->HeightOfOneBlock();
const int in_width = input_folder_converter->WidthOfOneBlock();
const int out_height = output_folder_converter->HeightOfOneBlock();
const int out_width = output_folder_converter->WidthOfOneBlock();
std::string pooling_type = param.PoolingType();
std::vector<int> ksize = param.Ksize();
std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings();
const int pad_top = paddings[0];
const int pad_left = paddings[1];
const int stride_h = strides[0];
const int stride_w = strides[1];
const int ksize_h = ksize[0];
const int ksize_w = ksize[1];
clSetKernelArg(kernel, 0, sizeof(cl_int), &in_height);
clSetKernelArg(kernel, 1, sizeof(cl_int), &in_width);
clSetKernelArg(kernel, 2, sizeof(cl_int), &out_height);
clSetKernelArg(kernel, 3, sizeof(cl_int), &out_width);
clSetKernelArg(kernel, 4, sizeof(cl_int), &pad_top);
clSetKernelArg(kernel, 5, sizeof(cl_int), &pad_left);
clSetKernelArg(kernel, 6, sizeof(cl_int), &stride_h);
clSetKernelArg(kernel, 7, sizeof(cl_int), &stride_w);
clSetKernelArg(kernel, 8, sizeof(cl_int), &ksize_h);
clSetKernelArg(kernel, 9, sizeof(cl_int), &ksize_w);
clSetKernelArg(kernel, 10, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 11, sizeof(cl_mem), &out);
// cl_event out_event = param.Output()->GetClEvent();
// cl_event wait_event = param.Input()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class PoolKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef RELU_OP
#include "operators/kernel/relu_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ReluKernel<GPU_CL, float>::Init(ReluParam<GPU_CL>* param) {
this->cl_helper_.AddKernel("relu", "relu.cl");
// this->cl_helper_.AddKernel("relu_p0", "relu.cl");
// this->cl_helper_.AddKernel("relu_p1", "relu.cl");
// const auto dim =
// const_cast<framework::CLImage*>(param->InputX())->ImageDims();
// param->getMidImage().InitEmptyImage(this->cl_helper_.CLContext(),
// this->cl_helper_.CLCommandQueue(),
// dim);
return true;
}
template <>
void ReluKernel<GPU_CL, float>::Compute(const ReluParam<GPU_CL>& param) {
auto kernel = this->cl_helper_.KernelAt(0);
// auto kernel_p0 = this->cl_helper_.KernelAt(1);
// auto kernel_p1 = this->cl_helper_.KernelAt(2);
const auto* input = param.InputX();
auto* output = param.Out();
auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
// auto tImage =
// const_cast<ReluParam<GPU_CL>&>(param).getMidImage().GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
// clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &inputImage);
// clSetKernelArg(kernel_p0, 0, sizeof(cl_mem), &tImage);
// clSetKernelArg(kernel_p1, 0, sizeof(cl_mem), &tImage);
// clSetKernelArg(kernel_p1, 1, sizeof(cl_mem), &outputImage);
const size_t work_size[2] = {input->ImageWidth(), input->ImageHeight()};
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 0, NULL, NULL);
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel_p1, 3,
// NULL,
// work_size, NULL, 0, NULL, NULL);
}
template class ReluKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/reshape_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ReshapeKernel<GPU_CL, float>::Init(ReshapeParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("reshape", "reshape.cl");
return true;
}
template <>
void ReshapeKernel<GPU_CL, float>::Compute(const ReshapeParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
const auto *input = param.InputX();
auto *output = param.Out();
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
const auto &inputDim = input->dims();
const auto &outputDim = output->dims();
int dims[4] = {1, 1, 1, 1};
int odims[4] = {1, 1, 1, 1};
// 1 1000 1 1
for (int i = 0; i < inputDim.size(); i++) {
dims[4 - inputDim.size() + i] = inputDim[i];
}
// 1 1 1 1000
for (int i = 0; i < outputDim.size(); i++) {
odims[4 - outputDim.size() + i] = outputDim[i];
}
clSetKernelArg(kernel, 2, sizeof(cl_int), &dims);
clSetKernelArg(kernel, 3, sizeof(cl_int), &dims[1]);
clSetKernelArg(kernel, 4, sizeof(cl_int), &dims[2]);
clSetKernelArg(kernel, 5, sizeof(cl_int), &dims[3]);
clSetKernelArg(kernel, 6, sizeof(cl_int), &odims);
clSetKernelArg(kernel, 7, sizeof(cl_int), &odims[1]);
clSetKernelArg(kernel, 8, sizeof(cl_int), &odims[1]);
clSetKernelArg(kernel, 9, sizeof(cl_int), &odims[1]);
const size_t work_size[2] = {output->ImageWidth(), output->ImageHeight()};
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 2, NULL,
work_size, NULL, 0, NULL, NULL);
}
template class ReshapeKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SOFTMAX_OP
#include "operators/kernel/softmax_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("softmax", "softmax.cl");
return true;
}
template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*(param.Out()));
const auto *input = param.InputX();
auto *output = param.Out();
auto inputImage = input->GetCLImage();
auto outputImage = output->GetCLImage();
int group = output->ImageWidth();
cl_int status;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
status = clSetKernelArg(kernel, 2, sizeof(int), &group);
// const auto &inputDim = input->dims();
//
// int dims[4] = {1, 1, 1, 1};
//
// for (int i = 0; i < inputDim.size(); i++) {
// dims[4 - inputDim.size() + i] = inputDim[i];
// }
//
// clSetKernelArg(kernel, 2, sizeof(int), &dims);
// clSetKernelArg(kernel, 3, sizeof(int), &dims[1]);
// clSetKernelArg(kernel, 4, sizeof(int), &dims[2]);
// clSetKernelArg(kernel, 5, sizeof(int), &dims[3]);
// cl_event out_event = param.Out()->GetClEvent();
// cl_event wait_event = param.InputX()->GetClEvent();
status = clEnqueueNDRangeKernel(
this->cl_helper_.CLCommandQueue(), kernel, default_work_size.size(), NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
CL_CHECK_ERRORS(status);
}
template class SoftmaxKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T>
class ConcatKernel class ConcatKernel
: public framework::OpKernelBase<DeviceType, ConcatParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ConcatParam<DeviceType>> {
public: public:
void Compute(const ConcatParam<DeviceType> &param) const; void Compute(const ConcatParam<DeviceType> &param);
bool Init(ConcatParam<DeviceType> *param); bool Init(ConcatParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvAddAddPReluKernel class ConvAddAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddAddPReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddAddPReluParam<DeviceType> &param) const; void Compute(const FusionConvAddAddPReluParam<DeviceType> &param);
bool Init(FusionConvAddAddPReluParam<DeviceType> *param); bool Init(FusionConvAddAddPReluParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvAddBNKernel class ConvAddBNKernel
: public OpKernelBase<DeviceType, FusionConvAddBNParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddBNParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddBNParam<DeviceType> &param) const; void Compute(const FusionConvAddBNParam<DeviceType> &param);
bool Init(FusionConvAddBNParam<DeviceType> *param); bool Init(FusionConvAddBNParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvAddBNReluKernel class ConvAddBNReluKernel
: public OpKernelBase<DeviceType, FusionConvAddBNReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddBNReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddBNReluParam<DeviceType> &param) const; void Compute(const FusionConvAddBNReluParam<DeviceType> &param);
bool Init(FusionConvAddBNReluParam<DeviceType> *param); bool Init(FusionConvAddBNReluParam<DeviceType> *param);
}; };
......
...@@ -40,7 +40,7 @@ template <typename DeviceType, typename T> ...@@ -40,7 +40,7 @@ template <typename DeviceType, typename T>
class ConvAddKernel class ConvAddKernel
: public OpKernelBase<DeviceType, FusionConvAddParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddParam<DeviceType> &param) const; void Compute(const FusionConvAddParam<DeviceType> &param);
bool Init(FusionConvAddParam<DeviceType> *param); bool Init(FusionConvAddParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvAddPReluKernel class ConvAddPReluKernel
: public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddPReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddPReluParam<DeviceType> &param) const; void Compute(const FusionConvAddPReluParam<DeviceType> &param);
bool Init(FusionConvAddPReluParam<DeviceType> *param); bool Init(FusionConvAddPReluParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvAddReluKernel class ConvAddReluKernel
: public OpKernelBase<DeviceType, FusionConvAddReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvAddReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvAddReluParam<DeviceType> &param) const; void Compute(const FusionConvAddReluParam<DeviceType> &param);
bool Init(FusionConvAddReluParam<DeviceType> *param); bool Init(FusionConvAddReluParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvBNAddReluKernel class ConvBNAddReluKernel
: public OpKernelBase<DeviceType, FusionConvBNAddReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvBNAddReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvBNAddReluParam<DeviceType> &param) const; void Compute(const FusionConvBNAddReluParam<DeviceType> &param);
bool Init(FusionConvBNAddReluParam<DeviceType> *param); bool Init(FusionConvBNAddReluParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvBNKernel class ConvBNKernel
: public OpKernelBase<DeviceType, FusionConvBNParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvBNParam<DeviceType>> {
public: public:
void Compute(const FusionConvBNParam<DeviceType> &param) const; void Compute(const FusionConvBNParam<DeviceType> &param);
bool Init(FusionConvBNParam<DeviceType> *param); bool Init(FusionConvBNParam<DeviceType> *param);
}; };
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class ConvBNReluKernel class ConvBNReluKernel
: public OpKernelBase<DeviceType, FusionConvBNReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionConvBNReluParam<DeviceType>> {
public: public:
void Compute(const FusionConvBNReluParam<DeviceType> &param) const; void Compute(const FusionConvBNReluParam<DeviceType> &param);
bool Init(FusionConvBNReluParam<DeviceType> *param); bool Init(FusionConvBNReluParam<DeviceType> *param);
}; };
......
...@@ -31,7 +31,7 @@ using framework::OpKernelBase; ...@@ -31,7 +31,7 @@ using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class ConvKernel : public OpKernelBase<DeviceType, ConvParam<DeviceType>> { class ConvKernel : public OpKernelBase<DeviceType, ConvParam<DeviceType>> {
public: public:
void Compute(const ConvParam<DeviceType> &param) const; void Compute(const ConvParam<DeviceType> &param);
bool Init(ConvParam<DeviceType> *param); bool Init(ConvParam<DeviceType> *param);
}; };
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class ConvTransposeKernel class ConvTransposeKernel
: public OpKernelBase<DeviceType, ConvTransposeParam<DeviceType>> { : public OpKernelBase<DeviceType, ConvTransposeParam<DeviceType>> {
public: public:
void Compute(const ConvTransposeParam<DeviceType> &param) const; void Compute(const ConvTransposeParam<DeviceType> &param);
bool Init(ConvTransposeParam<DeviceType> *param); bool Init(ConvTransposeParam<DeviceType> *param);
}; };
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class CrfKernel class CrfKernel
: public framework::OpKernelBase<DeviceType, CrfParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, CrfParam<DeviceType>> {
public: public:
void Compute(const CrfParam<DeviceType>& param) const; void Compute(const CrfParam<DeviceType>& param);
bool Init(CrfParam<DeviceType>* param); bool Init(CrfParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -31,7 +31,7 @@ template <typename DeviceType, typename T> ...@@ -31,7 +31,7 @@ template <typename DeviceType, typename T>
class DepthwiseConvKernel class DepthwiseConvKernel
: public OpKernelBase<DeviceType, ConvParam<DeviceType>> { : public OpKernelBase<DeviceType, ConvParam<DeviceType>> {
public: public:
void Compute(const ConvParam<DeviceType> &param) const; void Compute(const ConvParam<DeviceType> &param);
bool Init(ConvParam<DeviceType> *param); bool Init(ConvParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T> ...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T>
class DequantizeKernel class DequantizeKernel
: public framework::OpKernelBase<DeviceType, DequantizeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, DequantizeParam<DeviceType>> {
public: public:
void Compute(const DequantizeParam<DeviceType> &param) const; void Compute(const DequantizeParam<DeviceType> &param);
bool Init(DequantizeParam<DeviceType> *param); bool Init(DequantizeParam<DeviceType> *param);
}; };
......
...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T> ...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T>
class DropoutKernel class DropoutKernel
: public framework::OpKernelBase<DeviceType, DropoutParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, DropoutParam<DeviceType>> {
public: public:
void Compute(const DropoutParam<DeviceType>& param) const; void Compute(const DropoutParam<DeviceType>& param);
bool Init(DropoutParam<DeviceType>* para); bool Init(DropoutParam<DeviceType>* para);
}; };
} // namespace operators } // namespace operators
......
...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T> ...@@ -35,7 +35,7 @@ template <typename DeviceType, typename T>
class DWConvBNReluKernel class DWConvBNReluKernel
: public OpKernelBase<DeviceType, FusionDWConvBNReluParam<DeviceType>> { : public OpKernelBase<DeviceType, FusionDWConvBNReluParam<DeviceType>> {
public: public:
void Compute(const FusionDWConvBNReluParam<DeviceType> &param) const; void Compute(const FusionDWConvBNReluParam<DeviceType> &param);
bool Init(FusionDWConvBNReluParam<DeviceType> *param); bool Init(FusionDWConvBNReluParam<DeviceType> *param);
}; };
......
...@@ -30,7 +30,7 @@ class ElementwiseAddKernel ...@@ -30,7 +30,7 @@ class ElementwiseAddKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
ElementwiseAddParam<DeviceType>> { ElementwiseAddParam<DeviceType>> {
public: public:
void Compute(const ElementwiseAddParam<DeviceType> &param) const; void Compute(const ElementwiseAddParam<DeviceType> &param);
bool Init(ElementwiseAddParam<DeviceType> *param); bool Init(ElementwiseAddParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -29,7 +29,7 @@ class ElementwiseAddReluKernel ...@@ -29,7 +29,7 @@ class ElementwiseAddReluKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
ElementwiseAddReluParam<DeviceType>> { ElementwiseAddReluParam<DeviceType>> {
public: public:
void Compute(const ElementwiseAddReluParam<DeviceType> &param) const; void Compute(const ElementwiseAddReluParam<DeviceType> &param);
bool Init(ElementwiseAddReluParam<DeviceType> *param); bool Init(ElementwiseAddReluParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ class ElementwiseMulKernel ...@@ -28,7 +28,7 @@ class ElementwiseMulKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
ElementwiseMulParam<DeviceType>> { ElementwiseMulParam<DeviceType>> {
public: public:
void Compute(const ElementwiseMulParam<DeviceType> &param) const; void Compute(const ElementwiseMulParam<DeviceType> &param);
bool Init(ElementwiseMulParam<DeviceType> *param); bool Init(ElementwiseMulParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ class ElementwiseSubKernel ...@@ -28,7 +28,7 @@ class ElementwiseSubKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
ElementwiseSubParam<DeviceType>> { ElementwiseSubParam<DeviceType>> {
public: public:
void Compute(const ElementwiseSubParam<DeviceType> &param) const; void Compute(const ElementwiseSubParam<DeviceType> &param);
bool Init(ElementwiseSubParam<DeviceType> *param); bool Init(ElementwiseSubParam<DeviceType> *param);
}; };
......
...@@ -28,7 +28,7 @@ class FusionFcReluKernel ...@@ -28,7 +28,7 @@ class FusionFcReluKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
FusionFcReluParam<DeviceType>> { FusionFcReluParam<DeviceType>> {
public: public:
void Compute(const FusionFcReluParam<DeviceType>& param) const; void Compute(const FusionFcReluParam<DeviceType>& param);
bool Init(FusionFcReluParam<DeviceType>* param); bool Init(FusionFcReluParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class FeedKernel
: public framework::OpKernelBase<DeviceType, FeedParam<DeviceType>> {
public:
void Compute(const FeedParam<DeviceType> &param);
bool Init(FeedParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
using namespace framework;
template <typename DeviceType, typename T>
class FetchKernel
: public framework::OpKernelBase<DeviceType, FetchParam<DeviceType>> {
public:
void Compute(const FetchParam<DeviceType> &param);
bool Init(FetchParam<DeviceType> *param);
};
} // namespace operators
} // namespace paddle_mobile
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class FlattenKernel class FlattenKernel
: public framework::OpKernelBase<DeviceType, FlattenParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, FlattenParam<DeviceType>> {
public: public:
void Compute(const FlattenParam<DeviceType>& param) const; void Compute(const FlattenParam<DeviceType>& param);
bool Init(FlattenParam<DeviceType>* param); bool Init(FlattenParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -58,7 +58,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConcatKernel<FPGA, float>::Init(ConcatParam<FPGA> *param) {
} }
template <> template <>
void ConcatKernel<FPGA, float>::Compute(const ConcatParam<FPGA> &param) const { void ConcatKernel<FPGA, float>::Compute(const ConcatParam<FPGA> &param) {
ComputeFPGAConcat(param.FpgaArgs()); ComputeFPGAConcat(param.FpgaArgs());
} }
template class ConcatKernel<FPGA, float>; template class ConcatKernel<FPGA, float>;
......
...@@ -78,7 +78,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -78,7 +78,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
template <> template <>
void ConvAddBNKernel<FPGA, float>::Compute( void ConvAddBNKernel<FPGA, float>::Compute(
const FusionConvAddBNParam<FPGA> &param) const { const FusionConvAddBNParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -76,7 +76,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -76,7 +76,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
template <> template <>
void ConvAddBNReluKernel<FPGA, float>::Compute( void ConvAddBNReluKernel<FPGA, float>::Compute(
const FusionConvAddBNReluParam<FPGA> &param) const { const FusionConvAddBNReluParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -58,7 +58,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -58,7 +58,7 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
template <> template <>
void ConvAddReluKernel<FPGA, float>::Compute( void ConvAddReluKernel<FPGA, float>::Compute(
const FusionConvAddReluParam<FPGA> &param) const { const FusionConvAddReluParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -69,8 +69,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -69,8 +69,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
} }
template <> template <>
void ConvBNKernel<FPGA, float>::Compute( void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam<FPGA> &param) {
const FusionConvBNParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -70,7 +70,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -70,7 +70,7 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
template <> template <>
void ConvBNReluKernel<FPGA, float>::Compute( void ConvBNReluKernel<FPGA, float>::Compute(
const FusionConvBNReluParam<FPGA> &param) const { const FusionConvBNReluParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
...@@ -26,8 +26,7 @@ bool DropoutKernel<FPGA, float>::Init(DropoutParam<FPGA> *param) { ...@@ -26,8 +26,7 @@ bool DropoutKernel<FPGA, float>::Init(DropoutParam<FPGA> *param) {
} }
template <> template <>
void DropoutKernel<FPGA, float>::Compute( void DropoutKernel<FPGA, float>::Compute(const DropoutParam<FPGA> &param) {}
const DropoutParam<FPGA> &param) const {}
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -56,7 +56,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -56,7 +56,7 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
template <> template <>
void ElementwiseAddReluKernel<FPGA, float>::Compute( void ElementwiseAddReluKernel<FPGA, float>::Compute(
const ElementwiseAddReluParam<FPGA> &param) const { const ElementwiseAddReluParam<FPGA> &param) {
fpga::ComputeFpgaEWAdd(param.FpgaArgs()); fpga::ComputeFpgaEWAdd(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
......
...@@ -61,7 +61,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) { ...@@ -61,7 +61,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
} }
template <> template <>
void FusionFcReluKernel<FPGA, float>::Compute( void FusionFcReluKernel<FPGA, float>::Compute(
const FusionFcReluParam<FPGA> &param) const { const FusionFcReluParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<FPGA, float>::Init(FeedParam<FPGA> *param) {
Tensor *output = param->Out();
fpga::format_fp16_ofm(output);
return true;
}
template <>
void FeedKernel<FPGA, float>::Compute(const FeedParam<FPGA> &param) {
auto input =
reinterpret_cast<Tensor *>(const_cast<LoDTensor *>(param.InputX()));
auto input_ptr = input->data<float>();
fpga::format_image(input);
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP32};
args.input_data_type = fpga::DATA_TYPE_FP32;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
args.image.pad_height = 0;
args.image.pad_width = 0;
args.output.address = output_ptr;
args.output.scale_address = output->scale;
fpga::PerformBypass(args);
}
template class FeedKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
return true;
}
template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
}
template class FetchKernel<FPGA, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -62,8 +62,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -62,8 +62,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
} }
template <> template <>
void FusionFcKernel<FPGA, float>::Compute( void FusionFcKernel<FPGA, float>::Compute(const FusionFcParam<FPGA> &param) {
const FusionFcParam<FPGA> &param) const {
fpga::ComputeFpgaConv(param.FpgaArgs()); fpga::ComputeFpgaConv(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
......
...@@ -53,7 +53,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) { ...@@ -53,7 +53,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
} }
template <> template <>
void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) const { void PoolKernel<FPGA, float>::Compute(const PoolParam<FPGA> &param) {
fpga::ComputeFpgaPool(param.FpgaArgs()); fpga::ComputeFpgaPool(param.FpgaArgs());
} }
} // namespace operators } // namespace operators
......
...@@ -47,8 +47,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -47,8 +47,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} }
template <> template <>
void SoftmaxKernel<FPGA, float>::Compute( void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
const SoftmaxParam<FPGA> &param) const {
Tensor *in_x = param.FloatInput(); Tensor *in_x = param.FloatInput();
Tensor *out = param.Out(); Tensor *out = param.Out();
......
...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T>
class FusionFcKernel class FusionFcKernel
: public framework::OpKernelBase<DeviceType, FusionFcParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, FusionFcParam<DeviceType>> {
public: public:
void Compute(const FusionFcParam<DeviceType>& param) const; void Compute(const FusionFcParam<DeviceType>& param);
bool Init(FusionFcParam<DeviceType>* param); bool Init(FusionFcParam<DeviceType>* param);
}; };
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class GruKernel class GruKernel
: public framework::OpKernelBase<DeviceType, GruParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, GruParam<DeviceType>> {
public: public:
void Compute(const GruParam<DeviceType>& param) const; void Compute(const GruParam<DeviceType>& param);
bool Init(GruParam<DeviceType>* param); bool Init(GruParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T> ...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T>
class Im2SequenceKernel class Im2SequenceKernel
: public framework::OpKernelBase<DeviceType, Im2SequenceParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, Im2SequenceParam<DeviceType>> {
public: public:
void Compute(const Im2SequenceParam<DeviceType>& param) const; void Compute(const Im2SequenceParam<DeviceType>& param);
bool Init(Im2SequenceParam<DeviceType>* para); bool Init(Im2SequenceParam<DeviceType>* para);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class LookupKernel class LookupKernel
: public framework::OpKernelBase<DeviceType, LookupParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, LookupParam<DeviceType>> {
public: public:
void Compute(const LookupParam<DeviceType>& param) const; void Compute(const LookupParam<DeviceType>& param);
bool Init(LookupParam<DeviceType>* param); bool Init(LookupParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once
#ifdef LRN_OP #ifdef LRN_OP
#ifdef _OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
...@@ -173,7 +175,7 @@ template <typename DeviceType, typename T> ...@@ -173,7 +175,7 @@ template <typename DeviceType, typename T>
class LrnKernel class LrnKernel
: public framework::OpKernelBase<DeviceType, LrnParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, LrnParam<DeviceType>> {
public: public:
void Compute(const LrnParam<DeviceType> &param) const; void Compute(const LrnParam<DeviceType> &param);
bool Init(LrnParam<DeviceType> *param); bool Init(LrnParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -145,7 +145,7 @@ bool BatchNormKernel<GPU_MALI, float>::Init(BatchNormParam<GPU_MALI>* param) { ...@@ -145,7 +145,7 @@ bool BatchNormKernel<GPU_MALI, float>::Init(BatchNormParam<GPU_MALI>* param) {
template <> template <>
void BatchNormKernel<GPU_MALI, float>::Compute( void BatchNormKernel<GPU_MALI, float>::Compute(
const BatchNormParam<GPU_MALI>& param) const { const BatchNormParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclBatchNormOp<GPU_MALI, float>* acl_op = AclBatchNormOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclBatchNormOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclBatchNormOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -118,7 +118,7 @@ bool ConcatKernel<GPU_MALI, float>::Init(ConcatParam<GPU_MALI>* param) { ...@@ -118,7 +118,7 @@ bool ConcatKernel<GPU_MALI, float>::Init(ConcatParam<GPU_MALI>* param) {
template <> template <>
void ConcatKernel<GPU_MALI, float>::Compute( void ConcatKernel<GPU_MALI, float>::Compute(
const ConcatParam<GPU_MALI>& param) const { const ConcatParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclConcatOp<GPU_MALI, float>* acl_op = AclConcatOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclConcatOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -212,7 +212,7 @@ bool ConvAddKernel<GPU_MALI, float>::Init(FusionConvAddParam<GPU_MALI>* param) { ...@@ -212,7 +212,7 @@ bool ConvAddKernel<GPU_MALI, float>::Init(FusionConvAddParam<GPU_MALI>* param) {
template <> template <>
void ConvAddKernel<GPU_MALI, float>::Compute( void ConvAddKernel<GPU_MALI, float>::Compute(
const FusionConvAddParam<GPU_MALI>& param) const { const FusionConvAddParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclConvAddOp<GPU_MALI, float>* acl_op = AclConvAddOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclConvAddOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -211,8 +211,7 @@ bool ConvKernel<GPU_MALI, float>::Init(ConvParam<GPU_MALI>* param) { ...@@ -211,8 +211,7 @@ bool ConvKernel<GPU_MALI, float>::Init(ConvParam<GPU_MALI>* param) {
} }
template <> template <>
void ConvKernel<GPU_MALI, float>::Compute( void ConvKernel<GPU_MALI, float>::Compute(const ConvParam<GPU_MALI>& param) {
const ConvParam<GPU_MALI>& param) const {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclConvOp<GPU_MALI, float>* acl_op = AclConvOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclConvOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -34,7 +34,7 @@ bool ElementwiseAddKernel<GPU_MALI, float>::Init( ...@@ -34,7 +34,7 @@ bool ElementwiseAddKernel<GPU_MALI, float>::Init(
template <> template <>
void ElementwiseAddKernel<GPU_MALI, float>::Compute( void ElementwiseAddKernel<GPU_MALI, float>::Compute(
const ElementwiseAddParam<GPU_MALI> &param) const { const ElementwiseAddParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX(); const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY(); const Tensor *input_y = param.InputY();
Tensor *Out = param.Out(); Tensor *Out = param.Out();
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/feed_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FeedKernel<GPU_MALI, float>::Init(FeedParam<GPU_MALI> *param) {
return true;
}
template <>
void FeedKernel<GPU_MALI, float>::Compute(const FeedParam<GPU_MALI> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
param.Out()->set_lod(param.InputX()->lod());
}
template class FeedKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/fetch_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool FetchKernel<GPU_MALI, float>::Init(FetchParam<GPU_MALI> *param) {
return true;
}
template <>
void FetchKernel<GPU_MALI, float>::Compute(const FetchParam<GPU_MALI> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
}
template class FetchKernel<GPU_MALI, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -26,7 +26,7 @@ bool FusionFcKernel<GPU_MALI, float>::Init(FusionFcParam<GPU_MALI> *param) { ...@@ -26,7 +26,7 @@ bool FusionFcKernel<GPU_MALI, float>::Init(FusionFcParam<GPU_MALI> *param) {
template <> template <>
void FusionFcKernel<GPU_MALI, float>::Compute( void FusionFcKernel<GPU_MALI, float>::Compute(
const FusionFcParam<GPU_MALI> &param) const { const FusionFcParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX(); const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY(); const Tensor *input_y = param.InputY();
const Tensor *input_z = param.InputZ(); const Tensor *input_z = param.InputZ();
......
...@@ -127,8 +127,7 @@ bool LrnKernel<GPU_MALI, float>::Init(LrnParam<GPU_MALI>* param) { ...@@ -127,8 +127,7 @@ bool LrnKernel<GPU_MALI, float>::Init(LrnParam<GPU_MALI>* param) {
} }
template <> template <>
void LrnKernel<GPU_MALI, float>::Compute( void LrnKernel<GPU_MALI, float>::Compute(const LrnParam<GPU_MALI>& param) {
const LrnParam<GPU_MALI>& param) const {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclLrnOp<GPU_MALI, float>* acl_op = AclLrnOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -27,8 +27,7 @@ bool MulKernel<GPU_MALI, float>::Init(MulParam<GPU_MALI> *param) { ...@@ -27,8 +27,7 @@ bool MulKernel<GPU_MALI, float>::Init(MulParam<GPU_MALI> *param) {
} }
template <> template <>
void MulKernel<GPU_MALI, float>::Compute( void MulKernel<GPU_MALI, float>::Compute(const MulParam<GPU_MALI> &param) {
const MulParam<GPU_MALI> &param) const {
const Tensor *input_x = param.InputX(); const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY(); const Tensor *input_y = param.InputY();
Tensor *out = param.Out(); Tensor *out = param.Out();
......
...@@ -195,8 +195,7 @@ bool PoolKernel<GPU_MALI, float>::Init(PoolParam<GPU_MALI>* param) { ...@@ -195,8 +195,7 @@ bool PoolKernel<GPU_MALI, float>::Init(PoolParam<GPU_MALI>* param) {
} }
template <> template <>
void PoolKernel<GPU_MALI, float>::Compute( void PoolKernel<GPU_MALI, float>::Compute(const PoolParam<GPU_MALI>& param) {
const PoolParam<GPU_MALI>& param) const {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclPoolOp<GPU_MALI, float>* acl_op = AclPoolOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -115,8 +115,7 @@ bool ReluKernel<GPU_MALI, float>::Init(ReluParam<GPU_MALI>* param) { ...@@ -115,8 +115,7 @@ bool ReluKernel<GPU_MALI, float>::Init(ReluParam<GPU_MALI>* param) {
} }
template <> template <>
void ReluKernel<GPU_MALI, float>::Compute( void ReluKernel<GPU_MALI, float>::Compute(const ReluParam<GPU_MALI>& param) {
const ReluParam<GPU_MALI>& param) const {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclReluOp<GPU_MALI, float>* acl_op = AclReluOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -28,7 +28,7 @@ bool ReshapeKernel<GPU_MALI, float>::Init(ReshapeParam<GPU_MALI> *param) { ...@@ -28,7 +28,7 @@ bool ReshapeKernel<GPU_MALI, float>::Init(ReshapeParam<GPU_MALI> *param) {
template <> template <>
void ReshapeKernel<GPU_MALI, float>::Compute( void ReshapeKernel<GPU_MALI, float>::Compute(
const ReshapeParam<GPU_MALI> &param) const { const ReshapeParam<GPU_MALI> &param) {
const auto *input_x = param.InputX(); const auto *input_x = param.InputX();
const auto &input_x_dims = input_x->dims(); const auto &input_x_dims = input_x->dims();
auto *out = param.Out(); auto *out = param.Out();
......
...@@ -113,7 +113,7 @@ bool SoftmaxKernel<GPU_MALI, float>::Init(SoftmaxParam<GPU_MALI>* param) { ...@@ -113,7 +113,7 @@ bool SoftmaxKernel<GPU_MALI, float>::Init(SoftmaxParam<GPU_MALI>* param) {
template <> template <>
void SoftmaxKernel<GPU_MALI, float>::Compute( void SoftmaxKernel<GPU_MALI, float>::Compute(
const SoftmaxParam<GPU_MALI>& param) const { const SoftmaxParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl; std::cout << "init acl" << std::endl;
AclSoftmaxOp<GPU_MALI, float>* acl_op = AclSoftmaxOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp()); reinterpret_cast<AclSoftmaxOp<GPU_MALI, float>*>(this->GetAclOp());
......
...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T> ...@@ -29,7 +29,7 @@ template <typename DeviceType, typename T>
class MulKernel class MulKernel
: public framework::OpKernelBase<DeviceType, MulParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, MulParam<DeviceType>> {
public: public:
void Compute(const MulParam<DeviceType> &param) const; void Compute(const MulParam<DeviceType> &param);
bool Init(MulParam<DeviceType> *param); bool Init(MulParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ class MultiClassNMSKernel ...@@ -28,7 +28,7 @@ class MultiClassNMSKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
MultiClassNMSParam<DeviceType>> { MultiClassNMSParam<DeviceType>> {
public: public:
void Compute(const MultiClassNMSParam<DeviceType>& param) const; void Compute(const MultiClassNMSParam<DeviceType>& param);
bool Init(MultiClassNMSParam<DeviceType>* param); bool Init(MultiClassNMSParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -27,7 +27,7 @@ class PolygonBoxTransformKernel ...@@ -27,7 +27,7 @@ class PolygonBoxTransformKernel
: public framework::OpKernelBase<DeviceType, : public framework::OpKernelBase<DeviceType,
PolygonBoxTransformParam<DeviceType>> { PolygonBoxTransformParam<DeviceType>> {
public: public:
void Compute(const PolygonBoxTransformParam<DeviceType>& param) const; void Compute(const PolygonBoxTransformParam<DeviceType>& param);
bool Init(PolygonBoxTransformParam<DeviceType>* param); bool Init(PolygonBoxTransformParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -26,7 +26,7 @@ using framework::OpKernelBase; ...@@ -26,7 +26,7 @@ using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
class PoolKernel : public OpKernelBase<DeviceType, PoolParam<DeviceType>> { class PoolKernel : public OpKernelBase<DeviceType, PoolParam<DeviceType>> {
public: public:
void Compute(const PoolParam<DeviceType> &param) const override; void Compute(const PoolParam<DeviceType> &param);
bool Init(PoolParam<DeviceType> *param); bool Init(PoolParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T> ...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T>
class PReluKernel class PReluKernel
: public framework::OpKernelBase<DeviceType, PReluParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, PReluParam<DeviceType>> {
public: public:
void Compute(const PReluParam<DeviceType>& param) const; void Compute(const PReluParam<DeviceType>& param);
}; };
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -54,7 +54,7 @@ template <typename DeviceType, typename T> ...@@ -54,7 +54,7 @@ template <typename DeviceType, typename T>
class PriorBoxKernel class PriorBoxKernel
: public framework::OpKernelBase<DeviceType, PriorBoxParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, PriorBoxParam<DeviceType>> {
public: public:
void Compute(const PriorBoxParam<DeviceType>& param) const; void Compute(const PriorBoxParam<DeviceType>& param);
bool Init(PriorBoxParam<DeviceType>* param); bool Init(PriorBoxParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T> ...@@ -26,7 +26,7 @@ template <typename DeviceType, typename T>
class QuantizeKernel class QuantizeKernel
: public framework::OpKernelBase<DeviceType, QuantizeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, QuantizeParam<DeviceType>> {
public: public:
void Compute(const QuantizeParam<DeviceType> &param) const; void Compute(const QuantizeParam<DeviceType> &param);
bool Init(QuantizeParam<DeviceType> *param); bool Init(QuantizeParam<DeviceType> *param);
}; };
......
...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T>
class ReluKernel class ReluKernel
: public framework::OpKernelBase<DeviceType, ReluParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ReluParam<DeviceType>> {
public: public:
void Compute(const ReluParam<DeviceType>& param) const; void Compute(const ReluParam<DeviceType>& param);
bool Init(ReluParam<DeviceType>* param); bool Init(ReluParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T>
class Reshape2Kernel class Reshape2Kernel
: public framework::OpKernelBase<DeviceType, Reshape2Param<DeviceType>> { : public framework::OpKernelBase<DeviceType, Reshape2Param<DeviceType>> {
public: public:
void Compute(const Reshape2Param<DeviceType>& param) const; void Compute(const Reshape2Param<DeviceType>& param);
bool Init(Reshape2Param<DeviceType>* param); bool Init(Reshape2Param<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -71,7 +71,7 @@ template <typename DeviceType, typename T> ...@@ -71,7 +71,7 @@ template <typename DeviceType, typename T>
class ReshapeKernel class ReshapeKernel
: public framework::OpKernelBase<DeviceType, ReshapeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ReshapeParam<DeviceType>> {
public: public:
void Compute(const ReshapeParam<DeviceType>& param) const; void Compute(const ReshapeParam<DeviceType>& param);
bool Init(ReshapeParam<DeviceType>* param); bool Init(ReshapeParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -74,7 +74,7 @@ template <typename DeviceType, typename T> ...@@ -74,7 +74,7 @@ template <typename DeviceType, typename T>
class ResizeKernel class ResizeKernel
: public framework::OpKernelBase<DeviceType, ResizeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ResizeParam<DeviceType>> {
public: public:
void Compute(const ResizeParam<DeviceType> &param) const; void Compute(const ResizeParam<DeviceType> &param);
}; };
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
......
...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T> ...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T>
class ScaleKernel class ScaleKernel
: public framework::OpKernelBase<DeviceType, ScaleParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ScaleParam<DeviceType>> {
public: public:
void Compute(const ScaleParam<DeviceType>& param) const; void Compute(const ScaleParam<DeviceType>& param);
}; };
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class ShapeKernel class ShapeKernel
: public framework::OpKernelBase<DeviceType, ShapeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, ShapeParam<DeviceType>> {
public: public:
void Compute(const ShapeParam<DeviceType>& param) const; void Compute(const ShapeParam<DeviceType>& param);
bool Init(ShapeParam<DeviceType>* param); bool Init(ShapeParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class SigmoidKernel class SigmoidKernel
: public OpKernelBase<DeviceType, SigmoidParam<DeviceType>> { : public OpKernelBase<DeviceType, SigmoidParam<DeviceType>> {
public: public:
void Compute(const SigmoidParam<DeviceType>& param) const override; void Compute(const SigmoidParam<DeviceType>& param);
bool Init(SigmoidParam<DeviceType>* param); bool Init(SigmoidParam<DeviceType>* param);
}; };
......
...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T> ...@@ -24,7 +24,7 @@ template <typename DeviceType, typename T>
class SliceKernel class SliceKernel
: public framework::OpKernelBase<DeviceType, SliceParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, SliceParam<DeviceType>> {
public: public:
void Compute(const SliceParam<DeviceType>& param) const {} void Compute(const SliceParam<DeviceType>& param) {}
}; };
} // namespace operators } // namespace operators
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T> ...@@ -27,7 +27,7 @@ template <typename DeviceType, typename T>
class SoftmaxKernel class SoftmaxKernel
: public OpKernelBase<DeviceType, SoftmaxParam<DeviceType>> { : public OpKernelBase<DeviceType, SoftmaxParam<DeviceType>> {
public: public:
void Compute(const SoftmaxParam<DeviceType> &param) const override; void Compute(const SoftmaxParam<DeviceType> &param);
bool Init(SoftmaxParam<DeviceType> *param); bool Init(SoftmaxParam<DeviceType> *param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class SplitKernel class SplitKernel
: public framework::OpKernelBase<DeviceType, SplitParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, SplitParam<DeviceType>> {
public: public:
void Compute(const SplitParam<DeviceType>& param) const; void Compute(const SplitParam<DeviceType>& param);
bool Init(SplitParam<DeviceType>* param); bool Init(SplitParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -25,7 +25,7 @@ template <typename DeviceType, typename T> ...@@ -25,7 +25,7 @@ template <typename DeviceType, typename T>
class SumKernel class SumKernel
: public framework::OpKernelBase<DeviceType, SumParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, SumParam<DeviceType>> {
public: public:
void Compute(const SumParam<DeviceType> &param) const; void Compute(const SumParam<DeviceType> &param);
bool Init(SumParam<DeviceType> *param); bool Init(SumParam<DeviceType> *param);
}; };
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class Transpose2Kernel class Transpose2Kernel
: public framework::OpKernelBase<DeviceType, Transpose2Param<DeviceType>> { : public framework::OpKernelBase<DeviceType, Transpose2Param<DeviceType>> {
public: public:
void Compute(const Transpose2Param<DeviceType>& param) const; void Compute(const Transpose2Param<DeviceType>& param);
bool Init(Transpose2Param<DeviceType>* param); bool Init(Transpose2Param<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T> ...@@ -28,7 +28,7 @@ template <typename DeviceType, typename T>
class TransposeKernel class TransposeKernel
: public framework::OpKernelBase<DeviceType, TransposeParam<DeviceType>> { : public framework::OpKernelBase<DeviceType, TransposeParam<DeviceType>> {
public: public:
void Compute(const TransposeParam<DeviceType>& param) const; void Compute(const TransposeParam<DeviceType>& param);
bool Init(TransposeParam<DeviceType>* param); bool Init(TransposeParam<DeviceType>* param);
}; };
} // namespace operators } // namespace operators
......
...@@ -37,10 +37,6 @@ class LookupOp : public framework::OperatorWithKernel< ...@@ -37,10 +37,6 @@ class LookupOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, LookupParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, LookupParam<DeviceType>,
operators::LookupKernel<DeviceType, T>>( operators::LookupKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, LookupParam<DeviceType>,
operators::LookupKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -35,10 +35,6 @@ class LrnOp : public framework::OperatorWithKernel< ...@@ -35,10 +35,6 @@ class LrnOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, LrnParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, LrnParam<DeviceType>,
operators::LrnKernel<DeviceType, T>>( operators::LrnKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, LrnParam<DeviceType>,
operators::LrnKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -35,10 +35,6 @@ class MulOp : public framework::OperatorWithKernel< ...@@ -35,10 +35,6 @@ class MulOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, MulParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, MulParam<DeviceType>,
operators::MulKernel<DeviceType, T>>( operators::MulKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, MulParam<DeviceType>,
operators::MulKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -40,10 +40,6 @@ class MultiClassNMSOp : public framework::OperatorWithKernel< ...@@ -40,10 +40,6 @@ class MultiClassNMSOp : public framework::OperatorWithKernel<
DeviceType, MultiClassNMSParam<DeviceType>, DeviceType, MultiClassNMSParam<DeviceType>,
operators::MultiClassNMSKernel<DeviceType, T>>( operators::MultiClassNMSKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, MultiClassNMSParam<DeviceType>,
operators::MultiClassNMSKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -27,6 +27,10 @@ limitations under the License. */ ...@@ -27,6 +27,10 @@ limitations under the License. */
#include "fpga/api.h" #include "fpga/api.h"
#endif #endif
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_image.h"
#endif
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
...@@ -48,6 +52,17 @@ struct DtypeTensorTrait { ...@@ -48,6 +52,17 @@ struct DtypeTensorTrait {
typedef framework::Tensor rtype; typedef framework::Tensor rtype;
}; };
#ifdef PADDLE_MOBILE_CL
template <>
struct DtypeTensorTrait<GPU_CL> {
// This is the type we obtained in variable.
typedef framework::CLImage gtype;
// This type will be the parent class type
// or the same type.
typedef framework::CLImage rtype;
};
#endif
class OpParam { class OpParam {
protected: protected:
template <typename T> template <typename T>
...@@ -397,6 +412,13 @@ class ConvParam : public OpParam { ...@@ -397,6 +412,13 @@ class ConvParam : public OpParam {
const int &Groups() const { return groups; } const int &Groups() const { return groups; }
#ifdef PADDLE_MOBILE_CL
int Offset() const { return offset_; }
int SetOffset(int in_offset) { offset_ = in_offset; }
#endif
private: private:
RType *input_; RType *input_;
RType *output_; RType *output_;
...@@ -405,6 +427,10 @@ class ConvParam : public OpParam { ...@@ -405,6 +427,10 @@ class ConvParam : public OpParam {
vector<int> paddings_; vector<int> paddings_;
vector<int> dilations_; vector<int> dilations_;
int groups; int groups;
#ifdef PADDLE_MOBILE_CL
int offset_;
#endif
}; };
template <typename Dtype> template <typename Dtype>
Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param); Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param);
...@@ -715,6 +741,14 @@ class BatchNormParam : OpParam { ...@@ -715,6 +741,14 @@ class BatchNormParam : OpParam {
const string &DataFormat() const { return data_format_; } const string &DataFormat() const { return data_format_; }
void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
const RType *NewScale() const { return new_scale_; }
const RType *NewBias() const { return new_bias_; }
private: private:
RType *input_x_; RType *input_x_;
RType *output_y_; RType *output_y_;
...@@ -726,6 +760,8 @@ class BatchNormParam : OpParam { ...@@ -726,6 +760,8 @@ class BatchNormParam : OpParam {
float momentum_; float momentum_;
bool is_test_; bool is_test_;
string data_format_; string data_format_;
RType *new_bias_;
RType *new_scale_;
}; };
#endif #endif
...@@ -1034,18 +1070,18 @@ class FeedParam : public OpParam { ...@@ -1034,18 +1070,18 @@ class FeedParam : public OpParam {
public: public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope) { const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, *scope); input_x_ = InputXFrom<LoDTensor>(inputs, scope);
out_ = OutFrom<GType>(outputs, *scope); out_ = OutFrom<GType>(outputs, scope);
auto var = scope->Var("batch_size"); auto var = scope.FindVar("batch_size");
batch_size = var->GetValue<int>(); batch_size = var->GetValue<int>();
} }
const GType *InputX() const { return input_x_; } const LoDTensor *InputX() const { return input_x_; }
GType *Out() const { return out_; } GType *Out() const { return out_; }
const int BatchSize() const { return batch_size; } const int BatchSize() const { return batch_size; }
private: private:
GType *input_x_; LoDTensor *input_x_;
GType *out_; GType *out_;
int batch_size; int batch_size;
}; };
...@@ -1059,14 +1095,19 @@ class FetchParam : public OpParam { ...@@ -1059,14 +1095,19 @@ class FetchParam : public OpParam {
FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs, FetchParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope); input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope); out_ = OutFrom(outputs, scope);
} }
const RType *InputX() const { return input_x_; } const RType *InputX() const { return input_x_; }
RType *Out() const { return out_; } Tensor *Out() const { return out_; }
static Tensor *OutFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<LoDTensor>("Out", outputs, scope);
}
private: private:
RType *input_x_; RType *input_x_;
RType *out_; Tensor *out_;
}; };
#ifdef FILL_CONSTANT_OP #ifdef FILL_CONSTANT_OP
...@@ -1447,13 +1488,13 @@ class ResizeParam : public OpParam { ...@@ -1447,13 +1488,13 @@ class ResizeParam : public OpParam {
* @b op 层实例化好这个 param 传递给 kernel 层使用 * @b op 层实例化好这个 param 传递给 kernel 层使用
* */ * */
template <typename Dtype> template <typename Dtype>
class ReluParam : public OpParam { class ReluParamBase : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType; typedef typename DtypeTensorTrait<Dtype>::gtype GType;
typedef typename DtypeTensorTrait<Dtype>::rtype RType; typedef typename DtypeTensorTrait<Dtype>::rtype RType;
public: public:
ReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs, ReluParamBase(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) { const AttributeMap &attrs, const Scope &scope) {
input_x_ = InputXFrom<GType>(inputs, scope); input_x_ = InputXFrom<GType>(inputs, scope);
out_ = OutFrom<GType>(outputs, scope); out_ = OutFrom<GType>(outputs, scope);
} }
...@@ -1466,6 +1507,25 @@ class ReluParam : public OpParam { ...@@ -1466,6 +1507,25 @@ class ReluParam : public OpParam {
RType *input_x_; RType *input_x_;
RType *out_; RType *out_;
}; };
template <typename Dtype>
class ReluParam : public ReluParamBase<Dtype> {
public:
using ReluParamBase<Dtype>::ReluParamBase;
};
#ifdef PADDLE_MOBILE_CL
template <>
class ReluParam<GPU_CL> : public ReluParamBase<GPU_CL> {
public:
using ReluParamBase<GPU_CL>::ReluParamBase;
framework::CLImage &getMidImage() { return midImage; }
private:
framework::CLImage midImage;
};
#endif
#endif #endif
#ifdef PRELU_OP #ifdef PRELU_OP
...@@ -1764,6 +1824,7 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> { ...@@ -1764,6 +1824,7 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
bool is_test_; bool is_test_;
RType *new_bias_; RType *new_bias_;
RType *new_scale_; RType *new_scale_;
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
private: private:
......
...@@ -14,7 +14,8 @@ limitations under the License. */ ...@@ -14,7 +14,8 @@ limitations under the License. */
#ifdef POOL_OP #ifdef POOL_OP
#include "pool_op.h" #include <vector>
#include "operators/pool_op.h"
#include "framework/op_proto_maker.h" #include "framework/op_proto_maker.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
...@@ -68,5 +69,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp); ...@@ -68,5 +69,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp); REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(pool2d, ops::PoolOp);
#endif
#endif #endif
...@@ -38,9 +38,6 @@ class PoolOp : public OperatorWithKernel<DeviceType, PoolParam<DeviceType>, ...@@ -38,9 +38,6 @@ class PoolOp : public OperatorWithKernel<DeviceType, PoolParam<DeviceType>,
: OperatorWithKernel<DeviceType, PoolParam<DeviceType>, : OperatorWithKernel<DeviceType, PoolParam<DeviceType>,
operators::PoolKernel<DeviceType, T>>( operators::PoolKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using OperatorWithKernel<
DeviceType, PoolParam<DeviceType>,
operators::PoolKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
private: private:
......
...@@ -38,10 +38,6 @@ class PReluOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class PReluOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, PReluParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, PReluParam<DeviceType>,
operators::PReluKernel<DeviceType, T>>( operators::PReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, PReluParam<DeviceType>,
operators::PReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -40,9 +40,6 @@ class PriorBoxOp : public framework::OperatorWithKernel< ...@@ -40,9 +40,6 @@ class PriorBoxOp : public framework::OperatorWithKernel<
operators::PriorBoxKernel<DeviceType, T>>( operators::PriorBoxKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, PriorBoxParam<DeviceType>,
operators::PriorBoxKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -39,5 +39,10 @@ REGISTER_OPERATOR_CPU(relu, ops::ReluOp); ...@@ -39,5 +39,10 @@ REGISTER_OPERATOR_CPU(relu, ops::ReluOp);
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp); REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp);
#endif #endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(relu, ops::ReluOp);
#endif
#endif #endif
...@@ -41,10 +41,6 @@ class ReluOp : public framework::OperatorWithKernel< ...@@ -41,10 +41,6 @@ class ReluOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ReluParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ReluParam<DeviceType>,
operators::ReluKernel<DeviceType, T>>( operators::ReluKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ReluParam<DeviceType>,
operators::ReluKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -38,5 +38,8 @@ REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp); ...@@ -38,5 +38,8 @@ REGISTER_OPERATOR_CPU(reshape, ops::ReshapeOp);
#ifdef PADDLE_MOBILE_MALI_GPU #ifdef PADDLE_MOBILE_MALI_GPU
REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp); REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(reshape, ops::ReshapeOp);
#endif
#endif #endif
...@@ -39,10 +39,6 @@ class ReshapeOp : public framework::OperatorWithKernel< ...@@ -39,10 +39,6 @@ class ReshapeOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ReshapeParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ReshapeParam<DeviceType>,
operators::ReshapeKernel<DeviceType, T>>( operators::ReshapeKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ReshapeParam<DeviceType>,
operators::ReshapeKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -38,10 +38,6 @@ class ResizeOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class ResizeOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ResizeParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ResizeParam<DeviceType>,
operators::ResizeKernel<DeviceType, T>>( operators::ResizeKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ResizeParam<DeviceType>,
operators::ResizeKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -38,10 +38,6 @@ class ScaleOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class ScaleOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ScaleParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ScaleParam<DeviceType>,
operators::ScaleKernel<DeviceType, T>>( operators::ScaleKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ScaleParam<DeviceType>,
operators::ScaleKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -38,10 +38,6 @@ class ShapeOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class ShapeOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, ShapeParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, ShapeParam<DeviceType>,
operators::ShapeKernel<DeviceType, T>>( operators::ShapeKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, ShapeParam<DeviceType>,
operators::ShapeKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -36,11 +36,6 @@ class SigmoidOp : public framework::OperatorWithKernel< ...@@ -36,11 +36,6 @@ class SigmoidOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, SigmoidParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, SigmoidParam<DeviceType>,
operators::SigmoidKernel<DeviceType, T>>( operators::SigmoidKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SigmoidParam<DeviceType>,
operators::SigmoidKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -38,10 +38,6 @@ class SliceOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class SliceOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, SliceParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, SliceParam<DeviceType>,
operators::SliceKernel<DeviceType, T>>( operators::SliceKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SliceParam<DeviceType>,
operators::SliceKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
protected: protected:
......
...@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp); ...@@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
#ifdef PADDLE_MOBILE_FPGA #ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp); REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
#endif #endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(softmax, ops::SoftmaxOp);
#endif
#endif #endif
...@@ -36,11 +36,6 @@ class SoftmaxOp : public framework::OperatorWithKernel< ...@@ -36,11 +36,6 @@ class SoftmaxOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, SoftmaxParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, SoftmaxParam<DeviceType>,
operators::SoftmaxKernel<DeviceType, T>>( operators::SoftmaxKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SoftmaxParam<DeviceType>,
operators::SoftmaxKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
private: private:
......
...@@ -38,10 +38,6 @@ class SplitOp : public framework::OperatorWithKernel< ...@@ -38,10 +38,6 @@ class SplitOp : public framework::OperatorWithKernel<
: framework::OperatorWithKernel<DeviceType, SplitParam<DeviceType>, : framework::OperatorWithKernel<DeviceType, SplitParam<DeviceType>,
operators::SplitKernel<DeviceType, T>>( operators::SplitKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope) {} type, inputs, outputs, attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, SplitParam<DeviceType>,
operators::SplitKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
} // namespace operators } // namespace operators
......
...@@ -40,10 +40,6 @@ class TransposeOp : public framework::OperatorWithKernel< ...@@ -40,10 +40,6 @@ class TransposeOp : public framework::OperatorWithKernel<
DeviceType, TransposeParam<DeviceType>, DeviceType, TransposeParam<DeviceType>,
operators::TransposeKernel<DeviceType, T>>(type, inputs, outputs, operators::TransposeKernel<DeviceType, T>>(type, inputs, outputs,
attrs, scope) {} attrs, scope) {}
using framework::OperatorWithKernel<
DeviceType, TransposeParam<DeviceType>,
operators::TransposeKernel<DeviceType, T>>::OperatorWithKernel;
void InferShape() const override; void InferShape() const override;
}; };
......
...@@ -342,6 +342,13 @@ if (NOT FOUND_MATCH) ...@@ -342,6 +342,13 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h)
target_link_libraries(test-fssd paddle-mobile) target_link_libraries(test-fssd paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h)
target_link_libraries(test-mobilenetgpu paddle-mobile)
# gen test
ADD_EXECUTABLE(test-yologpu net/test_yologpu.cpp test_helper.h test_include.h executor_for_test.h)
target_link_libraries(test-yologpu paddle-mobile)
# gen test # gen test
ADD_EXECUTABLE(test-multi-process net/test_multi_inference_predict.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-multi-process net/test_multi_inference_predict.cpp test_helper.h test_include.h)
...@@ -351,6 +358,5 @@ if (NOT FOUND_MATCH) ...@@ -351,6 +358,5 @@ if (NOT FOUND_MATCH)
ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h) ADD_EXECUTABLE(test-eng net/test_eng.cpp test_helper.h test_include.h)
target_link_libraries(test-eng paddle-mobile) target_link_libraries(test-eng paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp) #add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
endif () endif ()
...@@ -18,8 +18,8 @@ limitations under the License. */ ...@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector> #include <vector>
#include "common/log.h" #include "common/log.h"
#include "framework/executor.h"
#include "framework/op_registry.h" #include "framework/op_registry.h"
#include "io/executor.h"
#include "operators/conv_op.h" #include "operators/conv_op.h"
#include "operators/elementwise_add_op.h" #include "operators/elementwise_add_op.h"
#include "operators/pool_op.h" #include "operators/pool_op.h"
...@@ -29,9 +29,9 @@ limitations under the License. */ ...@@ -29,9 +29,9 @@ limitations under the License. */
#include "operators/softmax_op.h" #include "operators/softmax_op.h"
#include "operators/transpose_op.h" #include "operators/transpose_op.h"
using paddle_mobile::Executor;
using paddle_mobile::framework::BlockDesc; using paddle_mobile::framework::BlockDesc;
using paddle_mobile::framework::DDim; using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Executor;
using paddle_mobile::framework::LoDTensor; using paddle_mobile::framework::LoDTensor;
using paddle_mobile::framework::OpDesc; using paddle_mobile::framework::OpDesc;
using paddle_mobile::framework::Program; using paddle_mobile::framework::Program;
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/concat_op.h" #include "operators/concat_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::FPGA> loader; paddle_mobile::framework::Loader<paddle_mobile::FPGA> loader;
auto program = loader.Load(g_googlenet); auto program = loader.Load(g_googlenet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -12,21 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <iostream>
#include <string> #include <string>
#include "../test_helper.h" #include "../test_helper.h"
#include "io/loader.h" #include "framework/loader.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
// auto program = loader.Load(g_googlenet, true);
// auto program = loader.Load(g_mobilenet_ssd, true); // auto program = loader.Load(g_mobilenet_ssd, true);
auto program = loader.Load(std::string(g_ocr) + "/model", // auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params", false); // std::string(g_ocr) + "/params", false);
// program.originProgram->Description("program desc: "); // program.originProgram->Description("program desc: ");
return 0; return 0;
} }
...@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and ...@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "../test_helper.h" #include "../test_helper.h"
#include "framework/loader.h"
#include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/node.h"
#include "framework/program/program-optimize/program_optimize.h" #include "framework/program/program-optimize/program_optimize.h"
#include "io/loader.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet" // "../../../test/models/googlenet"
auto program = loader.Load(g_mobilenet_ssd, true); auto program = loader.Load(g_mobilenet_ssd, true);
paddle_mobile::framework::ProgramOptimize optimize; paddle_mobile::framework::ProgramOptimize optimize;
......
...@@ -29,8 +29,9 @@ int main() { ...@@ -29,8 +29,9 @@ int main() {
bool optimize = true; bool optimize = true;
auto time1 = time(); auto time1 = time();
if (paddle_mobile.Load(g_googlenet, optimize)) { if (paddle_mobile.Load(g_googlenet, optimize)) {
auto time2 = time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << time_diff(time1, time2) << "ms" << std::endl; std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<float> output; std::vector<float> output;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
......
...@@ -19,14 +19,15 @@ limitations under the License. */ ...@@ -19,14 +19,15 @@ limitations under the License. */
int main() { int main() {
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile; paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
paddle_mobile.SetThreadNum(4); paddle_mobile.SetThreadNum(4);
auto time1 = time(); auto time1 = paddle_mobile::time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true); // std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(g_mobilenet, true); auto isok = paddle_mobile.Load(g_mobilenet, true);
if (isok) { if (isok) {
auto time2 = time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << paddle_mobile::time_diff(time1, time1) << "ms"
<< std::endl;
std::vector<float> input; std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224}; std::vector<int64_t> dims{1, 3, 224, 224};
...@@ -42,14 +43,14 @@ int main() { ...@@ -42,14 +43,14 @@ int main() {
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
} }
auto time3 = time(); auto time3 = paddle_mobile::time();
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims); auto vec_result = paddle_mobile.Predict(input, dims);
} }
DLOG << vec_result; DLOG << vec_result;
auto time4 = time(); auto time4 = paddle_mobile::time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" std::cout << "predict cost :" << paddle_mobile::time_diff(time3, time4) / 10
<< std::endl; << "ms" << std::endl;
} }
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = paddle_mobile::time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(std::string(g_mobilenet), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
std::vector<float> vec_result = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time();
int max = 10;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
std::cout << "predict cost :"
<< paddle_mobile::time_diff(time3, time4) / max << "ms"
<< std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = paddle_mobile::time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(std::string(g_yolo_mul), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 416, 416};
GetInput<float>(g_yolo_img, &input, dims);
std::vector<float> vec_result;
// = paddle_mobile.Predict(input, dims);
auto time3 = paddle_mobile::time();
int max = 10;
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
}
auto time4 = paddle_mobile::time();
// auto time3 = paddle_mobile::time();
// for (int i = 0; i < 10; ++i) {
// auto vec_result = paddle_mobile.Predict(input, dims);
// }
// auto time4 = paddle_mobile::time();
std::cout << "predict cost :"
<< paddle_mobile::time_diff(time3, time4) / max << "ms"
<< std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// for (float i : vec_result) {
// std::cout << i << std::endl;
// }
}
return 0;
}
...@@ -125,7 +125,7 @@ template class TestBatchNormOp<CPU>; ...@@ -125,7 +125,7 @@ template class TestBatchNormOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run BatchNormOp Test"; DLOG << "begin to run BatchNormOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (4,10,2,2) /// input x (4,10,2,2)
......
...@@ -114,7 +114,7 @@ template class TestBoxCoderOp<CPU>; ...@@ -114,7 +114,7 @@ template class TestBoxCoderOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run BoxCoderOp Test"; DLOG << "begin to run BoxCoderOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
paddle_mobile::framework::Tensor priorbox; paddle_mobile::framework::Tensor priorbox;
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/concat_op.h" #include "operators/concat_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_googlenet); auto program = loader.Load(g_googlenet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/fusion_conv_add_relu_op.h" #include "operators/fusion_conv_add_relu_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model // ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_googlenet, true); auto program = loader.Load(g_googlenet, true);
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/conv_op.h" #include "operators/conv_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::GPU_MALI> loader; paddle_mobile::framework::Loader<paddle_mobile::GPU_MALI> loader;
// ../models/image_classification_resnet.inference.model // ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_googlenet); auto program = loader.Load(g_googlenet);
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/depthwise_conv_op.h" #include "operators/depthwise_conv_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model // ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_mobilenet_ssd); auto program = loader.Load(g_mobilenet_ssd);
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_resnet); auto program = loader.Load(g_resnet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -104,7 +104,7 @@ template class TestElementwiseSubOp<CPU>; ...@@ -104,7 +104,7 @@ template class TestElementwiseSubOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run ElementwiseSub Test"; DLOG << "begin to run ElementwiseSub Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params"); std::string(g_ocr) + "/params");
......
...@@ -94,7 +94,7 @@ template class TestFillConstantOp<CPU>; ...@@ -94,7 +94,7 @@ template class TestFillConstantOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run FillConstant Test"; DLOG << "begin to run FillConstant Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params"); std::string(g_ocr) + "/params");
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include "operators/fusion_conv_add_bn_relu_op.h" #include "operators/fusion_conv_add_bn_relu_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// ../models/image_classification_resnet.inference.model // ../models/image_classification_resnet.inference.model
auto program = loader.Load(g_mobilenet, true); auto program = loader.Load(g_mobilenet, true);
......
...@@ -112,7 +112,7 @@ template class TestFcOp<CPU>; ...@@ -112,7 +112,7 @@ template class TestFcOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run Fc Test"; DLOG << "begin to run Fc Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
// "../../../test/models/googlenet" // "../../../test/models/googlenet"
auto program = loader.Load(g_googlenet); auto program = loader.Load(g_googlenet);
paddle_mobile::framework::ProgramOptimize optimize; paddle_mobile::framework::ProgramOptimize optimize;
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/gru_op.h" #include "operators/gru_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_nlp); auto program = loader.Load(g_nlp);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -60,7 +60,6 @@ class TestIm2SequenceOp { ...@@ -60,7 +60,6 @@ class TestIm2SequenceOp {
Variable *x1_feed_value = scope->Var("conv2d_19.tmp_1"); Variable *x1_feed_value = scope->Var("conv2d_19.tmp_1");
auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>(); auto tensor_x1 = x1_feed_value->GetMutable<LoDTensor>();
tensor_x1->ShareDataWith(t1); tensor_x1->ShareDataWith(t1);
Variable *output = scope->Var("im2sequence_0.tmp_0"); Variable *output = scope->Var("im2sequence_0.tmp_0");
auto *output_tensor = output->GetMutable<LoDTensor>(); auto *output_tensor = output->GetMutable<LoDTensor>();
output_tensor->mutable_data<float>({2, 12}); output_tensor->mutable_data<float>({2, 12});
...@@ -100,7 +99,7 @@ template class TestIm2SequenceOp<CPU>; ...@@ -100,7 +99,7 @@ template class TestIm2SequenceOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run Im2Sequence Test"; DLOG << "begin to run Im2Sequence Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model", auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params"); std::string(g_eng) + "/params");
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/lrn_op.h" #include "operators/lrn_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_googlenet); auto program = loader.Load(g_googlenet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -111,9 +111,8 @@ template class TestMultiClassNMSOp<CPU>; ...@@ -111,9 +111,8 @@ template class TestMultiClassNMSOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run MulticlassNMS Test"; DLOG << "begin to run MulticlassNMS Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
paddle_mobile::framework::Tensor inputx1; paddle_mobile::framework::Tensor inputx1;
SetupTensor<float>(&inputx1, {1, 2, 4}, static_cast<float>(0), SetupTensor<float>(&inputx1, {1, 2, 4}, static_cast<float>(0),
static_cast<float>(1)); static_cast<float>(1));
......
...@@ -96,7 +96,7 @@ template class TestPolygonBoxTransformOp<CPU>; ...@@ -96,7 +96,7 @@ template class TestPolygonBoxTransformOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run PolygonBoxTransform Test"; DLOG << "begin to run PolygonBoxTransform Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr)); auto program = loader.Load(std::string(g_ocr));
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/pool_op.h" #include "operators/pool_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_googlenet)); auto program = loader.Load(std::string(g_googlenet));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include "operators/prelu_op.h" #include "operators/prelu_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_resnet); auto program = loader.Load(g_resnet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -125,7 +125,7 @@ template class TestPriorBoxOp<CPU>; ...@@ -125,7 +125,7 @@ template class TestPriorBoxOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run PriorBoxOp Test"; DLOG << "begin to run PriorBoxOp Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
/// input x (1,3,300,300) /// input x (1,3,300,300)
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/relu_op.h" #include "operators/relu_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(g_resnet); auto program = loader.Load(g_resnet);
PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr, PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
"program file read fail"); "program file read fail");
......
...@@ -112,7 +112,7 @@ template class TestReshape2Op<CPU>; ...@@ -112,7 +112,7 @@ template class TestReshape2Op<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run Reshape2 Test"; DLOG << "begin to run Reshape2 Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params"); std::string(g_ocr) + "/params");
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/reshape_op.h" #include "operators/reshape_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "operators/resize_op.h" #include "operators/resize_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include "../../src/operators/kernel/central-arm-func/sigmoid_arm_func.h" #include "../../src/operators/kernel/central-arm-func/sigmoid_arm_func.h"
#include "../../src/operators/kernel/sigmoid_kernel.h" #include "../../src/operators/kernel/sigmoid_kernel.h"
#include "../test_helper.h" #include "../test_helper.h"
#include "io/executor.h" #include "framework/executor.h"
int main() { int main() {
paddle_mobile::framework::Tensor input; paddle_mobile::framework::Tensor input;
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include "operators/softmax_op.h" #include "operators/softmax_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet)); auto program = loader.Load(std::string(g_mobilenet));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
......
...@@ -103,7 +103,7 @@ template class TestSumOp<CPU>; ...@@ -103,7 +103,7 @@ template class TestSumOp<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run Sum Test"; DLOG << "begin to run Sum Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_eng) + "/model", auto program = loader.Load(std::string(g_eng) + "/model",
std::string(g_eng) + "/params"); std::string(g_eng) + "/params");
......
...@@ -113,7 +113,7 @@ template class TestTranspose2Op<CPU>; ...@@ -113,7 +113,7 @@ template class TestTranspose2Op<CPU>;
int main() { int main() {
DLOG << "----------**********----------"; DLOG << "----------**********----------";
DLOG << "begin to run Transpose2 Test"; DLOG << "begin to run Transpose2 Test";
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_ocr) + "/model",
std::string(g_ocr) + "/params"); std::string(g_ocr) + "/params");
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
#include "operators/transpose_op.h" #include "operators/transpose_op.h"
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
auto program = loader.Load(std::string(g_mobilenet_ssd)); auto program = loader.Load(std::string(g_mobilenet_ssd));
if (program.originProgram == nullptr) { if (program.originProgram == nullptr) {
DLOG << "program read file"; DLOG << "program read file";
......
...@@ -36,6 +36,7 @@ static const char *g_squeezenet = "../models/squeezenet"; ...@@ -36,6 +36,7 @@ static const char *g_squeezenet = "../models/squeezenet";
static const char *g_googlenet = "../models/googlenet"; static const char *g_googlenet = "../models/googlenet";
static const char *g_googlenet_quali = "../models/googlenet_combine_quali"; static const char *g_googlenet_quali = "../models/googlenet_combine_quali";
static const char *g_mobilenet = "../models/mobilenet"; static const char *g_mobilenet = "../models/mobilenet";
static const char *g_mobilenet_mul = "../models/mobilenet_mul";
static const char *g_alexnet = "../models/alexnet"; static const char *g_alexnet = "../models/alexnet";
static const char *g_inceptionv4 = "../models/inceptionv4"; static const char *g_inceptionv4 = "../models/inceptionv4";
static const char *g_nlp = "../models/nlp"; static const char *g_nlp = "../models/nlp";
...@@ -44,8 +45,8 @@ static const char *g_resnet = "../models/resnet"; ...@@ -44,8 +45,8 @@ static const char *g_resnet = "../models/resnet";
static const char *g_googlenet_combine = "../models/googlenet_combine"; static const char *g_googlenet_combine = "../models/googlenet_combine";
static const char *g_yolo = "../models/yolo"; static const char *g_yolo = "../models/yolo";
static const char *g_yolo_combined = "../models/yolo_combined"; static const char *g_yolo_combined = "../models/yolo_combined";
static const char *g_yolo_mul = "../models/yolo_mul";
static const char *g_fluid_fssd_new = "../models/fluid_fssd_new"; static const char *g_fluid_fssd_new = "../models/fluid_fssd_new";
static const char *g_test_image_1x3x224x224 = static const char *g_test_image_1x3x224x224 =
"../images/test_image_1x3x224x224_float"; "../images/test_image_1x3x224x224_float";
static const char *g_test_image_1x3x224x224_banana = static const char *g_test_image_1x3x224x224_banana =
...@@ -57,9 +58,12 @@ static const char *g_moto = "../images/moto_300x300_float"; ...@@ -57,9 +58,12 @@ static const char *g_moto = "../images/moto_300x300_float";
static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
static const char *g_imgfssd_ar1 = "../images/003_0001.txt"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
static const char *g_img = "../images/img.bin"; static const char *g_img = "../images/img.bin";
static const char *g_yolo_img = "../images/in_put_1_3_416_416_2";
static const char *g_mobilenet_img = "../images/image";
using paddle_mobile::framework::DDim; using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Tensor; using paddle_mobile::framework::Tensor;
using namespace paddle_mobile;
template <typename T> template <typename T>
void SetupTensor(paddle_mobile::framework::Tensor *input, void SetupTensor(paddle_mobile::framework::Tensor *input,
......
/*******************************************************************************
* Copyright (c) 2008-2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_H
#define __OPENCL_CL_H
#ifdef __APPLE__
#include <OpenCL/cl_version.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_version.h>
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
typedef struct _cl_platform_id * cl_platform_id;
typedef struct _cl_device_id * cl_device_id;
typedef struct _cl_context * cl_context;
typedef struct _cl_command_queue * cl_command_queue;
typedef struct _cl_mem * cl_mem;
typedef struct _cl_program * cl_program;
typedef struct _cl_kernel * cl_kernel;
typedef struct _cl_event * cl_event;
typedef struct _cl_sampler * cl_sampler;
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef cl_ulong cl_bitfield;
typedef cl_bitfield cl_device_type;
typedef cl_uint cl_platform_info;
typedef cl_uint cl_device_info;
typedef cl_bitfield cl_device_fp_config;
typedef cl_uint cl_device_mem_cache_type;
typedef cl_uint cl_device_local_mem_type;
typedef cl_bitfield cl_device_exec_capabilities;
#ifdef CL_VERSION_2_0
typedef cl_bitfield cl_device_svm_capabilities;
#endif
typedef cl_bitfield cl_command_queue_properties;
#ifdef CL_VERSION_1_2
typedef intptr_t cl_device_partition_property;
typedef cl_bitfield cl_device_affinity_domain;
#endif
typedef intptr_t cl_context_properties;
typedef cl_uint cl_context_info;
#ifdef CL_VERSION_2_0
typedef cl_bitfield cl_queue_properties;
#endif
typedef cl_uint cl_command_queue_info;
typedef cl_uint cl_channel_order;
typedef cl_uint cl_channel_type;
typedef cl_bitfield cl_mem_flags;
#ifdef CL_VERSION_2_0
typedef cl_bitfield cl_svm_mem_flags;
#endif
typedef cl_uint cl_mem_object_type;
typedef cl_uint cl_mem_info;
#ifdef CL_VERSION_1_2
typedef cl_bitfield cl_mem_migration_flags;
#endif
typedef cl_uint cl_image_info;
#ifdef CL_VERSION_1_1
typedef cl_uint cl_buffer_create_type;
#endif
typedef cl_uint cl_addressing_mode;
typedef cl_uint cl_filter_mode;
typedef cl_uint cl_sampler_info;
typedef cl_bitfield cl_map_flags;
#ifdef CL_VERSION_2_0
typedef intptr_t cl_pipe_properties;
typedef cl_uint cl_pipe_info;
#endif
typedef cl_uint cl_program_info;
typedef cl_uint cl_program_build_info;
#ifdef CL_VERSION_1_2
typedef cl_uint cl_program_binary_type;
#endif
typedef cl_int cl_build_status;
typedef cl_uint cl_kernel_info;
#ifdef CL_VERSION_1_2
typedef cl_uint cl_kernel_arg_info;
typedef cl_uint cl_kernel_arg_address_qualifier;
typedef cl_uint cl_kernel_arg_access_qualifier;
typedef cl_bitfield cl_kernel_arg_type_qualifier;
#endif
typedef cl_uint cl_kernel_work_group_info;
#ifdef CL_VERSION_2_1
typedef cl_uint cl_kernel_sub_group_info;
#endif
typedef cl_uint cl_event_info;
typedef cl_uint cl_command_type;
typedef cl_uint cl_profiling_info;
#ifdef CL_VERSION_2_0
typedef cl_bitfield cl_sampler_properties;
typedef cl_uint cl_kernel_exec_info;
#endif
typedef struct _cl_image_format {
cl_channel_order image_channel_order;
cl_channel_type image_channel_data_type;
} cl_image_format;
#ifdef CL_VERSION_1_2
typedef struct _cl_image_desc {
cl_mem_object_type image_type;
size_t image_width;
size_t image_height;
size_t image_depth;
size_t image_array_size;
size_t image_row_pitch;
size_t image_slice_pitch;
cl_uint num_mip_levels;
cl_uint num_samples;
#ifdef __GNUC__
__extension__ /* Prevents warnings about anonymous union in -pedantic builds */
#endif
union {
cl_mem buffer;
cl_mem mem_object;
};
} cl_image_desc;
#endif
#ifdef CL_VERSION_1_1
typedef struct _cl_buffer_region {
size_t origin;
size_t size;
} cl_buffer_region;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_SUCCESS 0
#define CL_DEVICE_NOT_FOUND -1
#define CL_DEVICE_NOT_AVAILABLE -2
#define CL_COMPILER_NOT_AVAILABLE -3
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
#define CL_OUT_OF_RESOURCES -5
#define CL_OUT_OF_HOST_MEMORY -6
#define CL_PROFILING_INFO_NOT_AVAILABLE -7
#define CL_MEM_COPY_OVERLAP -8
#define CL_IMAGE_FORMAT_MISMATCH -9
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
#define CL_BUILD_PROGRAM_FAILURE -11
#define CL_MAP_FAILURE -12
#ifdef CL_VERSION_1_1
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
#endif
#ifdef CL_VERSION_1_2
#define CL_COMPILE_PROGRAM_FAILURE -15
#define CL_LINKER_NOT_AVAILABLE -16
#define CL_LINK_PROGRAM_FAILURE -17
#define CL_DEVICE_PARTITION_FAILED -18
#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
#endif
#define CL_INVALID_VALUE -30
#define CL_INVALID_DEVICE_TYPE -31
#define CL_INVALID_PLATFORM -32
#define CL_INVALID_DEVICE -33
#define CL_INVALID_CONTEXT -34
#define CL_INVALID_QUEUE_PROPERTIES -35
#define CL_INVALID_COMMAND_QUEUE -36
#define CL_INVALID_HOST_PTR -37
#define CL_INVALID_MEM_OBJECT -38
#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
#define CL_INVALID_IMAGE_SIZE -40
#define CL_INVALID_SAMPLER -41
#define CL_INVALID_BINARY -42
#define CL_INVALID_BUILD_OPTIONS -43
#define CL_INVALID_PROGRAM -44
#define CL_INVALID_PROGRAM_EXECUTABLE -45
#define CL_INVALID_KERNEL_NAME -46
#define CL_INVALID_KERNEL_DEFINITION -47
#define CL_INVALID_KERNEL -48
#define CL_INVALID_ARG_INDEX -49
#define CL_INVALID_ARG_VALUE -50
#define CL_INVALID_ARG_SIZE -51
#define CL_INVALID_KERNEL_ARGS -52
#define CL_INVALID_WORK_DIMENSION -53
#define CL_INVALID_WORK_GROUP_SIZE -54
#define CL_INVALID_WORK_ITEM_SIZE -55
#define CL_INVALID_GLOBAL_OFFSET -56
#define CL_INVALID_EVENT_WAIT_LIST -57
#define CL_INVALID_EVENT -58
#define CL_INVALID_OPERATION -59
#define CL_INVALID_GL_OBJECT -60
#define CL_INVALID_BUFFER_SIZE -61
#define CL_INVALID_MIP_LEVEL -62
#define CL_INVALID_GLOBAL_WORK_SIZE -63
#ifdef CL_VERSION_1_1
#define CL_INVALID_PROPERTY -64
#endif
#ifdef CL_VERSION_1_2
#define CL_INVALID_IMAGE_DESCRIPTOR -65
#define CL_INVALID_COMPILER_OPTIONS -66
#define CL_INVALID_LINKER_OPTIONS -67
#define CL_INVALID_DEVICE_PARTITION_COUNT -68
#endif
#ifdef CL_VERSION_2_0
#define CL_INVALID_PIPE_SIZE -69
#define CL_INVALID_DEVICE_QUEUE -70
#endif
#ifdef CL_VERSION_2_2
#define CL_INVALID_SPEC_ID -71
#define CL_MAX_SIZE_RESTRICTION_EXCEEDED -72
#endif
/* cl_bool */
#define CL_FALSE 0
#define CL_TRUE 1
#ifdef CL_VERSION_1_2
#define CL_BLOCKING CL_TRUE
#define CL_NON_BLOCKING CL_FALSE
#endif
/* cl_platform_info */
#define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903
#define CL_PLATFORM_EXTENSIONS 0x0904
#ifdef CL_VERSION_2_1
#define CL_PLATFORM_HOST_TIMER_RESOLUTION 0x0905
#endif
/* cl_device_type - bitfield */
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#ifdef CL_VERSION_1_2
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
#endif
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
/* cl_device_info */
#define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
#define CL_DEVICE_ADDRESS_BITS 0x100D
#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
#define CL_DEVICE_IMAGE_SUPPORT 0x1016
#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
#define CL_DEVICE_MAX_SAMPLERS 0x1018
#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
#define CL_DEVICE_ENDIAN_LITTLE 0x1026
#define CL_DEVICE_AVAILABLE 0x1027
#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A /* deprecated */
#ifdef CL_VERSION_2_0
#define CL_DEVICE_QUEUE_ON_HOST_PROPERTIES 0x102A
#endif
#define CL_DEVICE_NAME 0x102B
#define CL_DEVICE_VENDOR 0x102C
#define CL_DRIVER_VERSION 0x102D
#define CL_DEVICE_PROFILE 0x102E
#define CL_DEVICE_VERSION 0x102F
#define CL_DEVICE_EXTENSIONS 0x1030
#define CL_DEVICE_PLATFORM 0x1031
#ifdef CL_VERSION_1_2
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#endif
/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG which is already defined in "cl_ext.h" */
#ifdef CL_VERSION_1_1
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 /* deprecated */
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
#endif
#ifdef CL_VERSION_1_2
#define CL_DEVICE_LINKER_AVAILABLE 0x103E
#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
#define CL_DEVICE_PARENT_DEVICE 0x1042
#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
#define CL_DEVICE_PARTITION_TYPE 0x1046
#define CL_DEVICE_REFERENCE_COUNT 0x1047
#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
#endif
#ifdef CL_VERSION_2_0
#define CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS 0x104C
#define CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE 0x104D
#define CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES 0x104E
#define CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE 0x104F
#define CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE 0x1050
#define CL_DEVICE_MAX_ON_DEVICE_QUEUES 0x1051
#define CL_DEVICE_MAX_ON_DEVICE_EVENTS 0x1052
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE 0x1054
#define CL_DEVICE_MAX_PIPE_ARGS 0x1055
#define CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS 0x1056
#define CL_DEVICE_PIPE_MAX_PACKET_SIZE 0x1057
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT 0x1058
#define CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT 0x1059
#define CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT 0x105A
#endif
#ifdef CL_VERSION_2_1
#define CL_DEVICE_IL_VERSION 0x105B
#define CL_DEVICE_MAX_NUM_SUB_GROUPS 0x105C
#define CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS 0x105D
#endif
/* cl_device_fp_config - bitfield */
#define CL_FP_DENORM (1 << 0)
#define CL_FP_INF_NAN (1 << 1)
#define CL_FP_ROUND_TO_NEAREST (1 << 2)
#define CL_FP_ROUND_TO_ZERO (1 << 3)
#define CL_FP_ROUND_TO_INF (1 << 4)
#define CL_FP_FMA (1 << 5)
#ifdef CL_VERSION_1_1
#define CL_FP_SOFT_FLOAT (1 << 6)
#endif
#ifdef CL_VERSION_1_2
#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
#endif
/* cl_device_mem_cache_type */
#define CL_NONE 0x0
#define CL_READ_ONLY_CACHE 0x1
#define CL_READ_WRITE_CACHE 0x2
/* cl_device_local_mem_type */
#define CL_LOCAL 0x1
#define CL_GLOBAL 0x2
/* cl_device_exec_capabilities - bitfield */
#define CL_EXEC_KERNEL (1 << 0)
#define CL_EXEC_NATIVE_KERNEL (1 << 1)
/* cl_command_queue_properties - bitfield */
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
#ifdef CL_VERSION_2_0
#define CL_QUEUE_ON_DEVICE (1 << 2)
#define CL_QUEUE_ON_DEVICE_DEFAULT (1 << 3)
#endif
/* cl_context_info */
#define CL_CONTEXT_REFERENCE_COUNT 0x1080
#define CL_CONTEXT_DEVICES 0x1081
#define CL_CONTEXT_PROPERTIES 0x1082
#ifdef CL_VERSION_1_1
#define CL_CONTEXT_NUM_DEVICES 0x1083
#endif
/* cl_context_properties */
#define CL_CONTEXT_PLATFORM 0x1084
#ifdef CL_VERSION_1_2
#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
#endif
#ifdef CL_VERSION_1_2
/* cl_device_partition_property */
#define CL_DEVICE_PARTITION_EQUALLY 0x1086
#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
#endif
#ifdef CL_VERSION_1_2
/* cl_device_affinity_domain */
#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
#endif
#ifdef CL_VERSION_2_0
/* cl_device_svm_capabilities */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
/* cl_command_queue_info */
#define CL_QUEUE_CONTEXT 0x1090
#define CL_QUEUE_DEVICE 0x1091
#define CL_QUEUE_REFERENCE_COUNT 0x1092
#define CL_QUEUE_PROPERTIES 0x1093
#ifdef CL_VERSION_2_0
#define CL_QUEUE_SIZE 0x1094
#endif
#ifdef CL_VERSION_2_1
#define CL_QUEUE_DEVICE_DEFAULT 0x1095
#endif
/* cl_mem_flags and cl_svm_mem_flags - bitfield */
#define CL_MEM_READ_WRITE (1 << 0)
#define CL_MEM_WRITE_ONLY (1 << 1)
#define CL_MEM_READ_ONLY (1 << 2)
#define CL_MEM_USE_HOST_PTR (1 << 3)
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
#define CL_MEM_COPY_HOST_PTR (1 << 5)
/* reserved (1 << 6) */
#ifdef CL_VERSION_1_2
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
#define CL_MEM_HOST_READ_ONLY (1 << 8)
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
#endif
#ifdef CL_VERSION_2_0
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10) /* used by cl_svm_mem_flags only */
#define CL_MEM_SVM_ATOMICS (1 << 11) /* used by cl_svm_mem_flags only */
#define CL_MEM_KERNEL_READ_AND_WRITE (1 << 12)
#endif
#ifdef CL_VERSION_1_2
/* cl_mem_migration_flags - bitfield */
#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
#endif
/* cl_channel_order */
#define CL_R 0x10B0
#define CL_A 0x10B1
#define CL_RG 0x10B2
#define CL_RA 0x10B3
#define CL_RGB 0x10B4
#define CL_RGBA 0x10B5
#define CL_BGRA 0x10B6
#define CL_ARGB 0x10B7
#define CL_INTENSITY 0x10B8
#define CL_LUMINANCE 0x10B9
#ifdef CL_VERSION_1_1
#define CL_Rx 0x10BA
#define CL_RGx 0x10BB
#define CL_RGBx 0x10BC
#endif
#ifdef CL_VERSION_1_2
#define CL_DEPTH 0x10BD
#define CL_DEPTH_STENCIL 0x10BE
#endif
#ifdef CL_VERSION_2_0
#define CL_sRGB 0x10BF
#define CL_sRGBx 0x10C0
#define CL_sRGBA 0x10C1
#define CL_sBGRA 0x10C2
#define CL_ABGR 0x10C3
#endif
/* cl_channel_type */
#define CL_SNORM_INT8 0x10D0
#define CL_SNORM_INT16 0x10D1
#define CL_UNORM_INT8 0x10D2
#define CL_UNORM_INT16 0x10D3
#define CL_UNORM_SHORT_565 0x10D4
#define CL_UNORM_SHORT_555 0x10D5
#define CL_UNORM_INT_101010 0x10D6
#define CL_SIGNED_INT8 0x10D7
#define CL_SIGNED_INT16 0x10D8
#define CL_SIGNED_INT32 0x10D9
#define CL_UNSIGNED_INT8 0x10DA
#define CL_UNSIGNED_INT16 0x10DB
#define CL_UNSIGNED_INT32 0x10DC
#define CL_HALF_FLOAT 0x10DD
#define CL_FLOAT 0x10DE
#ifdef CL_VERSION_1_2
#define CL_UNORM_INT24 0x10DF
#endif
#ifdef CL_VERSION_2_1
#define CL_UNORM_INT_101010_2 0x10E0
#endif
/* cl_mem_object_type */
#define CL_MEM_OBJECT_BUFFER 0x10F0
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
#ifdef CL_VERSION_1_2
#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
#define CL_MEM_OBJECT_IMAGE1D 0x10F4
#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
#endif
#ifdef CL_VERSION_2_0
#define CL_MEM_OBJECT_PIPE 0x10F7
#endif
/* cl_mem_info */
#define CL_MEM_TYPE 0x1100
#define CL_MEM_FLAGS 0x1101
#define CL_MEM_SIZE 0x1102
#define CL_MEM_HOST_PTR 0x1103
#define CL_MEM_MAP_COUNT 0x1104
#define CL_MEM_REFERENCE_COUNT 0x1105
#define CL_MEM_CONTEXT 0x1106
#ifdef CL_VERSION_1_1
#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
#define CL_MEM_OFFSET 0x1108
#endif
#ifdef CL_VERSION_2_0
#define CL_MEM_USES_SVM_POINTER 0x1109
#endif
/* cl_image_info */
#define CL_IMAGE_FORMAT 0x1110
#define CL_IMAGE_ELEMENT_SIZE 0x1111
#define CL_IMAGE_ROW_PITCH 0x1112
#define CL_IMAGE_SLICE_PITCH 0x1113
#define CL_IMAGE_WIDTH 0x1114
#define CL_IMAGE_HEIGHT 0x1115
#define CL_IMAGE_DEPTH 0x1116
#ifdef CL_VERSION_1_2
#define CL_IMAGE_ARRAY_SIZE 0x1117
#define CL_IMAGE_BUFFER 0x1118
#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
#define CL_IMAGE_NUM_SAMPLES 0x111A
#endif
#ifdef CL_VERSION_2_0
/* cl_pipe_info */
#define CL_PIPE_PACKET_SIZE 0x1120
#define CL_PIPE_MAX_PACKETS 0x1121
#endif
/* cl_addressing_mode */
#define CL_ADDRESS_NONE 0x1130
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
#define CL_ADDRESS_CLAMP 0x1132
#define CL_ADDRESS_REPEAT 0x1133
#ifdef CL_VERSION_1_1
#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
#endif
/* cl_filter_mode */
#define CL_FILTER_NEAREST 0x1140
#define CL_FILTER_LINEAR 0x1141
/* cl_sampler_info */
#define CL_SAMPLER_REFERENCE_COUNT 0x1150
#define CL_SAMPLER_CONTEXT 0x1151
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
#define CL_SAMPLER_ADDRESSING_MODE 0x1153
#define CL_SAMPLER_FILTER_MODE 0x1154
#ifdef CL_VERSION_2_0
#define CL_SAMPLER_MIP_FILTER_MODE 0x1155
#define CL_SAMPLER_LOD_MIN 0x1156
#define CL_SAMPLER_LOD_MAX 0x1157
#endif
/* cl_map_flags - bitfield */
#define CL_MAP_READ (1 << 0)
#define CL_MAP_WRITE (1 << 1)
#ifdef CL_VERSION_1_2
#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
#endif
/* cl_program_info */
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
#define CL_PROGRAM_CONTEXT 0x1161
#define CL_PROGRAM_NUM_DEVICES 0x1162
#define CL_PROGRAM_DEVICES 0x1163
#define CL_PROGRAM_SOURCE 0x1164
#define CL_PROGRAM_BINARY_SIZES 0x1165
#define CL_PROGRAM_BINARIES 0x1166
#ifdef CL_VERSION_1_2
#define CL_PROGRAM_NUM_KERNELS 0x1167
#define CL_PROGRAM_KERNEL_NAMES 0x1168
#endif
#ifdef CL_VERSION_2_1
#define CL_PROGRAM_IL 0x1169
#endif
#ifdef CL_VERSION_2_2
#define CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT 0x116A
#define CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT 0x116B
#endif
/* cl_program_build_info */
#define CL_PROGRAM_BUILD_STATUS 0x1181
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
#define CL_PROGRAM_BUILD_LOG 0x1183
#ifdef CL_VERSION_1_2
#define CL_PROGRAM_BINARY_TYPE 0x1184
#endif
#ifdef CL_VERSION_2_0
#define CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE 0x1185
#endif
#ifdef CL_VERSION_1_2
/* cl_program_binary_type */
#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
#endif
/* cl_build_status */
#define CL_BUILD_SUCCESS 0
#define CL_BUILD_NONE -1
#define CL_BUILD_ERROR -2
#define CL_BUILD_IN_PROGRESS -3
/* cl_kernel_info */
#define CL_KERNEL_FUNCTION_NAME 0x1190
#define CL_KERNEL_NUM_ARGS 0x1191
#define CL_KERNEL_REFERENCE_COUNT 0x1192
#define CL_KERNEL_CONTEXT 0x1193
#define CL_KERNEL_PROGRAM 0x1194
#ifdef CL_VERSION_1_2
#define CL_KERNEL_ATTRIBUTES 0x1195
#endif
#ifdef CL_VERSION_2_1
#define CL_KERNEL_MAX_NUM_SUB_GROUPS 0x11B9
#define CL_KERNEL_COMPILE_NUM_SUB_GROUPS 0x11BA
#endif
#ifdef CL_VERSION_1_2
/* cl_kernel_arg_info */
#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
#define CL_KERNEL_ARG_TYPE_NAME 0x1198
#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
#define CL_KERNEL_ARG_NAME 0x119A
#endif
#ifdef CL_VERSION_1_2
/* cl_kernel_arg_address_qualifier */
#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
#endif
#ifdef CL_VERSION_1_2
/* cl_kernel_arg_access_qualifier */
#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
#endif
#ifdef CL_VERSION_1_2
/* cl_kernel_arg_type_qualifier */
#define CL_KERNEL_ARG_TYPE_NONE 0
#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
#ifdef CL_VERSION_2_0
#define CL_KERNEL_ARG_TYPE_PIPE (1 << 3)
#endif
#endif
/* cl_kernel_work_group_info */
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
#ifdef CL_VERSION_1_2
#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
#endif
#ifdef CL_VERSION_2_1
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 0x2034
#define CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT 0x11B8
#endif
#ifdef CL_VERSION_2_0
/* cl_kernel_exec_info */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM 0x11B7
#endif
/* cl_event_info */
#define CL_EVENT_COMMAND_QUEUE 0x11D0
#define CL_EVENT_COMMAND_TYPE 0x11D1
#define CL_EVENT_REFERENCE_COUNT 0x11D2
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
#ifdef CL_VERSION_1_1
#define CL_EVENT_CONTEXT 0x11D4
#endif
/* cl_command_type */
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
#define CL_COMMAND_TASK 0x11F1
#define CL_COMMAND_NATIVE_KERNEL 0x11F2
#define CL_COMMAND_READ_BUFFER 0x11F3
#define CL_COMMAND_WRITE_BUFFER 0x11F4
#define CL_COMMAND_COPY_BUFFER 0x11F5
#define CL_COMMAND_READ_IMAGE 0x11F6
#define CL_COMMAND_WRITE_IMAGE 0x11F7
#define CL_COMMAND_COPY_IMAGE 0x11F8
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
#define CL_COMMAND_MAP_BUFFER 0x11FB
#define CL_COMMAND_MAP_IMAGE 0x11FC
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
#define CL_COMMAND_MARKER 0x11FE
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
#ifdef CL_VERSION_1_1
#define CL_COMMAND_READ_BUFFER_RECT 0x1201
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
#define CL_COMMAND_USER 0x1204
#endif
#ifdef CL_VERSION_1_2
#define CL_COMMAND_BARRIER 0x1205
#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
#define CL_COMMAND_FILL_BUFFER 0x1207
#define CL_COMMAND_FILL_IMAGE 0x1208
#endif
#ifdef CL_VERSION_2_0
#define CL_COMMAND_SVM_FREE 0x1209
#define CL_COMMAND_SVM_MEMCPY 0x120A
#define CL_COMMAND_SVM_MEMFILL 0x120B
#define CL_COMMAND_SVM_MAP 0x120C
#define CL_COMMAND_SVM_UNMAP 0x120D
#endif
/* command execution status */
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
#ifdef CL_VERSION_1_1
/* cl_buffer_create_type */
#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
#endif
/* cl_profiling_info */
#define CL_PROFILING_COMMAND_QUEUED 0x1280
#define CL_PROFILING_COMMAND_SUBMIT 0x1281
#define CL_PROFILING_COMMAND_START 0x1282
#define CL_PROFILING_COMMAND_END 0x1283
#ifdef CL_VERSION_2_0
#define CL_PROFILING_COMMAND_COMPLETE 0x1284
#endif
/********************************************************************************************************/
/* Platform API */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformIDs(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformInfo(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Device APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDs(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceInfo(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevices(cl_device_id /* in_device */,
const cl_device_partition_property * /* properties */,
cl_uint /* num_devices */,
cl_device_id * /* out_devices */,
cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_VERSION_2_1
extern CL_API_ENTRY cl_int CL_API_CALL
clSetDefaultDeviceCommandQueue(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_2_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceAndHostTimer(cl_device_id /* device */,
cl_ulong* /* device_timestamp */,
cl_ulong* /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetHostTimer(cl_device_id /* device */,
cl_ulong * /* host_timestamp */) CL_API_SUFFIX__VERSION_2_1;
#endif
/* Context APIs */
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContext(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContextFromType(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetContextInfo(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Command Queue APIs */
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithProperties(cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties * /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetCommandQueueInfo(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Memory Object APIs */
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateSubBuffer(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
#endif
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateImage(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
const cl_image_desc * /* image_desc */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreatePipe(cl_context /* context */,
cl_mem_flags /* flags */,
cl_uint /* pipe_packet_size */,
cl_uint /* pipe_max_packets */,
const cl_pipe_properties * /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedImageFormats(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetMemObjectInfo(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetImageInfo(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPipeInfo(cl_mem /* pipe */,
cl_pipe_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_2_0;
#endif
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_int CL_API_CALL
clSetMemObjectDestructorCallback(cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1;
#endif
/* SVM Allocation APIs */
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY void * CL_API_CALL
clSVMAlloc(cl_context /* context */,
cl_svm_mem_flags /* flags */,
size_t /* size */,
cl_uint /* alignment */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY void CL_API_CALL
clSVMFree(cl_context /* context */,
void * /* svm_pointer */) CL_API_SUFFIX__VERSION_2_0;
#endif
/* Sampler APIs */
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_sampler CL_API_CALL
clCreateSamplerWithProperties(cl_context /* context */,
const cl_sampler_properties * /* normalized_coords */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_2_0;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSamplerInfo(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Program Object APIs */
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithSource(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBinary(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBuiltInKernels(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* kernel_names */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_VERSION_2_1
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithIL(cl_context /* context */,
const void* /* il */,
size_t /* length */,
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clCompileProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
cl_uint /* num_input_headers */,
const cl_program * /* input_headers */,
const char ** /* header_include_names */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_program CL_API_CALL
clLinkProgram(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
cl_uint /* num_input_programs */,
const cl_program * /* input_programs */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */,
cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_VERSION_2_2
extern CL_API_ENTRY cl_int CL_API_CALL
clSetProgramReleaseCallback(cl_program /* program */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_2_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetProgramSpecializationConstant(cl_program /* program */,
cl_uint /* spec_id */,
size_t /* spec_size */,
const void* /* spec_value */) CL_API_SUFFIX__VERSION_2_2;
#endif
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramInfo(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramBuildInfo(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Kernel Object APIs */
extern CL_API_ENTRY cl_kernel CL_API_CALL
clCreateKernel(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateKernelsInProgram(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_2_1
extern CL_API_ENTRY cl_kernel CL_API_CALL
clCloneKernel(cl_kernel /* source_kernel */,
cl_int* /* errcode_ret */) CL_API_SUFFIX__VERSION_2_1;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArg(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointer(cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfo(cl_kernel /* kernel */,
cl_kernel_exec_info /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */) CL_API_SUFFIX__VERSION_2_0;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelInfo(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelArgInfo(cl_kernel /* kernel */,
cl_uint /* arg_indx */,
cl_kernel_arg_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_2_1
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfo(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_sub_group_info /* param_name */,
size_t /* input_value_size */,
const void* /*input_value */,
size_t /* param_value_size */,
void* /* param_value */,
size_t* /* param_value_size_ret */ ) CL_API_SUFFIX__VERSION_2_1;
#endif
/* Event Object APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clWaitForEvents(cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventInfo(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateUserEvent(cl_context /* context */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_int CL_API_CALL
clSetUserEventStatus(cl_event /* event */,
cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetEventCallback( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
#endif
/* Profiling APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventProfilingInfo(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Flush and Finish APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
/* Enqueued Commands APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* size */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* size */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
#endif
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueFillBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
const void * /* pattern */,
size_t /* pattern_size */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_1
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueFillImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
const void * /* fill_color */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImage(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_objects */,
cl_mem_migration_flags /* flags */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNativeKernel(cl_command_queue /* command_queue */,
void (CL_CALLBACK * /*user_func*/)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_VERSION_2_0
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFree(cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void * /* user_data */),
void * /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpy(cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void * /* dst_ptr */,
const void * /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFill(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
const void * /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMap(cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* flags */,
void * /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmap(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_0;
#endif
#ifdef CL_VERSION_2_1
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMigrateMem(cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
const void ** /* svm_pointers */,
const size_t * /* sizes */,
cl_mem_migration_flags /* flags */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_2_1;
#endif
#ifdef CL_VERSION_1_2
/* Extension function access
*
* Returns the extension function address for the given function name,
* or NULL if a valid function can not be found. The client must
* check to make sure the address is not NULL, before using or
* calling the returned function address.
*/
extern CL_API_ENTRY void * CL_API_CALL
clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2;
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
/*
* WARNING:
* This API introduces mutable state into the OpenCL implementation. It has been REMOVED
* to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
* OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
* It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
*
* Software developers previously relying on this API are instructed to set the command queue
* properties when creating the queue, instead.
*/
extern CL_API_ENTRY cl_int CL_API_CALL
clSetCommandQueueProperty(cl_command_queue /* command_queue */,
cl_command_queue_properties /* properties */,
cl_bool /* enable */,
cl_command_queue_properties * /* old_properties */) CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED;
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateImage2D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateImage3D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueMarker(cl_command_queue /* command_queue */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* Deprecated OpenCL 2.0 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_command_queue CL_API_CALL
clCreateCommandQueue(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_sampler CL_API_CALL
clCreateSampler(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int CL_API_CALL
clEnqueueTask(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_H */
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */
/**********************************************************************************
* Copyright (c) 2008-2016 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_dx9_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <d3d9.h>
#include <dxvahd.h>
#include <wtypes.h>
#include <d3d9types.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id /* platform */,
cl_dx9_device_source_intel /* dx9_device_source */,
void* /* dx9_object */,
cl_dx9_device_set_intel /* dx9_device_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id /* platform */,
cl_dx9_device_source_intel /* dx9_device_source */,
void* /* dx9_object */,
cl_dx9_device_set_intel /* dx9_device_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context /* context */,
cl_mem_flags /* flags */,
IDirect3DSurface9* /* resource */,
HANDLE /* sharedHandle */,
UINT /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context /* context */,
cl_mem_flags /* flags */,
IDirect3DSurface9* /* resource */,
HANDLE /* sharedHandle */,
UINT /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#ifdef __APPLE__
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context /* context */,
CLeglDisplayKHR /* egldisplay */,
CLeglImageKHR /* eglimage */,
cl_mem_flags /* flags */,
const cl_egl_image_properties_khr * /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context /* context */,
CLeglSyncKHR /* sync */,
CLeglDisplayKHR /* display */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */
/*******************************************************************************
* Copyright (c) 2008-2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <AvailabilityMacros.h>
#else
#include <CL/cl.h>
#endif
/* cl_khr_fp64 extension - no extension #define since it has no functions */
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
#if CL_TARGET_OPENCL_VERSION <= 110
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#endif
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/*******************************
* cl_khr_il_program extension *
*******************************/
#define cl_khr_il_program 1
/* New property to clGetDeviceInfo for retrieving supported intermediate
* languages
*/
#define CL_DEVICE_IL_VERSION_KHR 0x105B
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
* program
*/
#define CL_PROGRAM_IL_KHR 0x1169
extern CL_API_ENTRY cl_program
CL_API_CALL clCreateProgramWithILKHR(
cl_context /* context */,
const void * /* il */,
size_t /* length */,
cl_int * /* errcode_ret */);
typedef CL_API_ENTRY cl_program
(CL_API_CALL *clCreateProgramWithILKHR_fn)(
cl_context /* context */,
const void * /* il */,
size_t /* length */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
/* Extension: cl_khr_image2D_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
* for 2D images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the width,
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
*
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
*/
/**************************************
* cl_khr_initialize_memory extension *
**************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/*****************************************
* cl_khr_create_command_queue extension *
*****************************************/
#define cl_khr_create_command_queue 1
typedef cl_bitfield cl_queue_properties_khr;
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithPropertiesKHR( cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties_khr* /* properties */,
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_command_queue
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)( cl_context /* context */,
cl_device_id /* device */,
const cl_queue_properties_khr* /* properties */,
cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
/***********************************
* cl_ext_device_fission extension
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/***********************************
* cl_ext_migrate_memobject extension definitions
***********************************/
#define cl_ext_migrate_memobject 1
typedef cl_bitfield cl_mem_migration_flags_ext;
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjectEXT( cl_command_queue /* command_queue */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_objects */,
cl_mem_migration_flags_ext /* flags */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */ );
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)( cl_command_queue /* command_queue */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_objects */,
cl_mem_migration_flags_ext /* flags */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */ );
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define cl_qcom_ext_host_ptr 1
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*******************************************
* cl_qcom_ext_host_ptr_iocoherent extension
********************************************/
/* Cache policy specifying io-coherence */
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
/*********************************
* cl_qcom_android_native_buffer_host_ptr extension
*********************************/
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
typedef struct _cl_mem_android_native_buffer_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
cl_mem_ext_host_ptr ext_host_ptr;
/* Virtual pointer to the android native buffer */
void* anb_ptr;
} cl_mem_android_native_buffer_host_ptr;
/******************************************
* cl_img_yuv_image extension *
******************************************/
/* Image formats used in clCreateImage */
#define CL_NV21_IMG 0x40D0
#define CL_YV12_IMG 0x40D1
/******************************************
* cl_img_cached_allocations extension *
******************************************/
/* Flag values used by clCreteBuffer */
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
/******************************************
* cl_img_use_gralloc_ptr extension *
******************************************/
#define cl_img_use_gralloc_ptr 1
/* Flag values used by clCreteBuffer */
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
/* To be used by clGetEventInfo: */
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
/*********************************
* cl_khr_subgroups extension
*********************************/
#define cl_khr_subgroups 1
#if !defined(CL_VERSION_2_1)
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
In hindsight, there should have been a khr suffix on this type for
the extension, but keeping it un-suffixed to maintain backwards
compatibility. */
typedef cl_uint cl_kernel_sub_group_info;
#endif
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
cl_device_id /*in_device*/,
cl_kernel_sub_group_info /* param_name */,
size_t /*input_value_size*/,
const void * /*input_value*/,
size_t /*param_value_size*/,
void* /*param_value*/,
size_t* /*param_value_size_ret*/ ) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
/*********************************
* cl_khr_priority_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
/*********************************
* cl_khr_throttle_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
/*********************************
* cl_khr_subgroup_named_barrier
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_subgroup_named_barrier 1
/* cl_device_info */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
/**********************************
* cl_arm_import_memory extension *
**********************************/
#define cl_arm_import_memory 1
typedef intptr_t cl_import_properties_arm;
/* Default and valid proporties name for cl_arm_import_memory */
#define CL_IMPORT_TYPE_ARM 0x40B2
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
/* Secure DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_SECURE_ARM 0x40B5
/* This extension adds a new function that allows for direct memory import into
* OpenCL via the clImportMemoryARM function.
*
* Memory imported through this interface will be mapped into the device's page
* tables directly, providing zero copy access. It will never fall back to copy
* operations and aliased buffers.
*
* Types of memory supported for import are specified as additional extension
* strings.
*
* This extension produces cl_mem allocations which are compatible with all other
* users of cl_mem in the standard API.
*
* This extension maps pages with the same properties as the normal buffer creation
* function clCreateBuffer.
*/
extern CL_API_ENTRY cl_mem CL_API_CALL
clImportMemoryARM( cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
void *memory,
size_t size,
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
/******************************************
* cl_arm_shared_virtual_memory extension *
******************************************/
#define cl_arm_shared_virtual_memory 1
/* Used by clGetDeviceInfo */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* Used by clGetMemObjectInfo */
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
/* Used by clSetKernelExecInfoARM: */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
/* To be used by clGetEventInfo: */
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
/* Flag values used by clSVMAllocARM: */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
typedef cl_bitfield cl_svm_mem_flags_arm;
typedef cl_uint cl_kernel_exec_info_arm;
typedef cl_bitfield cl_device_svm_capabilities_arm;
extern CL_API_ENTRY void * CL_API_CALL
clSVMAllocARM(cl_context /* context */,
cl_svm_mem_flags_arm /* flags */,
size_t /* size */,
cl_uint /* alignment */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY void CL_API_CALL
clSVMFreeARM(cl_context /* context */,
void * /* svm_pointer */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFreeARM(cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void (CL_CALLBACK * /*pfn_free_func*/)(cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void *[] /* svm_pointers[] */,
void * /* user_data */),
void * /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpyARM(cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void * /* dst_ptr */,
const void * /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFillARM(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
const void * /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMapARM(cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* flags */,
void * /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmapARM(cl_command_queue /* command_queue */,
void * /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointerARM(cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfoARM(cl_kernel /* kernel */,
cl_kernel_exec_info_arm /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */
/*******************************************************************************
* Copyright (c) 2008-2017 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2017 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_ext_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __CL_EXT_INTEL_H
#define __CL_EXT_INTEL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl.h>
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_thread_local_exec extension *
****************************************/
#define cl_intel_thread_local_exec 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
#define cl_intel_device_partition_by_names 1
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
/************************************************
* cl_intel_accelerator extension *
* cl_intel_motion_estimation extension *
* cl_intel_advanced_motion_estimation extension *
*************************************************/
#define cl_intel_accelerator 1
#define cl_intel_motion_estimation 1
#define cl_intel_advanced_motion_estimation 1
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
typedef cl_uint cl_accelerator_type_intel;
typedef cl_uint cl_accelerator_info_intel;
typedef struct _cl_motion_estimation_desc_intel {
cl_uint mb_block_type;
cl_uint subpixel_mode;
cl_uint sad_adjust_mode;
cl_uint search_path_type;
} cl_motion_estimation_desc_intel;
/* error codes */
#define CL_INVALID_ACCELERATOR_INTEL -1094
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
/* cl_accelerator_type_intel */
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
/* cl_accelerator_info_intel */
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
/* cl_motion_detect_desc_intel flags */
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
/* cl_device_info */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
#define CL_ME_VERSION_LEGACY_INTEL 0x0
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context /* context */,
cl_accelerator_type_intel /* accelerator_type */,
size_t /* descriptor_size */,
const void* /* descriptor */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
cl_context /* context */,
cl_accelerator_type_intel /* accelerator_type */,
size_t /* descriptor_size */,
const void* /* descriptor */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetAcceleratorInfoINTEL(
cl_accelerator_intel /* accelerator */,
cl_accelerator_info_intel /* param_name */,
size_t /* param_value_size */,
void* /* param_value */,
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
cl_accelerator_intel /* accelerator */,
cl_accelerator_info_intel /* param_name */,
size_t /* param_value_size */,
void* /* param_value */,
size_t* /* param_value_size_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainAcceleratorINTEL(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseAcceleratorINTEL(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
cl_accelerator_intel /* accelerator */) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_intel_simultaneous_sharing extension *
*******************************************/
#define cl_intel_simultaneous_sharing 1
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/***********************************
* cl_intel_egl_image_yuv extension *
************************************/
#define cl_intel_egl_image_yuv 1
#define CL_EGL_YUV_PLANE_INTEL 0x4107
/********************************
* cl_intel_packed_yuv extension *
*********************************/
#define cl_intel_packed_yuv 1
#define CL_YUYV_INTEL 0x4076
#define CL_UYVY_INTEL 0x4077
#define CL_YVYU_INTEL 0x4078
#define CL_VYUY_INTEL 0x4079
/********************************************
* cl_intel_required_subgroup_size extension *
*********************************************/
#define cl_intel_required_subgroup_size 1
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
/****************************************
* cl_intel_driver_diagnostics extension *
*****************************************/
#define cl_intel_driver_diagnostics 1
typedef cl_uint cl_diagnostics_verbose_level;
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
/********************************
* cl_intel_planar_yuv extension *
*********************************/
#define CL_NV12_INTEL 0x410E
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/*******************************************************
* cl_intel_device_side_avc_motion_estimation extension *
********************************************************/
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_INTEL_H */
/**********************************************************************************
* Copyright (c) 2008-2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#ifdef CL_VERSION_1_2
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
#endif
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#ifdef CL_VERSION_1_2
#define CL_GL_NUM_SAMPLES 0x2012
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl_gl.h>
#else
#include <CL/cl_gl.h>
#endif
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */
/**********************************************************************************
* Copyright (c) 2008-2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __CL_PLATFORM_H
#define __CL_PLATFORM_H
#ifdef __APPLE__
#include <OpenCL/cl_version.h>
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
#include <AvailabilityMacros.h>
#else
#include <CL/cl_version.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_WIN32)
#define CL_API_ENTRY
#define CL_API_CALL __stdcall
#define CL_CALLBACK __stdcall
#else
#define CL_API_ENTRY
#define CL_API_CALL
#define CL_CALLBACK
#endif
/*
* Deprecation flags refer to the last version of the header in which the
* feature was not deprecated.
*
* E.g. VERSION_1_1_DEPRECATED means the feature is present in 1.1 without
* deprecation but is deprecated in versions later than 1.1.
*/
#ifdef __APPLE__
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
#define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
#define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
#ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
#else
#warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here!
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#endif
#else
#define CL_EXTENSION_WEAK_LINK
#define CL_API_SUFFIX__VERSION_1_0
#define CL_EXT_SUFFIX__VERSION_1_0
#define CL_API_SUFFIX__VERSION_1_1
#define CL_EXT_SUFFIX__VERSION_1_1
#define CL_API_SUFFIX__VERSION_1_2
#define CL_EXT_SUFFIX__VERSION_1_2
#define CL_API_SUFFIX__VERSION_2_0
#define CL_EXT_SUFFIX__VERSION_2_0
#define CL_API_SUFFIX__VERSION_2_1
#define CL_EXT_SUFFIX__VERSION_2_1
#define CL_API_SUFFIX__VERSION_2_2
#define CL_EXT_SUFFIX__VERSION_2_2
#ifdef __GNUC__
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS
#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED
#endif
#elif defined(_WIN32)
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED __declspec(deprecated)
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED __declspec(deprecated)
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_2_1_APIS
#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED __declspec(deprecated)
#endif
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_0_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_2_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_2_1_DEPRECATED
#endif
#endif
#if (defined (_WIN32) && defined(_MSC_VER))
/* scalar types */
typedef signed __int8 cl_char;
typedef unsigned __int8 cl_uchar;
typedef signed __int16 cl_short;
typedef unsigned __int16 cl_ushort;
typedef signed __int32 cl_int;
typedef unsigned __int32 cl_uint;
typedef signed __int64 cl_long;
typedef unsigned __int64 cl_ulong;
typedef unsigned __int16 cl_half;
typedef float cl_float;
typedef double cl_double;
/* Macro names and corresponding values defined by OpenCL */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
#define CL_SCHAR_MIN (-127-1)
#define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1)
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647
#define CL_INT_MIN (-2147483647-1)
#define CL_UINT_MAX 0xffffffffU
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_DIG 6
#define CL_FLT_MANT_DIG 24
#define CL_FLT_MAX_10_EXP +38
#define CL_FLT_MAX_EXP +128
#define CL_FLT_MIN_10_EXP -37
#define CL_FLT_MIN_EXP -125
#define CL_FLT_RADIX 2
#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
#define CL_FLT_MIN 1.175494350822287507969e-38f
#define CL_FLT_EPSILON 1.1920928955078125e-7f
#define CL_HALF_DIG 3
#define CL_HALF_MANT_DIG 11
#define CL_HALF_MAX_10_EXP +4
#define CL_HALF_MAX_EXP +16
#define CL_HALF_MIN_10_EXP -4
#define CL_HALF_MIN_EXP -13
#define CL_HALF_RADIX 2
#define CL_HALF_MAX 65504.0f
#define CL_HALF_MIN 6.103515625e-05f
#define CL_HALF_EPSILON 9.765625e-04f
#define CL_DBL_DIG 15
#define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#define CL_DBL_MAX_EXP +1024
#define CL_DBL_MIN_10_EXP -307
#define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#define CL_DBL_MAX 1.7976931348623158e+308
#define CL_DBL_MIN 2.225073858507201383090e-308
#define CL_DBL_EPSILON 2.220446049250313080847e-16
#define CL_M_E 2.7182818284590452354
#define CL_M_LOG2E 1.4426950408889634074
#define CL_M_LOG10E 0.43429448190325182765
#define CL_M_LN2 0.69314718055994530942
#define CL_M_LN10 2.30258509299404568402
#define CL_M_PI 3.14159265358979323846
#define CL_M_PI_2 1.57079632679489661923
#define CL_M_PI_4 0.78539816339744830962
#define CL_M_1_PI 0.31830988618379067154
#define CL_M_2_PI 0.63661977236758134308
#define CL_M_2_SQRTPI 1.12837916709551257390
#define CL_M_SQRT2 1.41421356237309504880
#define CL_M_SQRT1_2 0.70710678118654752440
#define CL_M_E_F 2.718281828f
#define CL_M_LOG2E_F 1.442695041f
#define CL_M_LOG10E_F 0.434294482f
#define CL_M_LN2_F 0.693147181f
#define CL_M_LN10_F 2.302585093f
#define CL_M_PI_F 3.141592654f
#define CL_M_PI_2_F 1.570796327f
#define CL_M_PI_4_F 0.785398163f
#define CL_M_1_PI_F 0.318309886f
#define CL_M_2_PI_F 0.636619772f
#define CL_M_2_SQRTPI_F 1.128379167f
#define CL_M_SQRT2_F 1.414213562f
#define CL_M_SQRT1_2_F 0.707106781f
#define CL_NAN (CL_INFINITY - CL_INFINITY)
#define CL_HUGE_VALF ((cl_float) 1e50)
#define CL_HUGE_VAL ((cl_double) 1e500)
#define CL_MAXFLOAT CL_FLT_MAX
#define CL_INFINITY CL_HUGE_VALF
#else
#include <stdint.h>
/* scalar types */
typedef int8_t cl_char;
typedef uint8_t cl_uchar;
typedef int16_t cl_short __attribute__((aligned(2)));
typedef uint16_t cl_ushort __attribute__((aligned(2)));
typedef int32_t cl_int __attribute__((aligned(4)));
typedef uint32_t cl_uint __attribute__((aligned(4)));
typedef int64_t cl_long __attribute__((aligned(8)));
typedef uint64_t cl_ulong __attribute__((aligned(8)));
typedef uint16_t cl_half __attribute__((aligned(2)));
typedef float cl_float __attribute__((aligned(4)));
typedef double cl_double __attribute__((aligned(8)));
/* Macro names and corresponding values defined by OpenCL */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
#define CL_SCHAR_MIN (-127-1)
#define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1)
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647
#define CL_INT_MIN (-2147483647-1)
#define CL_UINT_MAX 0xffffffffU
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_DIG 6
#define CL_FLT_MANT_DIG 24
#define CL_FLT_MAX_10_EXP +38
#define CL_FLT_MAX_EXP +128
#define CL_FLT_MIN_10_EXP -37
#define CL_FLT_MIN_EXP -125
#define CL_FLT_RADIX 2
#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
#define CL_FLT_MIN 1.175494350822287507969e-38f
#define CL_FLT_EPSILON 1.1920928955078125e-7f
#define CL_HALF_DIG 3
#define CL_HALF_MANT_DIG 11
#define CL_HALF_MAX_10_EXP +4
#define CL_HALF_MAX_EXP +16
#define CL_HALF_MIN_10_EXP -4
#define CL_HALF_MIN_EXP -13
#define CL_HALF_RADIX 2
#define CL_HALF_MAX 65504.0f
#define CL_HALF_MIN 6.103515625e-05f
#define CL_HALF_EPSILON 9.765625e-04f
#define CL_DBL_DIG 15
#define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#define CL_DBL_MAX_EXP +1024
#define CL_DBL_MIN_10_EXP -307
#define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
#define CL_DBL_MIN 2.225073858507201383090e-308
#define CL_DBL_EPSILON 2.220446049250313080847e-16
#define CL_M_E 2.7182818284590452354
#define CL_M_LOG2E 1.4426950408889634074
#define CL_M_LOG10E 0.43429448190325182765
#define CL_M_LN2 0.69314718055994530942
#define CL_M_LN10 2.30258509299404568402
#define CL_M_PI 3.14159265358979323846
#define CL_M_PI_2 1.57079632679489661923
#define CL_M_PI_4 0.78539816339744830962
#define CL_M_1_PI 0.31830988618379067154
#define CL_M_2_PI 0.63661977236758134308
#define CL_M_2_SQRTPI 1.12837916709551257390
#define CL_M_SQRT2 1.41421356237309504880
#define CL_M_SQRT1_2 0.70710678118654752440
#define CL_M_E_F 2.718281828f
#define CL_M_LOG2E_F 1.442695041f
#define CL_M_LOG10E_F 0.434294482f
#define CL_M_LN2_F 0.693147181f
#define CL_M_LN10_F 2.302585093f
#define CL_M_PI_F 3.141592654f
#define CL_M_PI_2_F 1.570796327f
#define CL_M_PI_4_F 0.785398163f
#define CL_M_1_PI_F 0.318309886f
#define CL_M_2_PI_F 0.636619772f
#define CL_M_2_SQRTPI_F 1.128379167f
#define CL_M_SQRT2_F 1.414213562f
#define CL_M_SQRT1_2_F 0.707106781f
#if defined( __GNUC__ )
#define CL_HUGE_VALF __builtin_huge_valf()
#define CL_HUGE_VAL __builtin_huge_val()
#define CL_NAN __builtin_nanf( "" )
#else
#define CL_HUGE_VALF ((cl_float) 1e50)
#define CL_HUGE_VAL ((cl_double) 1e500)
float nanf( const char * );
#define CL_NAN nanf( "" )
#endif
#define CL_MAXFLOAT CL_FLT_MAX
#define CL_INFINITY CL_HUGE_VALF
#endif
#include <stddef.h>
/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
typedef unsigned int cl_GLuint;
typedef int cl_GLint;
typedef unsigned int cl_GLenum;
/*
* Vector types
*
* Note: OpenCL requires that all types be naturally aligned.
* This means that vector types must be naturally aligned.
* For example, a vector of four floats must be aligned to
* a 16 byte boundary (calculated as 4 * the natural 4-byte
* alignment of the float). The alignment qualifiers here
* will only function properly if your compiler supports them
* and if you don't actively work to defeat them. For example,
* in order for a cl_float4 to be 16 byte aligned in a struct,
* the start of the struct must itself be 16-byte aligned.
*
* Maintaining proper alignment is the user's responsibility.
*/
/* Define basic vector types */
#if defined( __VEC__ )
#include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
typedef vector unsigned char __cl_uchar16;
typedef vector signed char __cl_char16;
typedef vector unsigned short __cl_ushort8;
typedef vector signed short __cl_short8;
typedef vector unsigned int __cl_uint4;
typedef vector signed int __cl_int4;
typedef vector float __cl_float4;
#define __CL_UCHAR16__ 1
#define __CL_CHAR16__ 1
#define __CL_USHORT8__ 1
#define __CL_SHORT8__ 1
#define __CL_UINT4__ 1
#define __CL_INT4__ 1
#define __CL_FLOAT4__ 1
#endif
#if defined( __SSE__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <xmmintrin.h>
#endif
#if defined( __GNUC__ )
typedef float __cl_float4 __attribute__((vector_size(16)));
#else
typedef __m128 __cl_float4;
#endif
#define __CL_FLOAT4__ 1
#endif
#if defined( __SSE2__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <emmintrin.h>
#endif
#if defined( __GNUC__ )
typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16)));
typedef cl_char __cl_char16 __attribute__((vector_size(16)));
typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16)));
typedef cl_short __cl_short8 __attribute__((vector_size(16)));
typedef cl_uint __cl_uint4 __attribute__((vector_size(16)));
typedef cl_int __cl_int4 __attribute__((vector_size(16)));
typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16)));
typedef cl_long __cl_long2 __attribute__((vector_size(16)));
typedef cl_double __cl_double2 __attribute__((vector_size(16)));
#else
typedef __m128i __cl_uchar16;
typedef __m128i __cl_char16;
typedef __m128i __cl_ushort8;
typedef __m128i __cl_short8;
typedef __m128i __cl_uint4;
typedef __m128i __cl_int4;
typedef __m128i __cl_ulong2;
typedef __m128i __cl_long2;
typedef __m128d __cl_double2;
#endif
#define __CL_UCHAR16__ 1
#define __CL_CHAR16__ 1
#define __CL_USHORT8__ 1
#define __CL_SHORT8__ 1
#define __CL_INT4__ 1
#define __CL_UINT4__ 1
#define __CL_ULONG2__ 1
#define __CL_LONG2__ 1
#define __CL_DOUBLE2__ 1
#endif
#if defined( __MMX__ )
#include <mmintrin.h>
#if defined( __GNUC__ )
typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8)));
typedef cl_char __cl_char8 __attribute__((vector_size(8)));
typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8)));
typedef cl_short __cl_short4 __attribute__((vector_size(8)));
typedef cl_uint __cl_uint2 __attribute__((vector_size(8)));
typedef cl_int __cl_int2 __attribute__((vector_size(8)));
typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8)));
typedef cl_long __cl_long1 __attribute__((vector_size(8)));
typedef cl_float __cl_float2 __attribute__((vector_size(8)));
#else
typedef __m64 __cl_uchar8;
typedef __m64 __cl_char8;
typedef __m64 __cl_ushort4;
typedef __m64 __cl_short4;
typedef __m64 __cl_uint2;
typedef __m64 __cl_int2;
typedef __m64 __cl_ulong1;
typedef __m64 __cl_long1;
typedef __m64 __cl_float2;
#endif
#define __CL_UCHAR8__ 1
#define __CL_CHAR8__ 1
#define __CL_USHORT4__ 1
#define __CL_SHORT4__ 1
#define __CL_INT2__ 1
#define __CL_UINT2__ 1
#define __CL_ULONG1__ 1
#define __CL_LONG1__ 1
#define __CL_FLOAT2__ 1
#endif
#if defined( __AVX__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <immintrin.h>
#endif
#if defined( __GNUC__ )
typedef cl_float __cl_float8 __attribute__((vector_size(32)));
typedef cl_double __cl_double4 __attribute__((vector_size(32)));
#else
typedef __m256 __cl_float8;
typedef __m256d __cl_double4;
#endif
#define __CL_FLOAT8__ 1
#define __CL_DOUBLE4__ 1
#endif
/* Define capabilities for anonymous struct members. */
#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__
#elif defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__ __extension__
#elif defined( _WIN32) && defined(_MSC_VER)
#if _MSC_VER >= 1500
/* Microsoft Developer Studio 2008 supports anonymous structs, but
* complains by default. */
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__
/* Disable warning C4201: nonstandard extension used : nameless
* struct/union */
#pragma warning( push )
#pragma warning( disable : 4201 )
#endif
#else
#define __CL_HAS_ANON_STRUCT__ 0
#define __CL_ANON_STRUCT__
#endif
/* Define alignment keys */
#if defined( __GNUC__ )
#define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
#elif defined( _WIN32) && (_MSC_VER)
/* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
/* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
/* #include <crtdefs.h> */
/* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */
#define CL_ALIGNED(_x)
#else
#warning Need to implement some method to align data here
#define CL_ALIGNED(_x)
#endif
/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
#if __CL_HAS_ANON_STRUCT__
/* .xyzw and .s0123...{f|F} are supported */
#define CL_HAS_NAMED_VECTOR_FIELDS 1
/* .hi and .lo are supported */
#define CL_HAS_HI_LO_VECTOR_FIELDS 1
#endif
/* Define cl_vector types */
/* ---- cl_charn ---- */
typedef union
{
cl_char CL_ALIGNED(2) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_char lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2;
#endif
}cl_char2;
typedef union
{
cl_char CL_ALIGNED(4) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[2];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4;
#endif
}cl_char4;
/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
typedef cl_char4 cl_char3;
typedef union
{
cl_char CL_ALIGNED(8) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[4];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4[2];
#endif
#if defined( __CL_CHAR8__ )
__cl_char8 v8;
#endif
}cl_char8;
typedef union
{
cl_char CL_ALIGNED(16) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[8];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4[4];
#endif
#if defined( __CL_CHAR8__ )
__cl_char8 v8[2];
#endif
#if defined( __CL_CHAR16__ )
__cl_char16 v16;
#endif
}cl_char16;
/* ---- cl_ucharn ---- */
typedef union
{
cl_uchar CL_ALIGNED(2) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; };
#endif
#if defined( __cl_uchar2__)
__cl_uchar2 v2;
#endif
}cl_uchar2;
typedef union
{
cl_uchar CL_ALIGNED(4) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[2];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4;
#endif
}cl_uchar4;
/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
typedef cl_uchar4 cl_uchar3;
typedef union
{
cl_uchar CL_ALIGNED(8) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[4];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4[2];
#endif
#if defined( __CL_UCHAR8__ )
__cl_uchar8 v8;
#endif
}cl_uchar8;
typedef union
{
cl_uchar CL_ALIGNED(16) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[8];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4[4];
#endif
#if defined( __CL_UCHAR8__ )
__cl_uchar8 v8[2];
#endif
#if defined( __CL_UCHAR16__ )
__cl_uchar16 v16;
#endif
}cl_uchar16;
/* ---- cl_shortn ---- */
typedef union
{
cl_short CL_ALIGNED(4) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_short lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2;
#endif
}cl_short2;
typedef union
{
cl_short CL_ALIGNED(8) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[2];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4;
#endif
}cl_short4;
/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
typedef cl_short4 cl_short3;
typedef union
{
cl_short CL_ALIGNED(16) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[4];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4[2];
#endif
#if defined( __CL_SHORT8__ )
__cl_short8 v8;
#endif
}cl_short8;
typedef union
{
cl_short CL_ALIGNED(32) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[8];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4[4];
#endif
#if defined( __CL_SHORT8__ )
__cl_short8 v8[2];
#endif
#if defined( __CL_SHORT16__ )
__cl_short16 v16;
#endif
}cl_short16;
/* ---- cl_ushortn ---- */
typedef union
{
cl_ushort CL_ALIGNED(4) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2;
#endif
}cl_ushort2;
typedef union
{
cl_ushort CL_ALIGNED(8) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[2];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4;
#endif
}cl_ushort4;
/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
typedef cl_ushort4 cl_ushort3;
typedef union
{
cl_ushort CL_ALIGNED(16) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[4];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4[2];
#endif
#if defined( __CL_USHORT8__ )
__cl_ushort8 v8;
#endif
}cl_ushort8;
typedef union
{
cl_ushort CL_ALIGNED(32) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[8];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4[4];
#endif
#if defined( __CL_USHORT8__ )
__cl_ushort8 v8[2];
#endif
#if defined( __CL_USHORT16__ )
__cl_ushort16 v16;
#endif
}cl_ushort16;
/* ---- cl_halfn ---- */
typedef union
{
cl_half CL_ALIGNED(4) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_half x, y; };
__CL_ANON_STRUCT__ struct{ cl_half s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_half lo, hi; };
#endif
#if defined( __CL_HALF2__)
__cl_half2 v2;
#endif
}cl_half2;
typedef union
{
cl_half CL_ALIGNED(8) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_half2 lo, hi; };
#endif
#if defined( __CL_HALF2__)
__cl_half2 v2[2];
#endif
#if defined( __CL_HALF4__)
__cl_half4 v4;
#endif
}cl_half4;
/* cl_half3 is identical in size, alignment and behavior to cl_half4. See section 6.1.5. */
typedef cl_half4 cl_half3;
typedef union
{
cl_half CL_ALIGNED(16) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_half x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_half4 lo, hi; };
#endif
#if defined( __CL_HALF2__)
__cl_half2 v2[4];
#endif
#if defined( __CL_HALF4__)
__cl_half4 v4[2];
#endif
#if defined( __CL_HALF8__ )
__cl_half8 v8;
#endif
}cl_half8;
typedef union
{
cl_half CL_ALIGNED(32) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_half x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_half s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_half8 lo, hi; };
#endif
#if defined( __CL_HALF2__)
__cl_half2 v2[8];
#endif
#if defined( __CL_HALF4__)
__cl_half4 v4[4];
#endif
#if defined( __CL_HALF8__ )
__cl_half8 v8[2];
#endif
#if defined( __CL_HALF16__ )
__cl_half16 v16;
#endif
}cl_half16;
/* ---- cl_intn ---- */
typedef union
{
cl_int CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_int lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2;
#endif
}cl_int2;
typedef union
{
cl_int CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[2];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4;
#endif
}cl_int4;
/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
typedef cl_int4 cl_int3;
typedef union
{
cl_int CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[4];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4[2];
#endif
#if defined( __CL_INT8__ )
__cl_int8 v8;
#endif
}cl_int8;
typedef union
{
cl_int CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[8];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4[4];
#endif
#if defined( __CL_INT8__ )
__cl_int8 v8[2];
#endif
#if defined( __CL_INT16__ )
__cl_int16 v16;
#endif
}cl_int16;
/* ---- cl_uintn ---- */
typedef union
{
cl_uint CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_uint lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2;
#endif
}cl_uint2;
typedef union
{
cl_uint CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[2];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4;
#endif
}cl_uint4;
/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
typedef cl_uint4 cl_uint3;
typedef union
{
cl_uint CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[4];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4[2];
#endif
#if defined( __CL_UINT8__ )
__cl_uint8 v8;
#endif
}cl_uint8;
typedef union
{
cl_uint CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[8];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4[4];
#endif
#if defined( __CL_UINT8__ )
__cl_uint8 v8[2];
#endif
#if defined( __CL_UINT16__ )
__cl_uint16 v16;
#endif
}cl_uint16;
/* ---- cl_longn ---- */
typedef union
{
cl_long CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_long lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2;
#endif
}cl_long2;
typedef union
{
cl_long CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[2];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4;
#endif
}cl_long4;
/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
typedef cl_long4 cl_long3;
typedef union
{
cl_long CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[4];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4[2];
#endif
#if defined( __CL_LONG8__ )
__cl_long8 v8;
#endif
}cl_long8;
typedef union
{
cl_long CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[8];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4[4];
#endif
#if defined( __CL_LONG8__ )
__cl_long8 v8[2];
#endif
#if defined( __CL_LONG16__ )
__cl_long16 v16;
#endif
}cl_long16;
/* ---- cl_ulongn ---- */
typedef union
{
cl_ulong CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2;
#endif
}cl_ulong2;
typedef union
{
cl_ulong CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[2];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4;
#endif
}cl_ulong4;
/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
typedef cl_ulong4 cl_ulong3;
typedef union
{
cl_ulong CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[4];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4[2];
#endif
#if defined( __CL_ULONG8__ )
__cl_ulong8 v8;
#endif
}cl_ulong8;
typedef union
{
cl_ulong CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[8];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4[4];
#endif
#if defined( __CL_ULONG8__ )
__cl_ulong8 v8[2];
#endif
#if defined( __CL_ULONG16__ )
__cl_ulong16 v16;
#endif
}cl_ulong16;
/* --- cl_floatn ---- */
typedef union
{
cl_float CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_float lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2;
#endif
}cl_float2;
typedef union
{
cl_float CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[2];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4;
#endif
}cl_float4;
/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
typedef cl_float4 cl_float3;
typedef union
{
cl_float CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[4];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4[2];
#endif
#if defined( __CL_FLOAT8__ )
__cl_float8 v8;
#endif
}cl_float8;
typedef union
{
cl_float CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[8];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4[4];
#endif
#if defined( __CL_FLOAT8__ )
__cl_float8 v8[2];
#endif
#if defined( __CL_FLOAT16__ )
__cl_float16 v16;
#endif
}cl_float16;
/* --- cl_doublen ---- */
typedef union
{
cl_double CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_double lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2;
#endif
}cl_double2;
typedef union
{
cl_double CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[2];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4;
#endif
}cl_double4;
/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
typedef cl_double4 cl_double3;
typedef union
{
cl_double CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[4];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4[2];
#endif
#if defined( __CL_DOUBLE8__ )
__cl_double8 v8;
#endif
}cl_double8;
typedef union
{
cl_double CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[8];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4[4];
#endif
#if defined( __CL_DOUBLE8__ )
__cl_double8 v8[2];
#endif
#if defined( __CL_DOUBLE16__ )
__cl_double16 v16;
#endif
}cl_double16;
/* Macro to facilitate debugging
* Usage:
* Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
* The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \"
* Each line thereafter of OpenCL C source must end with: \n\
* The last line ends in ";
*
* Example:
*
* const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
* kernel void foo( int a, float * b ) \n\
* { \n\
* // my comment \n\
* *b[ get_global_id(0)] = a; \n\
* } \n\
* ";
*
* This should correctly set up the line, (column) and file information for your source
* string so you can do source level debugging.
*/
#define __CL_STRINGIFY( _x ) # _x
#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x )
#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
#ifdef __cplusplus
}
#endif
#undef __CL_HAS_ANON_STRUCT__
#undef __CL_ANON_STRUCT__
#if defined( _WIN32) && defined(_MSC_VER)
#if _MSC_VER >=1500
#pragma warning( pop )
#endif
#endif
#endif /* __CL_PLATFORM_H */
/**********************************************************************************
* Copyright (c) 2008-2016 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2016 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_va_api_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id /* platform */,
cl_va_api_device_source_intel /* media_adapter_type */,
void* /* media_adapter */,
cl_va_api_device_set_intel /* media_adapter_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id /* platform */,
cl_va_api_device_source_intel /* media_adapter_type */,
void* /* media_adapter */,
cl_va_api_device_set_intel /* media_adapter_set */,
cl_uint /* num_entries */,
cl_device_id* /* devices */,
cl_uint* /* num_devices */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context /* context */,
cl_mem_flags /* flags */,
VASurfaceID* /* surface */,
cl_uint /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context /* context */,
cl_mem_flags /* flags */,
VASurfaceID* /* surface */,
cl_uint /* plane */,
cl_int* /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem* /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
/*******************************************************************************
* Copyright (c) 2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __CL_VERSION_H
#define __CL_VERSION_H
/* Detect which version to target */
#if !defined(CL_TARGET_OPENCL_VERSION)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
#define CL_TARGET_OPENCL_VERSION 220
#endif
#if CL_TARGET_OPENCL_VERSION != 100 && \
CL_TARGET_OPENCL_VERSION != 110 && \
CL_TARGET_OPENCL_VERSION != 120 && \
CL_TARGET_OPENCL_VERSION != 200 && \
CL_TARGET_OPENCL_VERSION != 210 && \
CL_TARGET_OPENCL_VERSION != 220
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
#undef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 220
#endif
/* OpenCL Version */
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
#define CL_VERSION_2_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
#define CL_VERSION_2_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
#define CL_VERSION_2_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
#define CL_VERSION_1_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
#define CL_VERSION_1_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
#define CL_VERSION_1_0 1
#endif
/* Allow deprecated APIs for older OpenCL versions. */
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
#endif
#endif /* __CL_VERSION_H */
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <OpenCL/cl.h>
#include <OpenCL/cl_gl.h>
#include <OpenCL/cl_gl_ext.h>
#include <OpenCL/cl_ext.h>
#else
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */
Copyright (c) 2008-2015 The Khronos Group Inc.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and/or associated documentation files (the
"Materials"), to deal in the Materials without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Materials, and to
permit persons to whom the Materials are furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Materials.
MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
https://www.khronos.org/registry/
THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
# OpenCL<sup>TM</sup> API Headers
This repository contains C language headers for the OpenCL API.
The authoritative public repository for these headers is located at:
https://github.com/KhronosGroup/OpenCL-Headers
Issues, proposed fixes for issues, and other suggested changes should be
created using Github.
## Branch Structure
The OpenCL API headers in this repository are Unified headers and are designed
to work with all released OpenCL versions. This differs from previous OpenCL
API headers, where version-specific API headers either existed in separate
branches, or in separate folders in a branch.
## Compiling for a Specific OpenCL Version
By default, the OpenCL API headers in this repository are for the latest
OpenCL version (currently OpenCL 2.2). To use these API headers to target
a different OpenCL version, an application may `#define` the preprocessor
value `CL_TARGET_OPENCL_VERSION` before including the OpenCL API headers.
The `CL_TARGET_OPENCL_VERSION` is a three digit decimal value representing
the OpenCL API version.
For example, to enforce usage of no more than the OpenCL 1.2 APIs, you may
include the OpenCL API headers as follows:
```
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/opencl.h>
```
## Directory Structure
```
README.md This file
LICENSE Source license for the OpenCL API headers
CL/ Unified OpenCL API headers tree
```
## License
See [LICENSE](LICENSE).
---
OpenCL and the OpenCL logo are trademarks of Apple Inc. used by permission by Khronos.
...@@ -5,12 +5,12 @@ MODELS_PATH="../../test/models/*" ...@@ -5,12 +5,12 @@ MODELS_PATH="../../test/models/*"
MODELS_SRC="../../test/models" MODELS_SRC="../../test/models"
IMAGE_PATH="../../test/images/*" IMAGE_PATH="../../test/images/*"
EXE_FILE="../../test/build/*" EXE_FILE="../../test/build/*"
EXE_DIR="data/local/tmp/bin" EXE_DIR="/data/local/tmp/bin"
adb shell mkdir ${EXE_DIR} adb shell mkdir ${EXE_DIR}
MODELS_DIR="data/local/tmp/models" MODELS_DIR="/data/local/tmp/models"
adb shell mkdir ${MODELS_DIR} adb shell mkdir ${MODELS_DIR}
for file in `ls ${MODELS_SRC}` for file in `ls ${MODELS_SRC}`
do do
adb shell mkdir ${MODELS_DIR}"/"${file} adb shell mkdir ${MODELS_DIR}"/"${file}
done done
...@@ -19,11 +19,15 @@ ACL_BUILD_PATH="../../src/operators/kernel/mali/ACL_Android/build/*" ...@@ -19,11 +19,15 @@ ACL_BUILD_PATH="../../src/operators/kernel/mali/ACL_Android/build/*"
adb push ${ACL_BUILD_PATH} ${EXE_DIR} adb push ${ACL_BUILD_PATH} ${EXE_DIR}
fi fi
IMAGES_DIR="data/local/tmp/images" IMAGES_DIR="/data/local/tmp/images"
adb shell mkdir ${IMAGES_DIR} adb shell mkdir ${IMAGES_DIR}
LIB_PATH="../../build/release/arm-v7a/build/*" LIB_PATH="../../build/release/arm-v7a/build/*"
adb push ${EXE_FILE} ${EXE_DIR} adb push ${EXE_FILE} ${EXE_DIR}
adb push ${LIB_PATH} ${EXE_DIR} for file in ${LIB_PATH}
do
adb push ${file} ${EXE_DIR}
done
if [[ $1 != "npm" ]]; then if [[ $1 != "npm" ]]; then
adb push ${IMAGE_PATH} ${IMAGES_DIR} adb push ${IMAGE_PATH} ${IMAGES_DIR}
adb push ${MODELS_PATH} ${MODELS_DIR} adb push ${MODELS_PATH} ${MODELS_DIR}
......
...@@ -90,6 +90,8 @@ build_for_android() { ...@@ -90,6 +90,8 @@ build_for_android() {
fi fi
cd "../build/release/${PLATFORM}" cd "../build/release/${PLATFORM}"
make -j 8 make -j 8
mkdir ./build/cl_kernel
cp ../../../src/operators/kernel/cl/cl_kernel/* ./build/cl_kernel/
} }
......
...@@ -17,7 +17,7 @@ shift ...@@ -17,7 +17,7 @@ shift
perl -i -pe 's|^\s+#pragma\s+omp|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> #pragma omp|' "$@" perl -i -pe 's|^\s+#pragma\s+omp|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> #pragma omp|' "$@"
( (
# remove clang format ios_io folder # remove clang format ios_io folder
flist=$(echo "$@" | perl -pe 's|src/ios_io/[^ ]*||') flist=$(echo "$@" | perl -pe 's|src/io/ios_io/[^ ]*||')
clang-format -i $flist clang-format -i $flist
) )
perl -i -pe 's|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> ||' "$@" perl -i -pe 's|// <TRICKY-CLANG-FORMAT-PRAGMA-FIX> ||' "$@"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册