提交 3f052e01 编写于 作者: L liuruilong

add default cl work size func , format files

上级 b2e33f33
......@@ -46,7 +46,8 @@ struct PaddleMobileException : public std::exception {
std::string detail(buffer); \
throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \
__FILE__, __LINE__); \
}
} \
exit(0);
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
......
......@@ -39,7 +39,13 @@ struct PrecisionTrait<Precision::FP16> {
};
//! device type
enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kGPU_CL = 3};
enum DeviceTypeEnum {
kINVALID = -1,
kCPU = 0,
kFPGA = 1,
kGPU_MALI = 2,
kGPU_CL = 3
};
template <DeviceTypeEnum T>
struct DeviceType {};
......@@ -49,7 +55,6 @@ typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_MALI> GPU_MALI;
typedef DeviceType<kGPU_CL> GPU_CL;
//! data type
enum DataType {
PM_INVALID = -1,
......
......@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "framework/cl/cl_engine.h"
#include "CL/cl.h"
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h"
#include <cstdlib>
#include <cstring>
......@@ -28,11 +28,11 @@ bool CLEngine::Init() {
SetClDeviceId();
initialized_ = true;
// setClContext();
// setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str());
// buildProgram();
// setClContext();
// setClCommandQueue();
// std::string filename = "./HelloWorld_Kernel.cl";
// loadKernelFromFile(filename.c_str());
// buildProgram();
}
CLEngine *CLEngine::Instance() {
......@@ -74,26 +74,26 @@ bool CLEngine::SetClDeviceId() {
return false;
}
//std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel(
// const std::string &kernel_name) {
// std::unique_ptr<_cl_kernel, clKernel_deleter> kernel(
// clCreateKernel(program_.get(), kernel_name.c_str(), NULL));
// return std::move(kernel);
//}
//
//bool CLEngine::SetClCommandQueue() {
// bool CLEngine::SetClCommandQueue() {
// cl_int status;
// command_queue_.reset(
// clCreateCommandQueue(context_.get(), devices_[0], 0, &status));
// return true;
//}
//bool CLEngine::SetClContext() {
// bool CLEngine::SetClContext() {
// context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL));
// return true;
//}
//bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// bool CLEngine::LoadKernelFromFile(const char *kernel_file) {
// size_t size;
// char *str;
// std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary));
......@@ -118,10 +118,10 @@ bool CLEngine::SetClDeviceId() {
// const char *source = str;
// size_t sourceSize[] = {strlen(source)};
// program_.reset(
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL));
// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize,
// NULL));
// return true;
//}
} // namespace framework
} // namespace paddle_mobile
......@@ -17,9 +17,9 @@ limitations under the License. */
#include <memory>
#include <string>
#include "CL/cl.h"
#include "common/enforce.h"
#include "framework/cl/cl_deleter.h"
#include "CL/cl.h"
namespace paddle_mobile {
namespace framework {
......@@ -36,16 +36,18 @@ class CLEngine {
return std::move(context_ptr);
}
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue() {
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter>
CreateClCommandQueue() {
cl_int status;
cl_command_queue queue = clCreateCommandQueue(context_.get(), devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(queue);
cl_command_queue queue =
clCreateCommandQueue(context_.get(), devices_[0], 0, &status);
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(
queue);
return std::move(command_queue_ptr);
}
std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(cl_context context, std::string file_name) {
std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(
cl_context context, std::string file_name) {
FILE *file = fopen(file_name.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
......@@ -62,7 +64,8 @@ class CLEngine {
const char *source = data;
size_t sourceSize[] = {strlen(source)};
cl_program p = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
cl_program p =
clCreateProgramWithSource(context, 1, &source, sourceSize, NULL);
std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p);
return std::move(program_ptr);
}
......@@ -81,7 +84,6 @@ class CLEngine {
bool SetClDeviceId();
bool initialized_;
cl_platform_id platform_;
......@@ -94,14 +96,13 @@ class CLEngine {
std::unique_ptr<_cl_program, CLProgramDeleter> program_;
// bool SetClContext();
// bool SetClCommandQueue();
// bool SetClContext();
// bool LoadKernelFromFile(const char *kernel_file);
// bool SetClCommandQueue();
// bool BuildProgram();
// bool LoadKernelFromFile(const char *kernel_file);
// bool BuildProgram();
};
} // namespace framework
......
......@@ -14,429 +14,487 @@ limitations under the License. */
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
#include "cl_half.h"
#include "framework/cl/cl_half.h"
const static uint32_t mantissatable[2048] = {
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000, 0x34c00000, 0x34e00000,
0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000, 0x35f00000, 0x35f80000,
0x36000000, 0x36040000, 0x36080000, 0x360c0000, 0x36100000, 0x36140000, 0x36180000, 0x361c0000,
0x36200000, 0x36240000, 0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000, 0x36580000, 0x365c0000,
0x36600000, 0x36640000, 0x36680000, 0x366c0000, 0x36700000, 0x36740000, 0x36780000, 0x367c0000,
0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000, 0x369c0000, 0x369e0000,
0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000, 0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000,
0x36b00000, 0x36b20000, 0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000, 0x36cc0000, 0x36ce0000,
0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000, 0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000,
0x36e00000, 0x36e20000, 0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000, 0x36fc0000, 0x36fe0000,
0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000,
0x37080000, 0x37090000, 0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000,
0x37180000, 0x37190000, 0x371a0000, 0x371b0000, 0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000,
0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000, 0x372e0000, 0x372f0000,
0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000,
0x37380000, 0x37390000, 0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000,
0x37480000, 0x37490000, 0x374a0000, 0x374b0000, 0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000,
0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000, 0x375e0000, 0x375f0000,
0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000,
0x37680000, 0x37690000, 0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000,
0x37780000, 0x37790000, 0x377a0000, 0x377b0000, 0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000,
0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000,
0x378c0000, 0x378c8000, 0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000,
0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000, 0x379f0000, 0x379f8000,
0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000, 0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000,
0x37a40000, 0x37a48000, 0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000, 0x37ab0000, 0x37ab8000,
0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000, 0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000,
0x37b00000, 0x37b08000, 0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000, 0x37b70000, 0x37b78000,
0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000, 0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000,
0x37bc0000, 0x37bc8000, 0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000, 0x37c30000, 0x37c38000,
0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000, 0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000,
0x37c80000, 0x37c88000, 0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000, 0x37cf0000, 0x37cf8000,
0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000, 0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000,
0x37d40000, 0x37d48000, 0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000, 0x37db0000, 0x37db8000,
0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000, 0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000,
0x37e00000, 0x37e08000, 0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000, 0x37e70000, 0x37e78000,
0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000, 0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000,
0x37ec0000, 0x37ec8000, 0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000, 0x37f30000, 0x37f38000,
0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000, 0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000,
0x37f80000, 0x37f88000, 0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000, 0x37ff0000, 0x37ff8000,
0x38000000, 0x38004000, 0x38008000, 0x3800c000, 0x38010000, 0x38014000, 0x38018000, 0x3801c000,
0x38020000, 0x38024000, 0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000, 0x38058000, 0x3805c000,
0x38060000, 0x38064000, 0x38068000, 0x3806c000, 0x38070000, 0x38074000, 0x38078000, 0x3807c000,
0x38080000, 0x38084000, 0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000, 0x380b8000, 0x380bc000,
0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000, 0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000,
0x380e0000, 0x380e4000, 0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000, 0x38118000, 0x3811c000,
0x38120000, 0x38124000, 0x38128000, 0x3812c000, 0x38130000, 0x38134000, 0x38138000, 0x3813c000,
0x38140000, 0x38144000, 0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000, 0x38178000, 0x3817c000,
0x38180000, 0x38184000, 0x38188000, 0x3818c000, 0x38190000, 0x38194000, 0x38198000, 0x3819c000,
0x381a0000, 0x381a4000, 0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000, 0x381d8000, 0x381dc000,
0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000, 0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000,
0x38200000, 0x38204000, 0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000, 0x38238000, 0x3823c000,
0x38240000, 0x38244000, 0x38248000, 0x3824c000, 0x38250000, 0x38254000, 0x38258000, 0x3825c000,
0x38260000, 0x38264000, 0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000, 0x38298000, 0x3829c000,
0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000, 0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000,
0x382c0000, 0x382c4000, 0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000, 0x382f8000, 0x382fc000,
0x38300000, 0x38304000, 0x38308000, 0x3830c000, 0x38310000, 0x38314000, 0x38318000, 0x3831c000,
0x38320000, 0x38324000, 0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000, 0x38358000, 0x3835c000,
0x38360000, 0x38364000, 0x38368000, 0x3836c000, 0x38370000, 0x38374000, 0x38378000, 0x3837c000,
0x38380000, 0x38384000, 0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000, 0x383b8000, 0x383bc000,
0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000, 0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000,
0x383e0000, 0x383e4000, 0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000, 0x38418000, 0x3841c000,
0x38420000, 0x38424000, 0x38428000, 0x3842c000, 0x38430000, 0x38434000, 0x38438000, 0x3843c000,
0x38440000, 0x38444000, 0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000, 0x38478000, 0x3847c000,
0x38480000, 0x38484000, 0x38488000, 0x3848c000, 0x38490000, 0x38494000, 0x38498000, 0x3849c000,
0x384a0000, 0x384a4000, 0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000, 0x384d8000, 0x384dc000,
0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000, 0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000,
0x38500000, 0x38504000, 0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000, 0x38538000, 0x3853c000,
0x38540000, 0x38544000, 0x38548000, 0x3854c000, 0x38550000, 0x38554000, 0x38558000, 0x3855c000,
0x38560000, 0x38564000, 0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000, 0x38598000, 0x3859c000,
0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000, 0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000,
0x385c0000, 0x385c4000, 0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000, 0x385f8000, 0x385fc000,
0x38600000, 0x38604000, 0x38608000, 0x3860c000, 0x38610000, 0x38614000, 0x38618000, 0x3861c000,
0x38620000, 0x38624000, 0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000, 0x38658000, 0x3865c000,
0x38660000, 0x38664000, 0x38668000, 0x3866c000, 0x38670000, 0x38674000, 0x38678000, 0x3867c000,
0x38680000, 0x38684000, 0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000, 0x386b8000, 0x386bc000,
0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000, 0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000,
0x386e0000, 0x386e4000, 0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000, 0x38718000, 0x3871c000,
0x38720000, 0x38724000, 0x38728000, 0x3872c000, 0x38730000, 0x38734000, 0x38738000, 0x3873c000,
0x38740000, 0x38744000, 0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000, 0x38778000, 0x3877c000,
0x38780000, 0x38784000, 0x38788000, 0x3878c000, 0x38790000, 0x38794000, 0x38798000, 0x3879c000,
0x387a0000, 0x387a4000, 0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000, 0x387d8000, 0x387dc000,
0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000, 0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000,
0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000, 0x3801c000, 0x3801e000,
0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802a000, 0x3802c000, 0x3802e000,
0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000, 0x3804c000, 0x3804e000,
0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805a000, 0x3805c000, 0x3805e000,
0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000, 0x3807c000, 0x3807e000,
0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808a000, 0x3808c000, 0x3808e000,
0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000, 0x380ac000, 0x380ae000,
0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000, 0x380b8000, 0x380ba000, 0x380bc000, 0x380be000,
0x380c0000, 0x380c2000, 0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000, 0x380dc000, 0x380de000,
0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000, 0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000,
0x380f0000, 0x380f2000, 0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000, 0x3810c000, 0x3810e000,
0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811a000, 0x3811c000, 0x3811e000,
0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000, 0x3813c000, 0x3813e000,
0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814a000, 0x3814c000, 0x3814e000,
0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000, 0x3816c000, 0x3816e000,
0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817a000, 0x3817c000, 0x3817e000,
0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000, 0x3819c000, 0x3819e000,
0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000, 0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000,
0x381b0000, 0x381b2000, 0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000, 0x381cc000, 0x381ce000,
0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000, 0x381d8000, 0x381da000, 0x381dc000, 0x381de000,
0x381e0000, 0x381e2000, 0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000, 0x381fc000, 0x381fe000,
0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820a000, 0x3820c000, 0x3820e000,
0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000, 0x3822c000, 0x3822e000,
0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823a000, 0x3823c000, 0x3823e000,
0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000, 0x3825c000, 0x3825e000,
0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826a000, 0x3826c000, 0x3826e000,
0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000, 0x3828c000, 0x3828e000,
0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829a000, 0x3829c000, 0x3829e000,
0x382a0000, 0x382a2000, 0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000, 0x382bc000, 0x382be000,
0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000, 0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000,
0x382d0000, 0x382d2000, 0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000, 0x382ec000, 0x382ee000,
0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000, 0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000,
0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000, 0x3831c000, 0x3831e000,
0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832a000, 0x3832c000, 0x3832e000,
0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000, 0x3834c000, 0x3834e000,
0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835a000, 0x3835c000, 0x3835e000,
0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000, 0x3837c000, 0x3837e000,
0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838a000, 0x3838c000, 0x3838e000,
0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000, 0x383ac000, 0x383ae000,
0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000, 0x383b8000, 0x383ba000, 0x383bc000, 0x383be000,
0x383c0000, 0x383c2000, 0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000, 0x383dc000, 0x383de000,
0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000, 0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000,
0x383f0000, 0x383f2000, 0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000, 0x3840c000, 0x3840e000,
0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841a000, 0x3841c000, 0x3841e000,
0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000, 0x3843c000, 0x3843e000,
0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844a000, 0x3844c000, 0x3844e000,
0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000, 0x3846c000, 0x3846e000,
0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847a000, 0x3847c000, 0x3847e000,
0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000, 0x3849c000, 0x3849e000,
0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000, 0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000,
0x384b0000, 0x384b2000, 0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000, 0x384cc000, 0x384ce000,
0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000, 0x384d8000, 0x384da000, 0x384dc000, 0x384de000,
0x384e0000, 0x384e2000, 0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000, 0x384fc000, 0x384fe000,
0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850a000, 0x3850c000, 0x3850e000,
0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000, 0x3852c000, 0x3852e000,
0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853a000, 0x3853c000, 0x3853e000,
0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000, 0x3855c000, 0x3855e000,
0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856a000, 0x3856c000, 0x3856e000,
0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000, 0x3858c000, 0x3858e000,
0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859a000, 0x3859c000, 0x3859e000,
0x385a0000, 0x385a2000, 0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000, 0x385bc000, 0x385be000,
0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000, 0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000,
0x385d0000, 0x385d2000, 0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000, 0x385ec000, 0x385ee000,
0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000, 0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000,
0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000, 0x3861c000, 0x3861e000,
0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862a000, 0x3862c000, 0x3862e000,
0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000, 0x3864c000, 0x3864e000,
0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865a000, 0x3865c000, 0x3865e000,
0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000, 0x3867c000, 0x3867e000,
0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868a000, 0x3868c000, 0x3868e000,
0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000, 0x386ac000, 0x386ae000,
0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000, 0x386b8000, 0x386ba000, 0x386bc000, 0x386be000,
0x386c0000, 0x386c2000, 0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000, 0x386dc000, 0x386de000,
0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000, 0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000,
0x386f0000, 0x386f2000, 0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000, 0x3870c000, 0x3870e000,
0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871a000, 0x3871c000, 0x3871e000,
0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000, 0x3873c000, 0x3873e000,
0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874a000, 0x3874c000, 0x3874e000,
0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000, 0x3876c000, 0x3876e000,
0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877a000, 0x3877c000, 0x3877e000,
0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000, 0x3879c000, 0x3879e000,
0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000, 0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000,
0x387b0000, 0x387b2000, 0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000, 0x387cc000, 0x387ce000,
0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000, 0x387d8000, 0x387da000, 0x387dc000, 0x387de000,
0x387e0000, 0x387e2000, 0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000, 0x387fc000, 0x387fe000
};
static const uint32_t mantissatable[2048] = {
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
0x387fc000, 0x387fe000};
const static uint16_t offsettable[64] = {
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400
};
static const uint16_t offsettable[64] = {
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};
const static uint32_t exponenttable[64] = {
0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000,
0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000,
0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000, 0x0f000000, 0x47800000,
0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000,
0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000, 0x8b000000, 0x8b800000,
0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000, 0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000
};
static const uint32_t exponenttable[64] = {
0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000};
const static uint16_t basetable[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001,
0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00,
0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, 0x5c00,
0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00,
0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00,
0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00
};
static const uint16_t basetable[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,
0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000,
0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400,
0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800,
0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,
0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400,
0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800,
0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
const static uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d
};
static const uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
half_t float2half(float f) {
uint32_t v = *reinterpret_cast<uint32_t*>(&f);
return basetable[(v>>23)&0x1ff]+((v&0x007fffff)>>shifttable[(v>>23)&0x1ff]);
return basetable[(v >> 23) & 0x1ff] +
((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
}
float half2float(half_t h) {
uint32_t v = mantissatable[offsettable[h>>10]+(h&0x3ff)]+exponenttable[h>>10];
uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
exponenttable[h >> 10];
return *reinterpret_cast<float*>(&v);
}
\ No newline at end of file
}
......@@ -18,4 +18,4 @@ limitations under the License. */
typedef uint16_t half_t;
half_t float2half(float f);
float half2float(half_t h);
\ No newline at end of file
float half2float(half_t h);
......@@ -14,11 +14,13 @@ limitations under the License. */
#pragma once
#include <vector>
#include <string>
#include <type_traits>
#include <vector>
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_image.h"
#include "framework/cl/cl_scope.h"
namespace paddle_mobile {
namespace framework {
......@@ -27,24 +29,38 @@ class CLHelper {
public:
CLHelper() = default;
CLHelper(CLScope *scope): scope_(scope) {
}
explicit CLHelper(CLScope *scope) : scope_(scope) {}
void AddKernel(const std::string &kernel_name, const std::string &file_name) {
auto kernel = scope_->GetKernel(kernel_name, file_name);
kernels.emplace_back(std::move(kernel));
}
cl_kernel KernelAt(const int index) {
return kernels[index].get();
}
cl_kernel KernelAt(const int index) { return kernels[index].get(); }
cl_command_queue CLCommandQueue() {
return scope_->CommandQueue();
}
cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); }
cl_context CLContext() { return scope_->Context(); }
std::vector<size_t> DefaultWorkSize(const CLImage &image) {
// n c h w
auto image_dim = image.dims();
if (image_dim.size() == 4) {
auto n = image_dim[0];
auto h = image_dim[2];
auto w = image_dim[3];
auto image_width = image.ImageWidth();
auto work_size_0 = image_width / w;
auto work_size_1 = w;
auto work_size_2 = n * h;
cl_context CLContext() {
return scope_->Context();
return {work_size_0, work_size_1, work_size_2};
}
PADDLE_MOBILE_THROW_EXCEPTION("not support this dim, need imp");
}
private:
......@@ -52,5 +68,5 @@ class CLHelper {
std::vector<std::unique_ptr<_cl_kernel, CLKernelDeleter>> kernels;
};
}
}
} // namespace framework
} // namespace paddle_mobile
......@@ -14,10 +14,12 @@ limitations under the License. */
#pragma once
#include <vector>
#include "CL/cl.h"
#include "framework/cl/cl_half.h"
#include "framework/ddim.h"
#include "framework/tensor.h"
#include "CL/cl.h"
#include "cl_half.h"
namespace paddle_mobile {
namespace framework {
......@@ -27,113 +29,127 @@ class CLImage {
CLImage() = default;
void Init(cl_context context, float *tensorInput, DDim ddim) {
tensorDims_ = ddim;
cl_image_format cf = {
.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT
};
tensor_dims_ = ddim;
cl_image_format cf = {.image_channel_order = CL_RGBA,
.image_channel_data_type = CL_HALF_FLOAT};
// NCHW -> [W * (C+3)/4, H * N]
DLOG<<tensorDims_;
size_t N,C,H,W;
if(tensorDims_.size()==4){
N = tensorDims_[0];
if(N<0){
N = 1;
}
C = tensorDims_[1];
H = tensorDims_[2];
W = tensorDims_[3];
}else if(tensorDims_.size()==1){
DLOG << tensor_dims_;
size_t N, C, H, W;
if (tensor_dims_.size() == 4) {
N = tensor_dims_[0];
if (N < 0) {
N = 1;
C = tensorDims_[0];
H = 1;
W = 1;
}
C = tensor_dims_[1];
H = tensor_dims_[2];
W = tensor_dims_[3];
} else if (tensor_dims_.size() == 1) {
N = 1;
C = tensor_dims_[0];
H = 1;
W = 1;
}
DLOG<<"-------InitMemory-------";
DLOG << "-------InitMemory-------";
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
std::unique_ptr<half_t[]> imageData{};
int count = 0;
int count = 0;
if (tensorInput != nullptr) {
imageData.reset(new half_t[width * height * 4]);
float *p = tensorInput;
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1<<2) + c % 4;
for (int w = 0; w < W; w++) {
if (i2 >= width * height * 4) {
printf("%d > %d ----> %d, %d, %d, %d --- %d, %d, %d\n", i2, width*height*4, n, c, h, w, i0, i1, i2);
}
assert(i2 < width * height * 4);
imageData[i2] = float2half(*p);
i2 += 4;
p++;
// count++;
// DLOG<<count;
}
i1 += width;
}
}
i0 += width * H;
}
float *p = tensorInput;
size_t i0 = 0;
for (int n = 0; n < N; n++) {
for (int c = 0; c < C; c++) {
size_t i1 = i0;
for (int h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (int w = 0; w < W; w++) {
if (i2 >= width * height * 4) {
printf("%d > %d ----> %d, %d, %d, %d --- %d, %d, %d\n", i2,
width * height * 4, n, c, h, w, i0, i1, i2);
}
assert(i2 < width * height * 4);
imageData[i2] = float2half(*p);
i2 += 4;
p++;
// count++;
// DLOG<<count;
}
i1 += width;
}
}
i0 += width * H;
}
}
DLOG<<"-------InitMemory-------";
DLOG << "-------InitMemory-------";
cl_int err;
cl_image_ = clCreateImage2D(
context, // cl_context context
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, // cl_mem_flags flags
&cf, // const cl_image_format *image_format
width, // size_t image_width
height, // size_t image_height
0, // size_t image_row_pitch
reinterpret_cast<void*>(imageData.get()), // void *host_ptr
&err // cl_int *errcode_ret
);
context, // cl_context context
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, // cl_mem_flags flags
&cf, // const cl_image_format *image_format
width, // size_t image_width
height, // size_t image_height
0, // size_t image_row_pitch
reinterpret_cast<void *>(imageData.get()), // void *host_ptr
&err);
if (err != CL_SUCCESS) {
// TODO: error handling
// TODO(HaiPeng): error handling
}
}
void Init(cl_context context, DDim ddim) {
Init(context, nullptr, ddim);
}
void Init(cl_context context, DDim ddim) { Init(context, nullptr, ddim); }
inline CLImage &Resize(const DDim &dims) {
tensorDims_ = dims;
tensor_dims_ = dims;
return *this;
}
const DDim &dims() const {
return tensorDims_;
}
const DDim &dims() const { return tensor_dims_; }
std::vector<size_t> DefaultWorkSize() {
return {};
}
std::vector<size_t> DefaultWorkSize() { return {}; }
cl_mem GetCLImage() const { return cl_image_; }
cl_mem GetCLImage() {
return cl_image_;
template <typename T>
T *data() const {
return reinterpret_cast<T *>(tensor_input_);
}
inline int64_t numel() const { return product(tensor_dims_); }
int ImageWidth() const { return image_width_; }
int ImageHeight() const { return image_height_; }
int CBlock() const { return c_block_; }
int WidthOfOneBlock() const { return width_of_one_block_; }
int HeightOfOneBlock() const { return height_of_one_block_; }
private:
bool initialized_ = false;
cl_mem cl_image_;
DDim tensorDims_;
int image_width_;
int width_of_one_block_;
int height_of_one_block_;
int image_height_;
int c_block_;
DDim tensor_dims_;
float *tensor_input_;
cl_context context_;
};
//void TensorToCLImage(Tensor *tensor, CLImage *image) {
// void TensorToCLImage(Tensor *tensor, CLImage *image) {
//
//}
//
//void CLImageToTensor(CLImage *image, Tensor *tensor) {
// void CLImageToTensor(CLImage *image, Tensor *tensor) {
//
//}
}
}
\ No newline at end of file
} // namespace framework
} // namespace paddle_mobile
......@@ -18,10 +18,10 @@ limitations under the License. */
#include <string>
#include <unordered_map>
#include "framework/cl/cl_tool.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_deleter.h"
#include "CL/cl.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace framework {
......@@ -35,19 +35,17 @@ class CLScope {
command_queue_ = engin->CreateClCommandQueue();
}
cl_command_queue CommandQueue() {
return command_queue_.get();
}
cl_command_queue CommandQueue() { return command_queue_.get(); }
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(const std::string &kernel_name, const std::string &file_name) {
std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(
const std::string &kernel_name, const std::string &file_name) {
auto program = Program(file_name);
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(clCreateKernel(program, kernel_name.c_str(), NULL));
std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(
clCreateKernel(program, kernel_name.c_str(), NULL));
return std::move(kernel);
}
cl_context Context() {
return context_.get();
}
cl_context Context() { return context_.get(); }
cl_program Program(const std::string &file_name) {
auto it = programs_.find(file_name);
......@@ -55,20 +53,23 @@ class CLScope {
return it->second.get();
}
auto program = CLEngine::Instance()->CreateProgramWith(context_.get(), file_name);
auto program =
CLEngine::Instance()->CreateProgramWith(context_.get(), file_name);
programs_[file_name] = std::move(program);
status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0);
status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0);
CL_CHECK_ERRORS(status_);
return program.get();
}
private:
cl_int status_;
cl_int status_;
std::unique_ptr<_cl_context, CLContextDeleter> context_;
std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_;
std::unordered_map<std::string, std::unique_ptr<_cl_program, CLProgramDeleter>> programs_;
std::unordered_map<std::string,
std::unique_ptr<_cl_program, CLProgramDeleter>>
programs_;
};
}
}
} // namespace framework
} // namespace paddle_mobile
......@@ -18,17 +18,17 @@ limitations under the License. */
#include <string>
#include <vector>
#include "framework/tensor_base.h"
#include "framework/cl/cl_engine.h"
#include "framework/cl/cl_deleter.h"
#include "CL/cl.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_engine.h"
#include "framework/tensor_base.h"
namespace paddle_mobile {
namespace framework {
class CLTensor : TensorBase {
public:
CLTensor(cl_context context) : context_(context) {}
explicit CLTensor(cl_context context) : context_(context) {}
/*! Resize the dimensions of the memory block. */
inline CLTensor &Resize(const DDim &dims) {
......@@ -84,7 +84,6 @@ class CLTensor : TensorBase {
}
private:
cl_context context_;
/*
......@@ -99,18 +98,15 @@ class CLTensor : TensorBase {
virtual void set_type(std::type_index type) = 0;
* */
struct PlaceholderImpl : public Placeholder {
PlaceholderImpl(size_t size, void *input, std::type_index type, cl_context context)
: ptr_(clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size,
reinterpret_cast<void *>(input), NULL)),
PlaceholderImpl(size_t size, void *input, std::type_index type,
cl_context context)
: ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
size, reinterpret_cast<void *>(input), NULL)),
size_(size),
type_(type) {
}
type_(type) {}
PlaceholderImpl(size_t size, std::type_index type, cl_context context)
: ptr_(clCreateBuffer(context,
CL_MEM_READ_WRITE, size, NULL, NULL)),
: ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)),
size_(size),
type_(type) {}
......@@ -128,9 +124,7 @@ class CLTensor : TensorBase {
/* the current type of memory */
std::type_index type_;
};
};
} // namespace framework
......
......@@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "cl_tool.h"
#include "framework/cl/cl_tool.h"
namespace paddle_mobile {
namespace framework {
const char *opencl_error_to_str(cl_int error) {
#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME;
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch (error) {
CASE_CL_CONSTANT(CL_SUCCESS)
......@@ -78,5 +80,5 @@ const char *opencl_error_to_str(cl_int error) {
#undef CASE_CL_CONSTANT
}
}
}
} // namespace framework
} // namespace paddle_mobile
......@@ -19,16 +19,15 @@ limitations under the License. */
namespace paddle_mobile {
namespace framework {
const char* opencl_error_to_str (cl_int error);
#define CL_CHECK_ERRORS(ERR) \
if(ERR != CL_SUCCESS) \
{ \
printf( \
"OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__ \
); \
}
}
}
const char* opencl_error_to_str(cl_int error);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
} // namespace framework
} // namespace paddle_mobile
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "executor.h"
#include "framework/executor.h"
#include <operators/math/gemm.h>
#include <algorithm>
#include <vector>
......@@ -265,7 +265,7 @@ void Executor<Dtype, P>::InitCombineMemory() {
char *origin_data;
if (program_.combined_params_buf && program_.combined_params_len) {
LOG(kLOG_INFO) << "use outter memory";
origin_data = (char *)program_.combined_params_buf;
origin_data = reinterpret_cast<char *>(program_.combined_params_buf);
} else {
LOG(kLOG_INFO) << " begin init combine memory";
origin_data = Get_binary_data(program_.para_path);
......@@ -666,12 +666,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
InjectVariable(t, "feed");
};
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
......@@ -687,14 +687,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? (int)ops.size() : end;
end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
......@@ -715,17 +715,17 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end);
};
}
#endif
#ifdef PADDLE_MOBILE_FPGA
......@@ -738,12 +738,12 @@ void Executor<Dtype, P>::InjectVariable(const framework::Tensor &t,
g_feed_value->GetMutable<framework::LoDTensor>();
feed_tensor->Resize(t.dims());
feed_tensor->ShareDataWith(t);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
InjectVariable(t, "feed");
};
}
template <typename Dtype, Precision P>
std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
......@@ -759,14 +759,14 @@ std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
out_keys[0], output_map, *(program_.scope));
return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From_To(int start, int end) {
std::shared_ptr<framework::BlockDesc> to_predict_block =
to_predict_program_->Block(0);
auto &ops = ops_of_block_[*to_predict_block.get()];
end = end < 0 ? (int)ops.size() : end;
end = end < 0 ? static_cast<int>(ops.size()) : end;
PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
"start or end parameter is wrong");
......@@ -787,120 +787,120 @@ void Executor<Dtype, P>::Predict_From_To(int start, int end) {
profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
#endif
}
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_From(int start) {
Predict_From_To(start);
};
}
template <typename Dtype, Precision P>
void Executor<Dtype, P>::Predict_To(int end) {
Predict_From_To(0, end);
};
}
#endif
#ifdef PADDLE_MOBILE_CL
template <>
void Executor<GPU_CL, Precision::FP32>::LoadMemory(const framework::VarDesc var_desc,
float *tensorInput, char **data) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 2 Lod information
uint64_t *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
uint64_t lod_level = *lod_level_ptr;
delete lod_level_ptr;
(*data) += sizeof(uint64_t);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(*data);
(*data) += sizeof(uint64_t);
std::vector<size_t> tmp(size / sizeof(size_t));
for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *reinterpret_cast<size_t *>(*data);
(*data) += sizeof(size_t);
}
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(*data);
(*data) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (*data)[m];
}
(*data) += (sizeof(char) * size);
const framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
// int type_size = 0;
// switch (desc.DataType()) {
// case framework::VARTYPE_TYPE_FP16:
// type_size = 2;
// break;
// case framework::VARTYPE_TYPE_FP32:
// type_size = 4;
// memory = tensor->mutable_data<float>();
// break;
// case framework::VARTYPE_TYPE_FP64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_INT32:
// memory = tensor->mutable_data<int32_t>();
// type_size = 4;
// break;
// case framework::VARTYPE_TYPE_INT64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_BOOL:
// type_size = 1;
// break;
// default:
// break;
// }
int type_size = 4;
memory = tensorInput;
if (program_.quantification) {
float min_value;
float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
}
void Executor<GPU_CL, Precision::FP32>::LoadMemory(
const framework::VarDesc var_desc, float *tensorInput, char **data) {
// 1. version
uint32_t version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 2 Lod information
uint64_t *lod_level_ptr = new uint64_t();
memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
uint64_t lod_level = *lod_level_ptr;
delete lod_level_ptr;
(*data) += sizeof(uint64_t);
for (uint64_t i = 0; i < lod_level; ++i) {
uint64_t size = *reinterpret_cast<uint64_t *>(*data);
(*data) += sizeof(uint64_t);
std::vector<size_t> tmp(size / sizeof(size_t));
for (int k = 0; k < tmp.size(); ++k) {
tmp[k] = *reinterpret_cast<size_t *>(*data);
(*data) += sizeof(size_t);
}
}
// 3. tensor version
uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
(*data) += sizeof(uint32_t);
// 4. tensor desc
int32_t size = *reinterpret_cast<int32_t *>(*data);
(*data) += sizeof(int32_t);
std::unique_ptr<char[]> buf(new char[size]);
for (int m = 0; m < size; ++m) {
buf.get()[m] = (*data)[m];
}
(*data) += (sizeof(char) * size);
const framework::TensorDesc &desc = var_desc.Tensor_desc();
int memory_size = 1;
for (auto l : desc.Dims()) {
memory_size *= l;
}
void *memory = nullptr;
// int type_size = 0;
// switch (desc.DataType()) {
// case framework::VARTYPE_TYPE_FP16:
// type_size = 2;
// break;
// case framework::VARTYPE_TYPE_FP32:
// type_size = 4;
// memory = tensor->mutable_data<float>();
// break;
// case framework::VARTYPE_TYPE_FP64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_INT32:
// memory = tensor->mutable_data<int32_t>();
// type_size = 4;
// break;
// case framework::VARTYPE_TYPE_INT64:
// type_size = 8;
// break;
// case framework::VARTYPE_TYPE_BOOL:
// type_size = 1;
// break;
// default:
// break;
// }
int type_size = 4;
memory = tensorInput;
if (program_.quantification) {
float min_value;
float max_value;
memcpy(&min_value, *data, sizeof(float));
memcpy(&max_value, *data + sizeof(float), sizeof(float));
*data += 2 * sizeof(float);
const float factor = (max_value - min_value) / 255.0;
uint8_t *uint8_data = reinterpret_cast<uint8_t *>(*data);
for (int k = 0; k < memory_size; ++k) {
static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
}
*data += (memory_size * sizeof(uint8_t));
} else {
for (int n = 0; n < memory_size; n++) {
float value;
memcpy(&value, *data + n * type_size, type_size);
if (value < 1e-30 && value > -1e-30) {
static_cast<float *>(memory)[n] = 0.0;
} else {
static_cast<float *>(memory)[n] = value;
}
}
(*data) += (sizeof(char) * memory_size * type_size);
}
}
template <>
void Executor<GPU_CL, Precision::FP32>::InitMemory() {
......@@ -914,37 +914,35 @@ void Executor<GPU_CL, Precision::FP32>::InitMemory() {
}
char *origin_data =
Get_binary_data(program_.model_path + "/" + var_desc->Name());
char *data = origin_data;
char *data = origin_data;
cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
int numel = 1;
for (auto l : desc.Dims()) {
numel *= l;
}
DLOG<<var_desc->Name();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
int numel = 1;
for (auto l : desc.Dims()) {
numel *= l;
}
DLOG << var_desc->Name();
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc,tensorInput,&data);
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc, tensorInput, &data);
framework::DDim ddim = framework::make_ddim(desc.Dims());
cl_image->Init(context, tensorInput, ddim);
delete origin_data;
paddle_mobile::memory::Free(tensorInput);
}else{
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = framework::make_ddim(desc.Dims());
DLOG<<var_desc->Name();
cl_image->Init(context, ddim);
delete origin_data;
paddle_mobile::memory::Free(tensorInput);
} else {
if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) {
auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
}
const framework::TensorDesc &desc = var_desc->Tensor_desc();
framework::DDim ddim = framework::make_ddim(desc.Dims());
DLOG << var_desc->Name();
cl_image->Init(context, ddim);
}
}
}
}
......@@ -955,13 +953,13 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
char *origin_data;
if (program_.combined_params_buf && program_.combined_params_len) {
LOG(kLOG_INFO) << "use outter memory";
origin_data = (char *)program_.combined_params_buf;
origin_data = reinterpret_cast<char *>(program_.combined_params_buf);
} else {
LOG(kLOG_INFO) << " begin init combine memory";
origin_data = Get_binary_data(program_.para_path);
}
PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!");
float *data = (float *)origin_data;
float *data = reinterpret_cast<float *>(origin_data);
for (const auto &block : to_predict_program_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
......@@ -981,12 +979,12 @@ void Executor<GPU_CL, Precision::FP32>::InitCombineMemory() {
for (int i = 0; i < ddim.size(); i++) {
numel = numel * ddim[i];
}
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc,tensorInput,&origin_data);
float *tensorInput = static_cast<float *>(
paddle_mobile::memory::Alloc(sizeof(float) * numel));
LoadMemory(*var_desc, tensorInput, &origin_data);
cl_image->Init(context, tensorInput, ddim);
paddle_mobile::memory::Free(tensorInput);
}else{
paddle_mobile::memory::Free(tensorInput);
} else {
auto cl_image = var->template GetMutable<framework::CLImage>();
cl_context context = program_.scope->GetCLScpoe()->Context();
......
......@@ -35,7 +35,7 @@ using std::string;
namespace paddle_mobile {
namespace framework {
template<typename Dtype = CPU, Precision P = Precision::FP32>
template <typename Dtype = CPU, Precision P = Precision::FP32>
class Executor {
public:
typedef typename PrecisionTrait<P>::ptype Ptype;
......@@ -56,7 +56,7 @@ class Executor {
* @b to predict
* */
std::shared_ptr<framework::LoDTensor> PredictLod(
const framework::LoDTensor &t);
const framework::LoDTensor &t);
/*
* @b to predict with vector and dim
......@@ -73,8 +73,8 @@ class Executor {
void LoadMemory(const framework::VarDesc var_desc,
framework::LoDTensor *tensor, char **data);
void LoadMemory(const framework::VarDesc var_desc,
float * tensorInput, char **data);
void LoadMemory(const framework::VarDesc var_desc, float *tensorInput,
char **data);
void InitCombineMemory();
......@@ -86,8 +86,8 @@ class Executor {
int block_id);
std::map<framework::BlockDesc,
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_;
std::vector<std::shared_ptr<framework::OperatorBase<Dtype>>>>
ops_of_block_;
bool use_optimize_ = false;
bool loddable_ = false;
#ifdef PADDLE_EXECUTOR_MULTITHREAD
......@@ -107,15 +107,15 @@ class Executor {
#ifdef PADDLE_MOBILE_FPGA
public:
void InjectVariable(const framework::Tensor &t, string var_name);
void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
void Predict_To(int end);
public:
void InjectVariable(const framework::Tensor &t, string var_name);
void FeedData(const framework::Tensor &t);
std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
void Predict_From_To(int start = 0, int end = -1);
void Predict_From(int start);
void Predict_To(int end);
#endif
};
}
} // namespace framework
} // namespace paddle_mobile
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "loader.h"
#include "framework/loader.h"
#include "framework/lod_tensor.h"
#include "framework/program/program-optimize/program_optimize.h"
......@@ -29,10 +29,10 @@ namespace framework {
* @param originProgramDesc
* @param scope
*/
template<typename Dtype, Precision P>
template <typename Dtype, Precision P>
void Loader<Dtype, P>::InitMemoryFromProgram(
std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope) {
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name());
......@@ -56,32 +56,32 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
}
#ifdef PADDLE_MOBILE_CL
template<>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name());
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
} else {
auto dim = var_desc->Tensor_desc().Dims();
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
dim[0] = 1;
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
}
} else {
// TODO(codeWorm): some.
}
}
}
template <>
void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope) {
for (const auto &block : originProgramDesc.get()->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = scope.get()->Var(var_desc->Name());
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
if (var_desc->Persistable()) {
auto dim = var_desc->Tensor_desc().Dims();
// auto tensor = var->GetMutable<LoDTensor>();
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
} else {
auto dim = var_desc->Tensor_desc().Dims();
PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0");
dim[0] = 1;
auto cl_image = var->GetMutable<framework::CLImage>();
cl_image->Resize(make_ddim(dim));
}
} else {
// TODO(codeWorm): some.
}
}
}
}
#endif
/**
......@@ -93,14 +93,14 @@ void Loader<Dtype, P>::InitMemoryFromProgram(
* @param program
* @param originProgramDesc
*/
template<typename Dtype, Precision P>
template <typename Dtype, Precision P>
void FusionAndPrintInfos(
bool &optimize, bool &can_add_split, Program<Dtype, P> &program,
const std::shared_ptr<ProgramDesc> &originProgramDesc) {
bool optimize, bool can_add_split, const Program<Dtype, P> &program,
const std::shared_ptr<ProgramDesc> &originProgramDesc) {
if (optimize) {
ProgramOptimize program_optimize;
program.optimizeProgram =
program_optimize.FusionOptimize(originProgramDesc, can_add_split);
program_optimize.FusionOptimize(originProgramDesc, can_add_split);
}
if (optimize) {
program.optimizeProgram->Description("optimize: ");
......@@ -131,20 +131,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
return cur_len;
}
template<typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &dirname, bool optimize, bool quantification,
bool can_add_split) {
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &dirname,
bool optimize,
bool quantification,
bool can_add_split) {
auto program = this->LoadProgram(dirname + "/__model__", optimize,
quantification, can_add_split);
program.model_path = dirname;
return program;
}
template<typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(
const std::string &model_path, const std::string &para_path, bool optimize,
bool quantification) {
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::Load(const std::string &model_path,
const std::string &para_path,
bool optimize,
bool quantification) {
auto program = this->LoadProgram(model_path, optimize, quantification);
program.para_path = para_path;
......@@ -153,10 +155,10 @@ const Program<Dtype, P> Loader<Dtype, P>::Load(
return program;
}
template<typename Dtype, Precision P>
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) {
const std::string &model_path, bool optimize, bool quantification,
bool can_add_split) {
std::string model_filename = model_path;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
uint8_t *buf = NULL;
......@@ -165,7 +167,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null");
c_program = paddle_mobile__framework__proto__program_desc__unpack(
NULL, read_size, buf);
NULL, read_size, buf);
//
PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null");
//
......@@ -190,17 +192,17 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
return program;
}
template<typename Dtype, Precision P>
template <typename Dtype, Precision P>
const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
size_t read_size, const uint8_t *buf, size_t combined_params_len,
const uint8_t *combined_params_buf, bool optimize, bool quantification) {
size_t read_size, const uint8_t *buf, size_t combined_params_len,
const uint8_t *combined_params_buf, bool optimize, bool quantification) {
bool can_add_split = false;
PaddleMobile__Framework__Proto__ProgramDesc *c_program;
PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null");
c_program = paddle_mobile__framework__proto__program_desc__unpack(
nullptr, read_size, buf);
nullptr, read_size, buf);
//
PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null");
//
......@@ -225,17 +227,13 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
return program;
}
template
class Loader<CPU, Precision::FP32>;
template class Loader<CPU, Precision::FP32>;
template
class Loader<FPGA, Precision::FP32>;
template class Loader<FPGA, Precision::FP32>;
template
class Loader<GPU_MALI, Precision::FP32>;
template class Loader<GPU_MALI, Precision::FP32>;
template
class Loader<GPU_CL, Precision::FP32>;
template class Loader<GPU_CL, Precision::FP32>;
}
} // namespace framework
} // namespace paddle_mobile
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "framework/program/program.h"
namespace paddle_mobile {
namespace framework{
namespace framework {
template <typename Dtype = CPU, Precision P = Precision::FP32>
class Loader {
......@@ -30,33 +30,36 @@ class Loader {
* @b 加载分开形式的 fluid 模型
* */
const Program<Dtype, P> Load(const std::string &dirname,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
/*
* @b load combine format fluid mode
* @b 加载结合在一起格式的模型
* */
const Program<Dtype, P> Load(const std::string &model_path,
const std::string &para_path,
bool optimize = false,
bool quantification = false);
const std::string &para_path,
bool optimize = false,
bool quantification = false);
const Program<Dtype, P> LoadCombinedMemory(
size_t model_len, const uint8_t *model_buf, size_t combined_params_len,
const uint8_t *combined_params_buf, bool optimize = false,
bool quantification = false);
const Program<Dtype, P> LoadCombinedMemory(size_t model_len,
const uint8_t *model_buf,
size_t combined_params_len,
const uint8_t *combined_params_buf,
bool optimize = false,
bool quantification = false);
private:
const Program<Dtype, P> LoadProgram(const std::string &model_path,
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
bool optimize = false,
bool quantification = false,
bool can_add_split = false);
void InitMemoryFromProgram(std::shared_ptr<ProgramDesc> &originProgramDesc,
std::shared_ptr<Scope> &scope);
void InitMemoryFromProgram(
const std::shared_ptr<ProgramDesc> &originProgramDesc,
const std::shared_ptr<Scope> &scope);
};
}
} // namespace framework
} // namespace paddle_mobile
......@@ -14,8 +14,8 @@ limitations under the License. */
#pragma once
#include <string>
#include <memory>
#include <string>
#include <tuple>
#include "common/log.h"
......@@ -92,7 +92,6 @@ class OpRegistry {
const std::string& type, const VariableNameMap& inputs,
const VariableNameMap& outputs, const AttributeMap attrs,
std::shared_ptr<paddle_mobile::framework::Scope> scope) {
auto& info = OpInfoMap<Dtype>::Instance()->Get(type);
auto op = info.Creator()(type, inputs, outputs, attrs, scope);
return std::shared_ptr<OperatorBase<Dtype>>(op);
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <map>
#include <string>
#include <utility>
#include <vector>
#include "common/enforce.h"
......@@ -32,8 +33,8 @@ limitations under the License. */
#include "framework/tensor.h"
#include "framework/variable.h"
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_scope.h"
#include "framework/cl/cl_helper.h"
#include "framework/cl/cl_scope.h"
#endif
namespace paddle_mobile {
namespace framework {
......@@ -131,7 +132,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
// DLOG << i.second;
// }
PADDLE_MOBILE_ENFORCE(kernel_.Init(&param_), " %s kernel init failed",
this->type_.c_str());
}
......@@ -147,7 +147,6 @@ class OperatorWithKernel : public OperatorBase<Dtype> {
template <typename Dtype, typename P>
class OpKernelBase {
public:
OpKernelBase() = default;
#ifdef PADDLE_MOBILE_CL
......@@ -156,11 +155,11 @@ class OpKernelBase {
}
#endif
/*
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */
/*
* @b 所有kernel 需实现 Compute 方法
* @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体,
* 所有结构体存在与: paddle-mobile/src/operators/op_param.h
* */
#ifdef PADDLE_McOBILE_MALI_GPU
OpKernelBase() { acl_op_ = nullptr; }
void *GetAclOp() const { return acl_op_; }
......@@ -181,8 +180,6 @@ class OpKernelBase {
#ifdef PADDLE_MOBILE_MALI_GPU
void *acl_op_;
#endif
};
#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \
......
......@@ -15,13 +15,14 @@ limitations under the License. */
#pragma once
#include <list>
#include <string>
#include <unordered_map>
#include <vector>
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
#include "framework/cl/cl_scope.h"
#endif
#include <unordered_map>
#include "variable.h"
#include "framework/variable.h"
namespace paddle_mobile {
namespace framework {
......@@ -42,7 +43,6 @@ class Scope {
#ifdef PADDLE_MOBILE_CL
delete cl_scope_;
#endif
}
Scope &NewScope() const;
......@@ -83,9 +83,7 @@ class Scope {
Variable *FindVarLocally(const std::string &name) const;
#ifdef PADDLE_MOBILE_CL
CLScope *GetCLScpoe() {
return cl_scope_;
}
CLScope *GetCLScpoe() { return cl_scope_; }
#endif
private:
......@@ -99,7 +97,6 @@ class Scope {
#ifdef PADDLE_MOBILE_CL
CLScope *cl_scope_ = new CLScope();
#endif
};
} // namespace framework
} // namespace paddle_mobile
......@@ -22,9 +22,9 @@ limitations under the License. */
#endif // _OPENMP
#include "common/types.h"
#include "framework/tensor.h"
#include "framework/executor.h"
#include "framework/loader.h"
#include "framework/tensor.h"
namespace paddle_mobile {
......@@ -94,6 +94,7 @@ class PaddleMobile {
std::shared_ptr<framework::Executor<Dtype, P>> executor_;
#ifdef PADDLE_MOBILE_FPGA
public:
void InjectVariable(const framework::Tensor &t, string var_name);
void FeedData(const framework::Tensor &t);
......
......@@ -43,13 +43,14 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
#ifdef PADDLE_MOBILE_FPGA
void Init() {
void Init() {
Tensor *output = param_.Out();
fpga::format_fp16_ofm(output);
}
void RunImpl() const {
auto input = (Tensor *)const_cast<LoDTensor *>(param_.InputX());
auto input =
reinterpret_cast<Tensor *>(const_cast<LoDTensor *>(param_.InputX()));
auto input_ptr = input->data<float>();
fpga::format_image(input);
Tensor *output = param_.Out();
......@@ -61,7 +62,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
args.output_data_type = fpga::DATA_TYPE_FP16;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = (void *)input_ptr;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (uint32_t)input->dims()[2];
args.image.width = (uint32_t)input->dims()[3];
......@@ -74,12 +75,10 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
#else
#ifdef PADDLE_MOBILE_CL
void Init() {}
void RunImpl() {
}
void Init() {}
void RunImpl() {}
#else
void Init() {}
void Init() {}
void RunImpl() {
param_.Out()->ShareDataWith(*param_.InputX());
param_.Out()->set_lod(param_.InputX()->lod());
......
......@@ -20,8 +20,8 @@ limitations under the License. */
#include <vector>
#include "framework/operator.h"
#include "framework/program/program-optimize/fusion_op_register.h"
#include "op_param.h"
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
......@@ -103,7 +103,7 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
#ifdef PADDLE_MOBILE_CL
#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
new FusionConvAddBNReluMatcher());
#define FUSION_CONV_ADD_BN_RELU_REGISTER
#endif
......
......@@ -26,8 +26,7 @@ bool BatchNormKernel<CPU, float>::Init(BatchNormParam<CPU> *param) {
}
template <>
void BatchNormKernel<CPU, float>::Compute(
const BatchNormParam<CPU> &param) {
void BatchNormKernel<CPU, float>::Compute(const BatchNormParam<CPU> &param) {
BatchnormCompute<float>(param);
}
......
......@@ -26,8 +26,7 @@ bool BoxCoderKernel<CPU, float>::Init(BoxCoderParam<CPU> *param) {
}
template <>
void BoxCoderKernel<CPU, float>::Compute(
const BoxCoderParam<CPU> &param) {
void BoxCoderKernel<CPU, float>::Compute(const BoxCoderParam<CPU> &param) {
BoxCoderCompute<float>(param);
}
......
......@@ -25,8 +25,7 @@ bool ConvAddKernel<CPU, float>::Init(FusionConvAddParam<CPU> *param) {
}
template <>
void ConvAddKernel<CPU, float>::Compute(
const FusionConvAddParam<CPU> &param) {
void ConvAddKernel<CPU, float>::Compute(const FusionConvAddParam<CPU> &param) {
ConvAddCompute<float>(param);
}
......
......@@ -26,8 +26,7 @@ bool DepthwiseConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
}
template <>
void DepthwiseConvKernel<CPU, float>::Compute(
const ConvParam<CPU> &param) {
void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam<CPU> &param) {
DepthwiseConvCompute<float>(param);
}
......
......@@ -26,8 +26,7 @@ bool FusionFcKernel<CPU, float>::Init(FusionFcParam<CPU> *param) {
}
template <>
void FusionFcKernel<CPU, float>::Compute(
const FusionFcParam<CPU> &param) {
void FusionFcKernel<CPU, float>::Compute(const FusionFcParam<CPU> &param) {
FusionFcCompute<float>(param);
param.Out()->set_lod(param.InputX()->lod());
}
......
......@@ -26,8 +26,7 @@ bool PriorBoxKernel<CPU, float>::Init(PriorBoxParam<CPU> *param) {
}
template <>
void PriorBoxKernel<CPU, float>::Compute(
const PriorBoxParam<CPU> &param) {
void PriorBoxKernel<CPU, float>::Compute(const PriorBoxParam<CPU> &param) {
PriorBoxCompute<float>(param);
}
......
......@@ -25,8 +25,7 @@ bool TransposeKernel<CPU, float>::Init(TransposeParam<CPU> *param) {
}
template <>
void TransposeKernel<CPU, float>::Compute(
const TransposeParam<CPU> &param) {
void TransposeKernel<CPU, float>::Compute(const TransposeParam<CPU> &param) {
TransposeCompute<float>(param);
}
......
......@@ -33,4 +33,3 @@ inline hafl4 activation(half4 in
}
*/
......@@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
conv
conv_bn
......@@ -30,7 +27,6 @@ conv_add_bn_relu
#include "common.h"
__kernel void conv_1x1(__private const int global_size_dim0,
__private const int global_size_dim1,
__private const int global_size_dim2,
......
......@@ -21,14 +21,14 @@ namespace operators {
template <>
bool ConvAddBNReluKernel<GPU_CL, float>::Init(
FusionConvAddBNReluParam<GPU_CL> *param) {
FusionConvAddBNReluParam<GPU_CL> *param) {
return true;
}
template <>
void ConvAddBNReluKernel<GPU_CL, float>::Compute(
const FusionConvAddBNReluParam<GPU_CL> &param) {
}
const FusionConvAddBNReluParam<GPU_CL> &param) {}
template class ConvAddBNReluKernel<GPU_CL, float>;
} // namespace operators
......
......@@ -15,20 +15,103 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "framework/cl/cl_image.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ConvAddBNReluKernel<GPU_CL, float>::Init(
FusionConvAddBNReluParam<GPU_CL> *param) {
FusionConvAddBNReluParam<GPU_CL> *param) {
// const CL *mean = param->InputMean();
const framework::CLImage *mean = param->InputMean();
const framework::CLImage *variance = param->InputVariance();
const framework::CLImage *scale = param->InputScale();
const framework::CLImage *bias = param->InputBias();
const float epsilon = param->Epsilon();
auto mean_ptr = mean->data<float>();
auto variance_ptr = variance->data<float>();
auto scale_ptr = scale->data<float>();
auto bias_ptr = bias->data<float>();
const int C = mean->numel();
float inv_std_ptr[C];
for (int i = 0; i < C; i++) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
float *new_scale_ptr = new float[C];
float *new_bias_ptr = new float[C];
for (int i = 0; i < C; i++) {
new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i];
new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i];
}
delete[](new_scale_ptr);
delete[](new_bias_ptr);
framework::CLImage *new_scale = new framework::CLImage();
framework::CLImage *new_bias = new framework::CLImage();
param->SetNewScale(new_scale);
param->SetNewBias(new_bias);
PADDLE_MOBILE_ENFORCE(
param->Filter()->dims()[2] == param->Filter()->dims()[3] &&
param->Paddings()[0] == param->Paddings()[1],
"need equal");
param->SetOffset(param->Filter()->dims()[2] / 2 -
static_cast<int>(param->Paddings()[1]));
this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl");
return true;
}
template <>
void ConvAddBNReluKernel<GPU_CL, float>::Compute(
const FusionConvAddBNReluParam<GPU_CL> &param) {
const FusionConvAddBNReluParam<GPU_CL> &param) {
auto kernel = this->cl_helper_.KernelAt(0);
auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output());
int c_block = default_work_size[0];
int w = default_work_size[1];
int nh = default_work_size[2];
auto input = param.Input()->GetCLImage();
auto filter = param.Filter()->GetCLImage();
auto biase = param.Bias()->GetCLImage();
auto new_scale = param.NewScale()->GetCLImage();
auto new_bias = param.NewBias()->GetCLImage();
auto output = param.Output();
int stride = param.Strides()[0];
int offset = param.Offset();
int input_c = param.Input()->CBlock();
int input_width = param.Input()->WidthOfOneBlock();
int input_height = param.Input()->HeightOfOneBlock();
clSetKernelArg(kernel, 0, sizeof(int), &c_block);
clSetKernelArg(kernel, 1, sizeof(int), &w);
clSetKernelArg(kernel, 2, sizeof(int), &nh);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &input);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &new_scale);
clSetKernelArg(kernel, 7, sizeof(cl_mem), &new_bias);
clSetKernelArg(kernel, 8, sizeof(cl_mem), &output);
clSetKernelArg(kernel, 9, sizeof(int), &stride);
clSetKernelArg(kernel, 10, sizeof(int), &offset);
clSetKernelArg(kernel, 11, sizeof(int), &input_c);
clSetKernelArg(kernel, 12, sizeof(int), &input_width);
clSetKernelArg(kernel, 13, sizeof(int), &input_height);
clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
default_work_size.data(), NULL, 0, NULL, NULL);
}
template class ConvAddBNReluKernel<GPU_CL, float>;
} // namespace operators
......
......@@ -26,8 +26,7 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
template <>
void ConvAddKernel<GPU_CL, float>::Compute(
const FusionConvAddParam<GPU_CL> &param) {
}
const FusionConvAddParam<GPU_CL> &param) {}
template class ConvAddKernel<GPU_CL, float>;
......
......@@ -21,15 +21,16 @@ namespace operators {
template <>
bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
// this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
// this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
return true;
}
template <>
void ConvKernel<GPU_CL, float>::Compute(const ConvParam<GPU_CL> &param) {
// auto kernel = this->cl_helper_.KernelAt(0);
// size_t global_work_size[3] = {1, 2, 3};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL);
// auto kernel = this->cl_helper_.KernelAt(0);
// size_t global_work_size[3] = {1, 2, 3};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
// global_work_size, NULL, 0, NULL, NULL);
}
template class ConvKernel<GPU_CL, float>;
......
......@@ -17,22 +17,23 @@ limitations under the License. */
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) {
// this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
return true;
}
template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(
ElementwiseAddParam<GPU_CL> *param) {
// this->cl_helper_.AddKernel("elementwise_add",
// "elementwise_add_kernel.cl");
return true;
}
template <>
void ElementwiseAddKernel<GPU_CL, float>::Compute(const ElementwiseAddParam<GPU_CL> &param) {
template <>
void ElementwiseAddKernel<GPU_CL, float>::Compute(
const ElementwiseAddParam<GPU_CL> &param) {}
}
template class ElementwiseAddKernel<GPU_CL, float>;
template class ElementwiseAddKernel<GPU_CL, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/relu_kernel.h"
namespace paddle_mobile {
......@@ -30,4 +29,3 @@ template class ReluKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "operators/kernel/reshape_kernel.h"
namespace paddle_mobile {
......@@ -30,4 +29,3 @@ template class ReshapeKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
......@@ -17,20 +17,18 @@ limitations under the License. */
#include "operators/kernel/softmax_kernel.h"
namespace paddle_mobile {
namespace operators {
namespace operators {
template <>
bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
return true;
}
template <>
bool SoftmaxKernel<GPU_CL, float>::Init(SoftmaxParam<GPU_CL> *param) {
return true;
}
template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {}
template <>
void SoftmaxKernel<GPU_CL, float>::Compute(const SoftmaxParam<GPU_CL> &param) {}
template class SoftmaxKernel<GPU_CL, float>;
template class SoftmaxKernel<GPU_CL, float>;
} // namespace operators
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -67,8 +67,7 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
}
template <>
void ConvBNKernel<FPGA, float>::Compute(
const FusionConvBNParam<FPGA> &param) {
void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
......
......@@ -26,8 +26,7 @@ bool DropoutKernel<FPGA, float>::Init(DropoutParam<FPGA> *param) {
}
template <>
void DropoutKernel<FPGA, float>::Compute(
const DropoutParam<FPGA> &param) {}
void DropoutKernel<FPGA, float>::Compute(const DropoutParam<FPGA> &param) {}
} // namespace operators
} // namespace paddle_mobile
......
......@@ -60,8 +60,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
}
template <>
void FusionFcKernel<FPGA, float>::Compute(
const FusionFcParam<FPGA> &param) {
void FusionFcKernel<FPGA, float>::Compute(const FusionFcParam<FPGA> &param) {
fpga::ComputeFpgaConv(param.FpgaArgs());
}
} // namespace operators
......
......@@ -47,8 +47,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
}
template <>
void SoftmaxKernel<FPGA, float>::Compute(
const SoftmaxParam<FPGA> &param) {
void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) {
Tensor *in_x = param.FloatInput();
Tensor *out = param.Out();
......
......@@ -211,8 +211,7 @@ bool ConvKernel<GPU_MALI, float>::Init(ConvParam<GPU_MALI>* param) {
}
template <>
void ConvKernel<GPU_MALI, float>::Compute(
const ConvParam<GPU_MALI>& param) {
void ConvKernel<GPU_MALI, float>::Compute(const ConvParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclConvOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclConvOp<GPU_MALI, float>*>(this->GetAclOp());
......
......@@ -127,8 +127,7 @@ bool LrnKernel<GPU_MALI, float>::Init(LrnParam<GPU_MALI>* param) {
}
template <>
void LrnKernel<GPU_MALI, float>::Compute(
const LrnParam<GPU_MALI>& param) {
void LrnKernel<GPU_MALI, float>::Compute(const LrnParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclLrnOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclLrnOp<GPU_MALI, float>*>(this->GetAclOp());
......
......@@ -27,8 +27,7 @@ bool MulKernel<GPU_MALI, float>::Init(MulParam<GPU_MALI> *param) {
}
template <>
void MulKernel<GPU_MALI, float>::Compute(
const MulParam<GPU_MALI> &param) {
void MulKernel<GPU_MALI, float>::Compute(const MulParam<GPU_MALI> &param) {
const Tensor *input_x = param.InputX();
const Tensor *input_y = param.InputY();
Tensor *out = param.Out();
......
......@@ -195,8 +195,7 @@ bool PoolKernel<GPU_MALI, float>::Init(PoolParam<GPU_MALI>* param) {
}
template <>
void PoolKernel<GPU_MALI, float>::Compute(
const PoolParam<GPU_MALI>& param) {
void PoolKernel<GPU_MALI, float>::Compute(const PoolParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclPoolOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclPoolOp<GPU_MALI, float>*>(this->GetAclOp());
......
......@@ -115,8 +115,7 @@ bool ReluKernel<GPU_MALI, float>::Init(ReluParam<GPU_MALI>* param) {
}
template <>
void ReluKernel<GPU_MALI, float>::Compute(
const ReluParam<GPU_MALI>& param) {
void ReluKernel<GPU_MALI, float>::Compute(const ReluParam<GPU_MALI>& param) {
std::cout << "init acl" << std::endl;
AclReluOp<GPU_MALI, float>* acl_op =
reinterpret_cast<AclReluOp<GPU_MALI, float>*>(this->GetAclOp());
......
......@@ -389,6 +389,13 @@ class ConvParam : public OpParam {
const int &Groups() const { return groups; }
#ifdef PADDLE_MOBILE_CL
int Offset() const { return offset_; }
int SetOffset(int in_offset) { offset_ = in_offset; }
#endif
private:
RType *input_;
RType *output_;
......@@ -397,6 +404,10 @@ class ConvParam : public OpParam {
vector<int> paddings_;
vector<int> dilations_;
int groups;
#ifdef PADDLE_MOBILE_CL
int offset_;
#endif
};
template <typename Dtype>
Print &operator<<(Print &printer, const ConvParam<Dtype> &conv_param);
......@@ -1520,6 +1531,7 @@ class FusionConvAddBNReluParam : public ConvParam<Dtype> {
bool is_test_;
RType *new_bias_;
RType *new_scale_;
#ifdef PADDLE_MOBILE_FPGA
private:
......
......@@ -18,8 +18,8 @@ limitations under the License. */
#include <vector>
#include "common/log.h"
#include "framework/op_registry.h"
#include "framework/executor.h"
#include "framework/op_registry.h"
#include "operators/conv_op.h"
#include "operators/elementwise_add_op.h"
#include "operators/pool_op.h"
......@@ -29,9 +29,9 @@ limitations under the License. */
#include "operators/softmax_op.h"
#include "operators/transpose_op.h"
using paddle_mobile::framework::Executor;
using paddle_mobile::framework::BlockDesc;
using paddle_mobile::framework::DDim;
using paddle_mobile::framework::Executor;
using paddle_mobile::framework::LoDTensor;
using paddle_mobile::framework::OpDesc;
using paddle_mobile::framework::Program;
......
......@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "../test_helper.h"
#include "framework/loader.h"
#include "framework/program/program-optimize/node.h"
#include "framework/program/program-optimize/program_optimize.h"
#include "framework/loader.h"
int main() {
paddle_mobile::framework::Loader<paddle_mobile::CPU> loader;
......
......@@ -17,43 +17,43 @@ limitations under the License. */
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(g_mobilenet, false);
if (isok) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// 预热十次
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
DLOG << vec_result;
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(g_mobilenet, false);
if (isok) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// 预热十次
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
DLOG << vec_result;
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
return 0;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0;
}
......@@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "../../src/operators/kernel/sigmoid_kernel.h"
#include "../../src/operators/kernel/central-arm-func/sigmoid_arm_func.h"
#include "../../src/operators/kernel/sigmoid_kernel.h"
#include "../test_helper.h"
#include "framework/executor.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册