diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c835e5994284ebd1d22ba4de4c595bbaed5d9a1..d76ac16aacd9a3c4aa666960a17cf1cc1988c752 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ option(DEBUGING "enable debug mode" ON) option(USE_EXCEPTION "use std exception" OFF) option(LOG_PROFILE "log profile" OFF) # select the platform to build -option(CPU "armv7 with neon" ON) +option(CPU "armv7 with neon" OFF) option(GPU_MALI "mali gpu" OFF) option(GPU_CL "opencl gpu" ON) option(FPGA "fpga" OFF) diff --git a/src/common/enforce.h b/src/common/enforce.h index aebe2a58031cb1341596f07dbf653be4a5e01900..bf21b5b9a2fe5f70b3bd23a581f0c1dfbf373f42 100644 --- a/src/common/enforce.h +++ b/src/common/enforce.h @@ -46,7 +46,8 @@ struct PaddleMobileException : public std::exception { std::string detail(buffer); \ throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \ __FILE__, __LINE__); \ - } + } \ + exit(0); #define PADDLE_MOBILE_ENFORCE(stat, ...) \ { \ diff --git a/src/common/types.h b/src/common/types.h index a5782e7394e78a6ccfe8d51da19b5da1caebdaed..39b430afa832450d444b009e034ddb77d7c85b6a 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -39,7 +39,13 @@ struct PrecisionTrait { }; //! device type -enum DeviceTypeEnum { kINVALID = -1, kCPU = 0, kFPGA = 1, kGPU_MALI = 2, kGPU_CL = 3}; +enum DeviceTypeEnum { + kINVALID = -1, + kCPU = 0, + kFPGA = 1, + kGPU_MALI = 2, + kGPU_CL = 3 +}; template struct DeviceType {}; @@ -49,7 +55,6 @@ typedef DeviceType FPGA; typedef DeviceType GPU_MALI; typedef DeviceType GPU_CL; - //! data type enum DataType { PM_INVALID = -1, diff --git a/src/framework/cl/cl_engine.cpp b/src/framework/cl/cl_engine.cpp index 045f50c059d8735893062d7e9c0b5c0af0289121..8a6611b7d80ed23b2da072e04371c3c6e52a873c 100644 --- a/src/framework/cl/cl_engine.cpp +++ b/src/framework/cl/cl_engine.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "framework/cl/cl_engine.h" #include "CL/cl.h" #include "framework/cl/cl_tool.h" -#include "framework/cl/cl_engine.h" #include #include @@ -28,11 +28,11 @@ bool CLEngine::Init() { SetClDeviceId(); initialized_ = true; -// setClContext(); -// setClCommandQueue(); -// std::string filename = "./HelloWorld_Kernel.cl"; -// loadKernelFromFile(filename.c_str()); -// buildProgram(); + // setClContext(); + // setClCommandQueue(); + // std::string filename = "./HelloWorld_Kernel.cl"; + // loadKernelFromFile(filename.c_str()); + // buildProgram(); } CLEngine *CLEngine::Instance() { @@ -74,26 +74,26 @@ bool CLEngine::SetClDeviceId() { return false; } -//std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel( +// std::unique_ptr<_cl_kernel, clKernel_deleter> CLEngine::GSetKernel( // const std::string &kernel_name) { // std::unique_ptr<_cl_kernel, clKernel_deleter> kernel( // clCreateKernel(program_.get(), kernel_name.c_str(), NULL)); // return std::move(kernel); //} // -//bool CLEngine::SetClCommandQueue() { +// bool CLEngine::SetClCommandQueue() { // cl_int status; // command_queue_.reset( // clCreateCommandQueue(context_.get(), devices_[0], 0, &status)); // return true; //} -//bool CLEngine::SetClContext() { +// bool CLEngine::SetClContext() { // context_.reset(clCreateContext(NULL, 1, devices_, NULL, NULL, NULL)); // return true; //} -//bool CLEngine::LoadKernelFromFile(const char *kernel_file) { +// bool CLEngine::LoadKernelFromFile(const char *kernel_file) { // size_t size; // char *str; // std::fstream f(kernel_file, (std::fstream::in | std::fstream::binary)); @@ -118,10 +118,10 @@ bool CLEngine::SetClDeviceId() { // const char *source = str; // size_t sourceSize[] = {strlen(source)}; // program_.reset( -// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, NULL)); +// clCreateProgramWithSource(context_.get(), 1, &source, sourceSize, +// NULL)); // return true; //} - } // namespace framework } // namespace paddle_mobile diff --git a/src/framework/cl/cl_engine.h b/src/framework/cl/cl_engine.h index e77f6b47d6bed307349e73a0d0f852e6f93afa17..0f8046f8f151d53480cf8054763c4a4ec9209ec1 100644 --- a/src/framework/cl/cl_engine.h +++ b/src/framework/cl/cl_engine.h @@ -17,9 +17,10 @@ limitations under the License. */ #include #include +#include "CL/cl.h" #include "common/enforce.h" #include "framework/cl/cl_deleter.h" -#include "CL/cl.h" +#include "framework/cl/cl_tool.h" namespace paddle_mobile { namespace framework { @@ -36,16 +37,18 @@ class CLEngine { return std::move(context_ptr); } - std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> CreateClCommandQueue() { + std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> + CreateClCommandQueue() { cl_int status; - cl_command_queue queue = clCreateCommandQueue(context_.get(), devices_[0], 0, &status); - std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr(queue); + cl_command_queue queue = + clCreateCommandQueue(context_.get(), devices_[0], 0, &status); + std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_ptr( + queue); return std::move(command_queue_ptr); } - std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith(cl_context context, std::string file_name) { - - + std::unique_ptr<_cl_program, CLProgramDeleter> CreateProgramWith( + cl_context context, std::string file_name) { FILE *file = fopen(file_name.c_str(), "rb"); PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ", filename.c_str()); @@ -62,7 +65,8 @@ class CLEngine { const char *source = data; size_t sourceSize[] = {strlen(source)}; - cl_program p = clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); + cl_program p = + clCreateProgramWithSource(context, 1, &source, sourceSize, NULL); std::unique_ptr<_cl_program, CLProgramDeleter> program_ptr(p); return std::move(program_ptr); } @@ -81,7 +85,6 @@ class CLEngine { bool SetClDeviceId(); - bool initialized_; cl_platform_id platform_; @@ -94,14 +97,13 @@ class CLEngine { std::unique_ptr<_cl_program, CLProgramDeleter> program_; -// bool SetClContext(); - -// bool SetClCommandQueue(); + // bool SetClContext(); -// bool LoadKernelFromFile(const char *kernel_file); + // bool SetClCommandQueue(); -// bool BuildProgram(); + // bool LoadKernelFromFile(const char *kernel_file); + // bool BuildProgram(); }; } // namespace framework diff --git a/src/framework/cl/cl_half.cpp b/src/framework/cl/cl_half.cpp index c98e06fd58eb75d149886bc7f0ef145d8f25eea1..6554815c68dce8649adc9b78b696c226c00d1c3d 100644 --- a/src/framework/cl/cl_half.cpp +++ b/src/framework/cl/cl_half.cpp @@ -14,429 +14,487 @@ limitations under the License. */ // ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf -#include "cl_half.h" +#include "framework/cl/cl_half.h" -const static uint32_t mantissatable[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000, 0x34c00000, 0x34e00000, - 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000, - 0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000, 0x35f00000, 0x35f80000, - 0x36000000, 0x36040000, 0x36080000, 0x360c0000, 0x36100000, 0x36140000, 0x36180000, 0x361c0000, - 0x36200000, 0x36240000, 0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000, - 0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000, 0x36580000, 0x365c0000, - 0x36600000, 0x36640000, 0x36680000, 0x366c0000, 0x36700000, 0x36740000, 0x36780000, 0x367c0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000, - 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000, 0x369c0000, 0x369e0000, - 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000, 0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, - 0x36b00000, 0x36b20000, 0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000, - 0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000, 0x36cc0000, 0x36ce0000, - 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000, 0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, - 0x36e00000, 0x36e20000, 0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000, - 0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000, 0x36fc0000, 0x36fe0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, - 0x37080000, 0x37090000, 0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, - 0x37180000, 0x37190000, 0x371a0000, 0x371b0000, 0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, - 0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000, 0x372e0000, 0x372f0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, - 0x37380000, 0x37390000, 0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, - 0x37480000, 0x37490000, 0x374a0000, 0x374b0000, 0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, - 0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000, 0x375e0000, 0x375f0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, - 0x37680000, 0x37690000, 0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, - 0x37780000, 0x37790000, 0x377a0000, 0x377b0000, 0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, - 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, - 0x378c0000, 0x378c8000, 0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, - 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000, - 0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000, 0x379f0000, 0x379f8000, - 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000, 0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, - 0x37a40000, 0x37a48000, 0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000, - 0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000, 0x37ab0000, 0x37ab8000, - 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000, 0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, - 0x37b00000, 0x37b08000, 0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000, - 0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000, 0x37b70000, 0x37b78000, - 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000, 0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, - 0x37bc0000, 0x37bc8000, 0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000, - 0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000, 0x37c30000, 0x37c38000, - 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000, 0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, - 0x37c80000, 0x37c88000, 0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000, - 0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000, 0x37cf0000, 0x37cf8000, - 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000, 0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, - 0x37d40000, 0x37d48000, 0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000, - 0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000, 0x37db0000, 0x37db8000, - 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000, 0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, - 0x37e00000, 0x37e08000, 0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000, - 0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000, 0x37e70000, 0x37e78000, - 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000, 0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, - 0x37ec0000, 0x37ec8000, 0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000, - 0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000, 0x37f30000, 0x37f38000, - 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000, 0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, - 0x37f80000, 0x37f88000, 0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000, - 0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000, 0x37ff0000, 0x37ff8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800c000, 0x38010000, 0x38014000, 0x38018000, 0x3801c000, - 0x38020000, 0x38024000, 0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000, - 0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000, 0x38058000, 0x3805c000, - 0x38060000, 0x38064000, 0x38068000, 0x3806c000, 0x38070000, 0x38074000, 0x38078000, 0x3807c000, - 0x38080000, 0x38084000, 0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000, - 0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000, 0x380b8000, 0x380bc000, - 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000, 0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, - 0x380e0000, 0x380e4000, 0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000, - 0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000, 0x38118000, 0x3811c000, - 0x38120000, 0x38124000, 0x38128000, 0x3812c000, 0x38130000, 0x38134000, 0x38138000, 0x3813c000, - 0x38140000, 0x38144000, 0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000, - 0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000, 0x38178000, 0x3817c000, - 0x38180000, 0x38184000, 0x38188000, 0x3818c000, 0x38190000, 0x38194000, 0x38198000, 0x3819c000, - 0x381a0000, 0x381a4000, 0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000, - 0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000, 0x381d8000, 0x381dc000, - 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000, 0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, - 0x38200000, 0x38204000, 0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000, - 0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000, 0x38238000, 0x3823c000, - 0x38240000, 0x38244000, 0x38248000, 0x3824c000, 0x38250000, 0x38254000, 0x38258000, 0x3825c000, - 0x38260000, 0x38264000, 0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000, - 0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000, 0x38298000, 0x3829c000, - 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000, 0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, - 0x382c0000, 0x382c4000, 0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000, - 0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000, 0x382f8000, 0x382fc000, - 0x38300000, 0x38304000, 0x38308000, 0x3830c000, 0x38310000, 0x38314000, 0x38318000, 0x3831c000, - 0x38320000, 0x38324000, 0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000, - 0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000, 0x38358000, 0x3835c000, - 0x38360000, 0x38364000, 0x38368000, 0x3836c000, 0x38370000, 0x38374000, 0x38378000, 0x3837c000, - 0x38380000, 0x38384000, 0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000, - 0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000, 0x383b8000, 0x383bc000, - 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000, 0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, - 0x383e0000, 0x383e4000, 0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000, - 0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000, 0x38418000, 0x3841c000, - 0x38420000, 0x38424000, 0x38428000, 0x3842c000, 0x38430000, 0x38434000, 0x38438000, 0x3843c000, - 0x38440000, 0x38444000, 0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000, - 0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000, 0x38478000, 0x3847c000, - 0x38480000, 0x38484000, 0x38488000, 0x3848c000, 0x38490000, 0x38494000, 0x38498000, 0x3849c000, - 0x384a0000, 0x384a4000, 0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000, - 0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000, 0x384d8000, 0x384dc000, - 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000, 0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, - 0x38500000, 0x38504000, 0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000, - 0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000, 0x38538000, 0x3853c000, - 0x38540000, 0x38544000, 0x38548000, 0x3854c000, 0x38550000, 0x38554000, 0x38558000, 0x3855c000, - 0x38560000, 0x38564000, 0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000, - 0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000, 0x38598000, 0x3859c000, - 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000, 0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, - 0x385c0000, 0x385c4000, 0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000, - 0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000, 0x385f8000, 0x385fc000, - 0x38600000, 0x38604000, 0x38608000, 0x3860c000, 0x38610000, 0x38614000, 0x38618000, 0x3861c000, - 0x38620000, 0x38624000, 0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000, - 0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000, 0x38658000, 0x3865c000, - 0x38660000, 0x38664000, 0x38668000, 0x3866c000, 0x38670000, 0x38674000, 0x38678000, 0x3867c000, - 0x38680000, 0x38684000, 0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000, - 0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000, 0x386b8000, 0x386bc000, - 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000, 0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, - 0x386e0000, 0x386e4000, 0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000, - 0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000, 0x38718000, 0x3871c000, - 0x38720000, 0x38724000, 0x38728000, 0x3872c000, 0x38730000, 0x38734000, 0x38738000, 0x3873c000, - 0x38740000, 0x38744000, 0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000, - 0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000, 0x38778000, 0x3877c000, - 0x38780000, 0x38784000, 0x38788000, 0x3878c000, 0x38790000, 0x38794000, 0x38798000, 0x3879c000, - 0x387a0000, 0x387a4000, 0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000, - 0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000, 0x387d8000, 0x387dc000, - 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000, 0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000, - 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000, 0x3801c000, 0x3801e000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, - 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000, 0x3804c000, 0x3804e000, - 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000, - 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000, 0x3807c000, 0x3807e000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, - 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000, - 0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000, 0x380ac000, 0x380ae000, - 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000, 0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, - 0x380c0000, 0x380c2000, 0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000, - 0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000, 0x380dc000, 0x380de000, - 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000, 0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, - 0x380f0000, 0x380f2000, 0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000, 0x3810c000, 0x3810e000, - 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000, - 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000, 0x3813c000, 0x3813e000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, - 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000, 0x3816c000, 0x3816e000, - 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000, - 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000, 0x3819c000, 0x3819e000, - 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000, 0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, - 0x381b0000, 0x381b2000, 0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000, - 0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000, 0x381cc000, 0x381ce000, - 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000, 0x381d8000, 0x381da000, 0x381dc000, 0x381de000, - 0x381e0000, 0x381e2000, 0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000, - 0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000, 0x381fc000, 0x381fe000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, - 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000, 0x3822c000, 0x3822e000, - 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000, - 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000, 0x3825c000, 0x3825e000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, - 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000, 0x3828c000, 0x3828e000, - 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, - 0x382a0000, 0x382a2000, 0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000, - 0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000, 0x382bc000, 0x382be000, - 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000, 0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, - 0x382d0000, 0x382d2000, 0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000, - 0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000, 0x382ec000, 0x382ee000, - 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000, 0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000, - 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000, 0x3831c000, 0x3831e000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, - 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000, 0x3834c000, 0x3834e000, - 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000, - 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000, 0x3837c000, 0x3837e000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, - 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000, - 0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000, 0x383ac000, 0x383ae000, - 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000, 0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, - 0x383c0000, 0x383c2000, 0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000, - 0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000, 0x383dc000, 0x383de000, - 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000, 0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, - 0x383f0000, 0x383f2000, 0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000, 0x3840c000, 0x3840e000, - 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000, - 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000, 0x3843c000, 0x3843e000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, - 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000, 0x3846c000, 0x3846e000, - 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000, - 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000, 0x3849c000, 0x3849e000, - 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000, 0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, - 0x384b0000, 0x384b2000, 0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000, - 0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000, 0x384cc000, 0x384ce000, - 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000, 0x384d8000, 0x384da000, 0x384dc000, 0x384de000, - 0x384e0000, 0x384e2000, 0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000, - 0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000, 0x384fc000, 0x384fe000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, - 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000, 0x3852c000, 0x3852e000, - 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000, - 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000, 0x3855c000, 0x3855e000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, - 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000, 0x3858c000, 0x3858e000, - 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, - 0x385a0000, 0x385a2000, 0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000, - 0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000, 0x385bc000, 0x385be000, - 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000, 0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, - 0x385d0000, 0x385d2000, 0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000, - 0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000, 0x385ec000, 0x385ee000, - 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000, 0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000, - 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000, 0x3861c000, 0x3861e000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, - 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000, 0x3864c000, 0x3864e000, - 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000, - 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000, 0x3867c000, 0x3867e000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, - 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000, - 0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000, 0x386ac000, 0x386ae000, - 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000, 0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, - 0x386c0000, 0x386c2000, 0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000, - 0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000, 0x386dc000, 0x386de000, - 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000, 0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, - 0x386f0000, 0x386f2000, 0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000, 0x3870c000, 0x3870e000, - 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000, - 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000, 0x3873c000, 0x3873e000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, - 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000, 0x3876c000, 0x3876e000, - 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000, - 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000, 0x3879c000, 0x3879e000, - 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000, 0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, - 0x387b0000, 0x387b2000, 0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000, - 0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000, 0x387cc000, 0x387ce000, - 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000, 0x387d8000, 0x387da000, 0x387dc000, 0x387de000, - 0x387e0000, 0x387e2000, 0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000, - 0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000, 0x387fc000, 0x387fe000 -}; +static const uint32_t mantissatable[2048] = { + 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000, + 0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, + 0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000, + 0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000, + 0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000, + 0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000, + 0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000, + 0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000, + 0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000, + 0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000, + 0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000, + 0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000, + 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000, + 0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000, + 0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000, + 0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000, + 0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000, + 0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000, + 0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000, + 0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000, + 0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000, + 0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, + 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, + 0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000, + 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, + 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000, + 0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000, + 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, + 0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000, + 0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000, + 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, + 0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000, + 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, + 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000, + 0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000, + 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, + 0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000, + 0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, + 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, + 0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, + 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000, + 0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000, + 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, + 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, + 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, + 0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000, + 0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000, + 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, + 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, + 0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000, + 0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000, + 0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000, + 0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000, + 0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000, + 0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000, + 0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000, + 0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000, + 0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000, + 0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000, + 0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000, + 0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000, + 0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000, + 0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000, + 0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000, + 0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000, + 0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000, + 0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000, + 0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000, + 0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000, + 0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000, + 0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000, + 0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000, + 0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000, + 0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000, + 0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000, + 0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000, + 0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000, + 0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000, + 0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000, + 0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000, + 0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000, + 0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000, + 0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000, + 0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000, + 0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000, + 0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000, + 0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000, + 0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000, + 0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000, + 0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000, + 0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000, + 0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000, + 0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000, + 0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000, + 0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000, + 0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000, + 0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000, + 0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000, + 0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000, + 0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000, + 0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000, + 0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000, + 0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000, + 0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000, + 0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000, + 0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000, + 0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000, + 0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000, + 0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000, + 0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000, + 0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000, + 0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000, + 0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000, + 0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000, + 0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000, + 0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000, + 0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000, + 0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000, + 0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000, + 0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000, + 0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000, + 0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000, + 0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000, + 0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000, + 0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000, + 0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000, + 0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000, + 0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000, + 0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000, + 0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000, + 0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000, + 0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000, + 0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000, + 0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000, + 0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000, + 0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000, + 0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000, + 0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000, + 0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000, + 0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000, + 0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000, + 0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000, + 0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000, + 0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000, + 0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000, + 0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000, + 0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000, + 0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000, + 0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000, + 0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000, + 0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000, + 0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000, + 0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000, + 0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000, + 0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000, + 0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000, + 0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000, + 0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000, + 0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000, + 0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000, + 0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000, + 0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000, + 0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000, + 0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000, + 0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000, + 0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000, + 0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000, + 0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000, + 0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000, + 0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000, + 0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000, + 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000, + 0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000, + 0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000, + 0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000, + 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000, + 0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, + 0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000, + 0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000, + 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000, + 0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, + 0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000, + 0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000, + 0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000, + 0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000, + 0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000, + 0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000, + 0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000, + 0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000, + 0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000, + 0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000, + 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000, + 0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, + 0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000, + 0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000, + 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000, + 0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000, + 0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000, + 0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000, + 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000, + 0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, + 0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000, + 0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000, + 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000, + 0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000, + 0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000, + 0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000, + 0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000, + 0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000, + 0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000, + 0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000, + 0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000, + 0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, + 0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000, + 0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000, + 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000, + 0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, + 0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000, + 0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000, + 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000, + 0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000, + 0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000, + 0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000, + 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000, + 0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, + 0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000, + 0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000, + 0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000, + 0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000, + 0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000, + 0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000, + 0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000, + 0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000, + 0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000, + 0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000, + 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000, + 0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, + 0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000, + 0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000, + 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000, + 0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, + 0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000, + 0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000, + 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000, + 0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000, + 0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000, + 0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000, + 0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000, + 0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000, + 0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000, + 0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000, + 0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000, + 0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000, + 0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000, + 0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000, + 0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, + 0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000, + 0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000, + 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000, + 0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, + 0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000, + 0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000, + 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000, + 0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, + 0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000, + 0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000, + 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000, + 0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000, + 0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000, + 0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000, + 0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000, + 0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000, + 0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000, + 0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000, + 0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000, + 0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, + 0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000, + 0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000, + 0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, + 0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000, + 0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000, + 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000, + 0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, + 0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000, + 0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000, + 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000, + 0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, + 0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000, + 0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000, + 0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000, + 0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000, + 0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000, + 0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000, + 0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000, + 0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000, + 0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000, + 0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000, + 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000, + 0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, + 0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000, + 0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000, + 0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, + 0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000, + 0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000, + 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000, + 0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, + 0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000, + 0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000, + 0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000, + 0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000, + 0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000, + 0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000, + 0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000, + 0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000, + 0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000, + 0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000, + 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000, + 0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, + 0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000, + 0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000, + 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000, + 0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, + 0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000, + 0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000, + 0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, + 0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000, + 0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000, + 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000, + 0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000, + 0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000, + 0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000, + 0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000, + 0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000, + 0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000, + 0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000, + 0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000, + 0x387fc000, 0x387fe000}; -const static uint16_t offsettable[64] = { - 0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, - 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400 -}; +static const uint16_t offsettable[64] = { + 0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, + 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400}; -const static uint32_t exponenttable[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, - 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000, - 0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000, 0x0f000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, - 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000, 0x8b000000, 0x8b800000, - 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000, 0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000 -}; +static const uint32_t exponenttable[64] = { + 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, + 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, + 0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000, + 0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000, + 0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000, + 0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000, + 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, + 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, + 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000, + 0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000, + 0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000}; -const static uint16_t basetable[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, - 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, - 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, - 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, - 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, 0x5c00, - 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, - 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, - 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, - 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, - 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, - 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, - 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00 -}; +static const uint16_t basetable[512] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, + 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, + 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, + 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800, + 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, + 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, + 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, + 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, + 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, + 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, + 0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, + 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00}; -const static uint8_t shifttable[512] = { - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, - 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, - 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, - 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, - 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, - 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d -}; +static const uint8_t shifttable[512] = { + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, + 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, + 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, + 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d}; half_t float2half(float f) { uint32_t v = *reinterpret_cast(&f); - return basetable[(v>>23)&0x1ff]+((v&0x007fffff)>>shifttable[(v>>23)&0x1ff]); + return basetable[(v >> 23) & 0x1ff] + + ((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]); } float half2float(half_t h) { - uint32_t v = mantissatable[offsettable[h>>10]+(h&0x3ff)]+exponenttable[h>>10]; + uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] + + exponenttable[h >> 10]; return *reinterpret_cast(&v); -} \ No newline at end of file +} diff --git a/src/framework/cl/cl_half.h b/src/framework/cl/cl_half.h index b9730c7fafe6259d4aabbd67cbe1daf4e99feb84..23ef236d72b1842620302bdef9eb0ab4a0a67e38 100644 --- a/src/framework/cl/cl_half.h +++ b/src/framework/cl/cl_half.h @@ -18,4 +18,4 @@ limitations under the License. */ typedef uint16_t half_t; half_t float2half(float f); -float half2float(half_t h); \ No newline at end of file +float half2float(half_t h); diff --git a/src/framework/cl/cl_helper.h b/src/framework/cl/cl_helper.h index 7562ab78695376d9766a72eefc2d5482283c49b7..e611a209238070420d15caa456c426b7b3650b79 100644 --- a/src/framework/cl/cl_helper.h +++ b/src/framework/cl/cl_helper.h @@ -14,11 +14,13 @@ limitations under the License. */ #pragma once -#include +#include #include +#include -#include "framework/cl/cl_scope.h" #include "framework/cl/cl_deleter.h" +#include "framework/cl/cl_image.h" +#include "framework/cl/cl_scope.h" namespace paddle_mobile { namespace framework { @@ -27,24 +29,38 @@ class CLHelper { public: CLHelper() = default; - CLHelper(CLScope *scope): scope_(scope) { - } + explicit CLHelper(CLScope *scope) : scope_(scope) {} void AddKernel(const std::string &kernel_name, const std::string &file_name) { auto kernel = scope_->GetKernel(kernel_name, file_name); kernels.emplace_back(std::move(kernel)); } - cl_kernel KernelAt(const int index) { - return kernels[index].get(); - } + cl_kernel KernelAt(const int index) { return kernels[index].get(); } - cl_command_queue CLCommandQueue() { - return scope_->CommandQueue(); - } + cl_command_queue CLCommandQueue() { return scope_->CommandQueue(); } + + cl_context CLContext() { return scope_->Context(); } + + std::vector DefaultWorkSize(const CLImage &image) { + // n c h w + auto image_dim = image.dims(); + if (image_dim.size() == 4) { + auto n = image_dim[0]; + auto h = image_dim[2]; + auto w = image_dim[3]; + + auto image_width = image.ImageWidth(); + + auto work_size_0 = image_width / w; + + auto work_size_1 = w; + + auto work_size_2 = n * h; - cl_context CLContext() { - return scope_->Context(); + return {work_size_0, work_size_1, work_size_2}; + } + PADDLE_MOBILE_THROW_EXCEPTION("not support this dim, need imp"); } private: @@ -52,5 +68,5 @@ class CLHelper { std::vector> kernels; }; -} -} +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_image.h b/src/framework/cl/cl_image.h index 0c5e996540efa817334a1fa1ba66578e299d00a6..58c8ea6bc4e76dddf15a263878e5270563d2ed57 100644 --- a/src/framework/cl/cl_image.h +++ b/src/framework/cl/cl_image.h @@ -14,10 +14,12 @@ limitations under the License. */ #pragma once +#include + +#include "CL/cl.h" +#include "framework/cl/cl_half.h" #include "framework/ddim.h" #include "framework/tensor.h" -#include "CL/cl.h" -#include "cl_half.h" namespace paddle_mobile { namespace framework { @@ -27,18 +29,44 @@ class CLImage { CLImage() = default; void Init(cl_context context, float *tensorInput, DDim ddim) { - cl_image_format cf = { - .image_channel_order = CL_RGBA, - .image_channel_data_type = CL_HALF_FLOAT - }; + tensor_dims_ = ddim; + cl_image_format cf = {.image_channel_order = CL_RGBA, + .image_channel_data_type = CL_HALF_FLOAT}; // NCHW -> [W * (C+3)/4, H * N] - size_t N = tensorDims_[0]; - size_t C = tensorDims_[1]; - size_t H = tensorDims_[2]; - size_t W = tensorDims_[3]; + DLOG << tensor_dims_; + size_t N, C, H, W; + if (tensor_dims_.size() == 4) { + N = tensor_dims_[0]; + if (N < 0) { + N = 1; + } + C = tensor_dims_[1]; + H = tensor_dims_[2]; + W = tensor_dims_[3]; + + width_of_one_block_ = W; + height_of_one_block_ = H; + + } else if (tensor_dims_.size() == 1) { + N = 1; + C = tensor_dims_[0]; + H = 1; + W = 1; + + width_of_one_block_ = W; + height_of_one_block_ = H; + } + + DLOG << "-------InitMemory-------"; + size_t width = W * ((C + 3) / 4); size_t height = H * N; + + image_width_ = width; + image_height_ = height; + std::unique_ptr imageData{}; + int count = 0; if (tensorInput != nullptr) { imageData.reset(new half_t[width * height * 4]); float *p = tensorInput; @@ -47,11 +75,19 @@ class CLImage { for (int c = 0; c < C; c++) { size_t i1 = i0; for (int h = 0; h < H; h++) { - size_t i2 = i1 << 2 + c % 4; + size_t i2 = (i1 << 2) + c % 4; for (int w = 0; w < W; w++) { + if (i2 >= width * height * 4) { + printf("%d > %d ----> %d, %d, %d, %d --- %d, %d, %d\n", i2, + width * height * 4, n, c, h, w, i0, i1, i2); + } + assert(i2 < width * height * 4); + imageData[i2] = float2half(*p); i2 += 4; p++; + // count++; + // DLOG<(imageData.get()), // void *host_ptr - &err // cl_int *errcode_ret - ); + context, // cl_context context + CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, // cl_mem_flags flags + &cf, // const cl_image_format *image_format + width, // size_t image_width + height, // size_t image_height + 0, // size_t image_row_pitch + reinterpret_cast(imageData.get()), // void *host_ptr + &err); + if (err != CL_SUCCESS) { - // TODO: error handling + // TODO(HaiPeng): error handling + PADDLE_MOBILE_THROW_EXCEPTION(" create image 2d error "); } - } - void Init(cl_context context, DDim ddim) { - Init(context, nullptr, ddim); + initialized_ = true; } + void Init(cl_context context, DDim ddim) { Init(context, nullptr, ddim); } + inline CLImage &Resize(const DDim &dims) { - tensorDims_ = dims; + tensor_dims_ = dims; return *this; } - const DDim &dims() const { - return tensorDims_; - } + const DDim &dims() const { return tensor_dims_; } - std::vector DefaultWorkSize() { - return {}; - } + cl_mem GetCLImage() const { return cl_image_; } - cl_mem GetCLImage() { - return cl_image_; + template + T *data() const { + return reinterpret_cast(tensor_input_); } + inline int64_t numel() const { return product(tensor_dims_); } + + inline size_t ImageWidth() const { return image_width_; } + + inline size_t ImageHeight() const { return image_height_; } + + inline size_t CBlock() const { return c_block_; } + + inline size_t WidthOfOneBlock() const { return width_of_one_block_; } + + inline size_t HeightOfOneBlock() const { return height_of_one_block_; } + private: bool initialized_ = false; cl_mem cl_image_; - DDim tensorDims_; + size_t image_width_; + size_t width_of_one_block_; + size_t height_of_one_block_; + size_t image_height_; + size_t c_block_; + DDim tensor_dims_; + float *tensor_input_; cl_context context_; }; -//void TensorToCLImage(Tensor *tensor, CLImage *image) { +// void TensorToCLImage(Tensor *tensor, CLImage *image) { // //} // -//void CLImageToTensor(CLImage *image, Tensor *tensor) { +// void CLImageToTensor(CLImage *image, Tensor *tensor) { // //} -} -} \ No newline at end of file +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_scope.h b/src/framework/cl/cl_scope.h index 58de4e9750097115f457d35a72601f35f9bea3b7..15ca27fd7c7929aa216e7bbef73e2555028285b8 100644 --- a/src/framework/cl/cl_scope.h +++ b/src/framework/cl/cl_scope.h @@ -18,10 +18,10 @@ limitations under the License. */ #include #include -#include "framework/cl/cl_tool.h" -#include "framework/cl/cl_engine.h" -#include "framework/cl/cl_deleter.h" #include "CL/cl.h" +#include "framework/cl/cl_deleter.h" +#include "framework/cl/cl_engine.h" +#include "framework/cl/cl_tool.h" namespace paddle_mobile { namespace framework { @@ -35,19 +35,17 @@ class CLScope { command_queue_ = engin->CreateClCommandQueue(); } - cl_command_queue CommandQueue() { - return command_queue_.get(); - } + cl_command_queue CommandQueue() { return command_queue_.get(); } - std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel(const std::string &kernel_name, const std::string &file_name) { + std::unique_ptr<_cl_kernel, CLKernelDeleter> GetKernel( + const std::string &kernel_name, const std::string &file_name) { auto program = Program(file_name); - std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel(clCreateKernel(program, kernel_name.c_str(), NULL)); + std::unique_ptr<_cl_kernel, CLKernelDeleter> kernel( + clCreateKernel(program, kernel_name.c_str(), NULL)); return std::move(kernel); } - cl_context Context() { - return context_.get(); - } + cl_context Context() { return context_.get(); } cl_program Program(const std::string &file_name) { auto it = programs_.find(file_name); @@ -55,20 +53,23 @@ class CLScope { return it->second.get(); } - auto program = CLEngine::Instance()->CreateProgramWith(context_.get(), file_name); + auto program = + CLEngine::Instance()->CreateProgramWith(context_.get(), file_name); programs_[file_name] = std::move(program); - status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0); + status_ = clBuildProgram(program.get(), 0, 0, 0, 0, 0); CL_CHECK_ERRORS(status_); return program.get(); } private: - cl_int status_; + cl_int status_; std::unique_ptr<_cl_context, CLContextDeleter> context_; std::unique_ptr<_cl_command_queue, CLCommQueueDeleter> command_queue_; - std::unordered_map> programs_; + std::unordered_map> + programs_; }; -} -} +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_tensor.h b/src/framework/cl/cl_tensor.h index e4d12494b25ea55ab7c8cea5fc7a4ce27e6d8c40..6601c0b2f5121daa263478d1cdb3a5094800ba62 100644 --- a/src/framework/cl/cl_tensor.h +++ b/src/framework/cl/cl_tensor.h @@ -18,17 +18,17 @@ limitations under the License. */ #include #include -#include "framework/tensor_base.h" -#include "framework/cl/cl_engine.h" -#include "framework/cl/cl_deleter.h" #include "CL/cl.h" +#include "framework/cl/cl_deleter.h" +#include "framework/cl/cl_engine.h" +#include "framework/tensor_base.h" namespace paddle_mobile { namespace framework { class CLTensor : TensorBase { public: - CLTensor(cl_context context) : context_(context) {} + explicit CLTensor(cl_context context) : context_(context) {} /*! Resize the dimensions of the memory block. */ inline CLTensor &Resize(const DDim &dims) { @@ -84,7 +84,6 @@ class CLTensor : TensorBase { } private: - cl_context context_; /* @@ -99,18 +98,15 @@ class CLTensor : TensorBase { virtual void set_type(std::type_index type) = 0; * */ struct PlaceholderImpl : public Placeholder { - PlaceholderImpl(size_t size, void *input, std::type_index type, cl_context context) - : ptr_(clCreateBuffer(context, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, size, - reinterpret_cast(input), NULL)), + PlaceholderImpl(size_t size, void *input, std::type_index type, + cl_context context) + : ptr_(clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + size, reinterpret_cast(input), NULL)), size_(size), - type_(type) { - - } + type_(type) {} PlaceholderImpl(size_t size, std::type_index type, cl_context context) - : ptr_(clCreateBuffer(context, - CL_MEM_READ_WRITE, size, NULL, NULL)), + : ptr_(clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, NULL)), size_(size), type_(type) {} @@ -128,9 +124,7 @@ class CLTensor : TensorBase { /* the current type of memory */ std::type_index type_; - }; - }; } // namespace framework diff --git a/src/framework/cl/cl_tool.cpp b/src/framework/cl/cl_tool.cpp index 993b63743b2697922d40dbfb594594c47c67498a..827642b6b73cfaee02f4053dce798bf6b3c52f4b 100644 --- a/src/framework/cl/cl_tool.cpp +++ b/src/framework/cl/cl_tool.cpp @@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "cl_tool.h" +#include "framework/cl/cl_tool.h" namespace paddle_mobile { namespace framework { const char *opencl_error_to_str(cl_int error) { -#define CASE_CL_CONSTANT(NAME) case NAME: return #NAME; +#define CASE_CL_CONSTANT(NAME) \ + case NAME: \ + return #NAME; // Suppose that no combinations are possible. switch (error) { CASE_CL_CONSTANT(CL_SUCCESS) @@ -78,5 +80,5 @@ const char *opencl_error_to_str(cl_int error) { #undef CASE_CL_CONSTANT } -} -} +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/cl/cl_tool.h b/src/framework/cl/cl_tool.h index a2c1eb628b81588bf8bdf85da8b341410ce5a0c0..74a20f48185af34c2d509c6e8de23ecab42601cc 100644 --- a/src/framework/cl/cl_tool.h +++ b/src/framework/cl/cl_tool.h @@ -19,16 +19,15 @@ limitations under the License. */ namespace paddle_mobile { namespace framework { -const char* opencl_error_to_str (cl_int error); - -#define CL_CHECK_ERRORS(ERR) \ - if(ERR != CL_SUCCESS) \ - { \ - printf( \ - "OpenCL error with code %s happened in file %s at line %d. Exiting.\n", \ - opencl_error_to_str(ERR), __FILE__, __LINE__ \ - ); \ - } - -} -} +const char* opencl_error_to_str(cl_int error); + +#define CL_CHECK_ERRORS(ERR) \ + if (ERR != CL_SUCCESS) { \ + printf( \ + "OpenCL error with code %s happened in file %s at line %d. " \ + "Exiting.\n", \ + opencl_error_to_str(ERR), __FILE__, __LINE__); \ + } + +} // namespace framework +} // namespace paddle_mobile diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp index 9c898ba554725aa36f9bab034c8d9dcf936f1bae..35532103be403a7950452afdc514cafdeb41c735 100644 --- a/src/framework/executor.cpp +++ b/src/framework/executor.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "executor.h" +#include "framework/executor.h" #include #include #include @@ -265,7 +265,7 @@ void Executor::InitCombineMemory() { char *origin_data; if (program_.combined_params_buf && program_.combined_params_len) { LOG(kLOG_INFO) << "use outter memory"; - origin_data = (char *)program_.combined_params_buf; + origin_data = reinterpret_cast(program_.combined_params_buf); } else { LOG(kLOG_INFO) << " begin init combine memory"; origin_data = Get_binary_data(program_.para_path); @@ -666,12 +666,12 @@ void Executor::InjectVariable(const framework::Tensor &t, g_feed_value->GetMutable(); feed_tensor->Resize(t.dims()); feed_tensor->ShareDataWith(t); -}; +} template void Executor::FeedData(const framework::Tensor &t) { InjectVariable(t, "feed"); -}; +} template std::shared_ptr Executor::FetchResult(int id) { @@ -687,14 +687,14 @@ std::shared_ptr Executor::FetchResult(int id) { auto *output_tensor = framework::GetVarValue( out_keys[0], output_map, *(program_.scope)); return std::make_shared(framework::Tensor(*output_tensor)); -}; +} template void Executor::Predict_From_To(int start, int end) { std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; - end = end < 0 ? (int)ops.size() : end; + end = end < 0 ? static_cast(ops.size()) : end; PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(), "start or end parameter is wrong"); @@ -715,17 +715,17 @@ void Executor::Predict_From_To(int start, int end) { profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif } -}; +} template void Executor::Predict_From(int start) { Predict_From_To(start); -}; +} template void Executor::Predict_To(int end) { Predict_From_To(0, end); -}; +} #endif #ifdef PADDLE_MOBILE_FPGA @@ -738,12 +738,12 @@ void Executor::InjectVariable(const framework::Tensor &t, g_feed_value->GetMutable(); feed_tensor->Resize(t.dims()); feed_tensor->ShareDataWith(t); -}; +} template void Executor::FeedData(const framework::Tensor &t) { InjectVariable(t, "feed"); -}; +} template std::shared_ptr Executor::FetchResult(int id) { @@ -759,14 +759,14 @@ std::shared_ptr Executor::FetchResult(int id) { auto *output_tensor = framework::GetVarValue( out_keys[0], output_map, *(program_.scope)); return std::make_shared(framework::Tensor(*output_tensor)); -}; +} template void Executor::Predict_From_To(int start, int end) { std::shared_ptr to_predict_block = to_predict_program_->Block(0); auto &ops = ops_of_block_[*to_predict_block.get()]; - end = end < 0 ? (int)ops.size() : end; + end = end < 0 ? static_cast(ops.size()) : end; PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(), "start or end parameter is wrong"); @@ -787,20 +787,120 @@ void Executor::Predict_From_To(int start, int end) { profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec; #endif } -}; +} template void Executor::Predict_From(int start) { Predict_From_To(start); -}; +} template void Executor::Predict_To(int end) { Predict_From_To(0, end); -}; +} #endif #ifdef PADDLE_MOBILE_CL +template <> +void Executor::LoadMemory( + const framework::VarDesc var_desc, float *tensorInput, char **data) { + // 1. version + uint32_t version = *reinterpret_cast(*data); + + (*data) += sizeof(uint32_t); + + // 2 Lod information + uint64_t *lod_level_ptr = new uint64_t(); + memcpy(lod_level_ptr, (*data), sizeof(uint64_t)); + uint64_t lod_level = *lod_level_ptr; + delete lod_level_ptr; + (*data) += sizeof(uint64_t); + + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size = *reinterpret_cast(*data); + (*data) += sizeof(uint64_t); + std::vector tmp(size / sizeof(size_t)); + + for (int k = 0; k < tmp.size(); ++k) { + tmp[k] = *reinterpret_cast(*data); + (*data) += sizeof(size_t); + } + } + + // 3. tensor version + uint32_t tensor_version = *reinterpret_cast(*data); + (*data) += sizeof(uint32_t); + + // 4. tensor desc + int32_t size = *reinterpret_cast(*data); + (*data) += sizeof(int32_t); + + std::unique_ptr buf(new char[size]); + for (int m = 0; m < size; ++m) { + buf.get()[m] = (*data)[m]; + } + (*data) += (sizeof(char) * size); + + const framework::TensorDesc &desc = var_desc.Tensor_desc(); + int memory_size = 1; + for (auto l : desc.Dims()) { + memory_size *= l; + } + + void *memory = nullptr; + // int type_size = 0; + // switch (desc.DataType()) { + // case framework::VARTYPE_TYPE_FP16: + // type_size = 2; + // break; + // case framework::VARTYPE_TYPE_FP32: + // type_size = 4; + // memory = tensor->mutable_data(); + // break; + // case framework::VARTYPE_TYPE_FP64: + // type_size = 8; + // break; + // case framework::VARTYPE_TYPE_INT32: + // memory = tensor->mutable_data(); + // type_size = 4; + // break; + // case framework::VARTYPE_TYPE_INT64: + // type_size = 8; + // break; + // case framework::VARTYPE_TYPE_BOOL: + // type_size = 1; + // break; + // default: + // break; + // } + int type_size = 4; + memory = tensorInput; + if (program_.quantification) { + float min_value; + float max_value; + + memcpy(&min_value, *data, sizeof(float)); + memcpy(&max_value, *data + sizeof(float), sizeof(float)); + *data += 2 * sizeof(float); + const float factor = (max_value - min_value) / 255.0; + uint8_t *uint8_data = reinterpret_cast(*data); + for (int k = 0; k < memory_size; ++k) { + static_cast(memory)[k] = uint8_data[k] * factor + min_value; + } + *data += (memory_size * sizeof(uint8_t)); + } else { + for (int n = 0; n < memory_size; n++) { + float value; + memcpy(&value, *data + n * type_size, type_size); + if (value < 1e-30 && value > -1e-30) { + static_cast(memory)[n] = 0.0; + } else { + static_cast(memory)[n] = value; + } + } + (*data) += (sizeof(char) * memory_size * type_size); + } +} template <> void Executor::InitMemory() { @@ -812,27 +912,37 @@ void Executor::InitMemory() { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { continue; } - char *origin_data = Get_binary_data(program_.model_path + "/" + var_desc->Name()); + char *data = origin_data; cl_context context = program_.scope->GetCLScpoe()->Context(); - - float *tensorInput = (float *)origin_data; - const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = cl_image->dims(); + int numel = 1; + for (auto l : desc.Dims()) { + numel *= l; + } + DLOG << var_desc->Name(); + float *tensorInput = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * numel)); + LoadMemory(*var_desc, tensorInput, &data); + + framework::DDim ddim = framework::make_ddim(desc.Dims()); cl_image->Init(context, tensorInput, ddim); - delete origin_data; - }else{ - auto cl_image = var->template GetMutable(); - cl_context context = program_.scope->GetCLScpoe()->Context(); - const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = cl_image->dims(); + delete origin_data; + paddle_mobile::memory::Free(tensorInput); + } else { + if (var_desc->Type() == framework::VARTYPE_TYPE_LOD_TENSOR) { + auto cl_image = var->template GetMutable(); + cl_context context = program_.scope->GetCLScpoe()->Context(); - cl_image->Init(context, ddim); + const framework::TensorDesc &desc = var_desc->Tensor_desc(); + framework::DDim ddim = framework::make_ddim(desc.Dims()); + DLOG << var_desc->Name(); + cl_image->Init(context, ddim); + } } } } @@ -843,13 +953,13 @@ void Executor::InitCombineMemory() { char *origin_data; if (program_.combined_params_buf && program_.combined_params_len) { LOG(kLOG_INFO) << "use outter memory"; - origin_data = (char *)program_.combined_params_buf; + origin_data = reinterpret_cast(program_.combined_params_buf); } else { LOG(kLOG_INFO) << " begin init combine memory"; origin_data = Get_binary_data(program_.para_path); } PADDLE_MOBILE_ENFORCE(origin_data != nullptr, "origin_data==nullptr!!!"); - float *data = (float *)origin_data; + float *data = reinterpret_cast(origin_data); for (const auto &block : to_predict_program_->Blocks()) { for (const auto &var_desc : block->Vars()) { @@ -863,21 +973,23 @@ void Executor::InitCombineMemory() { cl_context context = program_.scope->GetCLScpoe()->Context(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = cl_image->dims(); + framework::DDim ddim = framework::make_ddim(desc.Dims()); int numel = 1; for (int i = 0; i < ddim.size(); i++) { numel = numel * ddim[i]; } - float *tensorInput = data; + float *tensorInput = static_cast( + paddle_mobile::memory::Alloc(sizeof(float) * numel)); + LoadMemory(*var_desc, tensorInput, &origin_data); cl_image->Init(context, tensorInput, ddim); - data += numel; - }else{ + paddle_mobile::memory::Free(tensorInput); + } else { auto cl_image = var->template GetMutable(); cl_context context = program_.scope->GetCLScpoe()->Context(); const framework::TensorDesc &desc = var_desc->Tensor_desc(); - framework::DDim ddim = cl_image->dims(); + framework::DDim ddim = framework::make_ddim(desc.Dims()); cl_image->Init(context, ddim); } diff --git a/src/framework/executor.h b/src/framework/executor.h index f43cd14c29b909e9f666b098824d1bb444998add..4a99c41406b9c9946784b096d11689fa6c36bec3 100644 --- a/src/framework/executor.h +++ b/src/framework/executor.h @@ -35,7 +35,7 @@ using std::string; namespace paddle_mobile { namespace framework { -template +template class Executor { public: typedef typename PrecisionTrait

::ptype Ptype; @@ -56,7 +56,7 @@ class Executor { * @b to predict * */ std::shared_ptr PredictLod( - const framework::LoDTensor &t); + const framework::LoDTensor &t); /* * @b to predict with vector and dim @@ -73,6 +73,8 @@ class Executor { void LoadMemory(const framework::VarDesc var_desc, framework::LoDTensor *tensor, char **data); + void LoadMemory(const framework::VarDesc var_desc, float *tensorInput, + char **data); void InitCombineMemory(); @@ -84,8 +86,8 @@ class Executor { int block_id); std::map>>> - ops_of_block_; + std::vector>>> + ops_of_block_; bool use_optimize_ = false; bool loddable_ = false; #ifdef PADDLE_EXECUTOR_MULTITHREAD @@ -105,15 +107,15 @@ class Executor { #ifdef PADDLE_MOBILE_FPGA - public: - void InjectVariable(const framework::Tensor &t, string var_name); - void FeedData(const framework::Tensor &t); - std::shared_ptr FetchResult(int id = -1); - void Predict_From_To(int start = 0, int end = -1); - void Predict_From(int start); - void Predict_To(int end); + public: + void InjectVariable(const framework::Tensor &t, string var_name); + void FeedData(const framework::Tensor &t); + std::shared_ptr FetchResult(int id = -1); + void Predict_From_To(int start = 0, int end = -1); + void Predict_From(int start); + void Predict_To(int end); #endif }; -} +} // namespace framework } // namespace paddle_mobile diff --git a/src/framework/loader.cpp b/src/framework/loader.cpp index 7b95f1d1bf3186583cb586c6c4f3cb5862fa7978..0122f8916f2aa454df7ffe2ebaa8bbc9fe686b7b 100644 --- a/src/framework/loader.cpp +++ b/src/framework/loader.cpp @@ -12,10 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "loader.h" +#include "framework/loader.h" #include "framework/lod_tensor.h" #include "framework/program/program-optimize/program_optimize.h" +#ifdef PADDLE_MOBILE_CL +#include "framework/cl/cl_image.h" +#endif namespace paddle_mobile { namespace framework { @@ -26,9 +29,10 @@ namespace framework { * @param originProgramDesc * @param scope */ -void InitMemoryFromProgram( - std::shared_ptr &originProgramDesc, - std::shared_ptr &scope) { +template +void Loader::InitMemoryFromProgram( + const std::shared_ptr &originProgramDesc, + const std::shared_ptr &scope) { for (const auto &block : originProgramDesc.get()->Blocks()) { for (const auto &var_desc : block->Vars()) { auto var = scope.get()->Var(var_desc->Name()); @@ -51,6 +55,35 @@ void InitMemoryFromProgram( } } +#ifdef PADDLE_MOBILE_CL +template <> +void Loader::InitMemoryFromProgram( + const std::shared_ptr &originProgramDesc, + const std::shared_ptr &scope) { + for (const auto &block : originProgramDesc.get()->Blocks()) { + for (const auto &var_desc : block->Vars()) { + auto var = scope.get()->Var(var_desc->Name()); + if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { + if (var_desc->Persistable()) { + auto dim = var_desc->Tensor_desc().Dims(); + // auto tensor = var->GetMutable(); + auto cl_image = var->GetMutable(); + cl_image->Resize(make_ddim(dim)); + } else { + auto dim = var_desc->Tensor_desc().Dims(); + PADDLE_MOBILE_ENFORCE(dim.size() > 0, "dim size is 0"); + dim[0] = 1; + auto cl_image = var->GetMutable(); + cl_image->Resize(make_ddim(dim)); + } + } else { + // TODO(codeWorm): some. + } + } + } +} +#endif + /** * fusion and print someinfos * @tparam Dtype @@ -60,17 +93,17 @@ void InitMemoryFromProgram( * @param program * @param originProgramDesc */ -template +template void FusionAndPrintInfos( - bool &optimize, bool &can_add_split, Program &program, - const std::shared_ptr &originProgramDesc) { + bool optimize, bool can_add_split, Program *program, + const std::shared_ptr &originProgramDesc) { if (optimize) { ProgramOptimize program_optimize; - program.optimizeProgram = - program_optimize.FusionOptimize(originProgramDesc, can_add_split); + program->optimizeProgram = + program_optimize.FusionOptimize(originProgramDesc, can_add_split); } if (optimize) { - program.optimizeProgram->Description("optimize: "); + program->optimizeProgram->Description("optimize: "); } else { originProgramDesc->Description("program: "); } @@ -98,20 +131,22 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) { return cur_len; } -template -const Program Loader::Load( - const std::string &dirname, bool optimize, bool quantification, - bool can_add_split) { +template +const Program Loader::Load(const std::string &dirname, + bool optimize, + bool quantification, + bool can_add_split) { auto program = this->LoadProgram(dirname + "/__model__", optimize, quantification, can_add_split); program.model_path = dirname; return program; } -template -const Program Loader::Load( - const std::string &model_path, const std::string ¶_path, bool optimize, - bool quantification) { +template +const Program Loader::Load(const std::string &model_path, + const std::string ¶_path, + bool optimize, + bool quantification) { auto program = this->LoadProgram(model_path, optimize, quantification); program.para_path = para_path; @@ -120,10 +155,10 @@ const Program Loader::Load( return program; } -template +template const Program Loader::LoadProgram( - const std::string &model_path, bool optimize, bool quantification, - bool can_add_split) { + const std::string &model_path, bool optimize, bool quantification, + bool can_add_split) { std::string model_filename = model_path; PaddleMobile__Framework__Proto__ProgramDesc *c_program; uint8_t *buf = NULL; @@ -132,7 +167,7 @@ const Program Loader::LoadProgram( PADDLE_MOBILE_ENFORCE(buf != NULL, "read from __model__ is null"); c_program = paddle_mobile__framework__proto__program_desc__unpack( - NULL, read_size, buf); + NULL, read_size, buf); // PADDLE_MOBILE_ENFORCE(c_program != NULL, "program is null"); // @@ -151,23 +186,23 @@ const Program Loader::LoadProgram( // use originProgramDesc and scope to init tensors InitMemoryFromProgram(originProgramDesc, scope); // perform fusion and print infos - FusionAndPrintInfos(optimize, can_add_split, program, originProgramDesc); + FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc); paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, NULL); return program; } -template +template const Program Loader::LoadCombinedMemory( - size_t read_size, const uint8_t *buf, size_t combined_params_len, - const uint8_t *combined_params_buf, bool optimize, bool quantification) { + size_t read_size, const uint8_t *buf, size_t combined_params_len, + uint8_t *combined_params_buf, bool optimize, bool quantification) { bool can_add_split = false; PaddleMobile__Framework__Proto__ProgramDesc *c_program; PADDLE_MOBILE_ENFORCE(buf != nullptr, "read from __model__ is null"); c_program = paddle_mobile__framework__proto__program_desc__unpack( - nullptr, read_size, buf); + nullptr, read_size, buf); // PADDLE_MOBILE_ENFORCE(c_program != nullptr, "program is null"); // @@ -186,23 +221,19 @@ const Program Loader::LoadCombinedMemory( auto scope = std::make_shared(); program.scope = scope; InitMemoryFromProgram(originProgramDesc, scope); - FusionAndPrintInfos(optimize, can_add_split, program, originProgramDesc); + FusionAndPrintInfos(optimize, can_add_split, &program, originProgramDesc); paddle_mobile__framework__proto__program_desc__free_unpacked(c_program, nullptr); return program; } -template -class Loader; +template class Loader; -template -class Loader; +template class Loader; -template -class Loader; +template class Loader; -template -class Loader; +template class Loader; -} +} // namespace framework } // namespace paddle_mobile diff --git a/src/framework/loader.h b/src/framework/loader.h index fe4460f6c0045116b7ed7947f080024165917bfe..3200f0b25368fa123b80c51000cfd6c6a6d084b6 100644 --- a/src/framework/loader.h +++ b/src/framework/loader.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "framework/program/program.h" namespace paddle_mobile { -namespace framework{ +namespace framework { template class Loader { @@ -30,30 +30,36 @@ class Loader { * @b 加载分开形式的 fluid 模型 * */ const Program Load(const std::string &dirname, - bool optimize = false, - bool quantification = false, - bool can_add_split = false); + bool optimize = false, + bool quantification = false, + bool can_add_split = false); /* * @b load combine format fluid mode * @b 加载结合在一起格式的模型 * */ const Program Load(const std::string &model_path, - const std::string ¶_path, - bool optimize = false, - bool quantification = false); + const std::string ¶_path, + bool optimize = false, + bool quantification = false); - const Program LoadCombinedMemory( - size_t model_len, const uint8_t *model_buf, size_t combined_params_len, - const uint8_t *combined_params_buf, bool optimize = false, - bool quantification = false); + const Program LoadCombinedMemory(size_t model_len, + const uint8_t *model_buf, + size_t combined_params_len, + uint8_t *combined_params_buf, + bool optimize = false, + bool quantification = false); private: const Program LoadProgram(const std::string &model_path, - bool optimize = false, - bool quantification = false, - bool can_add_split = false); + bool optimize = false, + bool quantification = false, + bool can_add_split = false); + + void InitMemoryFromProgram( + const std::shared_ptr &originProgramDesc, + const std::shared_ptr &scope); }; -} +} // namespace framework } // namespace paddle_mobile diff --git a/src/framework/op_registry.h b/src/framework/op_registry.h index 657a1f88effcb3db6357994d531609a94f95bfe3..c4adf7d630896ec190b19493b0e01e95bcab0f6c 100644 --- a/src/framework/op_registry.h +++ b/src/framework/op_registry.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include #include +#include #include #include "common/log.h" @@ -92,7 +92,6 @@ class OpRegistry { const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap attrs, std::shared_ptr scope) { - auto& info = OpInfoMap::Instance()->Get(type); auto op = info.Creator()(type, inputs, outputs, attrs, scope); return std::shared_ptr>(op); diff --git a/src/framework/operator.h b/src/framework/operator.h index 56a2bc734f3d2766bdc6ad22e87ce7da32c6c39c..fa7417a2975e224d9cac9bfdd4e28d73a34e019e 100644 --- a/src/framework/operator.h +++ b/src/framework/operator.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include #include #include "common/enforce.h" @@ -32,8 +33,8 @@ limitations under the License. */ #include "framework/tensor.h" #include "framework/variable.h" #ifdef PADDLE_MOBILE_CL -#include "framework/cl/cl_scope.h" #include "framework/cl/cl_helper.h" +#include "framework/cl/cl_scope.h" #endif namespace paddle_mobile { namespace framework { @@ -131,7 +132,6 @@ class OperatorWithKernel : public OperatorBase { // DLOG << i.second; // } - PADDLE_MOBILE_ENFORCE(kernel_.Init(¶m_), " %s kernel init failed", this->type_.c_str()); } @@ -147,7 +147,6 @@ class OperatorWithKernel : public OperatorBase { template class OpKernelBase { public: - OpKernelBase() = default; #ifdef PADDLE_MOBILE_CL @@ -156,11 +155,11 @@ class OpKernelBase { } #endif - /* - * @b 所有kernel 需实现 Compute 方法 - * @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, - * 所有结构体存在与: paddle-mobile/src/operators/op_param.h - * */ + /* + * @b 所有kernel 需实现 Compute 方法 + * @p para 这个参数为 kernel 运算时所需要用到参数组成的一个结构体, + * 所有结构体存在与: paddle-mobile/src/operators/op_param.h + * */ #ifdef PADDLE_McOBILE_MALI_GPU OpKernelBase() { acl_op_ = nullptr; } void *GetAclOp() const { return acl_op_; } @@ -181,8 +180,6 @@ class OpKernelBase { #ifdef PADDLE_MOBILE_MALI_GPU void *acl_op_; #endif - - }; #define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ diff --git a/src/framework/program/program.h b/src/framework/program/program.h index 192328a567e6d3bfad7a8a3b35e3bc64131a2cd2..ae3e7b0abea2b2e1fc41962dc1d926fed252e096 100644 --- a/src/framework/program/program.h +++ b/src/framework/program/program.h @@ -32,7 +32,7 @@ class Program { bool combined = false; bool quantification = false; size_t combined_params_len; - const uint8_t *combined_params_buf; + uint8_t *combined_params_buf; private: }; diff --git a/src/framework/scope.h b/src/framework/scope.h index a984b9d5096da267938d09e4fc796b4217eabfec..abc727231a0d119ab53d765ab020085aaab9102d 100644 --- a/src/framework/scope.h +++ b/src/framework/scope.h @@ -15,13 +15,14 @@ limitations under the License. */ #pragma once #include +#include +#include +#include -#ifdef PADDLE_MOBILE_CL +#ifdef PADDLE_MOBILE_CL #include "framework/cl/cl_scope.h" #endif - -#include -#include "variable.h" +#include "framework/variable.h" namespace paddle_mobile { namespace framework { @@ -42,7 +43,6 @@ class Scope { #ifdef PADDLE_MOBILE_CL delete cl_scope_; #endif - } Scope &NewScope() const; @@ -83,9 +83,7 @@ class Scope { Variable *FindVarLocally(const std::string &name) const; #ifdef PADDLE_MOBILE_CL - CLScope *GetCLScpoe() { - return cl_scope_; - } + CLScope *GetCLScpoe() { return cl_scope_; } #endif private: @@ -99,7 +97,6 @@ class Scope { #ifdef PADDLE_MOBILE_CL CLScope *cl_scope_ = new CLScope(); #endif - }; } // namespace framework } // namespace paddle_mobile diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp index 54a2a4e912266f12c2dd4c232cb3061a7a487bb1..8bea4412ac0b371e029f5aa8914bcb2d6eeb547b 100644 --- a/src/io/paddle_mobile.cpp +++ b/src/io/paddle_mobile.cpp @@ -68,9 +68,10 @@ bool PaddleMobile::Load(const std::string &model_path, } template -bool PaddleMobile::LoadCombinedMemory( - size_t model_len, const uint8_t *model_buf, size_t combined_params_len, - const uint8_t *combined_params_buf) { +bool PaddleMobile::LoadCombinedMemory(size_t model_len, + const uint8_t *model_buf, + size_t combined_params_len, + uint8_t *combined_params_buf) { int batch_size = 1; bool optimise = true; bool quantification = false; diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h index cf753fa7a64c08ded2ec756731780769f35531d3..b53dc5ac9eb1255a51992c3dd4dbbba3f306c467 100644 --- a/src/io/paddle_mobile.h +++ b/src/io/paddle_mobile.h @@ -22,9 +22,9 @@ limitations under the License. */ #endif // _OPENMP #include "common/types.h" -#include "framework/tensor.h" #include "framework/executor.h" #include "framework/loader.h" +#include "framework/tensor.h" namespace paddle_mobile { @@ -83,7 +83,7 @@ class PaddleMobile { */ bool LoadCombinedMemory(size_t model_len, const uint8_t *model_buf, size_t combined_params_len, - const uint8_t *combined_params_buf); + uint8_t *combined_params_buf); void Clear(); @@ -94,6 +94,7 @@ class PaddleMobile { std::shared_ptr> executor_; #ifdef PADDLE_MOBILE_FPGA + public: void InjectVariable(const framework::Tensor &t, string var_name); void FeedData(const framework::Tensor &t); diff --git a/src/operators/batchnorm_op.cpp b/src/operators/batchnorm_op.cpp index f820908404ea637d9680c32d5c4b5568e191dd7e..566b8438d8775f67246cd535f8cb85cd804ce1bb 100644 --- a/src/operators/batchnorm_op.cpp +++ b/src/operators/batchnorm_op.cpp @@ -40,4 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(batch_norm, ops::BatchNormOp); #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(batch_norm, ops::BatchNormOp); +#endif + #endif diff --git a/src/operators/batchnorm_op.h b/src/operators/batchnorm_op.h index 52c423f1bb90428e867ea6fb992036ab83c683d7..55bc5a4a295fb90bebe7ab8e359c5a6efccdc250 100644 --- a/src/operators/batchnorm_op.h +++ b/src/operators/batchnorm_op.h @@ -54,5 +54,8 @@ USE_OP_MALI_GPU(batch_norm); #endif #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(batch_norm); +#endif #endif diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 1e0192f3df4eef495015181f1832e9413e755af3..fe444b206b86aaaccbd52307743f3e0cd23f5d7f 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -43,13 +43,14 @@ class FeedOp : public framework::OperatorBase { #ifdef PADDLE_MOBILE_FPGA - void Init() { + void Init() { Tensor *output = param_.Out(); fpga::format_fp16_ofm(output); } void RunImpl() const { - auto input = (Tensor *)const_cast(param_.InputX()); + auto input = + reinterpret_cast(const_cast(param_.InputX())); auto input_ptr = input->data(); fpga::format_image(input); Tensor *output = param_.Out(); @@ -61,7 +62,7 @@ class FeedOp : public framework::OperatorBase { args.output_data_type = fpga::DATA_TYPE_FP16; args.input_layout_type = fpga::LAYOUT_CHW; args.output_layout_type = fpga::LAYOUT_HWC; - args.image.address = (void *)input_ptr; + args.image.address = reinterpret_cast(input_ptr); args.image.channels = (uint32_t)input->dims()[1]; args.image.height = (uint32_t)input->dims()[2]; args.image.width = (uint32_t)input->dims()[3]; @@ -74,13 +75,10 @@ class FeedOp : public framework::OperatorBase { #else #ifdef PADDLE_MOBILE_CL - void Init() {} - void RunImpl() { - - - } + void Init() {} + void RunImpl() {} #else - void Init() {} + void Init() {} void RunImpl() { param_.Out()->ShareDataWith(*param_.InputX()); param_.Out()->set_lod(param_.InputX()->lod()); diff --git a/src/operators/fetch_op.cpp b/src/operators/fetch_op.cpp index cc96934cadd63278d62383be33fe9bb7da9bf640..852d5c48fcef57d7f7e06baecc2c1cbb09b5d530 100644 --- a/src/operators/fetch_op.cpp +++ b/src/operators/fetch_op.cpp @@ -43,3 +43,6 @@ REGISTER_OPERATOR_MALI_GPU(fetch, ops::FetchOp); #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(fetch, ops::FetchOp); #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(fetch, ops::FetchOp); +#endif diff --git a/src/operators/fetch_op.h b/src/operators/fetch_op.h index 708686c8878f7754c084c5ac287deaed41b9d246..59b9c46ed35c25feefe19f8e4a0d8a23b0dddcaf 100644 --- a/src/operators/fetch_op.h +++ b/src/operators/fetch_op.h @@ -54,3 +54,6 @@ USE_OP_MALI_GPU(fetch); #ifdef PADDLE_MOBILE_FPGA USE_OP_FPGA(fetch); #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(fetch); +#endif diff --git a/src/operators/fusion_conv_add_bn_relu_op.h b/src/operators/fusion_conv_add_bn_relu_op.h index 68f9107e6a95f0872e0e2628f6672597d4de0277..f6b0b6b3d1f48084efd32b5112e02c02c9d3782b 100644 --- a/src/operators/fusion_conv_add_bn_relu_op.h +++ b/src/operators/fusion_conv_add_bn_relu_op.h @@ -20,8 +20,8 @@ limitations under the License. */ #include #include "framework/operator.h" #include "framework/program/program-optimize/fusion_op_register.h" -#include "op_param.h" #include "operators/kernel/conv_add_bn_relu_kernel.h" +#include "operators/op_param.h" namespace paddle_mobile { namespace operators { @@ -103,7 +103,7 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar( #ifdef PADDLE_MOBILE_CL #ifndef FUSION_CONV_ADD_BN_RELU_REGISTER - static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar( +static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar( new FusionConvAddBNReluMatcher()); #define FUSION_CONV_ADD_BN_RELU_REGISTER #endif diff --git a/src/operators/kernel/arm/batchnorm_kernel.cpp b/src/operators/kernel/arm/batchnorm_kernel.cpp index fe7233088eaaf303af7c85f1702cf0381af33887..f31c4426db7d28234692742fcd670cb26ec50ab0 100644 --- a/src/operators/kernel/arm/batchnorm_kernel.cpp +++ b/src/operators/kernel/arm/batchnorm_kernel.cpp @@ -26,8 +26,7 @@ bool BatchNormKernel::Init(BatchNormParam *param) { } template <> -void BatchNormKernel::Compute( - const BatchNormParam ¶m) { +void BatchNormKernel::Compute(const BatchNormParam ¶m) { BatchnormCompute(param); } diff --git a/src/operators/kernel/arm/box_coder_kernel.cpp b/src/operators/kernel/arm/box_coder_kernel.cpp index 2071b0ee996b6a96509c8d0f8556ddc5ce12daac..30ede12dffe0eed7673c9ae1f7c836fd1b5b7096 100644 --- a/src/operators/kernel/arm/box_coder_kernel.cpp +++ b/src/operators/kernel/arm/box_coder_kernel.cpp @@ -26,8 +26,7 @@ bool BoxCoderKernel::Init(BoxCoderParam *param) { } template <> -void BoxCoderKernel::Compute( - const BoxCoderParam ¶m) { +void BoxCoderKernel::Compute(const BoxCoderParam ¶m) { BoxCoderCompute(param); } diff --git a/src/operators/kernel/arm/conv_add_kernel.cpp b/src/operators/kernel/arm/conv_add_kernel.cpp index 8672678882ef15be8538312e0526c57d5956f10f..e016b8efbd15472ae0d77423d84dc19671bfa316 100644 --- a/src/operators/kernel/arm/conv_add_kernel.cpp +++ b/src/operators/kernel/arm/conv_add_kernel.cpp @@ -25,8 +25,7 @@ bool ConvAddKernel::Init(FusionConvAddParam *param) { } template <> -void ConvAddKernel::Compute( - const FusionConvAddParam ¶m) { +void ConvAddKernel::Compute(const FusionConvAddParam ¶m) { ConvAddCompute(param); } diff --git a/src/operators/kernel/arm/depthwise_conv_kernel.cpp b/src/operators/kernel/arm/depthwise_conv_kernel.cpp index d3ada81b774d23d4b91270d2ae11da86dbe0a209..000d59baa8c804201cbd2e2a731c2077196b698f 100644 --- a/src/operators/kernel/arm/depthwise_conv_kernel.cpp +++ b/src/operators/kernel/arm/depthwise_conv_kernel.cpp @@ -26,8 +26,7 @@ bool DepthwiseConvKernel::Init(ConvParam *param) { } template <> -void DepthwiseConvKernel::Compute( - const ConvParam ¶m) { +void DepthwiseConvKernel::Compute(const ConvParam ¶m) { DepthwiseConvCompute(param); } diff --git a/src/operators/kernel/arm/fusion_fc_kernel.cpp b/src/operators/kernel/arm/fusion_fc_kernel.cpp index 0f13a1e84b8e80bc09e9bbf2c6554108567f00f0..c503edab643def7af0585a18d774b14ca0a3c39d 100644 --- a/src/operators/kernel/arm/fusion_fc_kernel.cpp +++ b/src/operators/kernel/arm/fusion_fc_kernel.cpp @@ -26,8 +26,7 @@ bool FusionFcKernel::Init(FusionFcParam *param) { } template <> -void FusionFcKernel::Compute( - const FusionFcParam ¶m) { +void FusionFcKernel::Compute(const FusionFcParam ¶m) { FusionFcCompute(param); param.Out()->set_lod(param.InputX()->lod()); } diff --git a/src/operators/kernel/arm/prior_box_kernel.cpp b/src/operators/kernel/arm/prior_box_kernel.cpp index 43b0d4f16ac64f6a4fb8d08e8d9715d2f66ef683..c067d3388dd928b032178add99c6567a8add20d3 100644 --- a/src/operators/kernel/arm/prior_box_kernel.cpp +++ b/src/operators/kernel/arm/prior_box_kernel.cpp @@ -26,8 +26,7 @@ bool PriorBoxKernel::Init(PriorBoxParam *param) { } template <> -void PriorBoxKernel::Compute( - const PriorBoxParam ¶m) { +void PriorBoxKernel::Compute(const PriorBoxParam ¶m) { PriorBoxCompute(param); } diff --git a/src/operators/kernel/arm/transpose_kernel.cpp b/src/operators/kernel/arm/transpose_kernel.cpp index 43355abf0a6e10f3aca857cc94ac72d1827b47f2..f90376eb507253badb209838a3db4bafbcfbb5b9 100644 --- a/src/operators/kernel/arm/transpose_kernel.cpp +++ b/src/operators/kernel/arm/transpose_kernel.cpp @@ -25,8 +25,7 @@ bool TransposeKernel::Init(TransposeParam *param) { } template <> -void TransposeKernel::Compute( - const TransposeParam ¶m) { +void TransposeKernel::Compute(const TransposeParam ¶m) { TransposeCompute(param); } diff --git a/src/operators/kernel/cl/cl_kernel/common.h b/src/operators/kernel/cl/cl_kernel/common.h index 80d90e25ba91443768e488be6db24820edd1a083..027255d9dc01ad8f6da6e23842a80c029f2698ee 100644 --- a/src/operators/kernel/cl/cl_kernel/common.h +++ b/src/operators/kernel/cl/cl_kernel/common.h @@ -33,4 +33,3 @@ inline hafl4 activation(half4 in } */ - diff --git a/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl b/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl index 49c18e1e4772f17d5a98a806544a8391dfc5b946..04ceed8a6e16378f87966e53dff2bc880d3141d6 100644 --- a/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl +++ b/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl @@ -12,9 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - - - /* conv conv_bn @@ -30,7 +27,6 @@ conv_add_bn_relu #include "common.h" - __kernel void conv_1x1(__private const int global_size_dim0, __private const int global_size_dim1, __private const int global_size_dim2, diff --git a/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl b/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl index d441932dbf0dc5f079c0bdf13a30c8e5b3215cf7..f731a61a82f9d1e7d44e760037512157c4ffef19 100644 --- a/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl +++ b/src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl @@ -1,3 +1,17 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + __kernel void elementwise_add(__global float* in, __global float* out) { int num = get_global_id(0); out[num] = in[num] * 0.1 + 102; diff --git a/src/operators/kernel/cl/conv_add_bn_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_kernel.cpp deleted file mode 100644 index 7fc231019134b62d7e9fb289720cb2ed3b7fa3ba..0000000000000000000000000000000000000000 --- a/src/operators/kernel/cl/conv_add_bn_kernel.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifdef FUSION_CONVADDBN_OP - -#include "operators/kernel/conv_add_bn_kernel.h" - -namespace paddle_mobile { -namespace operators { - -template <> -bool ConvAddBNReluKernel::Init( - FusionConvAddBNReluParam *param) { - return true; -} - -template <> -void ConvAddBNReluKernel::Compute( - const FusionConvAddBNReluParam ¶m) { -} -template class ConvAddBNReluKernel; - -} // namespace operators -} // namespace paddle_mobile - -#endif diff --git a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp index 5be9eece8a85f0dce406d65541afeebb4baa83a5..e62041d3f47aae8dbc9078d49beb84d45c2d9423 100644 --- a/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp @@ -15,20 +15,122 @@ limitations under the License. */ #ifdef FUSION_CONVADDBNRELU_OP #include "operators/kernel/conv_add_bn_relu_kernel.h" +#include "framework/cl/cl_image.h" namespace paddle_mobile { namespace operators { template <> bool ConvAddBNReluKernel::Init( - FusionConvAddBNReluParam *param) { + FusionConvAddBNReluParam *param) { + // const CL *mean = param->InputMean(); + const framework::CLImage *mean = param->InputMean(); + + const framework::CLImage *variance = param->InputVariance(); + const framework::CLImage *scale = param->InputScale(); + const framework::CLImage *bias = param->InputBias(); + const float epsilon = param->Epsilon(); + + auto mean_ptr = mean->data(); + auto variance_ptr = variance->data(); + auto scale_ptr = scale->data(); + auto bias_ptr = bias->data(); + + const int C = mean->numel(); + + float inv_std_ptr[C]; + for (int i = 0; i < C; i++) { + inv_std_ptr[i] = + 1 / static_cast(pow((variance_ptr[i] + epsilon), 0.5)); + } + float *new_scale_ptr = new float[C]; + float *new_bias_ptr = new float[C]; + + for (int i = 0; i < C; i++) { + new_scale_ptr[i] = inv_std_ptr[i] * scale_ptr[i]; + new_bias_ptr[i] = bias_ptr[i] - mean_ptr[i] * inv_std_ptr[i] * scale_ptr[i]; + } + + delete[](new_scale_ptr); + delete[](new_bias_ptr); + + framework::CLImage *new_scale = new framework::CLImage(); + + new_scale->Init(this->cl_helper_.CLContext(), new_scale_ptr, + variance->dims()); + + framework::CLImage *new_bias = new framework::CLImage(); + + new_bias->Init(this->cl_helper_.CLContext(), new_bias_ptr, variance->dims()); + + param->SetNewScale(new_scale); + + param->SetNewBias(new_bias); + + PADDLE_MOBILE_ENFORCE( + param->Filter()->dims()[2] == param->Filter()->dims()[3] && + param->Paddings()[0] == param->Paddings()[1], + "need equal"); + + int offset = static_cast(param->Filter()->dims()[2]) / 2 - + static_cast(param->Paddings()[1]); + + param->SetOffset(offset); + + if (param->Filter()->WidthOfOneBlock() == 1 && + param->Filter()->HeightOfOneBlock() == 1) { + this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->dims()[1] == 1) { + this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->WidthOfOneBlock() == 3 && + param->Filter()->HeightOfOneBlock() == 3) { + this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else { + PADDLE_MOBILE_THROW_EXCEPTION(" not support "); + } + return true; } template <> void ConvAddBNReluKernel::Compute( - const FusionConvAddBNReluParam ¶m) { + const FusionConvAddBNReluParam ¶m) { + auto kernel = this->cl_helper_.KernelAt(0); + auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); + int c_block = default_work_size[0]; + int w = default_work_size[1]; + int nh = default_work_size[2]; + auto input = param.Input()->GetCLImage(); + auto filter = param.Filter()->GetCLImage(); + auto biase = param.Bias()->GetCLImage(); + auto new_scale = param.NewScale()->GetCLImage(); + auto new_bias = param.NewBias()->GetCLImage(); + auto output = param.Output(); + int stride = param.Strides()[0]; + int offset = param.Offset(); + int input_c = param.Input()->CBlock(); + int input_width = param.Input()->WidthOfOneBlock(); + int input_height = param.Input()->HeightOfOneBlock(); + + clSetKernelArg(kernel, 0, sizeof(int), &c_block); + clSetKernelArg(kernel, 1, sizeof(int), &w); + clSetKernelArg(kernel, 2, sizeof(int), &nh); + clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); + clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); + clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase); + clSetKernelArg(kernel, 6, sizeof(cl_mem), &new_scale); + clSetKernelArg(kernel, 7, sizeof(cl_mem), &new_bias); + clSetKernelArg(kernel, 8, sizeof(cl_mem), &output); + clSetKernelArg(kernel, 9, sizeof(int), &stride); + clSetKernelArg(kernel, 10, sizeof(int), &offset); + clSetKernelArg(kernel, 11, sizeof(int), &input_c); + clSetKernelArg(kernel, 12, sizeof(int), &input_width); + clSetKernelArg(kernel, 13, sizeof(int), &input_height); + + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); } + template class ConvAddBNReluKernel; } // namespace operators diff --git a/src/operators/kernel/cl/conv_add_kernel.cpp b/src/operators/kernel/cl/conv_add_kernel.cpp index 868013cf9bd0b954508e33207e5f9f45f694c38b..74de92e4c28709a5fdffa99402b1214982475511 100644 --- a/src/operators/kernel/cl/conv_add_kernel.cpp +++ b/src/operators/kernel/cl/conv_add_kernel.cpp @@ -21,12 +21,62 @@ namespace operators { template <> bool ConvAddKernel::Init(FusionConvAddParam *param) { + PADDLE_MOBILE_ENFORCE( + param->Filter()->dims()[2] == param->Filter()->dims()[3] && + param->Paddings()[0] == param->Paddings()[1], + "need equal"); + int offset = static_cast(param->Filter()->dims()[2]) / 2 - + static_cast(param->Paddings()[1]); + param->SetOffset(offset); + + if (param->Filter()->WidthOfOneBlock() == 1 && + param->Filter()->HeightOfOneBlock() == 1) { + this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->dims()[1] == 1) { + this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->WidthOfOneBlock() == 3 && + param->Filter()->HeightOfOneBlock() == 3) { + this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else { + PADDLE_MOBILE_THROW_EXCEPTION(" not support "); + } + return true; } template <> void ConvAddKernel::Compute( - const FusionConvAddParam ¶m) { + const FusionConvAddParam ¶m) { + auto kernel = this->cl_helper_.KernelAt(0); + auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); + int c_block = default_work_size[0]; + int w = default_work_size[1]; + int nh = default_work_size[2]; + auto input = param.Input()->GetCLImage(); + auto filter = param.Filter()->GetCLImage(); + auto biase = param.Bias()->GetCLImage(); + auto output = param.Output(); + int stride = param.Strides()[0]; + int offset = param.Offset(); + int input_c = param.Input()->CBlock(); + int input_width = param.Input()->WidthOfOneBlock(); + int input_height = param.Input()->HeightOfOneBlock(); + + clSetKernelArg(kernel, 0, sizeof(int), &c_block); + clSetKernelArg(kernel, 1, sizeof(int), &w); + clSetKernelArg(kernel, 2, sizeof(int), &nh); + clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); + clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); + clSetKernelArg(kernel, 5, sizeof(cl_mem), &biase); + clSetKernelArg(kernel, 8, sizeof(cl_mem), &output); + clSetKernelArg(kernel, 9, sizeof(int), &stride); + clSetKernelArg(kernel, 10, sizeof(int), &offset); + clSetKernelArg(kernel, 11, sizeof(int), &input_c); + clSetKernelArg(kernel, 12, sizeof(int), &input_width); + clSetKernelArg(kernel, 13, sizeof(int), &input_height); + + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); } template class ConvAddKernel; diff --git a/src/operators/kernel/cl/conv_kernel.cpp b/src/operators/kernel/cl/conv_kernel.cpp index 451f96447e458524e698f5068bba73f763b236b2..ec265b7992cd62fd4f77399698c377570c2b7a61 100644 --- a/src/operators/kernel/cl/conv_kernel.cpp +++ b/src/operators/kernel/cl/conv_kernel.cpp @@ -15,22 +15,72 @@ limitations under the License. */ #ifdef CONV_OP #include "operators/kernel/conv_kernel.h" -#include "operators/kernel/central-arm-func/conv_arm_func.h" namespace paddle_mobile { namespace operators { template <> bool ConvKernel::Init(ConvParam *param) { - this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl"); + PADDLE_MOBILE_ENFORCE( + param->Filter()->dims()[2] == param->Filter()->dims()[3] && + param->Paddings()[0] == param->Paddings()[1], + "need equal"); + int offset = static_cast(param->Filter()->dims()[2]) / 2 - + static_cast(param->Paddings()[1]); + param->SetOffset(offset); + + if (param->Filter()->WidthOfOneBlock() == 1 && + param->Filter()->HeightOfOneBlock() == 1) { + this->cl_helper_.AddKernel("conv_1x1", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->dims()[1] == 1) { + this->cl_helper_.AddKernel("depth_conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else if (param->Filter()->WidthOfOneBlock() == 3 && + param->Filter()->HeightOfOneBlock() == 3) { + this->cl_helper_.AddKernel("conv_3x3", "conv_add_bn_relu_kernel.cl"); + } else { + PADDLE_MOBILE_THROW_EXCEPTION(" not support "); + } + return true; } template <> void ConvKernel::Compute(const ConvParam ¶m) { auto kernel = this->cl_helper_.KernelAt(0); - size_t global_work_size[3] = {1, 2, 3}; - clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL); + auto default_work_size = this->cl_helper_.DefaultWorkSize(*param.Output()); + int c_block = default_work_size[0]; + int w = default_work_size[1]; + int nh = default_work_size[2]; + auto input = param.Input()->GetCLImage(); + auto filter = param.Filter()->GetCLImage(); + auto output = param.Output(); + int stride = param.Strides()[0]; + int offset = param.Offset(); + int input_c = param.Input()->CBlock(); + int dilation = param.Dilations()[0]; + int input_width = param.Input()->WidthOfOneBlock(); + int input_height = param.Input()->HeightOfOneBlock(); + + clSetKernelArg(kernel, 0, sizeof(int), &c_block); + clSetKernelArg(kernel, 1, sizeof(int), &w); + clSetKernelArg(kernel, 2, sizeof(int), &nh); + clSetKernelArg(kernel, 3, sizeof(cl_mem), &input); + clSetKernelArg(kernel, 4, sizeof(cl_mem), &filter); + clSetKernelArg(kernel, 5, sizeof(cl_mem), &output); + clSetKernelArg(kernel, 6, sizeof(int), &stride); + clSetKernelArg(kernel, 7, sizeof(int), &offset); + clSetKernelArg(kernel, 8, sizeof(int), &input_c); + clSetKernelArg(kernel, 9, sizeof(int), &dilation); + clSetKernelArg(kernel, 10, sizeof(int), &input_width); + clSetKernelArg(kernel, 11, sizeof(int), &input_height); + + clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + default_work_size.data(), NULL, 0, NULL, NULL); + + // auto kernel = this->cl_helper_.KernelAt(0); + // size_t global_work_size[3] = {1, 2, 3}; + // clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL, + // global_work_size, NULL, 0, NULL, NULL); } template class ConvKernel; diff --git a/src/operators/kernel/cl/elementwise_add_kernel.cpp b/src/operators/kernel/cl/elementwise_add_kernel.cpp index d399e25d5e1216666d8df87c770ae82240b644bf..f68373677bbd383d42bf6b3920e567a0118be254 100644 --- a/src/operators/kernel/cl/elementwise_add_kernel.cpp +++ b/src/operators/kernel/cl/elementwise_add_kernel.cpp @@ -17,22 +17,23 @@ limitations under the License. */ #include "operators/kernel/elementwise_add_kernel.h" namespace paddle_mobile { - namespace operators { +namespace operators { - template <> - bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { - this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl"); - return true; - } +template <> +bool ElementwiseAddKernel::Init( + ElementwiseAddParam *param) { + // this->cl_helper_.AddKernel("elementwise_add", + // "elementwise_add_kernel.cl"); + return true; +} - template <> - void ElementwiseAddKernel::Compute(const ElementwiseAddParam ¶m) { +template <> +void ElementwiseAddKernel::Compute( + const ElementwiseAddParam ¶m) {} - } +template class ElementwiseAddKernel; - template class ElementwiseAddKernel; - - } // namespace operators +} // namespace operators } // namespace paddle_mobile #endif diff --git a/src/operators/kernel/cl/relu_kernel.cpp b/src/operators/kernel/cl/relu_kernel.cpp index 25a89c802bd5fc3ac321fa832392dc7a0bb03a7e..f38c29f1827cd61b18a0dd59773e63169a4445a7 100644 --- a/src/operators/kernel/cl/relu_kernel.cpp +++ b/src/operators/kernel/cl/relu_kernel.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "operators/kernel/relu_kernel.h" namespace paddle_mobile { @@ -30,4 +29,3 @@ template class ReluKernel; } // namespace operators } // namespace paddle_mobile - diff --git a/src/operators/kernel/cl/reshape_kernel.cpp b/src/operators/kernel/cl/reshape_kernel.cpp index be9ad946285acb16322ed9cf927f301fdf656846..bc6eb2834b3cff1720ddb7ffb8b4272cf8abbbeb 100644 --- a/src/operators/kernel/cl/reshape_kernel.cpp +++ b/src/operators/kernel/cl/reshape_kernel.cpp @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - #include "operators/kernel/reshape_kernel.h" namespace paddle_mobile { @@ -30,4 +29,3 @@ template class ReshapeKernel; } // namespace operators } // namespace paddle_mobile - diff --git a/src/operators/kernel/cl/softmax_kernel.cpp b/src/operators/kernel/cl/softmax_kernel.cpp index de61592466dd01a60d1819c97e856fc50095db2c..d0a97cf076c5fe22c7b2612629616053c63dec6c 100644 --- a/src/operators/kernel/cl/softmax_kernel.cpp +++ b/src/operators/kernel/cl/softmax_kernel.cpp @@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifdef SOFTMAX_OP -#include "operators/kernel/pool_kernel.h" +#include "operators/kernel/softmax_kernel.h" namespace paddle_mobile { namespace operators { @@ -30,4 +31,4 @@ template class SoftmaxKernel; } // namespace operators } // namespace paddle_mobile - +#endif diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp index 4d5486fd6c45c0bcbdf047743f849ac3edfeb6b5..e483e90ab1e147c74ab68a29acf118bf3451af89 100644 --- a/src/operators/kernel/fpga/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp @@ -67,8 +67,7 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { } template <> -void ConvBNKernel::Compute( - const FusionConvBNParam ¶m) { +void ConvBNKernel::Compute(const FusionConvBNParam ¶m) { fpga::ComputeFpgaConv(param.FpgaArgs()); } diff --git a/src/operators/kernel/fpga/dropout_kernel.cpp b/src/operators/kernel/fpga/dropout_kernel.cpp index 24dbbaa2f412529b3ebb88749f03c03f391fece4..8b990d46e0b90bf67eaf36bbf38238fd4432ace6 100644 --- a/src/operators/kernel/fpga/dropout_kernel.cpp +++ b/src/operators/kernel/fpga/dropout_kernel.cpp @@ -26,8 +26,7 @@ bool DropoutKernel::Init(DropoutParam *param) { } template <> -void DropoutKernel::Compute( - const DropoutParam ¶m) {} +void DropoutKernel::Compute(const DropoutParam ¶m) {} } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp index b105c51c892d4ae32cc56a4bf0ed03d3c4f5ab51..a3625786a56e2cfe9b45b5e9603faaabc43ddaa0 100644 --- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp @@ -60,8 +60,7 @@ bool FusionFcKernel::Init(FusionFcParam *param) { } template <> -void FusionFcKernel::Compute( - const FusionFcParam ¶m) { +void FusionFcKernel::Compute(const FusionFcParam ¶m) { fpga::ComputeFpgaConv(param.FpgaArgs()); } } // namespace operators diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp index f4b5f535cf222a912bb8adeae1aed722b3a0b639..b83db5f1549e896681e0be4697e5763dc805ee5f 100644 --- a/src/operators/kernel/fpga/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/softmax_kernel.cpp @@ -47,8 +47,7 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { } template <> -void SoftmaxKernel::Compute( - const SoftmaxParam ¶m) { +void SoftmaxKernel::Compute(const SoftmaxParam ¶m) { Tensor *in_x = param.FloatInput(); Tensor *out = param.Out(); diff --git a/src/operators/kernel/mali/conv_kernel.cpp b/src/operators/kernel/mali/conv_kernel.cpp index 7de2a755cbacc3c511de0fcadba9a8797ec76fbf..7cca16274ecc7ae1707f8d5ed8faf2fde810ab30 100644 --- a/src/operators/kernel/mali/conv_kernel.cpp +++ b/src/operators/kernel/mali/conv_kernel.cpp @@ -211,8 +211,7 @@ bool ConvKernel::Init(ConvParam* param) { } template <> -void ConvKernel::Compute( - const ConvParam& param) { +void ConvKernel::Compute(const ConvParam& param) { std::cout << "init acl" << std::endl; AclConvOp* acl_op = reinterpret_cast*>(this->GetAclOp()); diff --git a/src/operators/kernel/mali/lrn_kernel.cpp b/src/operators/kernel/mali/lrn_kernel.cpp index 5a84ce1aad15468ec58020b3d5151e2e8791b2ac..b46c9680d576ead3e7ab309c08894654a9fad04a 100644 --- a/src/operators/kernel/mali/lrn_kernel.cpp +++ b/src/operators/kernel/mali/lrn_kernel.cpp @@ -127,8 +127,7 @@ bool LrnKernel::Init(LrnParam* param) { } template <> -void LrnKernel::Compute( - const LrnParam& param) { +void LrnKernel::Compute(const LrnParam& param) { std::cout << "init acl" << std::endl; AclLrnOp* acl_op = reinterpret_cast*>(this->GetAclOp()); diff --git a/src/operators/kernel/mali/mul_kernel.cpp b/src/operators/kernel/mali/mul_kernel.cpp index b3658d34545009b687e5d7738af0a04f59f926fd..da69f5e6fe5a4ec95373011d360cd4d9e20a8a61 100644 --- a/src/operators/kernel/mali/mul_kernel.cpp +++ b/src/operators/kernel/mali/mul_kernel.cpp @@ -27,8 +27,7 @@ bool MulKernel::Init(MulParam *param) { } template <> -void MulKernel::Compute( - const MulParam ¶m) { +void MulKernel::Compute(const MulParam ¶m) { const Tensor *input_x = param.InputX(); const Tensor *input_y = param.InputY(); Tensor *out = param.Out(); diff --git a/src/operators/kernel/mali/pool_kernel.cpp b/src/operators/kernel/mali/pool_kernel.cpp index 0c94b595a85f8dcaa1ad9e3da274dfb07238c53c..ec5d35a8f600d63a623b468c9c97c3540bf9c3f7 100644 --- a/src/operators/kernel/mali/pool_kernel.cpp +++ b/src/operators/kernel/mali/pool_kernel.cpp @@ -195,8 +195,7 @@ bool PoolKernel::Init(PoolParam* param) { } template <> -void PoolKernel::Compute( - const PoolParam& param) { +void PoolKernel::Compute(const PoolParam& param) { std::cout << "init acl" << std::endl; AclPoolOp* acl_op = reinterpret_cast*>(this->GetAclOp()); diff --git a/src/operators/kernel/mali/relu_kernel.cpp b/src/operators/kernel/mali/relu_kernel.cpp index 23334c4ffda0001817badf174b76d603e16aaacf..68bb52af3ab9b262218223d971b044edd759b347 100644 --- a/src/operators/kernel/mali/relu_kernel.cpp +++ b/src/operators/kernel/mali/relu_kernel.cpp @@ -115,8 +115,7 @@ bool ReluKernel::Init(ReluParam* param) { } template <> -void ReluKernel::Compute( - const ReluParam& param) { +void ReluKernel::Compute(const ReluParam& param) { std::cout << "init acl" << std::endl; AclReluOp* acl_op = reinterpret_cast*>(this->GetAclOp()); diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 3f1b820ebf1939188545d627e786b717dcace33a..0fafc1915248972ea4096852e96eea7733e5372c 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -389,6 +389,13 @@ class ConvParam : public OpParam { const int &Groups() const { return groups; } +#ifdef PADDLE_MOBILE_CL + int Offset() const { return offset_; } + + int SetOffset(int in_offset) { offset_ = in_offset; } + +#endif + private: RType *input_; RType *output_; @@ -397,6 +404,10 @@ class ConvParam : public OpParam { vector paddings_; vector dilations_; int groups; + +#ifdef PADDLE_MOBILE_CL + int offset_; +#endif }; template Print &operator<<(Print &printer, const ConvParam &conv_param); @@ -1520,6 +1531,7 @@ class FusionConvAddBNReluParam : public ConvParam { bool is_test_; RType *new_bias_; RType *new_scale_; + #ifdef PADDLE_MOBILE_FPGA private: diff --git a/src/operators/pool_op.cpp b/src/operators/pool_op.cpp index dd23059ea01a332aff45137b7f7ed4c9f6c2e1bb..74a01b5274d93cc4517d05bdfbc26a9e8ffd5d86 100644 --- a/src/operators/pool_op.cpp +++ b/src/operators/pool_op.cpp @@ -68,5 +68,8 @@ REGISTER_OPERATOR_MALI_GPU(pool2d, ops::PoolOp); #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(pool2d, ops::PoolOp); #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(pool2d, ops::PoolOp); +#endif #endif diff --git a/src/operators/pool_op.h b/src/operators/pool_op.h index 0b43b607a2788955e82a3e98084defc81d2ec58d..0792582203863361f21b142be6c80a1f80e79302 100644 --- a/src/operators/pool_op.h +++ b/src/operators/pool_op.h @@ -54,5 +54,8 @@ USE_OP_MALI_GPU(pool2d); #ifdef PADDLE_MOBILE_FPGA USE_OP_FPGA(pool2d); #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(pool2d); +#endif #endif diff --git a/src/operators/relu_op.cpp b/src/operators/relu_op.cpp index 2a771e81e7a5a0e869984990b52b98d15036543a..d6d83475ee7879f8bc967439dac2094df12c8617 100644 --- a/src/operators/relu_op.cpp +++ b/src/operators/relu_op.cpp @@ -41,5 +41,8 @@ REGISTER_OPERATOR_MALI_GPU(relu, ops::ReluOp); #endif #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(relu, ops::ReluOp); +#endif #endif diff --git a/src/operators/relu_op.h b/src/operators/relu_op.h index 403de6f3613628878e2c7f1c7e2aaa82ecdd7d9e..01fe415b0e1cdaafbadf3434a66dc41f8850aa18 100644 --- a/src/operators/relu_op.h +++ b/src/operators/relu_op.h @@ -57,5 +57,8 @@ USE_OP_MALI_GPU(relu); #endif #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(relu); +#endif #endif diff --git a/src/operators/reshape_op.cpp b/src/operators/reshape_op.cpp index dcc15009af2b23129552d58b3fa22c3c67684dce..9c2ed3035bf7378a1743cf8f42c4303a5c12f458 100644 --- a/src/operators/reshape_op.cpp +++ b/src/operators/reshape_op.cpp @@ -40,5 +40,8 @@ REGISTER_OPERATOR_MALI_GPU(reshape, ops::ReshapeOp); #endif #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(reshape, ops::ReshapeOp); +#endif #endif diff --git a/src/operators/reshape_op.h b/src/operators/reshape_op.h index 7df6890018ca34d95999124fb473cf7a5d214c2c..1b36a8690956c24808cf4ce39b0743aea9552534 100644 --- a/src/operators/reshape_op.h +++ b/src/operators/reshape_op.h @@ -56,5 +56,8 @@ USE_OP_MALI_GPU(reshape); #endif #ifdef PADDLE_MOBILE_FPGA #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(reshape); +#endif #endif diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp index e85edc69c3291c794f2eeb8119b91b2926c4d870..e605864706a6c59a35205b3072dd432b009c5d1f 100644 --- a/src/operators/softmax_op.cpp +++ b/src/operators/softmax_op.cpp @@ -36,5 +36,8 @@ REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp); #ifdef PADDLE_MOBILE_FPGA REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp); #endif +#ifdef PADDLE_MOBILE_CL +REGISTER_OPERATOR_CL(softmax, ops::SoftmaxOp); +#endif #endif diff --git a/src/operators/softmax_op.h b/src/operators/softmax_op.h index d532332992a7df332e267355d148aefa2175998b..6ccbe713b581b1076f4d841b2c9c08013314a3ae 100644 --- a/src/operators/softmax_op.h +++ b/src/operators/softmax_op.h @@ -52,5 +52,8 @@ USE_OP_MALI_GPU(softmax); #ifdef PADDLE_MOBILE_FPGA USE_OP_FPGA(softmax); #endif +#ifdef PADDLE_MOBILE_CL +USE_OP_CL(softmax); +#endif #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4ae006fa54acc1349ab2f1f4115f0f9cda38bb60..aab82a62359ef308688cd326c0d5a25396465a8b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -83,175 +83,175 @@ elseif("genet" IN_LIST NET) target_link_libraries(test-genet paddle-mobile) else () - # gen test - ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-resnet paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-squeezenet paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-yolo paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-googlenet paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-conv-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_include.h) - target_link_libraries(test-mul-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) - target_link_libraries(test-elementwiseadd-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) - target_link_libraries(test-concat-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-lrn-op operators/test_lrn_op.cpp test_helper.h test_include.h) - target_link_libraries(test-lrn-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-batchnorm-op operators/test_batchnorm_op.cpp test_helper.h test_include.h) - target_link_libraries(test-batchnorm-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-priorbox-op operators/test_prior_box_op.cpp test_helper.h test_include.h) - target_link_libraries(test-priorbox-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-boxcoder-op operators/test_box_coder_op.cpp test_helper.h test_include.h) - target_link_libraries(test-boxcoder-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h) - target_link_libraries(test-transpose-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h) - target_link_libraries(test-multiclassnms-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h) - target_link_libraries(test-reshape-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h) - target_link_libraries(test-relu-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h) - target_link_libraries(test-fc-op paddle-mobile) - - # gen test log - ADD_EXECUTABLE(test-log common/test_log.cpp) - target_link_libraries(test-log paddle-mobile) - - # gen test log - ADD_EXECUTABLE(test-load framework/test_load.cpp) - target_link_libraries(test-load paddle-mobile) - - # gen test log - ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp) - target_link_libraries(test-loadmemory paddle-mobile) - - ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp) - target_link_libraries(test-inference-api paddle-mobile) - - - # gen test log - # gen test - ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp) - target_link_libraries(test-optimize paddle-mobile) - - - #gen test - ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-pool paddle-mobile) - - #gen test - ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-softmax paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp) - target_link_libraries(test-gemm-accuracy paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp) - target_link_libraries(test-gemm-perf paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-enforce common/test_enforce.cpp) - target_link_libraries(test-enforce paddle-mobile) - - # gen test - test if openmp works - ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-openmp paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-mobilenetssd paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-mobilenet-combine paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-genet paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) - target_link_libraries(test-sigmoid paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-depthwise-conv-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-mobilenet paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-conv-add-relu-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-conv-add-bn-relu-op paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-nlp paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h) - target_link_libraries(test-gru-op paddle-mobile) - - # gen test - - ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-inceptionv4 paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h) - target_link_libraries(test-alexnet paddle-mobile) - - ADD_EXECUTABLE(test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h) - target_link_libraries(test-googlenetv1 paddle-mobile) - - # gen test - ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h) - target_link_libraries(test-fssd paddle-mobile) +# # gen test +# ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-resnet paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-squeezenet net/test_squeezenet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-squeezenet paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-yolo paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-googlenet net/test_googlenet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-googlenet paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-conv-op operators/test_cov_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-conv-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-mul-op operators/test_mul_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-mul-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-elementwiseadd-op operators/test_elementwise_add_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-elementwiseadd-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-concat-op operators/test_concat_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-concat-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-lrn-op operators/test_lrn_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-lrn-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-batchnorm-op operators/test_batchnorm_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-batchnorm-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-priorbox-op operators/test_prior_box_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-priorbox-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-boxcoder-op operators/test_box_coder_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-boxcoder-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-transpose-op operators/test_transpose_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-transpose-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-multiclassnms-op operators/test_multiclass_nms_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-multiclassnms-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-reshape-op operators/test_reshape_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-reshape-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-relu-op operators/test_relu_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-relu-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-fc-op operators/test_fusion_fc_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-fc-op paddle-mobile) +# +# # gen test log +# ADD_EXECUTABLE(test-log common/test_log.cpp) +# target_link_libraries(test-log paddle-mobile) +# +# # gen test log +# ADD_EXECUTABLE(test-load framework/test_load.cpp) +# target_link_libraries(test-load paddle-mobile) +# +# # gen test log +# ADD_EXECUTABLE(test-loadmemory framework/test_load_memory.cpp) +# target_link_libraries(test-loadmemory paddle-mobile) +# +# ADD_EXECUTABLE(test-inference-api framework/test_inference_api.cpp) +# target_link_libraries(test-inference-api paddle-mobile) +# +# +# # gen test log +# # gen test +# ADD_EXECUTABLE(test-optimize framework/test_optimize.cpp) +# target_link_libraries(test-optimize paddle-mobile) +# +# +# #gen test +# ADD_EXECUTABLE(test-pool operators/test_pool_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-pool paddle-mobile) +# +# #gen test +# ADD_EXECUTABLE(test-softmax operators/test_softmax_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-softmax paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-gemm-accuracy common/test_gemm_accuracy.cpp) +# target_link_libraries(test-gemm-accuracy paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-gemm-perf common/test_gemm_perf.cpp) +# target_link_libraries(test-gemm-perf paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-enforce common/test_enforce.cpp) +# target_link_libraries(test-enforce paddle-mobile) +# +# # gen test - test if openmp works +# ADD_EXECUTABLE(test-openmp common/test_openmp.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-openmp paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-mobilenetssd paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-mobilenet-combine paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-genet paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp test_include.h) +# target_link_libraries(test-sigmoid paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-depthwise-conv-op operators/test_depthwise_conv_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-depthwise-conv-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-mobilenet paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-conv-add-relu-op operators/test_conv_add_relu_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-conv-add-relu-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-conv-add-bn-relu-op operators/test_fusion_conv_add_bn_relu_op.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-conv-add-bn-relu-op paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-nlp net/test_nlp.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-nlp paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-gru-op operators/test_gru_op.cpp test_helper.h test_include.h) +# target_link_libraries(test-gru-op paddle-mobile) +# +# # gen test +# +# ADD_EXECUTABLE(test-inceptionv4 net/test_inceptionv4.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-inceptionv4 paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-alexnet net/test_alexnet.cpp test_helper.h test_include.h executor_for_test.h) +# target_link_libraries(test-alexnet paddle-mobile) +# +# ADD_EXECUTABLE(test-googlenetv1 net/test_googlenetv1_combine.cpp test_helper.h test_include.h) +# target_link_libraries(test-googlenetv1 paddle-mobile) +# +# # gen test +# ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h) +# target_link_libraries(test-fssd paddle-mobile) # gen test ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h) diff --git a/test/executor_for_test.h b/test/executor_for_test.h index fbef578dfca461681c4dd07688eb650a0b91cb8f..460c4a7f76af9e4ca1fc7d09d431cc084004e352 100644 --- a/test/executor_for_test.h +++ b/test/executor_for_test.h @@ -18,8 +18,8 @@ limitations under the License. */ #include #include "common/log.h" -#include "framework/op_registry.h" #include "framework/executor.h" +#include "framework/op_registry.h" #include "operators/conv_op.h" #include "operators/elementwise_add_op.h" #include "operators/pool_op.h" @@ -29,9 +29,9 @@ limitations under the License. */ #include "operators/softmax_op.h" #include "operators/transpose_op.h" -using paddle_mobile::framework::Executor; using paddle_mobile::framework::BlockDesc; using paddle_mobile::framework::DDim; +using paddle_mobile::framework::Executor; using paddle_mobile::framework::LoDTensor; using paddle_mobile::framework::OpDesc; using paddle_mobile::framework::Program; diff --git a/test/framework/test_optimize.cpp b/test/framework/test_optimize.cpp index 2f187eb583bed5bd2bcf1787beb03e521dfcf1b4..0392020789096e921865afed0b0fc51fa5999c6b 100644 --- a/test/framework/test_optimize.cpp +++ b/test/framework/test_optimize.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "../test_helper.h" +#include "framework/loader.h" #include "framework/program/program-optimize/node.h" #include "framework/program/program-optimize/program_optimize.h" -#include "framework/loader.h" int main() { paddle_mobile::framework::Loader loader; diff --git a/test/net/test_mobilenet_GPU.cpp b/test/net/test_mobilenet_GPU.cpp index 9cf8a5aa42a2439a131694259433f3e3a055607a..f0994855faed337bf2e2e557c10108e053ea7e71 100644 --- a/test/net/test_mobilenet_GPU.cpp +++ b/test/net/test_mobilenet_GPU.cpp @@ -17,43 +17,43 @@ limitations under the License. */ #include "../test_include.h" int main() { - paddle_mobile::PaddleMobile paddle_mobile; -// paddle_mobile.SetThreadNum(4); - auto time1 = time(); - // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", - // std::string(g_mobilenet_detect) + "/params", true); - - auto isok = paddle_mobile.Load(g_mobilenet, false); - if (isok) { - auto time2 = time(); - std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; - - std::vector input; - std::vector dims{1, 3, 224, 224}; - GetInput(g_test_image_1x3x224x224_banana, &input, dims); - - auto vec_result = paddle_mobile.Predict(input, dims); - std::vector::iterator biggest = - std::max_element(std::begin(vec_result), std::end(vec_result)); - std::cout << " Max element is " << *biggest << " at position " - << std::distance(std::begin(vec_result), biggest) << std::endl; - - // 预热十次 - for (int i = 0; i < 10; ++i) { - auto vec_result = paddle_mobile.Predict(input, dims); - } - auto time3 = time(); - for (int i = 0; i < 10; ++i) { - auto vec_result = paddle_mobile.Predict(input, dims); - } - DLOG << vec_result; - auto time4 = time(); - std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" - << std::endl; + paddle_mobile::PaddleMobile paddle_mobile; + // paddle_mobile.SetThreadNum(4); + auto time1 = time(); + // auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model", + // std::string(g_mobilenet_detect) + "/params", true); + + auto isok = paddle_mobile.Load(g_mobilenet, false); + if (isok) { + auto time2 = time(); + std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; + + std::vector input; + std::vector dims{1, 3, 224, 224}; + GetInput(g_test_image_1x3x224x224_banana, &input, dims); + + auto vec_result = paddle_mobile.Predict(input, dims); + std::vector::iterator biggest = + std::max_element(std::begin(vec_result), std::end(vec_result)); + std::cout << " Max element is " << *biggest << " at position " + << std::distance(std::begin(vec_result), biggest) << std::endl; + + // 预热十次 + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); } - - std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " - "是否存在?" + auto time3 = time(); + for (int i = 0; i < 10; ++i) { + auto vec_result = paddle_mobile.Predict(input, dims); + } + DLOG << vec_result; + auto time4 = time(); + std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms" << std::endl; - return 0; + } + + std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana " + "是否存在?" + << std::endl; + return 0; } diff --git a/test/operators/test_sigmoid_op.cpp b/test/operators/test_sigmoid_op.cpp index 4f466845b97b124b971b76e776f177b3c06ef937..df93da1529ae1e03561643ebeef4cb821f10d211 100644 --- a/test/operators/test_sigmoid_op.cpp +++ b/test/operators/test_sigmoid_op.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "../../src/operators/kernel/sigmoid_kernel.h" #include "../../src/operators/kernel/central-arm-func/sigmoid_arm_func.h" +#include "../../src/operators/kernel/sigmoid_kernel.h" #include "../test_helper.h" #include "framework/executor.h"