diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp index a913d6e39cddda97b347c0675717c265dfa89d18..8167d4cf7d1fd343e15754bafc7c77ee6f52a822 100644 --- a/src/fpga/api/fpga_api.cpp +++ b/src/fpga/api/fpga_api.cpp @@ -29,15 +29,15 @@ limitations under the License. */ #include "fpga/api/fpga_api.h" -namespace paddle { -namespace mobile { +namespace paddle_mobile { namespace fpga { -namespace api { static int fd = -1; static const char *device_path = "/dev/fpgadrv0"; -static inline int do_ioctl(int req, void *arg) { return ioctl(req, arg); } +static inline int do_ioctl(int req, void *arg) { + return ioctl(req, (long unsigned int)arg); +} int open_device() { if (fd == -1) { @@ -48,8 +48,8 @@ int open_device() { // memory management; void *fpga_malloc(size_t size) { - return reinterpret_cast<(void *)> mmap64(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED, fd, 0); + return reinterpret_cast( + mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)); } void fpga_free(void *ptr) { munmap(ptr, 0); } @@ -58,11 +58,9 @@ void fpga_copy(void *dest, const void *src, size_t num) { memcpy(dest, src, num); } -int ComputeFpgaConv(struct FpgaConvArgs) {} -int ComputeFpgaPool(struct FpgaPoolArgs) {} -int ComputeFpgaEWAdd(struct FpgaEWAddArgs) {} +int ComputeFpgaConv(struct ConvArgs args) {} +int ComputeFpgaPool(struct PoolingArgs args) {} +int ComputeFpgaEWAdd(struct EWAddArgs args) {} -} // namespace api } // namespace fpga -} // namespace mobile -} // namespace paddle +} // namespace paddle_mobile diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api/fpga_api.h index 42e99f4e4238d6974d23c1fb33bf238ca8a8626d..4225e887c3dadeddf48b173db72412a5209d989d 100644 --- a/src/fpga/api/fpga_api.h +++ b/src/fpga/api/fpga_api.h @@ -31,90 +31,132 @@ void* fpga_malloc(size_t size); void fpga_free(void* ptr); void fpga_copy(void* dst, const void* src, size_t num); -struct FpgaVersionArgs { - void* buf; -}; - -struct MemoryToPhysicalArgs { - const void* src; - uint64_t physical; +struct VersionArgs { + void* buffer; }; struct MemoryCopyArgs { void* src; - void* dst; + void* dest; size_t size; }; -struct FpgaQuantArgs { - float scale; -}; - -struct FpgaBNArgs { - bool enabled = false; - void* bias_addr; - void* scale_addr; +struct BNArgs { + bool enabled; + void* bias_address; + void* scale_address; }; -struct FpgaKernelArgs { +/** +Conv and Pooling kernel +*/ +struct KernelArgs { uint32_t width; uint32_t height; - uint32_t stride_h; uint32_t stride_w; + uint32_t stride_h; }; -struct FpgaImageArgs { - uint32_t width; - uint32_t height; +struct ImageInputArgs { + void* address; // input featuremap virtual address + float* scale_address; // input scale address; uint32_t channels; - uint32_t pad_h; - uint32_t pad_w; + uint32_t width; // featuremap width + uint32_t height; + uint32_t pad_width; // padding width; + uint32_t pad_height; +}; + +struct ImageOutputArgs { + void* address; // output result address; + float* scale_address; // output scale address; }; -struct FpgaConvArgs { +struct ConvArgs { bool relu_enabled; - struct FpgaBNArgs BNargs; - void* image_addr; - void* filter_addr; - void* bias_addr; - void* output_addr; - float quant_scale; - struct FpgaImageArgs image; + void* bias_address; + void* filter_address; uint32_t filter_num; uint32_t group_num; - struct FpgaKernelArgs kernel; + struct BNArgs bn; + struct KernelArgs kernel; + struct ImageInputArgs image; // input image; + struct ImageOutputArgs output; }; -struct FpgaPoolArgs { - void* image_addr; - void* output_addr; - struct FpgaImageArgs image; - struct FpgaKernelArgs kernel; +struct PoolingArgs { + struct KernelArgs kernel; + struct ImageInputArgs image; // input image; + struct ImageOutputArgs output; }; -struct FpgaEWAddArgs { +// elementwise add arguments +struct EWAddArgs { bool relu_enabled; - void* image0_addr; - void* image1_addr; - void* result_addr; - uint32_t const0; - uint32_t const1; - uint32_t data_len; // aligned element count + + float const0; // output0 = const0 x input0 + const1 x input1; + float const1; + struct ImageInputArgs image0; + struct ImageInputArgs image1; + struct ImageOutputArgs output; +}; + +struct FpgaRegWriteArgs { + uint64_t address; // + uint64_t value; }; -int ComputeFpgaConv(struct FpgaConvArgs args); -int ComputeFpgaPool(struct FpgaPoolArgs args); -int ComputeFpgaEWAdd(struct FpgaEWAddArgs args); +struct FpgaRegReadArgs { + uint64_t address; + uint64_t value; +}; + +#define IOCTL_FPGA_MAGIC 'FPGA' + +#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 01, struct VersionArgs) +#define IOCTL_FPGA_REG_READ _IOW(IOCTL_FPGA_MAGIC, 02, struct FpgaRegReadArgs) +#define IOCTL_FPGA_REG_WRITE _IOW(IOCTL_FPGA_MAGIC, 03, struct FpgaRegWriteArgs) + +#define IOCTL_SEPARATOR_0 10 -#define IOCTL_FPGA_MAGIC 'CNN' -#define IOCTL_VERSION _IOW(IOCTL_FPGA_MAGIC, 1, struct FpgaVersionArgs) -#define IOCTL_GET_QUANT _IOW(IOCTL_FPGA_MAGIC, 2, struct FpgaQuantArgs) -#define IOCTL_SET_QUANT _IOW(IOCTL_FPGA_MAGIC, 3, struct FpgaQuantArgs) #define IOCTL_MEM_COPY _IOW(IOCTL_FPGA_MAGIC, 11, struct MemoryCopyArgs) -#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct FpgaConvArgs) -#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct FpgaPoolArgs) -#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct FpgaEWAddArgs) + +#define IOCTL_SEPARATOR_1 20 + +#define IOCTL_CONFIG_CONV _IOW(IOCTL_FPGA_MAGIC, 21, struct ConvArgs) +#define IOCTL_CONFIG_POOLING _IOW(IOCTL_FPGA_MAGIC, 22, struct PoolingArgs) +#define IOCTL_CONFIG_EW _IOW(IOCTL_FPGA_MAGIC, 23, struct EWAddArgs) + +enum FPGA_ERR_TYPE { + ERR_IOCTL_CMD = -1, + ERR_TIMEOUT = -2, + ERR_COMPLETION_TIMEOUT = -3, + ERR_INVALID_FPGA_ADDR = -4, + ERR_NOMEM = -5, + ERR_NO_RESERVE_MEM = -6, + ERR_COPY_FROM_USER = -7, + ERR_COPY_TO_USER = -8, + ERR_DEL_TIMER = -9, + ERR_ENABLE_MSI = -10, + ERR_REGISTER_IRQ = -11, + ERR_PCIE_REGISTER = -12, + ERR_PCIE_PROBE = -13, + ERR_REGISTER_BLOCK = -14, + ERR_ALLOC_GENDISK = -15, + ERR_INIT_QUEUE = -16, + ERR_WAIT = -17, + ERR_ECC_ERROR = -31, + ERR_FPGA_FAIL_STOP = -64, + ERR_FPGA_DEBUG_STOP = -113, + DEV_TMP_UNAVAILABLE = -128 +}; + +//============================== API ============================= + +int ComputeFpgaConv(struct ConvArgs args); +int ComputeFpgaPool(struct PoolingArgs args); +int ComputeFpgaEWAdd(struct EWAddArgs args); } // namespace fpga } // namespace paddle_mobile diff --git a/src/framework/tensor.h b/src/framework/tensor.h index 954a65a3605c4d0204890d9414aeb074371b0d69..388788216fe45b66441a0390e2ef09c2d51c16dc 100644 --- a/src/framework/tensor.h +++ b/src/framework/tensor.h @@ -253,6 +253,18 @@ class Tensor { "Tensor's dims_ is out of bound. "); } +#ifdef PADDLE_MOBILE_FPGA + struct FPGAArgs { + float scale; + + inline float *scale_pointer() { return &scale; } + }; + + struct &fpga_args() const { + return fpgaArgs_; + } +#endif + private: /** * @note Placeholder hides type T, so it doesn't appear as a @@ -319,6 +331,10 @@ class Tensor { * begins. */ size_t offset_; + +#ifdef PADDLE_MOBILE_FPGA + FPGAArgs fpgaArgs_; +#endif }; #ifdef PADDLE_MOBILE_DEBUG diff --git a/src/io/executor.cpp b/src/io/executor.cpp index 6b0af3454e0cb9c41633bd793b76250028644abe..d6434b64aa752fd62bc637a882298228d59880b8 100644 --- a/src/io/executor.cpp +++ b/src/io/executor.cpp @@ -420,6 +420,6 @@ std::vector::Ptype> Executor::Predict( template class Executor; template class Executor; -template class Executor; +template class Executor; } // namespace paddle_mobile diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp index 178541953323b6ffd1a3339f8209c2839b37a784..42b8c4551871c58955251d94845ca13576d7735b 100644 --- a/src/memory/t_malloc.cpp +++ b/src/memory/t_malloc.cpp @@ -27,17 +27,17 @@ namespace memory { const int MALLOC_ALIGN = 64; #ifdef PADDLE_MOBILE_FPGA -namespace api = paddle::mobile::fpga::api; +namespace fpga = paddle_mobile::fpga; void Copy(void *dst, const void *src, size_t num) { std::memcpy(dst, src, num); } -void *Alloc(size_t size) { return api::malloc(size); } +void *Alloc(size_t size) { return fpga::fpga_malloc(size); } void Free(void *ptr) { if (ptr) { - api::fpga_free(ptr); + fpga::fpga_free(ptr); } } diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h index 723747874da8fc8ee2c02eb1be4c89189c2af746..e45ad38fd68cb9b4616b7e363be117e2039c93a9 100644 --- a/src/operators/feed_op.h +++ b/src/operators/feed_op.h @@ -29,7 +29,7 @@ class FeedOp : public framework::OperatorBase { std::shared_ptr scope) : framework::OperatorBase(type, inputs, outputs, attrs, scope), - param_(inputs, outputs, attrs, *scope) {} + param_(inputs, outputs, attrs, scope.get()) {} void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); } void Init() {} diff --git a/src/operators/kernel/arm/dropout_kernel.cpp b/src/operators/kernel/arm/dropout_kernel.cpp index af16048a1b4eba2ff36f842b6cf968031989576e..db942b018d7085ca3986533937328101afb08ff9 100644 --- a/src/operators/kernel/arm/dropout_kernel.cpp +++ b/src/operators/kernel/arm/dropout_kernel.cpp @@ -14,8 +14,6 @@ limitations under the License. */ #ifdef DROPOUT_OP -#pragma once - #include "operators/kernel/dropout_kernel.h" #include diff --git a/src/operators/kernel/dropout_kernel.h b/src/operators/kernel/dropout_kernel.h index 5a3783971959db8fba9ca6b701fb6eb6340fcb3f..3ef6b9dd62d88f012eba3456c676ac0d33bf9e52 100644 --- a/src/operators/kernel/dropout_kernel.h +++ b/src/operators/kernel/dropout_kernel.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "framework/operator.h" #include "operators/op_param.h" -#pragma once; +#pragma once namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/fpga/concat_kernel.cpp b/src/operators/kernel/fpga/concat_kernel.cpp index c691988f4a388c7835a7016602d7a1ac9cb5f9b6..627a94242ca4638640a7961120b36c9f763a0e85 100644 --- a/src/operators/kernel/fpga/concat_kernel.cpp +++ b/src/operators/kernel/fpga/concat_kernel.cpp @@ -39,7 +39,7 @@ void ConcatKernel::Compute(const ConcatParam ¶m) const { for (int i = 0; i < inputs.size(); ++i) { auto input = inputs[i]; - auto channels = input[3]; + auto channels = input->dims()[3]; out_offset += channels; auto src = input->data(); for (int j = 0; j < pixels; ++j) { diff --git a/src/operators/kernel/im2sequence_kernel.h b/src/operators/kernel/im2sequence_kernel.h index cb592613f73d90dae5a7d6e515f8bc091981776e..aa798fd6af5592a062de207714dc9fee2afb93df 100644 --- a/src/operators/kernel/im2sequence_kernel.h +++ b/src/operators/kernel/im2sequence_kernel.h @@ -20,13 +20,11 @@ limitations under the License. */ #include "operators/math/vol2col.h" #include "operators/op_param.h" -#pragma once; +#pragma once namespace paddle_mobile { namespace operators { -using namespace framework; - template class Im2SequenceKernel : public framework::OpKernelBase { diff --git a/src/operators/kernel/mali/fushion_fc_kernel.cpp b/src/operators/kernel/mali/fushion_fc_kernel.cpp index a76c3c46012a758a05cf8f846a15376ad1b9f33c..44a7ce2af62a1d27aff8181f6742bebda1d6d066 100755 --- a/src/operators/kernel/mali/fushion_fc_kernel.cpp +++ b/src/operators/kernel/mali/fushion_fc_kernel.cpp @@ -14,8 +14,6 @@ limitations under the License. */ #ifdef FUSION_FC_OP -#pragma once - #include "operators/kernel/fusion_fc_kernel.h" namespace paddle_mobile { diff --git a/src/operators/kernel/prelu_kernel.h b/src/operators/kernel/prelu_kernel.h index 9f5dcb23ee9bf44ffa8bbdd98879d533d07c39f9..15696174377f04ad9a62366e03ded1f2cdcdee9e 100644 --- a/src/operators/kernel/prelu_kernel.h +++ b/src/operators/kernel/prelu_kernel.h @@ -15,7 +15,7 @@ limitations under the License. */ #include "framework/operator.h" #include "operators/op_param.h" -#pragma once; +#pragma once namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/scale_kernel.h b/src/operators/kernel/scale_kernel.h index cc76a4b59b0be24dd3a3fb82c0e3d9fb1a4dbf24..98ac71d0bbad86f595171ad7ac5b2a1cdf5908fa 100644 --- a/src/operators/kernel/scale_kernel.h +++ b/src/operators/kernel/scale_kernel.h @@ -15,7 +15,7 @@ limitations under the License. */ #include "framework/operator.h" #include "operators/op_param.h" -#pragma once; +#pragma once namespace paddle_mobile { namespace operators { diff --git a/src/operators/kernel/slice_kernel.h b/src/operators/kernel/slice_kernel.h index e308364602f401b1c6c6f8e2e35385aefa055360..fd3b8dc767076c5244509f6015c42bee87df100b 100644 --- a/src/operators/kernel/slice_kernel.h +++ b/src/operators/kernel/slice_kernel.h @@ -15,7 +15,7 @@ limitations under the License. */ #include "framework/operator.h" #include "operators/op_param.h" -#pragma once; +#pragma once namespace paddle_mobile { namespace operators { diff --git a/src/operators/op_param.h b/src/operators/op_param.h index a1c9baad79df159b1784ef0dd5d12ccf7ed7fe11..88c1886ad7ade5960d1d8175a1b46e12363ca849 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -262,11 +262,11 @@ class ElementwiseAddParam : OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaEWAddArgs fpga_EW_add_args; + fpga::EWAddArgs fpga_EW_add_args; public: - const fpga::FpgaEWAddArgs &FpgaArgs() const { return fpga_EW_add_args; } - void SetFpgaArgs(const fpga::FpgaEWAddArgs &args) { fpga_EW_add_args = args; } + const fpga::EWAddArgs &FpgaArgs() const { return fpga_EW_add_args; } + void SetFpgaArgs(const fpga::EWAddArgs &args) { fpga_EW_add_args = args; } #endif }; @@ -465,11 +465,11 @@ class PoolParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaPoolArgs fpga_pool_args; + fpga::PoolingArgs fpga_pool_args; public: - const fpga::FpgaPoolArgs &FpgaArgs() const { return fpga_pool_args; } - void SetFpgaArgs(const fpga::FpgaPoolArgs &args) { fpga_pool_args = args; } + const fpga::PoolingArgs &FpgaArgs() const { return fpga_pool_args; } + void SetFpgaArgs(const fpga::PoolingArgs &args) { fpga_pool_args = args; } #endif }; #endif @@ -651,10 +651,10 @@ class MultiClassNMSParam : public OpParam { class FeedParam : public OpParam { public: FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs, - const AttributeMap &attrs, Scope const &scope) { - input_x_ = InputXFrom(inputs, scope); - out_ = OutFrom(outputs, scope); - auto var = scope.Var("batch_size"); + const AttributeMap &attrs, Scope *scope) { + input_x_ = InputXFrom(inputs, *scope); + out_ = OutFrom(outputs, *scope); + auto var = scope->Var("batch_size"); batch_size = var->GetValue(); } const Tensor *InputX() const { return input_x_; } @@ -933,11 +933,11 @@ class FusionFcParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaConvArgs fpga_conv_args; + fpga::ConvArgs fpga_conv_args; public: - const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } + const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -991,11 +991,11 @@ class FusionConvAddParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaConvArgs fpga_conv_args; + fpga::ConvArgs fpga_conv_args; public: - const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } + const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } #endif }; @@ -1096,11 +1096,11 @@ class FusionConvAddBNReluParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaConvArgs fpga_conv_args; + fpga::ConvArgs fpga_conv_args; public: - const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } + const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } #endif }; #endif @@ -1190,11 +1190,11 @@ class FusionConvAddBNParam : public OpParam { #ifdef PADDLE_MOBILE_FPGA private: - fpga::FpgaConvArgs fpga_conv_args; + fpga::ConvArgs fpga_conv_args; public: - const fpga::FpgaConvArgs &FpgaArgs() const { return fpga_conv_args; } - void SetFpgaArgs(const fpga::FpgaConvArgs &args) { fpga_conv_args = args; } + const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; } + void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; } #endif }; #endif diff --git a/test/framework/test_load.cpp b/test/framework/test_load.cpp index bea7d4ba7d2df1344f0819222fbdb389106fa77e..25cad4feaa706899122902dee2a8f0c915e78975 100644 --- a/test/framework/test_load.cpp +++ b/test/framework/test_load.cpp @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "../test_helper.h" #include "io/loader.h" @@ -20,12 +22,10 @@ int main() { // ../../../test/models/googlenet // ../../../test/models/mobilenet // auto program = loader.Load(g_googlenet, true); + // auto program = loader.Load(g_mobilenet_ssd, true); - auto program = loader.Load(g_mobilenet_ssd, true); - // auto program = loader.Load(g_googlenet_combine + "/model", - // g_googlenet_combine + - // "/params", true); - + auto program = loader.Load(std::string(g_ocr) + "/model", + std::string(g_ocr) + "/params", false); // program.originProgram->Description("program desc: "); return 0; } diff --git a/test/test_helper.h b/test/test_helper.h index 9a5c62c79c44fdf52657ea5facb5f0768810c440..658af447d6cfcd85c68ff350b104c2468d442e40 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -24,6 +24,7 @@ limitations under the License. */ #include "framework/ddim.h" #include "framework/tensor.h" +static const char *g_ocr = "../models/ocr"; static const char *g_mobilenet_ssd = "../models/mobilenet+ssd"; static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture"; static const char *g_squeezenet = "../models/squeezenet"; diff --git a/test_gemm.cpp b/test_gemm.cpp deleted file mode 100644 index 6a49193256d8293dc2cef559b1d1e73bc6dfc7bb..0000000000000000000000000000000000000000 --- a/test_gemm.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include -#include "../test_helper.h" -#include "common/log.h" -#include "memory/t_malloc.h" -#include "operators/math/gemm.h" - -#define a(i, j) a[(i)*lda + (j)] -#define b(i, j) b[(i)*ldb + (j)] -#define c(i, j) c[(i)*ldc + (j)] -#define c1(i, j) c1[(i)*ldc + (j)] - - -void print_matirx(int m, int n, int ldc, float *c) { - for (int i = 0; i < m; ++i) { - std::cout << c(i, 0); - for (int j = 1; j < n; ++j) { - std::cout << " | " << c(i, j); - } - std::cout << std::endl; - } - std::cout << std::endl; -} - -int do_sgemm(int m, int n, int k, bool relu, int t1, int t2, int pr) { - int lda = k; - int ldb = n; - int ldc = n; - - float *a = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * m * k)); - float *b = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * k * n)); - float *c = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * m * n)); - float *c1 = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * m * n)); - float* scale = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * m)); - float* bias = static_cast(paddle_mobile::memory::Alloc(sizeof(float) * m)); - - srand(unsigned(time(0))); - for (int i = 0; i < m * k; ++i) { - a[i] = t1 + rand() % t2; - } - for (int i = 0; i < k * n; ++i) { - b[i] = t1 + rand() % t2; - } - for (int i = 0; i < m; ++i) { - scale[i] = t1 + rand() % t2; - } - for (int i = 0; i < m; ++i) { - bias[i] = t1 + rand() % t2; - } - - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - float r = 0; - for (int p = 0; p < k; p++) { - r += a(i, p) * b(p, j); - } - r *= scale[i]; - r += bias[i]; - if (relu && (r < 0)) { - r = 0; - } - c1(i, j) = r; - } - } - - paddle_mobile::operators::math::SgemmWithBn(m, n, k, 0.9, a, lda, - b, ldb, 0.3, c, ldc, relu, scale, bias); - int eq = 0; - int neq = 0; - for (int i = 0; i < m * n; ++i) { - if (static_cast(c[i]) == static_cast(c1[i])) { - ++eq; - } else { - ++neq; - } - } - - if (pr > 0) { - std::cout << "A:" << std::endl; - print_matirx(m, k, lda, a); - - std::cout << "B:" << std::endl; - print_matirx(k, n, ldb, b); - - std::cout << "C:" << std::endl; - print_matirx(m, n, ldc, c); - - std::cout << "C1:" << std::endl; - print_matirx(m, n, ldc, c1); - } - - std::cout << "mnk=" << m << " " << n << " " << k << - " relu=" << relu << - " eq=" << eq << " neq=" << neq << std::endl; - - paddle_mobile::memory::Free(a); - paddle_mobile::memory::Free(b); - paddle_mobile::memory::Free(c); - paddle_mobile::memory::Free(c1); - paddle_mobile::memory::Free(scale); - paddle_mobile::memory::Free(bias); - - return 0; -} - -int main() { - - do_sgemm(9, 9, 9, true, 10, 10, 10); - do_sgemm(10, 6, 12, false, 10, 10, 0); - do_sgemm(512, 256, 384, false, 10, 10, 0); - do_sgemm(1366, 768, 256, false, 10, 10, 0); - do_sgemm(1255, 755, 333, false, 10, 10, 0); - do_sgemm(555, 777, 999, false, 10, 10, 0); - - do_sgemm(10, 6, 12, true, -4, 10, 0); - do_sgemm(512, 256, 384, true, -4, 10, 0); - do_sgemm(1366, 768, 256, true, -4, 10, 0); - do_sgemm(1255, 755, 333, true, -4, 10, 0); - do_sgemm(555, 777, 999, true, -4, 10, 0); - return 0; -} diff --git a/tools/build.sh b/tools/build.sh index ced18a180762826ffa2c45949e5aab9bfe5c8f88..bf3545ef162c86c16c0877f5f25f3a1e09de1fd4 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -40,8 +40,8 @@ build_for_android() { fi if [ -z "$PLATFORM" ]; then -# PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. - PLATFORM="arm-v8a" + PLATFORM="arm-v7a" # Users could choose "arm-v8a" platform. +# PLATFORM="arm-v8a" fi if [ "${PLATFORM}" = "arm-v7a" ]; then diff --git a/tools/quantification/convert.cpp b/tools/quantification/convert.cpp index 88eef48b39ab8d2aeb1d4e3858ba97ef6360c9a9..0223a3b353c4715ed0c52a1d6b3f0751e372691d 100644 --- a/tools/quantification/convert.cpp +++ b/tools/quantification/convert.cpp @@ -3,8 +3,8 @@ #include "src/enforce.h" #include "src/var_desc.h" #include "src/program_desc.h" +#include #include -#include #include #include #include @@ -13,7 +13,7 @@ #include "src/protobuf-c.h" #include #include - +#include const size_t kSize64 = sizeof(uint64_t); const size_t kSize32 = sizeof(uint32_t); diff --git a/tools/quantification/src/block_desc_local.h b/tools/quantification/src/block_desc_local.h index 41c2dc0abbdf8bb006f4152674e92dd1f7d01500..2ee8132af7f21ed0e62678c8da510bfd7fba9dbd 100644 --- a/tools/quantification/src/block_desc_local.h +++ b/tools/quantification/src/block_desc_local.h @@ -19,6 +19,7 @@ limitations under the License. */ #ifndef TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ #define TOOLS_QUANTIFICATION_SRC_BLOCK_DESC_LOCAL_H_ +#include #include #include "src/var_desc.h"