提交 e12edf15 编写于 作者: H hangq

remove unused third_party & strip so when Release

上级 8dec7490
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <memory> #include <memory>
#include <functional> #include <functional>
#include "utils/overload.h" #include "utils/overload.h"
#include "./securec.h"
#ifndef USE_ANDROID_LOG #ifndef USE_ANDROID_LOG
#ifdef USE_GLOG #ifdef USE_GLOG
#include "glog/logging.h" #include "glog/logging.h"
......
...@@ -16,15 +16,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}) ...@@ -16,15 +16,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${TOP_DIR}/third_party) include_directories(${TOP_DIR}/third_party)
include_directories(${TOP_DIR}/third_party/flatbuffers/include) include_directories(${TOP_DIR}/third_party/flatbuffers/include)
include(${TOP_DIR}/cmake/utils.cmake) option(CMAKE_BUILD_TYPE "build type" Release)
include(${TOP_DIR}/cmake/dependency_utils.cmake)
include(${TOP_DIR}/cmake/external_libs/json.cmake)
include(${TOP_DIR}/cmake/dependency_securec.cmake)
set(CMAKE_VERBOSE_MAKEFILE on)
add_compile_definitions(USE_ANDROID_LOG)
add_compile_definitions(NO_DLIB)
add_compile_options(-fPIC)
option(BUILD_DEVICE "if build device" on) option(BUILD_DEVICE "if build device" on)
option(SUPPORT_TRAIN "if build for on-device train" off) option(SUPPORT_TRAIN "if build for on-device train" off)
option(PLATFORM_ARM64 "if build device for arm64" off) option(PLATFORM_ARM64 "if build device for arm64" off)
...@@ -35,6 +27,14 @@ option(SUPPORT_GPU "if support gpu" off) ...@@ -35,6 +27,14 @@ option(SUPPORT_GPU "if support gpu" off)
option(OFFLINE_COMPILE "if offline compile OpenCL kernel" off) option(OFFLINE_COMPILE "if offline compile OpenCL kernel" off)
option(BUILD_MINDDATA "" off) option(BUILD_MINDDATA "" off)
set(CMAKE_VERBOSE_MAKEFILE on)
add_compile_definitions(USE_ANDROID_LOG)
add_compile_definitions(NO_DLIB)
add_compile_options(-fPIC)
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
endif()
if (BUILD_DEVICE) if (BUILD_DEVICE)
add_compile_definitions(BUILD_DEVICE) add_compile_definitions(BUILD_DEVICE)
endif() endif()
...@@ -91,6 +91,10 @@ if (BUILD_CONVERTER) ...@@ -91,6 +91,10 @@ if (BUILD_CONVERTER)
set(PYTHON_LIBRARIES "${py_lib}") set(PYTHON_LIBRARIES "${py_lib}")
endif() endif()
include_directories(${PYTHON_INCLUDE_DIRS}) include_directories(${PYTHON_INCLUDE_DIRS})
include(${TOP_DIR}/cmake/utils.cmake)
include(${TOP_DIR}/cmake/dependency_utils.cmake)
include(${TOP_DIR}/cmake/external_libs/json.cmake)
include(${TOP_DIR}/cmake/dependency_securec.cmake)
include(${TOP_DIR}/cmake/external_libs/pybind11.cmake) include(${TOP_DIR}/cmake/external_libs/pybind11.cmake)
include(${TOP_DIR}/cmake/external_libs/eigen.cmake) include(${TOP_DIR}/cmake/external_libs/eigen.cmake)
include_directories(${TOP_DIR}/third_party/protobuf/build/include) include_directories(${TOP_DIR}/third_party/protobuf/build/include)
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#include "schema/model_generated.h" #include "schema/model_generated.h"
namespace mindspore { namespace mindspore {
#define MS_API __attribute__((visibility("default")))
/// \brief ModelImpl defined by MindSpore Lite. /// \brief ModelImpl defined by MindSpore Lite.
/// ///
/// \note List public class and interface for reference. /// \note List public class and interface for reference.
...@@ -35,7 +37,7 @@ namespace lite { ...@@ -35,7 +37,7 @@ namespace lite {
class Primitive; class Primitive;
/// \brief Model defined by MindSpore Lite. /// \brief Model defined by MindSpore Lite.
class Model { class MS_API Model {
public: public:
/// \brief Static method to create a Model pointer. /// \brief Static method to create a Model pointer.
/// ///
...@@ -78,7 +80,7 @@ class Model { ...@@ -78,7 +80,7 @@ class Model {
}; };
/// \brief ModelBuilder defined by MindSpore Lite. /// \brief ModelBuilder defined by MindSpore Lite.
class ModelBuilder { class MS_API ModelBuilder {
public: public:
/// \brief OutEdge defined by MindSpore Lite. /// \brief OutEdge defined by MindSpore Lite.
struct OutEdge { struct OutEdge {
......
...@@ -75,8 +75,6 @@ add_library(mindspore-lite SHARED ${LITE_SRC} ${ANF_SRC}) ...@@ -75,8 +75,6 @@ add_library(mindspore-lite SHARED ${LITE_SRC} ${ANF_SRC})
target_link_libraries(mindspore-lite target_link_libraries(mindspore-lite
cpu_kernel_mid_ cpu_kernel_mid_
ops_mid_ ops_mid_
${SECUREC_LIBRARY}
mindspore::json
) )
add_subdirectory(runtime/kernel/arm) add_subdirectory(runtime/kernel/arm)
...@@ -89,3 +87,9 @@ endif () ...@@ -89,3 +87,9 @@ endif ()
add_subdirectory(ops) add_subdirectory(ops)
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND (PLATFORM_ARM64 OR PLATFORM_ARM32))
add_custom_command(TARGET mindspore-lite POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so)
endif()
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "mindspore/lite/src/ir/primitive_t_value.h" #include "mindspore/lite/src/ir/primitive_t_value.h"
#include "mindspore/ccsrc/utils/utils.h" #include "mindspore/ccsrc/utils/utils.h"
#include "mindspore/lite/src/gllo/common/utils.h" #include "mindspore/lite/src/gllo/common/utils.h"
#include "securec/include/securec.h"
namespace mindspore::opt { namespace mindspore::opt {
namespace { namespace {
...@@ -90,11 +91,11 @@ void GenConvNewBias(const FuncGraphPtr &func_graph, const CNodePtr &conv_node, c ...@@ -90,11 +91,11 @@ void GenConvNewBias(const FuncGraphPtr &func_graph, const CNodePtr &conv_node, c
auto add_weight_data = reinterpret_cast<float *>(add_weight_tensor->tensor_addr()); auto add_weight_data = reinterpret_cast<float *>(add_weight_tensor->tensor_addr());
if (add_weight_tensor->tensor_shape().empty()) { if (add_weight_tensor->tensor_shape().empty()) {
if (0 != memset_s(add_bias_data, kernel_nums * sizeof(float), *add_weight_data, kernel_nums * sizeof(float))) { if (EOK != memset_s(add_bias_data, kernel_nums * sizeof(float), *add_weight_data, kernel_nums * sizeof(float))) {
MS_LOG(EXCEPTION) << "memset_s conv_bias_data failed"; MS_LOG(EXCEPTION) << "memset_s conv_bias_data failed";
} }
} else { } else {
if (0 != memcpy_s(add_bias_data, kernel_nums * sizeof(float), add_weight_data, kernel_nums * sizeof(float))) { if (EOK != memcpy_s(add_bias_data, kernel_nums * sizeof(float), add_weight_data, kernel_nums * sizeof(float))) {
MS_LOG(EXCEPTION) << "memset_s conv_bias_data failed"; MS_LOG(EXCEPTION) << "memset_s conv_bias_data failed";
} }
} }
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "mindspore/lite/src/ir/primitive_t_value.h" #include "mindspore/lite/src/ir/primitive_t_value.h"
#include "mindspore/ccsrc/utils/utils.h" #include "mindspore/ccsrc/utils/utils.h"
#include "mindspore/lite/src/gllo/common/utils.h" #include "mindspore/lite/src/gllo/common/utils.h"
#include "securec/include/securec.h"
namespace mindspore::opt { namespace mindspore::opt {
namespace { namespace {
...@@ -46,7 +47,7 @@ void CalTransale(const AnfNodePtr &bn_scale_node, const AnfNodePtr &bn_var_node, ...@@ -46,7 +47,7 @@ void CalTransale(const AnfNodePtr &bn_scale_node, const AnfNodePtr &bn_var_node,
auto bn_var_tensor = std::dynamic_pointer_cast<ParamValueLite>(bn_var_param); auto bn_var_tensor = std::dynamic_pointer_cast<ParamValueLite>(bn_var_param);
auto bn_var_data = reinterpret_cast<float *>(bn_var_tensor->tensor_addr()); auto bn_var_data = reinterpret_cast<float *>(bn_var_tensor->tensor_addr());
// cal transScale, tf : scale/sqrt(variance + eps); caffe : 1/sqrt(variance + eps) // cal transScale, tf : scale/sqrt(variance + eps); caffe : 1/sqrt(variance + eps)
if (memcpy_s(trans_scale, kernel_num * sizeof(float), bn_var_data, kernel_num * sizeof(float)) != 0) { if (memcpy_s(trans_scale, kernel_num * sizeof(float), bn_var_data, kernel_num * sizeof(float)) != EOK) {
MS_LOG(EXCEPTION) << "memcpy_s transScale error"; MS_LOG(EXCEPTION) << "memcpy_s transScale error";
return; return;
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mindspore/ccsrc/utils/utils.h" #include "mindspore/ccsrc/utils/utils.h"
#include "mindspore/lite/src/gllo/common/utils.h" #include "mindspore/lite/src/gllo/common/utils.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "securec/include/securec.h"
namespace mindspore::opt { namespace mindspore::opt {
namespace { namespace {
...@@ -69,7 +70,7 @@ const void ConvScaleFusion::InitTransParam(const CNodePtr &scale_node, int kerne ...@@ -69,7 +70,7 @@ const void ConvScaleFusion::InitTransParam(const CNodePtr &scale_node, int kerne
auto weight_value = std::dynamic_pointer_cast<ParamValueLite>(scale_weight_param); auto weight_value = std::dynamic_pointer_cast<ParamValueLite>(scale_weight_param);
auto weight_data = reinterpret_cast<const float *>(weight_value->tensor_addr()); auto weight_data = reinterpret_cast<const float *>(weight_value->tensor_addr());
if (0 != memcpy_s(trans_scale, kernel_num * sizeof(float), weight_data, kernel_num * sizeof(float))) { if (EOK != memcpy_s(trans_scale, kernel_num * sizeof(float), weight_data, kernel_num * sizeof(float))) {
MS_LOG(EXCEPTION) << "memcpy_s transScale failed"; MS_LOG(EXCEPTION) << "memcpy_s transScale failed";
} }
...@@ -77,7 +78,7 @@ const void ConvScaleFusion::InitTransParam(const CNodePtr &scale_node, int kerne ...@@ -77,7 +78,7 @@ const void ConvScaleFusion::InitTransParam(const CNodePtr &scale_node, int kerne
auto scale_bias_param = scale_bias_node->cast<ParameterPtr>()->default_param(); auto scale_bias_param = scale_bias_node->cast<ParameterPtr>()->default_param();
auto bias_value = std::dynamic_pointer_cast<ParamValueLite>(scale_bias_param); auto bias_value = std::dynamic_pointer_cast<ParamValueLite>(scale_bias_param);
auto bias_data = reinterpret_cast<const float *>(bias_value->tensor_addr()); auto bias_data = reinterpret_cast<const float *>(bias_value->tensor_addr());
if (0 != memcpy_s(trans_bias, kernel_num * sizeof(float), bias_data, kernel_num * sizeof(float))) { if (EOK != memcpy_s(trans_bias, kernel_num * sizeof(float), bias_data, kernel_num * sizeof(float))) {
MS_LOG(EXCEPTION) << "memcpy_s transScale failed"; MS_LOG(EXCEPTION) << "memcpy_s transScale failed";
} }
} }
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "mindspore/ccsrc/utils/utils.h" #include "mindspore/ccsrc/utils/utils.h"
#include "mindspore/lite/src/gllo/common/utils.h" #include "mindspore/lite/src/gllo/common/utils.h"
#include "include/errorcode.h" #include "include/errorcode.h"
#include "securec/include/securec.h"
namespace mindspore::opt { namespace mindspore::opt {
namespace { namespace {
...@@ -176,7 +177,7 @@ const void ConvTransformFusion::CalNewBiasTensor(float *bias_data, int kernel_nu ...@@ -176,7 +177,7 @@ const void ConvTransformFusion::CalNewBiasTensor(float *bias_data, int kernel_nu
MS_ASSERT(bias_data != nullptr); MS_ASSERT(bias_data != nullptr);
if (bias_flag) { if (bias_flag) {
auto tmp_bias_data = new(std::nothrow) float[kernel_num]; auto tmp_bias_data = new(std::nothrow) float[kernel_num];
if (0 != memset_s(bias_data, kernel_num * sizeof(float), 0, kernel_num * sizeof(float))) { if (EOK != memset_s(bias_data, kernel_num * sizeof(float), 0, kernel_num * sizeof(float))) {
MS_LOG(EXCEPTION) << "memset bias data failed"; MS_LOG(EXCEPTION) << "memset bias data failed";
} }
for (size_t i = 0; i < kernel_num; i++) { for (size_t i = 0; i < kernel_num; i++) {
...@@ -189,7 +190,7 @@ const void ConvTransformFusion::CalNewBiasTensor(float *bias_data, int kernel_nu ...@@ -189,7 +190,7 @@ const void ConvTransformFusion::CalNewBiasTensor(float *bias_data, int kernel_nu
} }
delete[] tmp_bias_data; delete[] tmp_bias_data;
} else { } else {
if (0 != memset_s(bias_data, kernel_num * sizeof(float), 0, kernel_num * sizeof(float))) { if (EOK != memset_s(bias_data, kernel_num * sizeof(float), 0, kernel_num * sizeof(float))) {
MS_LOG(EXCEPTION) << "memset bias data failed"; MS_LOG(EXCEPTION) << "memset bias data failed";
} }
auto ret = memcpy_s(bias_data, kernel_num * sizeof(float), trans_bias, kernel_num * sizeof(float)); auto ret = memcpy_s(bias_data, kernel_num * sizeof(float), trans_bias, kernel_num * sizeof(float));
......
...@@ -50,11 +50,7 @@ int Tensor::CopyTensorData(const Tensor &srcTensor) { ...@@ -50,11 +50,7 @@ int Tensor::CopyTensorData(const Tensor &srcTensor) {
} }
this->data_ = malloc(data_size); this->data_ = malloc(data_size);
} }
auto ret = memcpy_s(this->data_, data_size, srcTensor.data_, srcTensor.Size()); memcpy(this->data_, srcTensor.data_, data_size);
if (EOK != ret) {
MS_LOG(ERROR) << "memcpy_s failed : " << ret;
return mindspore::lite::RET_ERROR;
}
return 0; return 0;
} }
......
...@@ -80,9 +80,8 @@ int ConcatOpenCLKernel::Run_axis0() { ...@@ -80,9 +80,8 @@ int ConcatOpenCLKernel::Run_axis0() {
ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true); ocl_runtime->MapBuffer(*buffer, CL_MAP_WRITE, tensor->Size(), command_queue, true);
} }
memcpy_s(outputs_[0]->Data(), inputs_[0]->Size(), inputs_[0]->Data(), inputs_[0]->Size()); memcpy(outputs_[0]->Data(), inputs_[0]->Data(), inputs_[0]->Size());
memcpy_s(reinterpret_cast<char *>(outputs_[0]->Data()) + inputs_[0]->Size(), inputs_[1]->Size(), inputs_[1]->Data(), memcpy(reinterpret_cast<char *>(outputs_[0]->Data()) + inputs_[0]->Size(), inputs_[1]->Data(), inputs_[1]->Size());
inputs_[1]->Size());
for (auto tensors : {&inputs_, &outputs_}) { for (auto tensors : {&inputs_, &outputs_}) {
for (auto &tensor : *tensors) { for (auto &tensor : *tensors) {
...@@ -202,8 +201,8 @@ int ConcatOpenCLKernel::Run() { ...@@ -202,8 +201,8 @@ int ConcatOpenCLKernel::Run() {
kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs, kernel::LiteKernel *OpenCLConcatKernelCreator(const std::vector<lite::tensor::Tensor *> &inputs,
const std::vector<lite::tensor::Tensor *> &outputs, const std::vector<lite::tensor::Tensor *> &outputs,
OpParameter *opParameter, OpParameter *opParameter, const lite::Context *ctx,
const lite::Context *ctx, const kernel::KernelKey &desc) { const kernel::KernelKey &desc) {
auto *kernel = new ConcatOpenCLKernel(opParameter, inputs, outputs); auto *kernel = new ConcatOpenCLKernel(opParameter, inputs, outputs);
auto ret = kernel->Init(); auto ret = kernel->Init();
if (0 != ret) { if (0 != ret) {
......
...@@ -66,12 +66,12 @@ int ConvolutionOpenCLKernel::InitBuffer() { ...@@ -66,12 +66,12 @@ int ConvolutionOpenCLKernel::InitBuffer() {
if (io_dataformat_ == schema::Format_NHWC) { if (io_dataformat_ == schema::Format_NHWC) {
packed_weight_ = reinterpret_cast<float *>(allocator->Malloc(weight_tensor->Size())); packed_weight_ = reinterpret_cast<float *>(allocator->Malloc(weight_tensor->Size()));
packed_weight_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true)); packed_weight_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true));
memcpy_s(packed_weight_, weight_tensor->Size(), weight_tensor->Data(), weight_tensor->Size()); memcpy(packed_weight_, weight_tensor->Data(), weight_tensor->Size());
allocator->UnmapBuffer(packed_weight_); allocator->UnmapBuffer(packed_weight_);
packed_bias_ = reinterpret_cast<float *>(allocator->Malloc(bias_tensor->Size())); packed_bias_ = reinterpret_cast<float *>(allocator->Malloc(bias_tensor->Size()));
packed_bias_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true)); packed_bias_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true));
memcpy_s(packed_bias_, bias_tensor->Size(), bias_tensor->Data(), bias_tensor->Size()); memcpy(packed_bias_, bias_tensor->Data(), bias_tensor->Size());
allocator->UnmapBuffer(packed_bias_); allocator->UnmapBuffer(packed_bias_);
} else if (io_dataformat_ == schema::Format_NHWC4) { } else if (io_dataformat_ == schema::Format_NHWC4) {
// OHWI -> OHWIIO // OHWI -> OHWIIO
...@@ -88,7 +88,7 @@ int ConvolutionOpenCLKernel::InitBuffer() { ...@@ -88,7 +88,7 @@ int ConvolutionOpenCLKernel::InitBuffer() {
packed_weight_ = reinterpret_cast<float *>(allocator->Malloc(packed_weight_size)); packed_weight_ = reinterpret_cast<float *>(allocator->Malloc(packed_weight_size));
packed_weight_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true)); packed_weight_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_weight_, CL_MAP_WRITE, nullptr, true));
memset_s(packed_weight_, packed_weight_size, 0x00, packed_weight_size); memset(packed_weight_, 0x00, packed_weight_size);
auto weight_data = reinterpret_cast<float *>(weight_tensor->Data()); auto weight_data = reinterpret_cast<float *>(weight_tensor->Data());
for (int co = 0; co < CO; ++co) { for (int co = 0; co < CO; ++co) {
for (int kh = 0; kh < KH; ++kh) { for (int kh = 0; kh < KH; ++kh) {
...@@ -108,7 +108,7 @@ int ConvolutionOpenCLKernel::InitBuffer() { ...@@ -108,7 +108,7 @@ int ConvolutionOpenCLKernel::InitBuffer() {
size_t packed_bias_size = CO_SLICES * CO_TILE * sizeof(float); size_t packed_bias_size = CO_SLICES * CO_TILE * sizeof(float);
packed_bias_ = reinterpret_cast<float *>(allocator->Malloc(packed_bias_size)); packed_bias_ = reinterpret_cast<float *>(allocator->Malloc(packed_bias_size));
packed_bias_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true)); packed_bias_ = reinterpret_cast<float *>(allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true));
memset_s(packed_bias_, packed_bias_size, 0x00, packed_bias_size); memset(packed_bias_, 0x00, packed_bias_size);
auto bias_data = reinterpret_cast<float *>(bias_tensor->Data()); auto bias_data = reinterpret_cast<float *>(bias_tensor->Data());
for (int co = 0; co < CO; ++co) { for (int co = 0; co < CO; ++co) {
packed_bias_[co] = bias_data[co]; packed_bias_[co] = bias_data[co];
......
...@@ -33,7 +33,6 @@ using mindspore::kernel::KERNEL_ARCH::kGPU; ...@@ -33,7 +33,6 @@ using mindspore::kernel::KERNEL_ARCH::kGPU;
using mindspore::lite::KernelRegistrar; using mindspore::lite::KernelRegistrar;
using mindspore::schema::PrimitiveType_DepthwiseConv2D; using mindspore::schema::PrimitiveType_DepthwiseConv2D;
namespace mindspore::kernel { namespace mindspore::kernel {
int DepthwiseConv2dOpenCLKernel::Init() { int DepthwiseConv2dOpenCLKernel::Init() {
...@@ -42,7 +41,8 @@ int DepthwiseConv2dOpenCLKernel::Init() { ...@@ -42,7 +41,8 @@ int DepthwiseConv2dOpenCLKernel::Init() {
auto in_format = inputs_[0]->GetFormat(); auto in_format = inputs_[0]->GetFormat();
outputs_[0]->SetFormat(in_format); outputs_[0]->SetFormat(in_format);
if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) { if (in_format != schema::Format_NHWC4 && in_format != schema::Format_NC4HW4) {
MS_LOG(ERROR) << "input format(" << in_format << ") " << "format not support!"; MS_LOG(ERROR) << "input format(" << in_format << ") "
<< "format not support!";
} }
if (mem_type_ == MEM_TYPE::BUF) { if (mem_type_ == MEM_TYPE::BUF) {
kernel_name += "_BUF"; kernel_name += "_BUF";
...@@ -62,7 +62,7 @@ int DepthwiseConv2dOpenCLKernel::Init() { ...@@ -62,7 +62,7 @@ int DepthwiseConv2dOpenCLKernel::Init() {
ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name); ocl_runtime->CreateKernelFromIL(kernel_(), kernel_name);
#else #else
std::string program_name = "DepthwiseConv2d"; std::string program_name = "DepthwiseConv2d";
std::set <std::string> build_options; std::set<std::string> build_options;
#ifdef ENABLE_FP16 #ifdef ENABLE_FP16
std::string source = depthwise_conv2d_source_fp16; std::string source = depthwise_conv2d_source_fp16;
#else #else
...@@ -102,10 +102,9 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { ...@@ -102,10 +102,9 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
bias_data_ = reinterpret_cast<FLOAT_t *>(allocator->Malloc(C4NUM * CO4 * sizeof(FLOAT_t))); bias_data_ = reinterpret_cast<FLOAT_t *>(allocator->Malloc(C4NUM * CO4 * sizeof(FLOAT_t)));
bias_data_ = reinterpret_cast<FLOAT_t *>(allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true)); bias_data_ = reinterpret_cast<FLOAT_t *>(allocator->MapBuffer(bias_data_, CL_MAP_WRITE, nullptr, true));
size_t up_co_size = C4NUM * CO4 * sizeof(FLOAT_t); size_t up_co_size = C4NUM * CO4 * sizeof(FLOAT_t);
memset_s(bias_data_, up_co_size, 0, up_co_size); memset(bias_data_, 0, up_co_size);
auto ori_bias = reinterpret_cast<FLOAT_t *>(inputs_.at(kBiasIndex)->Data()); auto ori_bias = reinterpret_cast<FLOAT_t *>(inputs_.at(kBiasIndex)->Data());
memcpy_s(bias_data_, outputs_[0]->Channel() * sizeof(FLOAT_t), ori_bias, memcpy(bias_data_, ori_bias, outputs_[0]->Channel() * sizeof(FLOAT_t));
outputs_[0]->Channel() * sizeof(FLOAT_t));
allocator->UnmapBuffer(bias_data_); allocator->UnmapBuffer(bias_data_);
} else { } else {
MS_ASSERT(inputs_.size() == kInputSize1); MS_ASSERT(inputs_.size() == kInputSize1);
...@@ -113,9 +112,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { ...@@ -113,9 +112,7 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() {
return 0; return 0;
} }
int DepthwiseConv2dOpenCLKernel::ReSize() { int DepthwiseConv2dOpenCLKernel::ReSize() { return 0; }
return 0;
}
int DepthwiseConv2dOpenCLKernel::Run() { int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << this->Name() << " Running!"; MS_LOG(DEBUG) << this->Name() << " Running!";
...@@ -123,17 +120,17 @@ int DepthwiseConv2dOpenCLKernel::Run() { ...@@ -123,17 +120,17 @@ int DepthwiseConv2dOpenCLKernel::Run() {
auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance(); auto ocl_runtime = lite::opencl::OpenCLRuntime::GetInstance();
size_t CO4 = UP_DIV(outputs_[0]->Channel(), C4NUM); size_t CO4 = UP_DIV(outputs_[0]->Channel(), C4NUM);
size_t CI4 = UP_DIV(inputs_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(inputs_[0]->Channel(), C4NUM);
std::vector <size_t> global = {(size_t) outputs_[0]->Width(), (size_t) outputs_[0]->Height(), CO4}; std::vector<size_t> global = {(size_t)outputs_[0]->Width(), (size_t)outputs_[0]->Height(), CO4};
std::vector <size_t> local = {1, 1, CO4}; std::vector<size_t> local = {1, 1, CO4};
float relu_clip1 = 6.0; float relu_clip1 = 6.0;
cl_int2 kernel_size = {parameter->kernel_h_, parameter->kernel_w_}; cl_int2 kernel_size = {parameter->kernel_h_, parameter->kernel_w_};
cl_int2 stride = {parameter->stride_h_, parameter->stride_w_}; cl_int2 stride = {parameter->stride_h_, parameter->stride_w_};
cl_int2 padding = {-parameter->pad_h_, -parameter->pad_w_}; cl_int2 padding = {-parameter->pad_h_, -parameter->pad_w_};
cl_int2 dilation = {parameter->dilation_h_, parameter->dilation_w_}; cl_int2 dilation = {parameter->dilation_h_, parameter->dilation_w_};
cl_int4 src_size = {inputs_[0]->Width(), inputs_[0]->Height(), (cl_int) CI4, inputs_[0]->Batch()}; cl_int4 src_size = {inputs_[0]->Width(), inputs_[0]->Height(), (cl_int)CI4, inputs_[0]->Batch()};
cl_int4 dst_size = {(cl_int) outputs_[0]->Width(), (cl_int) outputs_[0]->Height(), (cl_int) CO4, cl_int4 dst_size = {(cl_int)outputs_[0]->Width(), (cl_int)outputs_[0]->Height(), (cl_int)CO4,
(cl_int) outputs_[0]->Batch()}; (cl_int)outputs_[0]->Batch()};
ocl_runtime->SetKernelArg(kernel_, 1, packed_weight_); ocl_runtime->SetKernelArg(kernel_, 1, packed_weight_);
ocl_runtime->SetKernelArg(kernel_, 2, bias_data_); ocl_runtime->SetKernelArg(kernel_, 2, bias_data_);
...@@ -168,11 +165,11 @@ int DepthwiseConv2dOpenCLKernel::Run() { ...@@ -168,11 +165,11 @@ int DepthwiseConv2dOpenCLKernel::Run() {
im_dst_y = outputs_[0]->Height() * CO4; im_dst_y = outputs_[0]->Height() * CO4;
im_dst_x = outputs_[0]->Width(); im_dst_x = outputs_[0]->Width();
} }
cl::Image2D in_mem(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format, cl::Image2D in_mem(*ocl_runtime->Context(), CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, image_format, im_src_x,
im_src_x, im_src_y, 0, inputs_[0]->Data(), &in_error_code); im_src_y, 0, inputs_[0]->Data(), &in_error_code);
cl_int out_error_code; cl_int out_error_code;
cl::Image2D out_mem(*ocl_runtime->Context(), CL_MEM_WRITE_ONLY, image_format, cl::Image2D out_mem(*ocl_runtime->Context(), CL_MEM_WRITE_ONLY, image_format, im_dst_x, im_dst_y, 0, nullptr,
im_dst_x, im_dst_y, 0, nullptr, &out_error_code); &out_error_code);
if (in_error_code != CL_SUCCESS) { if (in_error_code != CL_SUCCESS) {
MS_LOG(DEBUG) << "in Image2D Failed, error=" << in_error_code; MS_LOG(DEBUG) << "in Image2D Failed, error=" << in_error_code;
return 1; return 1;
...@@ -181,10 +178,8 @@ int DepthwiseConv2dOpenCLKernel::Run() { ...@@ -181,10 +178,8 @@ int DepthwiseConv2dOpenCLKernel::Run() {
MS_LOG(DEBUG) << "out Image2D Failed, error= " << out_error_code; MS_LOG(DEBUG) << "out Image2D Failed, error= " << out_error_code;
return 1; return 1;
} }
auto origin = cl::array < cl::size_type, auto origin = cl::array<cl::size_type, 3U>{0, 0, 0};
3U > {0, 0, 0}; auto region = cl::array<cl::size_type, 3U>{im_dst_x, im_dst_y, 1};
auto region = cl::array < cl::size_type,
3U > {im_dst_x, im_dst_y, 1};
ocl_runtime->SetKernelArg(kernel_, 0, in_mem); ocl_runtime->SetKernelArg(kernel_, 0, in_mem);
ocl_runtime->SetKernelArg(kernel_, 4, out_mem); ocl_runtime->SetKernelArg(kernel_, 4, out_mem);
...@@ -209,7 +204,5 @@ kernel::LiteKernel *OpenCLDepthwiseConv2dKernelCreator(const std::vector<lite::t ...@@ -209,7 +204,5 @@ kernel::LiteKernel *OpenCLDepthwiseConv2dKernelCreator(const std::vector<lite::t
return kernel; return kernel;
} }
REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D, OpenCLDepthwiseConv2dKernelCreator REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_DepthwiseConv2D, OpenCLDepthwiseConv2dKernelCreator)
)
} // namespace mindspore::kernel } // namespace mindspore::kernel
...@@ -3,7 +3,6 @@ set(LITE_DIR ${TOP_DIR}/mindspore/lite) ...@@ -3,7 +3,6 @@ set(LITE_DIR ${TOP_DIR}/mindspore/lite)
include_directories(${TOP_DIR}) include_directories(${TOP_DIR})
include_directories(${TEST_DIR}) include_directories(${TEST_DIR})
include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/dependency_gtest.cmake)
#include(${CMAKE_CURRENT_SOURCE_DIR}/../../../cmake/external_libs/gtest.cmake)
### anf src ### anf src
set(ANF_SRC set(ANF_SRC
...@@ -294,7 +293,7 @@ endif () ...@@ -294,7 +293,7 @@ endif ()
add_executable(lite-test ${TEST_SRC}) add_executable(lite-test ${TEST_SRC})
target_link_libraries(lite-test dl ${SECUREC_LIBRARY} ${GTEST_LIBRARY} mindspore::json) target_link_libraries(lite-test dl ${GTEST_LIBRARY})
if (BUILD_MINDDATA) if (BUILD_MINDDATA)
target_link_libraries(lite-test target_link_libraries(lite-test
minddata-lite minddata-lite
...@@ -316,5 +315,7 @@ if (BUILD_CONVERTER) ...@@ -316,5 +315,7 @@ if (BUILD_CONVERTER)
pthread pthread
protobuf protobuf
mindspore::eigen mindspore::eigen
mindspore::json
${SECUREC_LIBRARY}
) )
endif() endif()
...@@ -11,12 +11,7 @@ add_executable(benchmark ...@@ -11,12 +11,7 @@ add_executable(benchmark
${COMMON_SRC}) ${COMMON_SRC})
if (PLATFORM_ARM32 OR PLATFORM_ARM64) if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(benchmark mindspore-lite ${SECUREC_LIBRARY}) target_link_libraries(benchmark mindspore-lite)
else() else()
target_link_libraries(benchmark mindspore-lite ${SECUREC_LIBRARY} pthread) target_link_libraries(benchmark mindspore-lite pthread)
endif() endif()
target_link_libraries(benchmark
mindspore::json
# mindspore::eigen
)
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h" #include "mindspore/lite/tools/converter/quantizer/general_bitpacking.h"
#include "src/common/utils.h" #include "src/common/utils.h"
#include "abstract/abstract_value.h" #include "abstract/abstract_value.h"
#include "securec/include/securec.h"
using std::string; using std::string;
using std::vector; using std::vector;
...@@ -310,12 +311,12 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) { ...@@ -310,12 +311,12 @@ STATUS PostBitPack(float *weight, size_t shapeSize, size_t bitNum) {
if (bitNum < 8 && bitNum > 1) { if (bitNum < 8 && bitNum > 1) {
BitPack weight_bitpack(bitNum); BitPack weight_bitpack(bitNum);
weight_bitpack.BitPacking(qDatas, qDatas_packed); weight_bitpack.BitPacking(qDatas, qDatas_packed);
if (0 != memcpy_s(rawDatas, shapeSize, &qDatas_packed[0], shapeSize)) { if (EOK != memcpy_s(rawDatas, shapeSize, &qDatas_packed[0], shapeSize)) {
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed"; MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas_packed failed";
return RET_ERROR; return RET_ERROR;
} }
} else if (bitNum == 8) { } else if (bitNum == 8) {
if (0 != memcpy_s(rawDatas, shapeSize, &qDatas[0], shapeSize)) { if (EOK != memcpy_s(rawDatas, shapeSize, &qDatas[0], shapeSize)) {
MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas failed"; MS_LOG(ERROR) << "PostBitPack memcpy_s qDatas failed";
return RET_ERROR; return RET_ERROR;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册