提交 ac55f025 编写于 作者: D dolphin8

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -17,8 +17,10 @@ elseif(FPGA) ...@@ -17,8 +17,10 @@ elseif(FPGA)
add_definitions(-DPADDLE_MOBILE_FPGA) add_definitions(-DPADDLE_MOBILE_FPGA)
endif() endif()
set(CMAKE_CXX_FLAGS "-std=c++14 -O3 -s ${CMAKE_CXX_FLAGS}")
if (DEBUGING) if (DEBUGING)
set(CMAKE_BUILD_TYPE Debug) set(CMAKE_BUILD_TYPE Debug)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS}")
else() else()
set(CMAKE_BUILD_TYPE Release) set(CMAKE_BUILD_TYPE Release)
endif () endif ()
...@@ -55,7 +57,6 @@ else () ...@@ -55,7 +57,6 @@ else ()
add_definitions(-DX86) add_definitions(-DX86)
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
...@@ -126,7 +127,7 @@ else () ...@@ -126,7 +127,7 @@ else ()
add_definitions(-DCONV_OP) add_definitions(-DCONV_OP)
add_definitions(-DDEPTHWISECONV_OP) add_definitions(-DDEPTHWISECONV_OP)
add_definitions(-DELEMENTWISEADD_OP) add_definitions(-DELEMENTWISEADD_OP)
add_definitions(-DFUSIONCONVADD_OP) add_definitions(-DFUSION_CONVADD_OP)
add_definitions(-DCONVADDRELU_OP) add_definitions(-DCONVADDRELU_OP)
add_definitions(-DFUSION_FC_OP) add_definitions(-DFUSION_FC_OP)
add_definitions(-DLRN_OP) add_definitions(-DLRN_OP)
......
...@@ -96,74 +96,39 @@ class OpRegistry { ...@@ -96,74 +96,39 @@ class OpRegistry {
} }
}; };
#ifdef PADDLE_MOBILE_CPU #define REGISTER_OPERATOR(op_type, op_class, device_name, device_type) \
template <typename Dtype, typename T> \
#define REGISTER_OPERATOR_CPU(op_type, op_class) \ class _OpClass_##op_type##_##device_name : public op_class<Dtype, T> { \
template <typename Dtype, typename T> \ public: \
class _OpClass_##op_type##_cpu : public op_class<Dtype, T> { \ DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_##device_name, op_class); \
public: \ }; \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_cpu, op_class); \ static paddle_mobile::framework::OperatorRegistrar< \
}; \ device_type, _OpClass_##op_type##_##device_name<device_type, float>> \
static paddle_mobile::framework::OperatorRegistrar< \ __op_registrar_##op_type##_##device_name(#op_type); \
paddle_mobile::CPU, _OpClass_##op_type##_cpu<paddle_mobile::CPU, float>> \ int TouchOpRegistrar_##op_type##_##device_name() { \
__op_registrar_##op_type##__cpu(#op_type); \ __op_registrar_##op_type##_##device_name.Touch(); \
int TouchOpRegistrar_##op_type##_cpu() { \ return 0; \
__op_registrar_##op_type##__cpu.Touch(); \
return 0; \
} }
#define USE_OP_CPU(op_type) \ #define REGISTER_OPERATOR_CPU(op_type, op_class) \
extern int TouchOpRegistrar_##op_type##_cpu(); \ REGISTER_OPERATOR(op_type, op_class, cpu, paddle_mobile::CPU);
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_cpu()
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
template <typename Dtype, typename T> \
class _OpClass_##op_type##_mali_gpu : public op_class<Dtype, T> { \
public: \
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_mali_gpu, op_class); \
}; \
static paddle_mobile::framework::OperatorRegistrar< \
paddle_mobile::CPU, \
_OpClass_##op_type##_mali_gpu<paddle_mobile::CPU, float>> \
__op_registrar_##op_type##__mali_gpu(#op_type); \
int TouchOpRegistrar_##op_type##_mali_gpu() { \
__op_registrar_##op_type##__mali_gpu.Touch(); \
return 0; \
}
#define USE_OP_MALI_GPU(op_type) \ #define REGISTER_OPERATOR_MALI_GPU(op_type, op_class) \
extern int TouchOpRegistrar_##op_type##_mali_gpu(); \ REGISTER_OPERATOR(op_type, op_class, mali_gpu, paddle_mobile::GPU_MALI);
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_mali_gpu() #define REGISTER_OPERATOR_FPGA(op_type, op_class) \
REGISTER_OPERATOR(op_type, op_class, fpga, paddle_mobile::FPGA);
#endif
#define USE_OP(op_type, device_name) \
#ifdef PADDLE_MOBILE_FPGA extern int TouchOpRegistrar_##op_type##_##device_name(); \
#define REGISTER_OPERATOR_FPGA(op_type, op_class) \ static int use_op_itself_##op_type##_##device_name __attribute__((unused)) = \
template <typename Dtype, typename T> \ TouchOpRegistrar_##op_type##_##device_name()
class _OpClass_##op_type##_fpga : public op_class<Dtype, T> { \
public: \ #define USE_OP_CPU(op_type) USE_OP(op_type, cpu);
DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_fpga, op_class); \
}; \
static paddle_mobile::framework::OperatorRegistrar< \
paddle_mobile::CPU, \
_OpClass_##op_type##_fpga<paddle_mobile::CPU, float>> \
__op_registrar_##op_type##__fpga(#op_type); \
int TouchOpRegistrar_##op_type##_fpga() { \
__op_registrar_##op_type##__fpga.Touch(); \
return 0; \
}
#define USE_OP_FPGA(op_type) \ #define USE_OP_MALI_GPU(op_type) USE_OP(op_type, mali_gpu);
extern int TouchOpRegistrar_##op_type##_fpga(); \
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type##_fpga()
#endif #define USE_OP_FPGA(op_type) USE_OP(op_type, fpga);
} // namespace framework } // namespace framework
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -131,7 +131,6 @@ class Tensor { ...@@ -131,7 +131,6 @@ class Tensor {
} }
PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.") PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor'snumel must >=0.")
int64_t size = numel() * SizeOfType(type); int64_t size = numel() * SizeOfType(type);
/* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || holder_->size() < size + offset_) { if (holder_ == nullptr || holder_->size() < size + offset_) {
holder_.reset(new PlaceholderImpl(size, type)); holder_.reset(new PlaceholderImpl(size, type));
offset_ = 0; offset_ = 0;
......
...@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,7 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define FUSION_CONVADD_OP
#ifdef FUSION_CONVADD_OP #ifdef FUSION_CONVADD_OP
#pragma once #pragma once
......
...@@ -18,6 +18,27 @@ limitations under the License. */ ...@@ -18,6 +18,27 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
void expand_bias(Tensor &bias, int axis, const DDim &dDim) {
auto bias_ptr = bias.data<float>();
const DDim bias_ddim = bias.dims();
PADDLE_MOBILE_ENFORCE(bias.dims().size() == 1,
"the bias tensor's dims size != 1")
DDim outer_ddim = paddle_mobile::framework::slice_ddim(dDim, 0, axis + 1);
DDim inner_ddim =
paddle_mobile::framework::slice_ddim(dDim, axis + 1, dDim.size());
int outer_size = paddle_mobile::framework::product(outer_ddim);
int inner_size = paddle_mobile::framework::product(inner_ddim);
bias.Resize(dDim);
auto new_ptr = bias.mutable_data<float>();
int axis_size = dDim[axis];
for (int i = 0; i < outer_size; ++i) {
float v_bias = bias_ptr[i * axis_size / outer_size];
for (int j = 0; j < inner_size; ++j) {
new_ptr[i * inner_size + j] = v_bias;
}
}
}
template <> template <>
void ConvAddKernel<CPU, float>::Compute( void ConvAddKernel<CPU, float>::Compute(
const FushionConvAddParam &param) const { const FushionConvAddParam &param) const {
...@@ -25,15 +46,16 @@ void ConvAddKernel<CPU, float>::Compute( ...@@ -25,15 +46,16 @@ void ConvAddKernel<CPU, float>::Compute(
const Tensor *input = param.Input(); const Tensor *input = param.Input();
Tensor filter = *param.Filter(); Tensor filter = *param.Filter();
Tensor bias = *param.Bias();
int axis = param.Axis();
Tensor *output = param.Output(); Tensor *output = param.Output();
output->mutable_data<float>(); expand_bias(bias, axis, output->dims());
output->ShareDataWith(bias);
int groups = param.Groups(); int groups = param.Groups();
std::vector<int> strides = param.Strides(); std::vector<int> strides = param.Strides();
std::vector<int> paddings = param.Paddings(); std::vector<int> paddings = param.Paddings();
std::vector<int> dilations = param.Dilations(); std::vector<int> dilations = param.Dilations();
// DLOG << " compute end get Attrs " << strides[0];
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims())); std::vector<int64_t> filter_shape_vec(framework::vectorize(filter.dims()));
...@@ -66,7 +88,6 @@ void ConvAddKernel<CPU, float>::Compute( ...@@ -66,7 +88,6 @@ void ConvAddKernel<CPU, float>::Compute(
framework::DDim filter_matrix_shape = {filter.dims()[0], framework::DDim filter_matrix_shape = {filter.dims()[0],
filter.numel() / filter.dims()[0]}; filter.numel() / filter.dims()[0]};
filter.Resize(filter_matrix_shape); filter.Resize(filter_matrix_shape);
DLOG << " filter.dims() = " << filter.dims();
framework::DDim output_matrix_shape = { framework::DDim output_matrix_shape = {
output->dims()[1], output->dims()[1],
output->numel() / (output->dims()[0] * output->dims()[1])}; output->numel() / (output->dims()[0] * output->dims()[1])};
...@@ -105,7 +126,7 @@ void ConvAddKernel<CPU, float>::Compute( ...@@ -105,7 +126,7 @@ void ConvAddKernel<CPU, float>::Compute(
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step); Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
math::matmul<float>(filter_slice, false, col_matrix, false, math::matmul<float>(filter_slice, false, col_matrix, false,
static_cast<float>(1), &out_slice, static_cast<float>(1), &out_slice,
static_cast<float>(0)); static_cast<float>(1));
} }
} }
} }
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#pragma once #pragma once
#include <vector> #include <vector>
#include "framework/ddim.h"
#include "framework/operator.h" #include "framework/operator.h"
#include "operators/math/im2col.h" #include "operators/math/im2col.h"
#include "operators/math/math_function.h" #include "operators/math/math_function.h"
...@@ -26,6 +27,7 @@ limitations under the License. */ ...@@ -26,6 +27,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace operators { namespace operators {
using framework::DDim;
using framework::OpKernelBase; using framework::OpKernelBase;
template <typename DeviceType, typename T> template <typename DeviceType, typename T>
......
...@@ -15,7 +15,6 @@ build_for_mac() { ...@@ -15,7 +15,6 @@ build_for_mac() {
fi fi
PLATFORM="x86" PLATFORM="x86"
MODE="Release" MODE="Release"
CXX_FLAGS="-std=c++11 -O3 -s"
BUILD_DIR=../build/release/"${PLATFORM}" BUILD_DIR=../build/release/"${PLATFORM}"
mkdir -p ${BUILD_DIR}/build mkdir -p ${BUILD_DIR}/build
...@@ -25,7 +24,6 @@ build_for_mac() { ...@@ -25,7 +24,6 @@ build_for_mac() {
cmake .. \ cmake .. \
-B"${BUILD_DIR}" \ -B"${BUILD_DIR}" \
-DCMAKE_BUILD_TYPE="${MODE}" \ -DCMAKE_BUILD_TYPE="${MODE}" \
-DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \
-DIS_MAC=true -DIS_MAC=true
cd ${BUILD_DIR} cd ${BUILD_DIR}
...@@ -46,11 +44,11 @@ build_for_android() { ...@@ -46,11 +44,11 @@ build_for_android() {
if [ "${PLATFORM}" = "arm-v7a" ]; then if [ "${PLATFORM}" = "arm-v7a" ]; then
ABI="armeabi-v7a with NEON" ABI="armeabi-v7a with NEON"
ARM_PLATFORM="V7" ARM_PLATFORM="V7"
CXX_FLAGS="-O3 -std=c++11 -s -march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security" CXX_FLAGS="-march=armv7-a -mfpu=neon -mfloat-abi=softfp -pie -fPIE -w -Wno-error=format-security"
elif [ "${PLATFORM}" = "arm-v8a" ]; then elif [ "${PLATFORM}" = "arm-v8a" ]; then
ABI="arm64-v8a" ABI="arm64-v8a"
ARM_PLATFORM="V8" ARM_PLATFORM="V8"
CXX_FLAGS="-O3 -std=c++11 -s -march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog" CXX_FLAGS="-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog"
else else
echo "unknown platform!" echo "unknown platform!"
exit -1 exit -1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册