提交 afa26250 编写于 作者: Y yangfei

add some function for openCL

上级 3264ebc0
......@@ -85,7 +85,7 @@ class CLImage {
}
const DDim &dims() const {
return DDim();
return tensorDims_;
}
std::vector<size_t> DefaultWorkSize() {
......
......@@ -30,6 +30,7 @@ class CLScope {
public:
CLScope() {
CLEngine *engin = CLEngine::Instance();
engin->Init();
context_ = engin->CreateContext();
command_queue_ = engin->CreateClCommandQueue();
}
......
......@@ -164,4 +164,6 @@ template class PaddleMobile<FPGA, Precision::FP32>;
template class PaddleMobile<GPU_MALI, Precision::FP32>;
template class PaddleMobile<GPU_CL, Precision::FP32>;
} // namespace paddle_mobile
......@@ -38,4 +38,8 @@ REGISTER_OPERATOR_MALI_GPU(elementwise_add, ops::ElementwiseAddOp);
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(elementwise_add, ops::ElementwiseAddOp);
#endif
#endif
......@@ -53,4 +53,8 @@ USE_OP_MALI_GPU(elementwise_add);
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(elementwise_add);
#endif
#endif
......@@ -27,3 +27,6 @@ REGISTER_OPERATOR_MALI_GPU(feed, ops::FeedOp);
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(feed, ops::FeedOp);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(feed, ops::FeedOp);
#endif
......@@ -43,7 +43,7 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
#ifdef PADDLE_MOBILE_FPGA
void Init() {
void Init() {
Tensor *output = param_.Out();
fpga::format_fp16_ofm(output);
}
......@@ -73,11 +73,19 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
}
#else
void Init() {}
#ifdef PADDLE_MOBILE_CL
void Init() {}
void RunImpl() {
}
#else
void Init() {}
void RunImpl() {
param_.Out()->ShareDataWith(*param_.InputX());
param_.Out()->set_lod(param_.InputX()->lod());
}
#endif
#endif
protected:
......@@ -96,3 +104,6 @@ USE_OP_MALI_GPU(feed);
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(feed);
#endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(feed);
#endif
......@@ -58,4 +58,8 @@ REGISTER_OPERATOR_MALI_GPU(fusion_conv_add, ops::FusionConvAddOp);
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add, ops::FusionConvAddOp);
#endif
#endif
......@@ -96,5 +96,7 @@ USE_OP_MALI_GPU(fusion_conv_add);
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(fusion_conv_add);
#endif
#endif
......@@ -57,5 +57,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif
#ifdef PADDLE_MOBILE_CL
REGISTER_OPERATOR_CL(fusion_conv_add_bn_relu, ops::FusionConvAddBNReluOp);
#endif
#endif
......@@ -99,6 +99,14 @@ static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
#define FUSION_CONV_ADD_BN_RELU_REGISTER
#endif
#endif
#ifdef PADDLE_MOBILE_CL
#ifndef FUSION_CONV_ADD_BN_RELU_REGISTER
static framework::FusionOpRegistrar fusion_conv_add_bn_relu_registrar(
new FusionConvAddBNReluMatcher());
#define FUSION_CONV_ADD_BN_RELU_REGISTER
#endif
#endif
} // namespace operators
......@@ -113,4 +121,8 @@ USE_OP_CPU(fusion_conv_add_bn_relu);
USE_OP_FPGA(fusion_conv_add_bn_relu);
#endif
#ifdef PADDLE_MOBILE_CL
USE_OP_CL(fusion_conv_add_bn_relu);
#endif
#endif
__kernel void elementwise_add(__global float* in, __global float* out) {
int num = get_global_id(0);
out[num] = in[num] * 0.1 + 102;
}
......@@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h"
#include "operators/kernel/conv_add_bn_kernel.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVADDBNRELU_OP
#include "operators/kernel/conv_add_bn_relu_kernel.h"
#include "operators/kernel/central-arm-func/conv_add_bn_relu_arm_func.h"
namespace paddle_mobile {
namespace operators {
......
......@@ -15,7 +15,6 @@ limitations under the License. */
#ifdef FUSION_CONVADD_OP
#include "operators/kernel/conv_add_kernel.h"
#include "../central-arm-func/conv_add_arm_func.h"
namespace paddle_mobile {
namespace operators {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef ELEMENTWISEADD_OP
#include "operators/kernel/elementwise_add_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool ElementwiseAddKernel<GPU_CL, float>::Init(ElementwiseAddParam<GPU_CL> *param) {
this->cl_helper_.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
return true;
}
template <>
void ElementwiseAddKernel<GPU_CL, float>::Compute(const ElementwiseAddParam<GPU_CL> &param) {
}
template class ElementwiseAddKernel<GPU_CL, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -901,7 +901,7 @@ class FeedParam : public OpParam {
public:
FeedParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, Scope *scope) {
input_x_ = InputXFrom<GType>(inputs, *scope);
input_x_ = InputXFrom<LoDTensor>(inputs, *scope);
out_ = OutFrom<GType>(outputs, *scope);
auto var = scope->Var("batch_size");
batch_size = var->GetValue<int>();
......@@ -911,7 +911,7 @@ class FeedParam : public OpParam {
const int BatchSize() const { return batch_size; }
private:
GType *input_x_;
LoDTensor *input_x_;
GType *out_;
int batch_size;
};
......
......@@ -253,6 +253,10 @@ else ()
ADD_EXECUTABLE(test-fssd net/test_mobilenet_025_fssd.cpp test_helper.h test_include.h)
target_link_libraries(test-fssd paddle-mobile)
# gen test
ADD_EXECUTABLE(test-mobilenetgpu net/test_mobilenet_GPU.cpp test_helper.h test_include.h)
target_link_libraries(test-mobilenetgpu paddle-mobile)
#add_library(test-lib-size SHARED common/test_lib_size.h common/test_lib_size.cpp)
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <iostream>
#include "../test_helper.h"
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
// paddle_mobile.SetThreadNum(4);
auto time1 = time();
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
// std::string(g_mobilenet_detect) + "/params", true);
auto isok = paddle_mobile.Load(g_mobilenet, false);
if (isok) {
auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<float> input;
std::vector<int64_t> dims{1, 3, 224, 224};
GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
auto vec_result = paddle_mobile.Predict(input, dims);
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// 预热十次
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
auto time3 = time();
for (int i = 0; i < 10; ++i) {
auto vec_result = paddle_mobile.Predict(input, dims);
}
DLOG << vec_result;
auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
<< std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册