Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b5c410a2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b5c410a2
编写于
5月 31, 2019
作者:
L
lijianshe02
提交者:
GitHub
5月 31, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ops and kernels that mul, scale, fc, relu, softmax, dropout, elem… (#17711)
* fix conflicts * fix kernel registry realted bugs test=develop
上级
89b0466c
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
668 addition
and
35 deletion
+668
-35
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+1
-1
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+8
-4
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+26
-5
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+5
-1
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
+12
-1
paddle/fluid/lite/core/op_registry.cc
paddle/fluid/lite/core/op_registry.cc
+4
-0
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+13
-2
paddle/fluid/lite/kernels/x86/dropout_compute.cc
paddle/fluid/lite/kernels/x86/dropout_compute.cc
+87
-0
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+34
-2
paddle/fluid/lite/kernels/x86/fc_compute.cc
paddle/fluid/lite/kernels/x86/fc_compute.cc
+112
-0
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
+1
-1
paddle/fluid/lite/kernels/x86/mean_compute.cc
paddle/fluid/lite/kernels/x86/mean_compute.cc
+2
-2
paddle/fluid/lite/kernels/x86/mul_compute.cc
paddle/fluid/lite/kernels/x86/mul_compute.cc
+2
-2
paddle/fluid/lite/kernels/x86/relu_compute.cc
paddle/fluid/lite/kernels/x86/relu_compute.cc
+56
-0
paddle/fluid/lite/kernels/x86/scale_compute.cc
paddle/fluid/lite/kernels/x86/scale_compute.cc
+62
-0
paddle/fluid/lite/kernels/x86/softmax_compute.cc
paddle/fluid/lite/kernels/x86/softmax_compute.cc
+90
-0
paddle/fluid/lite/operators/CMakeLists.txt
paddle/fluid/lite/operators/CMakeLists.txt
+5
-2
paddle/fluid/lite/operators/dropout_op.cc
paddle/fluid/lite/operators/dropout_op.cc
+75
-0
paddle/fluid/lite/operators/elementwise_ops.cc
paddle/fluid/lite/operators/elementwise_ops.cc
+3
-3
paddle/fluid/lite/operators/fc_op_test.cc
paddle/fluid/lite/operators/fc_op_test.cc
+7
-1
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+56
-2
paddle/fluid/lite/operators/relu_op.cc
paddle/fluid/lite/operators/relu_op.cc
+2
-3
paddle/fluid/lite/operators/relu_op.h
paddle/fluid/lite/operators/relu_op.h
+1
-1
paddle/fluid/lite/utils/varient.h
paddle/fluid/lite/utils/varient.h
+2
-1
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+2
-1
未找到文件。
paddle/fluid/framework/framework.proto
浏览文件 @
b5c410a2
...
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
syntax
=
"proto2"
;
option
optimize_for
=
LITE_RUNTIME
;
//
option optimize_for = LITE_RUNTIME;
package
paddle
.
framework.proto
;
// Any incompatible changes to ProgramDesc and its dependencies should
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
b5c410a2
...
...
@@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
"A path setting inference demo download directories."
)
# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
# DEPS cxx_api_lite model_parser_lite target_wrapper_host
# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
DEPS cxx_api_lite model_parser_lite target_wrapper_host
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
endif
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
WITH_TESTING
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"lite_naive_model.tar.gz"
)
...
...
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
b5c410a2
...
...
@@ -32,7 +32,8 @@ namespace lite {
TEST
(
CXXApi
,
test
)
{
lite
::
ExecutorLite
predictor
;
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
#else
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
...
...
@@ -44,7 +45,8 @@ TEST(CXXApi, test) {
});
#endif
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
// origin cuda
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
...
...
@@ -69,7 +71,8 @@ TEST(CXXApi, test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi
,
save_model
)
{
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
valid_places
);
...
...
@@ -78,7 +81,7 @@ TEST(CXXApi, save_model) {
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXTrainer
,
train
)
{
/*
TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place});
auto scope = std::make_shared<lite::Scope>();
...
...
@@ -108,7 +111,7 @@ TEST(CXXTrainer, train) {
data0[0] = 0;
exe.Run();
}
}
*/
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
...
...
@@ -116,13 +119,31 @@ TEST(CXXTrainer, train) {
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
scale
);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
elementwise_add
)
USE_LITE_OP
(
elementwise_sub
)
USE_LITE_OP
(
square
)
USE_LITE_OP
(
softmax
)
USE_LITE_OP
(
dropout
)
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
...
...
paddle/fluid/lite/core/context.h
浏览文件 @
b5c410a2
...
...
@@ -95,7 +95,11 @@ struct CUDAContext {
#ifdef LITE_WITH_X86
struct
X86Context
{
// overall information
X86Context
()
{
x86_device_context
.
reset
(
new
::
paddle
::
platform
::
CPUDeviceContext
);
x86_execution_context
.
reset
(
new
::
paddle
::
framework
::
ExecutionContext
(
*
x86_device_context
));
}
// kernel information
// legacy info.
...
...
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
浏览文件 @
b5c410a2
...
...
@@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass {
auto
&
inst
=
node
.
AsStmt
();
switch
(
inst
.
picked_kernel
().
target
())
{
case
TARGET
(
kHost
):
case
TARGET
(
kX86
):
inst
.
picked_kernel
().
SetContext
(
NewHostContext
());
break
;
#ifdef LITE_WITH_X86
case
TARGET
(
kX86
):
inst
.
picked_kernel
().
SetContext
(
NewX86Context
());
break
;
#endif
#ifdef LITE_WITH_CUDA
case
TARGET
(
kCUDA
):
inst
.
picked_kernel
().
SetContext
(
NewCudaContext
());
...
...
@@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass {
return
ctx
;
}
#ifdef LITE_WITH_X86
std
::
unique_ptr
<
KernelContext
>
NewX86Context
()
{
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
X86Context
>
();
return
ctx
;
}
#endif
#ifdef LITE_WITH_ARM
std
::
unique_ptr
<
KernelContext
>
NewARMContext
()
{
...
...
paddle/fluid/lite/core/op_registry.cc
浏览文件 @
b5c410a2
...
...
@@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry()
INIT_FOR
(
kHost
,
kAny
,
kNCHW
);
INIT_FOR
(
kHost
,
kAny
,
kAny
);
INIT_FOR
(
kX86
,
kFloat
,
kNCHW
);
INIT_FOR
(
kX86
,
kAny
,
kNCHW
);
INIT_FOR
(
kX86
,
kAny
,
kAny
);
INIT_FOR
(
kARM
,
kFloat
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
...
...
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
b5c410a2
...
...
@@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86)
endif
()
cc_library
(
activation_compute_x86 SRCS activation_compute.cc DEPS
${
lite_kernel_deps
}
activation_op
)
cc_library
(
elementwise_compute_x86 SRCS elementwise_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mean_compute_x86 SRCS mean_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_x86 SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
sgd_compute_x86 SRCS sgd_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
fc_compute_x86 SRCS fc_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_x86 SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
relu_compute_x86 SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
scale_compute_x86 SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
elementwise_compute_x86 SRCS elementwise_compute.cc DEPS
${
lite_kernel_deps
}
elementwise_sub_op elementwise_add_op
)
cc_library
(
softmax_compute_x86 SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
softmax
)
cc_library
(
dropout_compute_x86 SRCS dropout_compute.cc DEPS
${
lite_kernel_deps
}
)
set
(
x86_kernels
activation_compute_x86
elementwise_compute_x86
mean_compute_x86
fill_constant_compute_x86
mul_compute_x86
relu_compute_x86
fc_compute_x86
scale_compute_x86
softmax_compute_x86
dropout_compute_x86
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/kernels/x86/dropout_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
class
DropoutCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
DropoutParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
DropoutParam
>
();
const
auto
*
x_data
=
param
.
x
->
data
<
T
>
();
auto
*
out_data
=
param
.
output
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
auto
*
mask_data
=
param
.
mask
->
template
mutable_data
<
T
>();
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
param
.
fix_seed
?
param
.
seed
:
rnd
();
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
size_t
size
=
framework
::
product
(
param
.
mask
->
dims
().
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dist
(
engine
)
<
param
.
dropout_prob
)
{
mask_data
[
i
]
=
0
;
out_data
[
i
]
=
0
;
}
else
{
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
mask_data
[
i
]
=
1.0
f
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
out_data
[
i
]
=
x_data
[
i
]
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
else
{
mask_data
[
i
]
=
1
;
out_data
[
i
]
=
x_data
[
i
];
}
}
}
}
else
{
auto
X
=
EigenMatrix
<
T
>::
Reshape
(
param
.
x
->
raw_tensor
(),
1
);
auto
Y
=
EigenMatrix
<
T
>::
Reshape
(
param
.
output
->
raw_tensor
(),
1
);
auto
&
place
=
*
platform
::
CPUDeviceContext
().
eigen_device
();
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
Y
.
device
(
place
)
=
X
;
}
else
{
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
}
}
virtual
~
DropoutCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
DropoutCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Mask"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
浏览文件 @
b5c410a2
...
...
@@ -30,6 +30,11 @@ struct SubFunctor {
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
};
template
<
typename
T
>
struct
AddFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
typename
T
>
class
ElementwiseSubCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
...
...
@@ -67,10 +72,9 @@ class ElementwiseSubGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseGradParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
param
.
X_grad
->
template
mutable_data
<
T
>();
...
...
@@ -89,6 +93,26 @@ class ElementwiseSubGradCompute
virtual
~
ElementwiseSubGradCompute
()
=
default
;
};
template
<
typename
T
>
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
,
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
AddFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseAddCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
...
...
@@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW,
.
BindOutput
(
paddle
::
framework
::
GradVarName
(
"Y"
),
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ElementwiseAddCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/fc_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
fc_compute_eigen
(
const
T
*
x
,
int
x_w
,
int
x_h
,
//
const
T
*
w
,
int
w_w
,
int
w_h
,
//
const
T
*
b
,
//
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
W
(
w
,
w_h
,
w_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
w_h
);
Out
=
X
*
W
.
transpose
();
if
(
b
)
{
Eigen
::
Map
<
const
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
1
>>
B
(
b
,
w_h
);
Out
=
Out
.
array
().
rowwise
()
+
B
.
transpose
().
array
();
}
}
template
<
typename
T
>
__attribute__
((
optimize
(
"unroll-loops"
)))
//
T
dot
(
const
T
*
x
,
const
T
*
y
,
int
dim
)
{
T
out
{};
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
out
+=
x
[
i
]
*
y
[
i
];
}
return
out
;
}
template
<
typename
T
>
void
fc_compute_naive
(
const
T
*
x
,
int
x_w
,
int
x_h
,
//
const
T
*
w
,
int
w_w
,
int
w_h
,
//
const
T
*
b
,
//
T
*
out
)
{
CHECK_EQ
(
x_w
,
w_w
);
// out shape: (x_h, w_w)
memset
(
out
,
0
,
x_h
*
w_h
*
sizeof
(
T
));
for
(
int
r
=
0
;
r
<
x_h
;
r
++
)
{
for
(
int
c
=
0
;
c
<
w_h
;
c
++
)
{
out
[
r
*
w_h
+
c
]
=
dot
(
&
x
[
r
*
x_w
],
&
w
[
c
*
w_w
],
w_w
)
+
b
[
c
];
}
}
}
template
<
typename
T
>
class
FcCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FcParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
CHECK_GE
(
param
.
input
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
fc_compute_eigen
(
param
.
input
->
data
<
T
>
(),
// x
param
.
input
->
dims
().
Slice
(
0
,
param
.
in_num_col_dims
).
production
(),
param
.
input
->
dims
()
.
Slice
(
param
.
in_num_col_dims
,
param
.
input
->
dims
().
size
())
.
production
(),
param
.
w
->
data
<
T
>
(),
// w
param
.
w
->
dims
()[
1
],
// w_w
param
.
w
->
dims
()[
0
],
// w_h
param
.
bias
->
data
<
T
>
(),
// b
param
.
output
->
mutable_data
<
T
>
());
}
virtual
~
FcCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
FcCompute
<
float
>
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"W"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
浏览文件 @
b5c410a2
...
...
@@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
...
...
paddle/fluid/lite/kernels/x86/mean_compute.cc
浏览文件 @
b5c410a2
...
...
@@ -37,7 +37,7 @@ class MeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
...
...
@@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK_EQ
(
param
.
Out_grad
->
raw_tensor
().
numel
(),
1
);
CHECK
(
context
.
x86_device_context
);
...
...
paddle/fluid/lite/kernels/x86/mul_compute.cc
浏览文件 @
b5c410a2
...
...
@@ -30,7 +30,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using
param_t
=
operators
::
MulParam
;
void
Run
()
override
{
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulParam
>
();
CHECK
(
context
.
x86_device_context
);
...
...
@@ -68,7 +68,7 @@ template <typename T>
class
MulGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
void
Run
()
override
{
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulGradParam
>
();
CHECK
(
context
.
x86_device_context
);
...
...
paddle/fluid/lite/kernels/x86/relu_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ReluCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ReluParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
n
=
param
.
input
->
dims
().
production
();
const
float
*
input
=
param
.
input
->
data
<
float
>
();
float
*
output
=
param
.
output
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
output
[
i
]
=
std
::
max
(
0.
f
,
input
[
i
]);
}
}
virtual
~
ReluCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ReluCompute
<
float
>
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/scale_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
scale_compute
(
const
T
*
x
,
T
*
out
,
int
size
,
float
scale
,
float
bias
,
bool
bias_before
)
{
if
(
bias_before
)
bias
*=
scale
;
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
out
[
i
]
=
x
[
i
]
*
scale
+
bias
;
}
}
template
<
typename
T
>
class
ScaleCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
scale_compute
(
param
.
x
->
data
<
T
>
(),
param
.
output
->
mutable_data
<
T
>
(),
param
.
x
->
dims
().
production
(),
param
.
scale
,
param
.
bias
,
param
.
bias_after_scale
);
}
virtual
~
ScaleCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ScaleCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/softmax_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
static
inline
int
CanonicalAxis
(
const
int
axis
,
const
int
rank
)
{
if
(
axis
<
0
)
{
return
axis
+
rank
;
}
return
axis
;
}
static
inline
int
SizeToAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
static
inline
int
SizeFromAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
axis
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
template
<
typename
T
>
class
SoftmaxCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
SoftmaxParam
>
();
// auto& context = context_->As<X86Context>();
CHECK
(
param
.
output
);
CHECK
(
param
.
x
);
const
int
rank
=
param
.
x
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
param
.
axis
,
rank
);
int
axis_dim
=
param
.
x
->
dims
()[
axis
];
const
int
n
=
SizeToAxis
(
axis
,
param
.
x
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
param
.
x
->
dims
());
std
::
vector
<
int64_t
>
shape
{
n
,
d
};
lite
::
Tensor
input_2d
,
out_2d
;
input_2d
.
ShareDataWith
(
*
param
.
x
);
input_2d
.
Resize
(
lite
::
DDim
(
shape
));
out_2d
.
ShareDataWith
(
*
param
.
output
);
out_2d
.
Resize
(
lite
::
DDim
(
shape
));
paddle
::
operators
::
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
true
>
()(
platform
::
CPUDeviceContext
(),
axis_dim
,
&
input_2d
.
raw_tensor
(),
&
out_2d
.
raw_tensor
());
}
virtual
~
SoftmaxCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
SoftmaxCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/operators/CMakeLists.txt
浏览文件 @
b5c410a2
...
...
@@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
cc_library
(
mean_op_lite SRCS mean_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fill_constant_op_lite SRCS fill_constant_op.cc DEPS
${
op_DEPS
}
)
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
dropout_op_lite SRCS dropout_op.cc DEPS
${
op_DEPS
}
)
set
(
ops_lite
fc_op_lite
relu_op_lite
...
...
@@ -27,7 +28,9 @@ set(ops_lite
elementwise_ops_lite
mean_op_lite
fill_constant_op_lite
activation_ops_lite
dropout_op_lite
PARENT_SCOPE
)
lite_cc_test
(
test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite
)
lite_cc_test
(
test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite
fc_compute_x86
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
paddle/fluid/lite/operators/dropout_op.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
DropoutOpLite
:
public
OpLite
{
public:
explicit
DropoutOpLite
(
const
std
::
string
&
type
)
:
OpLite
(
type
)
{}
bool
CheckShape
()
const
override
{
CHECK_OR_FALSE
(
param_
.
x
);
return
true
;
}
bool
InferShape
()
const
override
{
const
auto
x_dims
=
param_
.
x
->
dims
();
param_
.
output
->
Resize
(
x_dims
);
if
(
param_
.
is_test
==
false
)
{
param_
.
mask
->
Resize
(
x_dims
);
}
// share LoD
// param_.output->set_lod(param_.input->lod());
return
true
;
}
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
// TODO(Superjomn) replace framework::OpDesc with a lite one.
bool
AttachImpl
(
const
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
override
{
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
out
=
op_desc
.
Output
(
"Out"
).
front
();
auto
Mask
=
op_desc
.
Output
(
"Mask"
).
front
();
param_
.
x
=
GetVar
<
lite
::
Tensor
>
(
scope
,
input
);
param_
.
output
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
out
);
param_
.
mask
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Mask
);
param_
.
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
if
(
op_desc
.
HasAttr
(
"axis"
))
{
param_
.
is_test
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"is_test"
));
}
param_
.
fix_seed
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"fix_seed"
));
param_
.
seed
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"seed"
));
param_
.
dropout_implementation
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"dropout_implementation"
));
return
true
;
}
std
::
string
DebugString
()
const
override
{
return
"dropout"
;
}
private:
mutable
DropoutParam
param_
;
};
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
dropout
,
paddle
::
lite
::
operators
::
DropoutOpLite
);
paddle/fluid/lite/operators/elementwise_ops.cc
浏览文件 @
b5c410a2
...
...
@@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite {
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
param_
.
Y
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Y_name
);
param_
.
Out
=
GetMutableVar
<
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
GetAttr
<
int
>
(
opdesc
.
GetAttr
(
"axis"
));
param_
.
Out
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
boost
::
get
<
int
>
(
opdesc
.
GetAttr
(
"axis"
));
return
true
;
}
...
...
@@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP
(
elementwise_sub_grad
,
paddle
::
lite
::
operators
::
ElementwiseGradExplicitOp
);
#endif
REGISTER_LITE_OP
(
elementwise_add
,
paddle
::
lite
::
operators
::
ElementwiseOp
);
paddle/fluid/lite/operators/fc_op_test.cc
浏览文件 @
b5c410a2
...
...
@@ -57,10 +57,16 @@ TEST(fc_op_lite, test) {
FcOpLite
fc
(
"fc"
);
fc
.
SetValidPlaces
({
Place
{
TARGET
(
k
Host
),
PRECISION
(
kFloat
)}});
fc
.
SetValidPlaces
({
Place
{
TARGET
(
k
X86
),
PRECISION
(
kFloat
)}});
fc
.
Attach
(
desc
,
&
scope
);
auto
kernels
=
fc
.
CreateKernels
({
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
ASSERT_FALSE
(
kernels
.
empty
());
}
}
// namespace operators
}
// namespace lite
}
// namespace paddle
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
paddle/fluid/lite/operators/op_params.h
浏览文件 @
b5c410a2
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/framework.pb.h"
...
...
@@ -94,14 +95,67 @@ struct ScaleParam {
bool
bias_after_scale
{
true
};
};
// For Softmax
O
p
// For Softmax
o
p
struct
SoftmaxParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
int
axis
{
-
1
};
};
// For Convolution op
struct
ConvParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
filter
{};
lite
::
Tensor
*
bias
{};
lite
::
Tensor
*
residualData
{};
lite
::
Tensor
*
output
{};
std
::
vector
<
int
>
strides
{
1
,
1
};
std
::
vector
<
int
>
paddings
{
0
,
0
};
int
groups
{
1
};
std
::
vector
<
int
>
dilations
{
1
,
1
};
bool
fuse_relu_before_depthwise_conv
{
false
};
bool
use_mkldnn
{
false
};
bool
fuse_relu
{
false
};
// only used in mkldnn kernel
bool
use_quantizer
{
false
};
// set true for op that should be quantized, only used for cpu
bool
fuse_residual_connection
{
false
};
float
scale_in
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_out
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_in_eltwise
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
std
::
string
data_format
{
"Anylayout"
};
};
// For Pooling op
struct
PoolParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
std
::
string
pooling_type
{
""
};
std
::
vector
<
int
>
ksize
{};
bool
global_pooling
{
false
};
// if true, knernel size and paddings will be ignored
std
::
vector
<
int
>
strides
{
1
,
1
};
std
::
vector
<
int
>
paddings
{
0
,
0
};
bool
exclusive
{
true
};
bool
adaptive
{
false
};
bool
ceil_mode
{
false
};
bool
use_quantizer
{
false
};
std
::
string
data_format
{
"AnyLayout"
};
};
// For Dropout op
struct
DropoutParam
{
const
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
lite
::
Tensor
*
mask
{};
float
dropout_prob
{
.5
f
};
bool
is_test
{
false
};
bool
fix_seed
{
false
};
int
seed
{
0
};
std
::
string
dropout_implementation
{
"downgrade_in_infer"
};
};
/// ----------------------- element wise operators ----------------------
struct
ElementwiseParam
{
const
lite
::
Tensor
*
X
{};
...
...
paddle/fluid/lite/operators/relu_op.cc
浏览文件 @
b5c410a2
...
...
@@ -25,7 +25,6 @@ bool ReluOp::InferShape() const {
CHECK_OR_FALSE
(
param_
.
output
);
// TODO(Superjomn) Enable data sharing.
param_
.
output
->
Resize
(
param_
.
input
->
dims
());
// param_.output->ShareDataWith(*param_.input);
// share lod
// param_.output->set_lod(param_.input->lod());
return
true
;
...
...
@@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) {
return
true
;
}
REGISTER_LITE_OP
(
relu
,
ReluOp
);
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
relu
,
paddle
::
lite
::
operators
::
ReluOp
);
paddle/fluid/lite/operators/relu_op.h
浏览文件 @
b5c410a2
...
...
@@ -35,7 +35,7 @@ class ReluOp : public OpLite {
bool
AttachImpl
(
const
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"
tanh
"
;
}
std
::
string
DebugString
()
const
override
{
return
"
relu
"
;
}
private:
mutable
ReluParam
param_
;
...
...
paddle/fluid/lite/utils/varient.h
浏览文件 @
b5c410a2
...
...
@@ -128,8 +128,9 @@ struct variant {
if
(
type_id
==
typeid
(
T
).
hash_code
())
return
reinterpret_cast
<
T
*>
(
&
data
);
else
LOG
(
FATAL
)
<<
"unmatched type get, should be "
<<
type_id
<<
" but get "
LOG
(
ERROR
)
<<
"unmatched type get, should be "
<<
type_id
<<
" but get "
<<
typeid
(
T
).
name
();
throw
std
::
invalid_argument
(
"unmatched type"
);
}
~
variant
()
{
helper_t
::
destroy
(
type_id
,
&
data
);
}
};
...
...
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
b5c410a2
...
...
@@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place)
cc_library
(
memory
DEPS
malloc
memcpy
)
memcpy
)
#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录