Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
b5c410a2
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b5c410a2
编写于
5月 31, 2019
作者:
L
lijianshe02
提交者:
GitHub
5月 31, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ops and kernels that mul, scale, fc, relu, softmax, dropout, elem… (#17711)
* fix conflicts * fix kernel registry realted bugs test=develop
上级
89b0466c
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
668 addition
and
35 deletion
+668
-35
paddle/fluid/framework/framework.proto
paddle/fluid/framework/framework.proto
+1
-1
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+8
-4
paddle/fluid/lite/api/cxx_api_test.cc
paddle/fluid/lite/api/cxx_api_test.cc
+26
-5
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+5
-1
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
+12
-1
paddle/fluid/lite/core/op_registry.cc
paddle/fluid/lite/core/op_registry.cc
+4
-0
paddle/fluid/lite/kernels/x86/CMakeLists.txt
paddle/fluid/lite/kernels/x86/CMakeLists.txt
+13
-2
paddle/fluid/lite/kernels/x86/dropout_compute.cc
paddle/fluid/lite/kernels/x86/dropout_compute.cc
+87
-0
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
+34
-2
paddle/fluid/lite/kernels/x86/fc_compute.cc
paddle/fluid/lite/kernels/x86/fc_compute.cc
+112
-0
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
+1
-1
paddle/fluid/lite/kernels/x86/mean_compute.cc
paddle/fluid/lite/kernels/x86/mean_compute.cc
+2
-2
paddle/fluid/lite/kernels/x86/mul_compute.cc
paddle/fluid/lite/kernels/x86/mul_compute.cc
+2
-2
paddle/fluid/lite/kernels/x86/relu_compute.cc
paddle/fluid/lite/kernels/x86/relu_compute.cc
+56
-0
paddle/fluid/lite/kernels/x86/scale_compute.cc
paddle/fluid/lite/kernels/x86/scale_compute.cc
+62
-0
paddle/fluid/lite/kernels/x86/softmax_compute.cc
paddle/fluid/lite/kernels/x86/softmax_compute.cc
+90
-0
paddle/fluid/lite/operators/CMakeLists.txt
paddle/fluid/lite/operators/CMakeLists.txt
+5
-2
paddle/fluid/lite/operators/dropout_op.cc
paddle/fluid/lite/operators/dropout_op.cc
+75
-0
paddle/fluid/lite/operators/elementwise_ops.cc
paddle/fluid/lite/operators/elementwise_ops.cc
+3
-3
paddle/fluid/lite/operators/fc_op_test.cc
paddle/fluid/lite/operators/fc_op_test.cc
+7
-1
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+56
-2
paddle/fluid/lite/operators/relu_op.cc
paddle/fluid/lite/operators/relu_op.cc
+2
-3
paddle/fluid/lite/operators/relu_op.h
paddle/fluid/lite/operators/relu_op.h
+1
-1
paddle/fluid/lite/utils/varient.h
paddle/fluid/lite/utils/varient.h
+2
-1
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+2
-1
未找到文件。
paddle/fluid/framework/framework.proto
浏览文件 @
b5c410a2
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
syntax
=
"proto2"
;
syntax
=
"proto2"
;
option
optimize_for
=
LITE_RUNTIME
;
//
option optimize_for = LITE_RUNTIME;
package
paddle
.
framework.proto
;
package
paddle
.
framework.proto
;
// Any incompatible changes to ProgramDesc and its dependencies should
// Any incompatible changes to ProgramDesc and its dependencies should
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
b5c410a2
...
@@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc
...
@@ -25,10 +25,14 @@ set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inferenc
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
set
(
LITE_DEMO_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo"
CACHE STRING
"A path setting inference demo download directories."
)
"A path setting inference demo download directories."
)
# lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc
if
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
# DEPS cxx_api_lite model_parser_lite target_wrapper_host
lite_cc_test
(
test_cxx_api_lite SRCS cxx_api_test.cc
# ${ops_lite} ${host_kernels} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model
DEPS cxx_api_lite model_parser_lite target_wrapper_host
# --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
${
ops_lite
}
${
host_kernels
}
${
x86_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
add_dependencies
(
test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz
)
endif
(
NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
if
(
WITH_TESTING
)
if
(
WITH_TESTING
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"lite_naive_model.tar.gz"
)
lite_download_and_uncompress
(
${
LITE_MODEL_DIR
}
${
LITE_URL
}
"lite_naive_model.tar.gz"
)
...
...
paddle/fluid/lite/api/cxx_api_test.cc
浏览文件 @
b5c410a2
...
@@ -32,7 +32,8 @@ namespace lite {
...
@@ -32,7 +32,8 @@ namespace lite {
TEST
(
CXXApi
,
test
)
{
TEST
(
CXXApi
,
test
)
{
lite
::
ExecutorLite
predictor
;
lite
::
ExecutorLite
predictor
;
#ifndef LITE_WITH_CUDA
#ifndef LITE_WITH_CUDA
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
#else
#else
std
::
vector
<
Place
>
valid_places
({
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)},
...
@@ -44,7 +45,8 @@ TEST(CXXApi, test) {
...
@@ -44,7 +45,8 @@ TEST(CXXApi, test) {
});
});
#endif
#endif
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)},
// origin cuda
valid_places
);
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
...
@@ -69,7 +71,8 @@ TEST(CXXApi, test) {
...
@@ -69,7 +71,8 @@ TEST(CXXApi, test) {
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXApi
,
save_model
)
{
TEST
(
CXXApi
,
save_model
)
{
lite
::
ExecutorLite
predictor
;
lite
::
ExecutorLite
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)}});
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kCUDA
),
PRECISION
(
kFloat
)},
valid_places
);
valid_places
);
...
@@ -78,7 +81,7 @@ TEST(CXXApi, save_model) {
...
@@ -78,7 +81,7 @@ TEST(CXXApi, save_model) {
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
CXXTrainer
,
train
)
{
/*
TEST(CXXTrainer, train) {
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)});
std::vector<Place> valid_places({prefer_place});
std::vector<Place> valid_places({prefer_place});
auto scope = std::make_shared<lite::Scope>();
auto scope = std::make_shared<lite::Scope>();
...
@@ -108,7 +111,7 @@ TEST(CXXTrainer, train) {
...
@@ -108,7 +111,7 @@ TEST(CXXTrainer, train) {
data0[0] = 0;
data0[0] = 0;
exe.Run();
exe.Run();
}
}
*/
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
}
// namespace lite
}
// namespace lite
...
@@ -116,13 +119,31 @@ TEST(CXXTrainer, train) {
...
@@ -116,13 +119,31 @@ TEST(CXXTrainer, train) {
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
mul
);
USE_LITE_OP
(
fc
);
USE_LITE_OP
(
fc
);
USE_LITE_OP
(
relu
);
USE_LITE_OP
(
scale
);
USE_LITE_OP
(
scale
);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
feed
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
fetch
);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
io_copy
);
USE_LITE_OP
(
elementwise_add
)
USE_LITE_OP
(
elementwise_sub
)
USE_LITE_OP
(
square
)
USE_LITE_OP
(
softmax
)
USE_LITE_OP
(
dropout
)
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_sub
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
#ifdef LITE_WITH_CUDA
#ifdef LITE_WITH_CUDA
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
mul
,
kCUDA
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
...
...
paddle/fluid/lite/core/context.h
浏览文件 @
b5c410a2
...
@@ -95,7 +95,11 @@ struct CUDAContext {
...
@@ -95,7 +95,11 @@ struct CUDAContext {
#ifdef LITE_WITH_X86
#ifdef LITE_WITH_X86
struct
X86Context
{
struct
X86Context
{
// overall information
// overall information
X86Context
()
{
x86_device_context
.
reset
(
new
::
paddle
::
platform
::
CPUDeviceContext
);
x86_execution_context
.
reset
(
new
::
paddle
::
framework
::
ExecutionContext
(
*
x86_device_context
));
}
// kernel information
// kernel information
// legacy info.
// legacy info.
...
...
paddle/fluid/lite/core/mir/runtime_context_assign_pass.cc
浏览文件 @
b5c410a2
...
@@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass {
...
@@ -34,9 +34,13 @@ class RuntimeContextAssignPass : public StmtPass {
auto
&
inst
=
node
.
AsStmt
();
auto
&
inst
=
node
.
AsStmt
();
switch
(
inst
.
picked_kernel
().
target
())
{
switch
(
inst
.
picked_kernel
().
target
())
{
case
TARGET
(
kHost
):
case
TARGET
(
kHost
):
case
TARGET
(
kX86
):
inst
.
picked_kernel
().
SetContext
(
NewHostContext
());
inst
.
picked_kernel
().
SetContext
(
NewHostContext
());
break
;
break
;
#ifdef LITE_WITH_X86
case
TARGET
(
kX86
):
inst
.
picked_kernel
().
SetContext
(
NewX86Context
());
break
;
#endif
#ifdef LITE_WITH_CUDA
#ifdef LITE_WITH_CUDA
case
TARGET
(
kCUDA
):
case
TARGET
(
kCUDA
):
inst
.
picked_kernel
().
SetContext
(
NewCudaContext
());
inst
.
picked_kernel
().
SetContext
(
NewCudaContext
());
...
@@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass {
...
@@ -61,6 +65,13 @@ class RuntimeContextAssignPass : public StmtPass {
return
ctx
;
return
ctx
;
}
}
#ifdef LITE_WITH_X86
std
::
unique_ptr
<
KernelContext
>
NewX86Context
()
{
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
ctx
->
As
<
X86Context
>
();
return
ctx
;
}
#endif
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
std
::
unique_ptr
<
KernelContext
>
NewARMContext
()
{
std
::
unique_ptr
<
KernelContext
>
NewARMContext
()
{
...
...
paddle/fluid/lite/core/op_registry.cc
浏览文件 @
b5c410a2
...
@@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry()
...
@@ -91,6 +91,10 @@ KernelRegistry::KernelRegistry()
INIT_FOR
(
kHost
,
kAny
,
kNCHW
);
INIT_FOR
(
kHost
,
kAny
,
kNCHW
);
INIT_FOR
(
kHost
,
kAny
,
kAny
);
INIT_FOR
(
kHost
,
kAny
,
kAny
);
INIT_FOR
(
kX86
,
kFloat
,
kNCHW
);
INIT_FOR
(
kX86
,
kAny
,
kNCHW
);
INIT_FOR
(
kX86
,
kAny
,
kAny
);
INIT_FOR
(
kARM
,
kFloat
,
kNCHW
);
INIT_FOR
(
kARM
,
kFloat
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
...
...
paddle/fluid/lite/kernels/x86/CMakeLists.txt
浏览文件 @
b5c410a2
...
@@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86)
...
@@ -3,18 +3,29 @@ if(NOT LITE_WITH_X86)
endif
()
endif
()
cc_library
(
activation_compute_x86 SRCS activation_compute.cc DEPS
${
lite_kernel_deps
}
activation_op
)
cc_library
(
activation_compute_x86 SRCS activation_compute.cc DEPS
${
lite_kernel_deps
}
activation_op
)
cc_library
(
elementwise_compute_x86 SRCS elementwise_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mean_compute_x86 SRCS mean_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mean_compute_x86 SRCS mean_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_x86 SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
sgd_compute_x86 SRCS sgd_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
sgd_compute_x86 SRCS sgd_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
fc_compute_x86 SRCS fc_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
mul_compute_x86 SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
relu_compute_x86 SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
scale_compute_x86 SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
)
cc_library
(
elementwise_compute_x86 SRCS elementwise_compute.cc DEPS
${
lite_kernel_deps
}
elementwise_sub_op elementwise_add_op
)
cc_library
(
softmax_compute_x86 SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
softmax
)
cc_library
(
dropout_compute_x86 SRCS dropout_compute.cc DEPS
${
lite_kernel_deps
}
)
set
(
x86_kernels
set
(
x86_kernels
activation_compute_x86
activation_compute_x86
elementwise_compute_x86
elementwise_compute_x86
mean_compute_x86
mean_compute_x86
fill_constant_compute_x86
fill_constant_compute_x86
mul_compute_x86
mul_compute_x86
relu_compute_x86
fc_compute_x86
scale_compute_x86
softmax_compute_x86
dropout_compute_x86
)
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
set
(
x86_kernels
"
${
x86_kernels
}
"
CACHE INTERNAL
"x86 kernels"
)
paddle/fluid/lite/kernels/x86/dropout_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <random>
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
using
EigenMatrix
=
framework
::
EigenMatrix
<
T
,
MajorType
,
IndexType
>
;
template
<
typename
T
>
class
DropoutCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
DropoutParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
DropoutParam
>
();
const
auto
*
x_data
=
param
.
x
->
data
<
T
>
();
auto
*
out_data
=
param
.
output
->
template
mutable_data
<
T
>();
if
(
!
param
.
is_test
)
{
auto
*
mask_data
=
param
.
mask
->
template
mutable_data
<
T
>();
std
::
random_device
rnd
;
std
::
minstd_rand
engine
;
int
seed
=
param
.
fix_seed
?
param
.
seed
:
rnd
();
engine
.
seed
(
seed
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
1
);
size_t
size
=
framework
::
product
(
param
.
mask
->
dims
().
data
());
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
if
(
dist
(
engine
)
<
param
.
dropout_prob
)
{
mask_data
[
i
]
=
0
;
out_data
[
i
]
=
0
;
}
else
{
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
mask_data
[
i
]
=
1.0
f
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
out_data
[
i
]
=
x_data
[
i
]
/
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
else
{
mask_data
[
i
]
=
1
;
out_data
[
i
]
=
x_data
[
i
];
}
}
}
}
else
{
auto
X
=
EigenMatrix
<
T
>::
Reshape
(
param
.
x
->
raw_tensor
(),
1
);
auto
Y
=
EigenMatrix
<
T
>::
Reshape
(
param
.
output
->
raw_tensor
(),
1
);
auto
&
place
=
*
platform
::
CPUDeviceContext
().
eigen_device
();
if
(
param
.
dropout_implementation
==
"upscale_in_train"
)
{
Y
.
device
(
place
)
=
X
;
}
else
{
Y
.
device
(
place
)
=
X
*
static_cast
<
T
>
(
1.0
f
-
param
.
dropout_prob
);
}
}
}
virtual
~
DropoutCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
dropout
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
DropoutCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Mask"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/elementwise_compute.cc
浏览文件 @
b5c410a2
...
@@ -30,6 +30,11 @@ struct SubFunctor {
...
@@ -30,6 +30,11 @@ struct SubFunctor {
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
-
b
;
}
};
};
template
<
typename
T
>
struct
AddFunctor
{
inline
HOSTDEVICE
T
operator
()(
T
a
,
T
b
)
const
{
return
a
+
b
;
}
};
template
<
typename
T
>
template
<
typename
T
>
class
ElementwiseSubCompute
class
ElementwiseSubCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
...
@@ -67,10 +72,9 @@ class ElementwiseSubGradCompute
...
@@ -67,10 +72,9 @@ class ElementwiseSubGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
public:
using
param_t
=
operators
::
ElementwiseGradParam
;
using
param_t
=
operators
::
ElementwiseGradParam
;
void
Run
()
override
{
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
param
.
X_grad
->
template
mutable_data
<
T
>();
param
.
X_grad
->
template
mutable_data
<
T
>();
...
@@ -89,6 +93,26 @@ class ElementwiseSubGradCompute
...
@@ -89,6 +93,26 @@ class ElementwiseSubGradCompute
virtual
~
ElementwiseSubGradCompute
()
=
default
;
virtual
~
ElementwiseSubGradCompute
()
=
default
;
};
};
template
<
typename
T
>
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
paddle
::
operators
::
ElementwiseComputeEx
<
AddFunctor
<
T
>
,
platform
::
CPUDeviceContext
,
T
>
(
*
context
.
x86_execution_context
,
&
param
.
X
->
raw_tensor
(),
&
param
.
Y
->
raw_tensor
(),
param
.
axis
,
AddFunctor
<
T
>
(),
&
param
.
Out
->
raw_tensor
());
}
virtual
~
ElementwiseAddCompute
()
=
default
;
};
}
// namespace x86
}
// namespace x86
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
...
@@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW,
...
@@ -113,3 +137,11 @@ REGISTER_LITE_KERNEL(elementwise_sub_grad, kX86, kFloat, kNCHW,
.
BindOutput
(
paddle
::
framework
::
GradVarName
(
"Y"
),
.
BindOutput
(
paddle
::
framework
::
GradVarName
(
"Y"
),
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_add
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ElementwiseAddCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/fc_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
fc_compute_eigen
(
const
T
*
x
,
int
x_w
,
int
x_h
,
//
const
T
*
w
,
int
w_w
,
int
w_h
,
//
const
T
*
b
,
//
T
*
out
)
{
using
matrix_t
=
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>
;
Eigen
::
Map
<
const
matrix_t
>
X
(
x
,
x_h
,
x_w
);
Eigen
::
Map
<
const
matrix_t
>
W
(
w
,
w_h
,
w_w
);
Eigen
::
Map
<
matrix_t
>
Out
(
out
,
x_h
,
w_h
);
Out
=
X
*
W
.
transpose
();
if
(
b
)
{
Eigen
::
Map
<
const
Eigen
::
Matrix
<
T
,
Eigen
::
Dynamic
,
1
>>
B
(
b
,
w_h
);
Out
=
Out
.
array
().
rowwise
()
+
B
.
transpose
().
array
();
}
}
template
<
typename
T
>
__attribute__
((
optimize
(
"unroll-loops"
)))
//
T
dot
(
const
T
*
x
,
const
T
*
y
,
int
dim
)
{
T
out
{};
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
out
+=
x
[
i
]
*
y
[
i
];
}
return
out
;
}
template
<
typename
T
>
void
fc_compute_naive
(
const
T
*
x
,
int
x_w
,
int
x_h
,
//
const
T
*
w
,
int
w_w
,
int
w_h
,
//
const
T
*
b
,
//
T
*
out
)
{
CHECK_EQ
(
x_w
,
w_w
);
// out shape: (x_h, w_w)
memset
(
out
,
0
,
x_h
*
w_h
*
sizeof
(
T
));
for
(
int
r
=
0
;
r
<
x_h
;
r
++
)
{
for
(
int
c
=
0
;
c
<
w_h
;
c
++
)
{
out
[
r
*
w_h
+
c
]
=
dot
(
&
x
[
r
*
x_w
],
&
w
[
c
*
w_w
],
w_w
)
+
b
[
c
];
}
}
}
template
<
typename
T
>
class
FcCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
FcParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
CHECK_GE
(
param
.
input
->
dims
().
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
fc_compute_eigen
(
param
.
input
->
data
<
T
>
(),
// x
param
.
input
->
dims
().
Slice
(
0
,
param
.
in_num_col_dims
).
production
(),
param
.
input
->
dims
()
.
Slice
(
param
.
in_num_col_dims
,
param
.
input
->
dims
().
size
())
.
production
(),
param
.
w
->
data
<
T
>
(),
// w
param
.
w
->
dims
()[
1
],
// w_w
param
.
w
->
dims
()[
0
],
// w_h
param
.
bias
->
data
<
T
>
(),
// b
param
.
output
->
mutable_data
<
T
>
());
}
virtual
~
FcCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
FcCompute
<
float
>
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindInput
(
"W"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/fill_constant_compute.cc
浏览文件 @
b5c410a2
...
@@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -31,7 +31,7 @@ class FillConstantCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
param
.
Out
->
template
mutable_data
<
T
>();
...
...
paddle/fluid/lite/kernels/x86/mean_compute.cc
浏览文件 @
b5c410a2
...
@@ -37,7 +37,7 @@ class MeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -37,7 +37,7 @@ class MeanCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
param
.
Out
->
template
mutable_data
<
T
>();
param
.
Out
->
template
mutable_data
<
T
>();
...
@@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -59,7 +59,7 @@ class MeanGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void
Run
()
override
{
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
CHECK_EQ
(
param
.
Out_grad
->
raw_tensor
().
numel
(),
1
);
CHECK_EQ
(
param
.
Out_grad
->
raw_tensor
().
numel
(),
1
);
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
...
...
paddle/fluid/lite/kernels/x86/mul_compute.cc
浏览文件 @
b5c410a2
...
@@ -30,7 +30,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
...
@@ -30,7 +30,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using
param_t
=
operators
::
MulParam
;
using
param_t
=
operators
::
MulParam
;
void
Run
()
override
{
void
Run
()
override
{
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulParam
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulParam
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
...
@@ -68,7 +68,7 @@ template <typename T>
...
@@ -68,7 +68,7 @@ template <typename T>
class
MulGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
class
MulGradCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
public:
void
Run
()
override
{
void
Run
()
override
{
auto
&
context
=
c
ontext
_
->
As
<
X86Context
>
();
auto
&
context
=
c
tx
_
->
As
<
X86Context
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulGradParam
>
();
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
MulGradParam
>
();
CHECK
(
context
.
x86_device_context
);
CHECK
(
context
.
x86_device_context
);
...
...
paddle/fluid/lite/kernels/x86/relu_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
class
ReluCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ReluParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
n
=
param
.
input
->
dims
().
production
();
const
float
*
input
=
param
.
input
->
data
<
float
>
();
float
*
output
=
param
.
output
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
output
[
i
]
=
std
::
max
(
0.
f
,
input
[
i
]);
}
}
virtual
~
ReluCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
relu
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ReluCompute
<
float
>
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/scale_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
#include "paddle/fluid/lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
template
<
typename
T
>
void
scale_compute
(
const
T
*
x
,
T
*
out
,
int
size
,
float
scale
,
float
bias
,
bool
bias_before
)
{
if
(
bias_before
)
bias
*=
scale
;
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
out
[
i
]
=
x
[
i
]
*
scale
+
bias
;
}
}
template
<
typename
T
>
class
ScaleCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
scale_compute
(
param
.
x
->
data
<
T
>
(),
param
.
output
->
mutable_data
<
T
>
(),
param
.
x
->
dims
().
production
(),
param
.
scale
,
param
.
bias
,
param
.
bias_after_scale
);
}
virtual
~
ScaleCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
scale
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
ScaleCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/kernels/x86/softmax_compute.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/math/softmax.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
x86
{
static
inline
int
CanonicalAxis
(
const
int
axis
,
const
int
rank
)
{
if
(
axis
<
0
)
{
return
axis
+
rank
;
}
return
axis
;
}
static
inline
int
SizeToAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
0
;
i
<
axis
;
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
static
inline
int
SizeFromAxis
(
const
int
axis
,
lite
::
DDim
dims
)
{
int
size
=
1
;
for
(
int
i
=
axis
;
i
<
dims
.
size
();
i
++
)
{
size
*=
dims
[
i
];
}
return
size
;
}
template
<
typename
T
>
class
SoftmaxCompute
:
public
KernelLite
<
TARGET
(
kX86
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
operators
::
SoftmaxParam
>
();
// auto& context = context_->As<X86Context>();
CHECK
(
param
.
output
);
CHECK
(
param
.
x
);
const
int
rank
=
param
.
x
->
dims
().
size
();
const
int
axis
=
CanonicalAxis
(
param
.
axis
,
rank
);
int
axis_dim
=
param
.
x
->
dims
()[
axis
];
const
int
n
=
SizeToAxis
(
axis
,
param
.
x
->
dims
());
const
int
d
=
SizeFromAxis
(
axis
,
param
.
x
->
dims
());
std
::
vector
<
int64_t
>
shape
{
n
,
d
};
lite
::
Tensor
input_2d
,
out_2d
;
input_2d
.
ShareDataWith
(
*
param
.
x
);
input_2d
.
Resize
(
lite
::
DDim
(
shape
));
out_2d
.
ShareDataWith
(
*
param
.
output
);
out_2d
.
Resize
(
lite
::
DDim
(
shape
));
paddle
::
operators
::
math
::
SoftmaxFunctor
<
platform
::
CPUDeviceContext
,
T
,
true
>
()(
platform
::
CPUDeviceContext
(),
axis_dim
,
&
input_2d
.
raw_tensor
(),
&
out_2d
.
raw_tensor
());
}
virtual
~
SoftmaxCompute
()
=
default
;
};
}
// namespace x86
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
softmax
,
kX86
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
x86
::
SoftmaxCompute
<
float
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kX86
))})
.
Finalize
();
paddle/fluid/lite/operators/CMakeLists.txt
浏览文件 @
b5c410a2
...
@@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
...
@@ -13,8 +13,9 @@ cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS})
cc_library
(
mean_op_lite SRCS mean_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
mean_op_lite SRCS mean_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fill_constant_op_lite SRCS fill_constant_op.cc DEPS
${
op_DEPS
}
)
cc_library
(
fill_constant_op_lite SRCS fill_constant_op.cc DEPS
${
op_DEPS
}
)
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
#cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS})
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
op_params_lite SRCS op_params.cc DEPS
${
tensor_lite
}
any_lite framework_proto_lite
)
cc_library
(
dropout_op_lite SRCS dropout_op.cc DEPS
${
op_DEPS
}
)
set
(
ops_lite
set
(
ops_lite
fc_op_lite
fc_op_lite
relu_op_lite
relu_op_lite
...
@@ -27,7 +28,9 @@ set(ops_lite
...
@@ -27,7 +28,9 @@ set(ops_lite
elementwise_ops_lite
elementwise_ops_lite
mean_op_lite
mean_op_lite
fill_constant_op_lite
fill_constant_op_lite
activation_ops_lite
dropout_op_lite
PARENT_SCOPE
)
PARENT_SCOPE
)
lite_cc_test
(
test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite
)
lite_cc_test
(
test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite memory_lite
fc_compute_x86
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
lite_cc_test
(
test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite memory_lite
)
paddle/fluid/lite/operators/dropout_op.cc
0 → 100644
浏览文件 @
b5c410a2
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
operators
{
class
DropoutOpLite
:
public
OpLite
{
public:
explicit
DropoutOpLite
(
const
std
::
string
&
type
)
:
OpLite
(
type
)
{}
bool
CheckShape
()
const
override
{
CHECK_OR_FALSE
(
param_
.
x
);
return
true
;
}
bool
InferShape
()
const
override
{
const
auto
x_dims
=
param_
.
x
->
dims
();
param_
.
output
->
Resize
(
x_dims
);
if
(
param_
.
is_test
==
false
)
{
param_
.
mask
->
Resize
(
x_dims
);
}
// share LoD
// param_.output->set_lod(param_.input->lod());
return
true
;
}
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
// TODO(Superjomn) replace framework::OpDesc with a lite one.
bool
AttachImpl
(
const
OpDesc
&
op_desc
,
lite
::
Scope
*
scope
)
override
{
auto
input
=
op_desc
.
Input
(
"X"
).
front
();
auto
out
=
op_desc
.
Output
(
"Out"
).
front
();
auto
Mask
=
op_desc
.
Output
(
"Mask"
).
front
();
param_
.
x
=
GetVar
<
lite
::
Tensor
>
(
scope
,
input
);
param_
.
output
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
out
);
param_
.
mask
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Mask
);
param_
.
dropout_prob
=
boost
::
get
<
float
>
(
op_desc
.
GetAttr
(
"dropout_prob"
));
if
(
op_desc
.
HasAttr
(
"axis"
))
{
param_
.
is_test
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"is_test"
));
}
param_
.
fix_seed
=
boost
::
get
<
bool
>
(
op_desc
.
GetAttr
(
"fix_seed"
));
param_
.
seed
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"seed"
));
param_
.
dropout_implementation
=
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"dropout_implementation"
));
return
true
;
}
std
::
string
DebugString
()
const
override
{
return
"dropout"
;
}
private:
mutable
DropoutParam
param_
;
};
}
// namespace operators
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_OP
(
dropout
,
paddle
::
lite
::
operators
::
DropoutOpLite
);
paddle/fluid/lite/operators/elementwise_ops.cc
浏览文件 @
b5c410a2
...
@@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite {
...
@@ -43,9 +43,8 @@ class ElementwiseOp : public OpLite {
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
param_
.
Y
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Y_name
);
param_
.
Y
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Y_name
);
param_
.
Out
=
GetMutableVar
<
Tensor
>
(
scope
,
Out_name
);
param_
.
Out
=
GetMutableVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
param_
.
axis
=
GetAttr
<
int
>
(
opdesc
.
GetAttr
(
"axis"
));
param_
.
axis
=
boost
::
get
<
int
>
(
opdesc
.
GetAttr
(
"axis"
));
return
true
;
return
true
;
}
}
...
@@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp);
...
@@ -110,3 +109,4 @@ REGISTER_LITE_OP(elementwise_sub, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP
(
elementwise_sub_grad
,
REGISTER_LITE_OP
(
elementwise_sub_grad
,
paddle
::
lite
::
operators
::
ElementwiseGradExplicitOp
);
paddle
::
lite
::
operators
::
ElementwiseGradExplicitOp
);
#endif
#endif
REGISTER_LITE_OP
(
elementwise_add
,
paddle
::
lite
::
operators
::
ElementwiseOp
);
paddle/fluid/lite/operators/fc_op_test.cc
浏览文件 @
b5c410a2
...
@@ -57,10 +57,16 @@ TEST(fc_op_lite, test) {
...
@@ -57,10 +57,16 @@ TEST(fc_op_lite, test) {
FcOpLite
fc
(
"fc"
);
FcOpLite
fc
(
"fc"
);
fc
.
SetValidPlaces
({
Place
{
TARGET
(
k
Host
),
PRECISION
(
kFloat
)}});
fc
.
SetValidPlaces
({
Place
{
TARGET
(
k
X86
),
PRECISION
(
kFloat
)}});
fc
.
Attach
(
desc
,
&
scope
);
fc
.
Attach
(
desc
,
&
scope
);
auto
kernels
=
fc
.
CreateKernels
({
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
ASSERT_FALSE
(
kernels
.
empty
());
}
}
}
// namespace operators
}
// namespace operators
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
#ifdef LITE_WITH_X86
USE_LITE_KERNEL
(
fc
,
kX86
,
kFloat
,
kNCHW
,
def
);
#endif
paddle/fluid/lite/operators/op_params.h
浏览文件 @
b5c410a2
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/core/framework.pb.h"
...
@@ -94,14 +95,67 @@ struct ScaleParam {
...
@@ -94,14 +95,67 @@ struct ScaleParam {
bool
bias_after_scale
{
true
};
bool
bias_after_scale
{
true
};
};
};
// For Softmax
O
p
// For Softmax
o
p
struct
SoftmaxParam
{
struct
SoftmaxParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
lite
::
Tensor
*
output
{};
int
axis
{
-
1
};
int
axis
{
-
1
};
};
};
// For Convolution op
struct
ConvParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
filter
{};
lite
::
Tensor
*
bias
{};
lite
::
Tensor
*
residualData
{};
lite
::
Tensor
*
output
{};
std
::
vector
<
int
>
strides
{
1
,
1
};
std
::
vector
<
int
>
paddings
{
0
,
0
};
int
groups
{
1
};
std
::
vector
<
int
>
dilations
{
1
,
1
};
bool
fuse_relu_before_depthwise_conv
{
false
};
bool
use_mkldnn
{
false
};
bool
fuse_relu
{
false
};
// only used in mkldnn kernel
bool
use_quantizer
{
false
};
// set true for op that should be quantized, only used for cpu
bool
fuse_residual_connection
{
false
};
float
scale_in
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_out
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_in_eltwise
{
1.0
f
};
// only used with mkl-dnn int8
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
std
::
string
data_format
{
"Anylayout"
};
};
// For Pooling op
struct
PoolParam
{
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
std
::
string
pooling_type
{
""
};
std
::
vector
<
int
>
ksize
{};
bool
global_pooling
{
false
};
// if true, knernel size and paddings will be ignored
std
::
vector
<
int
>
strides
{
1
,
1
};
std
::
vector
<
int
>
paddings
{
0
,
0
};
bool
exclusive
{
true
};
bool
adaptive
{
false
};
bool
ceil_mode
{
false
};
bool
use_quantizer
{
false
};
std
::
string
data_format
{
"AnyLayout"
};
};
// For Dropout op
struct
DropoutParam
{
const
lite
::
Tensor
*
x
{};
lite
::
Tensor
*
output
{};
lite
::
Tensor
*
mask
{};
float
dropout_prob
{
.5
f
};
bool
is_test
{
false
};
bool
fix_seed
{
false
};
int
seed
{
0
};
std
::
string
dropout_implementation
{
"downgrade_in_infer"
};
};
/// ----------------------- element wise operators ----------------------
/// ----------------------- element wise operators ----------------------
struct
ElementwiseParam
{
struct
ElementwiseParam
{
const
lite
::
Tensor
*
X
{};
const
lite
::
Tensor
*
X
{};
...
...
paddle/fluid/lite/operators/relu_op.cc
浏览文件 @
b5c410a2
...
@@ -25,7 +25,6 @@ bool ReluOp::InferShape() const {
...
@@ -25,7 +25,6 @@ bool ReluOp::InferShape() const {
CHECK_OR_FALSE
(
param_
.
output
);
CHECK_OR_FALSE
(
param_
.
output
);
// TODO(Superjomn) Enable data sharing.
// TODO(Superjomn) Enable data sharing.
param_
.
output
->
Resize
(
param_
.
input
->
dims
());
param_
.
output
->
Resize
(
param_
.
input
->
dims
());
// param_.output->ShareDataWith(*param_.input);
// share lod
// share lod
// param_.output->set_lod(param_.input->lod());
// param_.output->set_lod(param_.input->lod());
return
true
;
return
true
;
...
@@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) {
...
@@ -42,8 +41,8 @@ bool ReluOp::AttachImpl(const OpDesc &opdesc, lite::Scope *scope) {
return
true
;
return
true
;
}
}
REGISTER_LITE_OP
(
relu
,
ReluOp
);
}
// namespace operators
}
// namespace operators
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_LITE_OP
(
relu
,
paddle
::
lite
::
operators
::
ReluOp
);
paddle/fluid/lite/operators/relu_op.h
浏览文件 @
b5c410a2
...
@@ -35,7 +35,7 @@ class ReluOp : public OpLite {
...
@@ -35,7 +35,7 @@ class ReluOp : public OpLite {
bool
AttachImpl
(
const
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
bool
AttachImpl
(
const
OpDesc
&
opdesc
,
lite
::
Scope
*
scope
)
override
;
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
void
AttachKernel
(
KernelBase
*
kernel
)
override
{
kernel
->
SetParam
(
param_
);
}
std
::
string
DebugString
()
const
override
{
return
"
tanh
"
;
}
std
::
string
DebugString
()
const
override
{
return
"
relu
"
;
}
private:
private:
mutable
ReluParam
param_
;
mutable
ReluParam
param_
;
...
...
paddle/fluid/lite/utils/varient.h
浏览文件 @
b5c410a2
...
@@ -128,8 +128,9 @@ struct variant {
...
@@ -128,8 +128,9 @@ struct variant {
if
(
type_id
==
typeid
(
T
).
hash_code
())
if
(
type_id
==
typeid
(
T
).
hash_code
())
return
reinterpret_cast
<
T
*>
(
&
data
);
return
reinterpret_cast
<
T
*>
(
&
data
);
else
else
LOG
(
FATAL
)
<<
"unmatched type get, should be "
<<
type_id
<<
" but get "
LOG
(
ERROR
)
<<
"unmatched type get, should be "
<<
type_id
<<
" but get "
<<
typeid
(
T
).
name
();
<<
typeid
(
T
).
name
();
throw
std
::
invalid_argument
(
"unmatched type"
);
}
}
~
variant
()
{
helper_t
::
destroy
(
type_id
,
&
data
);
}
~
variant
()
{
helper_t
::
destroy
(
type_id
,
&
data
);
}
};
};
...
...
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
b5c410a2
...
@@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place)
...
@@ -6,7 +6,8 @@ cc_library(memcpy SRCS memcpy.cc DEPS place)
cc_library
(
memory
cc_library
(
memory
DEPS
DEPS
malloc
malloc
memcpy
)
memcpy
)
#if (WITH_GPU)
#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
#endif()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录