Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
7c09d198
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7c09d198
编写于
8月 16, 2018
作者:
L
luotao1
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into anakin_bug
上级
3373535b
c1083765
变更
58
隐藏空白更改
内联
并排
Showing
58 changed file
with
2251 addition
and
362 deletion
+2251
-362
CMakeLists.txt
CMakeLists.txt
+1
-2
doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst
doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst
+3
-3
paddle/fluid/API.spec
paddle/fluid/API.spec
+9
-4
paddle/fluid/framework/details/exception_holder.h
paddle/fluid/framework/details/exception_holder.h
+33
-21
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+3
-7
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+3
-0
paddle/fluid/framework/ir/graph.h
paddle/fluid/framework/ir/graph.h
+32
-0
paddle/fluid/framework/ir/graph_pattern_detecter.cc
paddle/fluid/framework/ir/graph_pattern_detecter.cc
+186
-0
paddle/fluid/framework/ir/graph_pattern_detecter.h
paddle/fluid/framework/ir/graph_pattern_detecter.h
+181
-0
paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc
paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc
+172
-0
paddle/fluid/framework/ir/graph_test.cc
paddle/fluid/framework/ir/graph_test.cc
+95
-1
paddle/fluid/framework/ir/graph_traits.cc
paddle/fluid/framework/ir/graph_traits.cc
+69
-0
paddle/fluid/framework/ir/graph_traits.h
paddle/fluid/framework/ir/graph_traits.h
+90
-0
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+3
-0
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+14
-1
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+3
-1
paddle/fluid/framework/program_desc.cc
paddle/fluid/framework/program_desc.cc
+1
-1
paddle/fluid/framework/tensor.cc
paddle/fluid/framework/tensor.cc
+1
-0
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+8
-0
paddle/fluid/framework/threadpool.cc
paddle/fluid/framework/threadpool.cc
+7
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+16
-0
paddle/fluid/operators/.flatten_op.cc.swp
paddle/fluid/operators/.flatten_op.cc.swp
+0
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+3
-6
paddle/fluid/operators/cross_entropy_op.cc
paddle/fluid/operators/cross_entropy_op.cc
+52
-37
paddle/fluid/operators/cross_entropy_op.h
paddle/fluid/operators/cross_entropy_op.h
+11
-3
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+6
-1
paddle/fluid/operators/fc_mkldnn_op.cc
paddle/fluid/operators/fc_mkldnn_op.cc
+7
-2
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+80
-16
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+4
-1
paddle/fluid/operators/prelu_op.cc
paddle/fluid/operators/prelu_op.cc
+52
-13
paddle/fluid/operators/prelu_op.cu
paddle/fluid/operators/prelu_op.cu
+0
-22
paddle/fluid/operators/prelu_op.h
paddle/fluid/operators/prelu_op.h
+73
-52
paddle/fluid/operators/shape_op.cc
paddle/fluid/operators/shape_op.cc
+2
-2
paddle/fluid/operators/shape_op.cu
paddle/fluid/operators/shape_op.cu
+1
-1
paddle/fluid/operators/shape_op.h
paddle/fluid/operators/shape_op.h
+1
-1
paddle/fluid/operators/softmax_op.h
paddle/fluid/operators/softmax_op.h
+10
-18
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+12
-5
paddle/fluid/pybind/protobuf.cc
paddle/fluid/pybind/protobuf.cc
+2
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+2
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+98
-35
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+2
-2
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+6
-2
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+31
-19
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+122
-0
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-2
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
+102
-0
python/paddle/fluid/tests/unittests/test_desc_clone.py
python/paddle/fluid/tests/unittests/test_desc_clone.py
+196
-0
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+56
-13
python/paddle/fluid/tests/unittests/test_dist_train.py
python/paddle/fluid/tests/unittests/test_dist_train.py
+17
-0
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+60
-17
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
+2
-7
python/paddle/fluid/tests/unittests/test_fc_op.py
python/paddle/fluid/tests/unittests/test_fc_op.py
+90
-0
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+26
-0
python/paddle/fluid/tests/unittests/test_prelu_op.py
python/paddle/fluid/tests/unittests/test_prelu_op.py
+42
-14
python/paddle/fluid/tests/unittests/test_program.py
python/paddle/fluid/tests/unittests/test_program.py
+34
-0
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
+1
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+115
-25
未找到文件。
CMakeLists.txt
浏览文件 @
7c09d198
...
...
@@ -204,12 +204,11 @@ include(external/snappy) # download snappy
include
(
external/snappystream
)
include
(
external/threadpool
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Disable Anakin first, will add it later."
FORCE
)
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
include
(
external/anakin
)
else
()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is valid only when GPU is set."
FORCE
)
endif
()
include
(
cudnn
)
# set cudnn libraries, must before configure
...
...
doc/fluid/new_docs/advanced_usage/deploy/native_infer.rst
浏览文件 @
7c09d198
...
...
@@ -4,7 +4,7 @@ Paddle 预测 API
为了更简单方便的预测部署,Fluid 提供了一套高层 API
用来隐藏底层不同的优化实现。
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
contrib/inference
>`__
`预测库相关代码 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
fluid/inference/api
>`__
包括
- 头文件 ``paddle_inference_api.h`` 定义了所有的接口
...
...
@@ -104,5 +104,5 @@ engine
------------
- `inference
demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
contrib/inference/demo
>`__
- `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/
contrib/inference/test_paddle_inference_api_impl
.cc>`__
demos <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/
fluid/inference/api/demo_ci
>`__
- `复杂单线程/多线程例子 <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/
fluid/inference/api/api_impl_tester
.cc>`__
paddle/fluid/API.spec
浏览文件 @
7c09d198
...
...
@@ -6,7 +6,7 @@ paddle.fluid.Program.create_block ArgSpec(args=['self', 'parent_idx'], varargs=N
paddle.fluid.Program.current_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.get_desc ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.global_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.inference_optimize ArgSpec(args=['self'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.Program.inference_optimize ArgSpec(args=['self'
, 'export_for_deployment'], varargs=None, keywords=None, defaults=(True,)
)
paddle.fluid.Program.list_vars ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.optimized_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.Program.parse_from_string ArgSpec(args=['binary_str'], varargs=None, keywords=None, defaults=None)
...
...
@@ -18,6 +18,9 @@ paddle.fluid.Operator.all_attrs ArgSpec(args=['self'], varargs=None, keywords=No
paddle.fluid.Operator.attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.attr_type ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.block_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.block_attr_id ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.blocks_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.blocks_attr_ids ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.has_attr ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.has_kernel ArgSpec(args=['self', 'op_type'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.input ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=None)
...
...
@@ -52,7 +55,7 @@ paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path
paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'
, 'startup_program'], varargs=None, keywords=None, defaults=(None,)
)
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True))
paddle.fluid.InferenceTranspiler.__init__
...
...
@@ -74,7 +77,7 @@ paddle.fluid.io.save_persistables ArgSpec(args=['executor', 'dirname', 'main_pro
paddle.fluid.io.load_vars ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename'
], varargs=None, keywords=None, defaults=(None, None, Non
e))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename'
, 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, Tru
e))
paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
...
...
@@ -156,6 +159,8 @@ paddle.fluid.layers.relu ArgSpec(args=['x'], varargs=None, keywords=None, defaul
paddle.fluid.layers.log ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
...
...
@@ -324,7 +329,7 @@ paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array
paddle.fluid.contrib.memory_usage ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'
], varargs=None, keywords=None, defaults=None
)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program'
, 'startup_program'], varargs=None, keywords=None, defaults=(None,)
)
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True))
paddle.fluid.transpiler.InferenceTranspiler.__init__
...
...
paddle/fluid/framework/details/exception_holder.h
浏览文件 @
7c09d198
...
...
@@ -14,6 +14,7 @@
#pragma once
#include "glog/logging.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
...
...
@@ -22,27 +23,24 @@ namespace details {
class
ExceptionHolder
{
public:
void
Catch
(
const
platform
::
EnforceNotMet
&
exp
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
exception_
.
reset
(
new
platform
::
EnforceNotMet
(
exp
));
type_
=
kEnforceNotMet
;
}
void
Catch
(
const
platform
::
EOFException
&
exp
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
// EOFException will not cover up existing EnforceNotMet.
if
(
exception_
.
get
()
==
nullptr
)
{
exception_
.
reset
(
new
platform
::
EOFException
(
exp
));
type_
=
kEOF
;
void
Catch
(
std
::
exception_ptr
eptr
)
{
try
{
std
::
rethrow_exception
(
eptr
);
}
catch
(
platform
::
EOFException
exp
)
{
Catch
(
exp
);
}
catch
(
platform
::
EnforceNotMet
exp
)
{
Catch
(
exp
);
}
catch
(...)
{
LOG
(
FATAL
)
<<
"Unknown exception caught"
;
}
}
bool
ExceptionCatched
()
const
{
bool
IsCaught
()
const
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
return
exception_
.
get
()
!=
nullptr
;
}
void
Throw
()
{
void
Re
Throw
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
switch
(
type_
)
{
case
kNone
:
...
...
@@ -50,27 +48,41 @@ class ExceptionHolder {
case
kEnforceNotMet
:
{
auto
e
=
*
static_cast
<
platform
::
EnforceNotMet
*>
(
exception_
.
get
());
throw
e
;
break
;
}
case
kEOF
:
{
auto
e
=
*
static_cast
<
platform
::
EOFException
*>
(
exception_
.
get
());
throw
e
;
break
;
}
default:
LOG
(
FATAL
)
<<
"Unknown exception."
;
}
exception_
.
reset
();
type_
=
kNone
;
ClearImpl
();
}
void
Clear
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
ClearImpl
();
}
private:
void
ClearImpl
()
{
exception_
.
reset
();
type_
=
kNone
;
}
private:
void
Catch
(
const
platform
::
EnforceNotMet
&
exp
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
exception_
.
reset
(
new
platform
::
EnforceNotMet
(
exp
));
type_
=
kEnforceNotMet
;
}
void
Catch
(
const
platform
::
EOFException
&
exp
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mu_
);
// EOFException will not cover up existing EnforceNotMet.
if
(
exception_
.
get
()
==
nullptr
)
{
exception_
.
reset
(
new
platform
::
EOFException
(
exp
));
type_
=
kEOF
;
}
}
enum
ExceptionType
{
kNone
,
kEnforceNotMet
,
kEOF
};
ExceptionType
type_
{
kNone
};
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
7c09d198
...
...
@@ -107,11 +107,11 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto
cur_ready_vars
=
ready_vars
.
PopAll
(
1
,
&
timeout
);
if
(
timeout
)
{
if
(
exception_holder_
.
ExceptionCatched
())
{
if
(
exception_holder_
.
IsCaught
())
{
for
(
auto
&
run_op_future
:
run_op_futures_
)
{
run_op_future
.
wait
();
}
exception_holder_
.
Throw
();
exception_holder_
.
Re
Throw
();
}
else
{
continue
;
}
...
...
@@ -220,12 +220,8 @@ void ThreadedSSAGraphExecutor::RunOp(
running_ops_
--
;
ready_var_q
->
Extend
(
op
->
Outputs
());
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
"Signal posted"
;
}
catch
(
platform
::
EOFException
ex
)
{
exception_holder_
.
Catch
(
ex
);
}
catch
(
platform
::
EnforceNotMet
ex
)
{
exception_holder_
.
Catch
(
ex
);
}
catch
(...)
{
LOG
(
FATAL
)
<<
"Unknown exception catched"
;
exception_holder_
.
Catch
(
std
::
current_exception
())
;
}
};
if
(
pool_
)
{
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
7c09d198
...
...
@@ -3,7 +3,10 @@ cc_library(graph SRCS graph.cc DEPS node)
cc_library
(
graph_helper SRCS graph_helper.cc DEPS graph
)
cc_library
(
pass SRCS pass.cc DEPS graph node graph_helper
)
cc_library
(
graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper
)
cc_library
(
graph_traits SRCS graph_traits.cc DEPS graph
)
cc_library
(
graph_pattern_detecter SRCS graph_pattern_detecter.cc DEPS graph graph_helper graph_traits
)
cc_test
(
pass_test SRCS pass_test.cc DEPS graph pass graph_helper
)
cc_test
(
graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry
)
cc_test
(
test_graph_pattern_detecter SRCS graph_pattern_detecter_tester.cc DEPS graph_pattern_detecter
)
paddle/fluid/framework/ir/graph.h
浏览文件 @
7c09d198
...
...
@@ -28,6 +28,38 @@ namespace paddle {
namespace
framework
{
namespace
ir
{
/*
* The graph is a Directed Acyclic Single Static Assignment Graph.
*
* In more detail, the following properties must hold:
*
* The graph shouldn't contain cycle. Each node is a black-box to the graph
* so the node itself could be a loop operator.
*
* Each Variable-type node has only one input (thus single static assignment).
*
* The output/input of operator is variable and the output/input of variable
* is operator.
*
* The following data harzards in Program are addressed in the Graph:
*
* Write-After-Read
* a = op1(x)
* x = op2(b)
* A control-dependency connection is created bettwen op1 and op2 such that
* op1->op2, so as to ensure correct order.
*
* Write-After-Write
* x = op1(a)
* x = op2(b)
* A control-dependency connection is created between op1 and op2 such that
* op1->op2, so as to ensure correct order.
*
* Other properties currently hold, but is not enforced yet:
*
* Variable-type node (not control dep) with the same variable name share
* the same underlying VarDesc.
*/
class
Graph
{
public:
explicit
Graph
(
const
ProgramDesc
&
program
);
...
...
paddle/fluid/framework/ir/graph_pattern_detecter.cc
0 → 100644
浏览文件 @
7c09d198
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph_pattern_detecter.h"
#include "paddle/fluid/framework/ir/graph_traits.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
PDNode
*
PDPattern
::
NewNode
(
PDNode
::
teller_t
&&
teller
,
const
std
::
string
&
name
)
{
nodes_
.
emplace_back
(
new
PDNode
(
std
::
move
(
teller
),
name
));
auto
*
cur
=
nodes_
.
back
().
get
();
return
cur
;
}
void
PDPattern
::
AddEdge
(
PDNode
*
a
,
PDNode
*
b
)
{
PADDLE_ENFORCE
(
a
);
PADDLE_ENFORCE
(
b
);
PADDLE_ENFORCE
(
a
!=
b
,
"can't connect to the same nodes."
);
edges_
.
emplace_back
(
a
,
b
);
}
void
GraphPatternDetecter
::
operator
()(
Graph
*
graph
,
GraphPatternDetecter
::
handle_t
handler
)
{
if
(
!
MarkPDNodesInGraph
(
*
graph
))
return
;
auto
subgraphs
=
DetectPatterns
();
UniquePatterns
(
&
subgraphs
);
RemoveOverlappedMatch
(
&
subgraphs
);
for
(
auto
&
g
:
subgraphs
)
{
handler
(
g
,
graph
);
}
}
bool
GraphPatternDetecter
::
MarkPDNodesInGraph
(
const
ir
::
Graph
&
graph
)
{
if
(
graph
.
Nodes
().
empty
())
return
false
;
for
(
auto
&
node
:
GraphTraits
::
DFS
(
graph
))
{
for
(
const
auto
&
pdnode
:
pattern_
.
nodes
())
{
if
(
pdnode
->
Tell
(
&
node
))
{
pdnodes2nodes_
[
pdnode
.
get
()].
insert
(
&
node
);
}
}
}
return
!
pdnodes2nodes_
.
empty
();
}
struct
HitGroup
{
std
::
unordered_map
<
PDNode
*
,
Node
*>
roles
;
bool
Match
(
Node
*
node
,
PDNode
*
pat
)
{
return
!
roles
.
count
(
pat
)
||
roles
.
at
(
pat
)
==
node
;
}
void
Register
(
Node
*
node
,
PDNode
*
pat
)
{
roles
[
pat
]
=
node
;
}
};
// Tell whether Node a links to b.
bool
IsNodesLink
(
Node
*
a
,
Node
*
b
)
{
for
(
auto
*
node
:
a
->
outputs
)
{
if
(
b
==
node
)
{
return
true
;
}
}
return
false
;
}
std
::
vector
<
GraphPatternDetecter
::
subgraph_t
>
GraphPatternDetecter
::
DetectPatterns
()
{
// Init empty subgraphs.
std
::
vector
<
GraphPatternDetecter
::
subgraph_t
>
result
;
std
::
vector
<
HitGroup
>
init_groups
;
PADDLE_ENFORCE
(
!
pattern_
.
edges
().
empty
(),
"At least one edge is needed"
);
auto
*
first_pnode
=
pattern_
.
edges
().
front
().
first
;
if
(
!
pdnodes2nodes_
.
count
(
first_pnode
))
return
result
;
for
(
auto
*
node
:
pdnodes2nodes_
[
first_pnode
])
{
HitGroup
group
;
group
.
roles
[
first_pnode
]
=
node
;
init_groups
.
emplace_back
(
group
);
}
int
step
=
0
;
std
::
array
<
std
::
vector
<
HitGroup
>
,
2
>
bi_records
;
bi_records
[
0
]
=
std
::
move
(
init_groups
);
// Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes.
for
(
const
auto
&
edge
:
pattern_
.
edges
())
{
// Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
auto
&
pre_groups
=
bi_records
[
step
%
2
];
auto
&
cur_groups
=
bi_records
[
1
-
(
step
++
%
2
)];
cur_groups
.
clear
();
// source -> target
for
(
Node
*
source
:
pdnodes2nodes_
[
edge
.
first
])
{
for
(
Node
*
target
:
pdnodes2nodes_
[
edge
.
second
])
{
// TODO(Superjomn) add some prune strategies.
for
(
const
auto
&
group
:
pre_groups
)
{
HitGroup
new_group
=
group
;
if
(
IsNodesLink
(
source
,
target
)
&&
new_group
.
Match
(
source
,
edge
.
first
))
{
new_group
.
Register
(
source
,
edge
.
first
);
if
(
new_group
.
Match
(
target
,
edge
.
second
))
{
new_group
.
Register
(
target
,
edge
.
second
);
cur_groups
.
push_back
(
new_group
);
// TODO(Superjomn) need to unique
}
}
}
}
}
}
for
(
auto
&
group
:
bi_records
[
step
%
2
])
{
GraphPatternDetecter
::
subgraph_t
subgraph
;
for
(
auto
&
role
:
group
.
roles
)
{
subgraph
.
emplace
(
role
.
first
,
role
.
second
);
}
result
.
emplace_back
(
subgraph
);
}
return
result
;
}
void
GraphPatternDetecter
::
UniquePatterns
(
std
::
vector
<
GraphPatternDetecter
::
subgraph_t
>*
subgraphs
)
{
if
(
subgraphs
->
empty
())
return
;
std
::
vector
<
GraphPatternDetecter
::
subgraph_t
>
result
;
std
::
unordered_set
<
size_t
>
set
;
for
(
auto
&
g
:
*
subgraphs
)
{
size_t
key
=
0
;
for
(
auto
&
item
:
g
)
{
key
^=
std
::
hash
<
void
*>
{}(
item
.
first
);
key
^=
std
::
hash
<
void
*>
{}(
item
.
second
);
}
if
(
!
set
.
count
(
key
))
{
result
.
emplace_back
(
g
);
set
.
insert
(
key
);
}
}
*
subgraphs
=
result
;
}
void
GraphPatternDetecter
::
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
)
{
std
::
vector
<
subgraph_t
>
result
;
std
::
unordered_set
<
Node
*>
node_set
;
for
(
const
auto
&
subgraph
:
*
subgraphs
)
{
bool
valid
=
true
;
for
(
auto
&
item
:
subgraph
)
{
if
(
node_set
.
count
(
item
.
second
))
{
valid
=
false
;
break
;
}
}
if
(
valid
)
{
for
(
auto
&
item
:
subgraph
)
{
node_set
.
insert
(
item
.
second
);
}
result
.
push_back
(
subgraph
);
}
}
*
subgraphs
=
result
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detecter.h
0 → 100644
浏览文件 @
7c09d198
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#ifdef PADDLE_WITH_TESTING
#include <gtest/gtest_prod.h>
#endif
#include <numeric>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
// Some basic torminolygies:
// - PDPattern: a pattern defined as a data flow graph.
// - PDNode: the node in the pattern, each PDNode represents an `ir::Node`
// that meets some conditions defined in `PDNode.teller`.
// - A pattern is defined with PDNodes with edges.
// Pattern detector node. This node helps to build a pattern.
struct
PDNode
{
// tell whether an ir::Node* is a candidation for a PDNode.
using
teller_t
=
std
::
function
<
bool
(
Node
*
)
>
;
PDNode
(
teller_t
&&
teller
,
const
std
::
string
&
name
=
""
)
:
teller_
(
teller
),
name_
(
name
)
{
PADDLE_ENFORCE
(
teller_
!=
nullptr
,
"invalid teller functer is set."
);
}
PDNode
(
PDNode
&&
other
)
=
default
;
std
::
vector
<
PDNode
*>
inlinks
;
std
::
vector
<
PDNode
*>
outlinks
;
bool
Tell
(
Node
*
node
)
const
{
PADDLE_ENFORCE
(
teller_
!=
nullptr
,
"teller should be set for a PDNode"
);
return
teller_
(
node
);
}
const
std
::
string
&
name
()
const
{
return
name_
;
}
PDNode
(
const
PDNode
&
)
=
delete
;
PDNode
&
operator
=
(
const
PDNode
&
)
=
delete
;
private:
teller_t
teller_
;
std
::
string
name_
;
};
/*
* A pattern in a graph, which defined with PDNode and edges. Most graph
* patterns can be divided into PDNodes and link relations between them.
*
* For example, the FC fusion need to filter the MUL and ELEMENTWISE_ADD
* operators from the computation graph, the MUL's output should have only one
* consumer which is the ELEMENTWISE_ADD.
* This pattern can be defined as with the following pseudo codes
*
* // Create two operator PDNodes.
* MUL = PDPattern.NewNode()
* ELE = PDPattern.NewNode()
* // Create the variable PDNodes.
* MUL_out = PDPattern.NewNode()
* // Add teller to define some rules that help to filter the target Nodes.
* MUL.teller = lambda(node): node->IsOp() && node->Op()->Type == "mul";
* ELE.teller = lambda(node): \
* node->IsOp() && node->Op()->Type == "elementwise_add";
* MUL_out.teller = lambda(node): node->IsVar() && (MUL in node->inputs)
* && (ELE in node->outputs)
*
* One can add more specific tellers for PDNodes or edges, both the Operator
* and Variable Nodes can be ruled in PDNode.teller.
*
* PDPattern can record the general patterns, such as the pattern represents
* - Op in CPU -> Op in GPU -> Op in CPU, to findout the IO abnormal place.
* - Ops whose inputs and outputs share the same variables
*/
class
PDPattern
{
public:
using
edge_t
=
std
::
pair
<
PDNode
*
,
PDNode
*>
;
void
AddEdge
(
PDNode
*
a
,
PDNode
*
b
);
PDNode
*
NewNode
(
PDNode
::
teller_t
&&
teller
,
const
std
::
string
&
name
=
""
);
const
std
::
vector
<
std
::
unique_ptr
<
PDNode
>>&
nodes
()
const
{
return
nodes_
;
}
const
std
::
vector
<
edge_t
>&
edges
()
const
{
return
edges_
;
}
private:
#ifdef PADDLE_WITH_TESTING
FRIEND_TEST
(
PDPattern
,
AddEdge
);
FRIEND_TEST
(
PDPattern
,
NewNode
);
#endif
std
::
vector
<
std
::
unique_ptr
<
PDNode
>>
nodes_
;
std
::
vector
<
edge_t
>
edges_
;
};
/*
* GraphPatternDetecter helps to detect the specific patterns in the graph.
* Input a pattern, output a list of the matched subgraphs/nodes.
* This helper can be used to support fuse(conv+batchnorm => batchnorm e.g.).
*
* The algorithm has three phases:
* 1. Mark the nodes that match the defined PDNodes in a PDPattern,
* 2. Extend a PDNode to subgraphs by deducing the connection relation defined
* in PAPattern(the edges),
* 3. Get the filtered subgraphs and treat them with a pre-defined handler.
*
* Usage:
* // Create a detector
* GraphPatternDetecter detector;
* // Define the detector's pattern, by adding PDNode and define the edges.
* auto* node0 = detector.mutable_pattern().AddNode(...)
* auto* node1 = detector.mutable_pattern().AddNode(...)
* node0->teller = some lambda.
* node1->teller = some lambda.
* detector.mutable_pattern().AddEdge(node0, node1);
* // Create an handler, to define the behavior of treating the filtered
* // subgraphs that comply with the patterns.
* GraphPatternDetecter::handle_t handler = some labmda
* // Execute the detector.
* detector(&graph, handler);
*/
class
GraphPatternDetecter
{
public:
using
subgraph_t
=
std
::
unordered_map
<
PDNode
*
,
Node
*>
;
// Operate on the detected pattern.
using
handle_t
=
std
::
function
<
void
(
const
subgraph_t
&
/*hitted pattern*/
,
Graph
*
)
>
;
void
operator
()(
Graph
*
graph
,
handle_t
handler
);
const
PDPattern
&
pattern
()
const
{
return
pattern_
;
}
PDPattern
*
mutable_pattern
()
{
return
&
pattern_
;
}
private:
// Mark the nodes that fits the pattern.
bool
MarkPDNodesInGraph
(
const
ir
::
Graph
&
graph
);
// Detect all the pattern and output the hit records.
std
::
vector
<
subgraph_t
>
DetectPatterns
();
// Remove duplicate patterns.
void
UniquePatterns
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
// Remove overlapped match subgraphs, when overlapped, keep the previous one.
void
RemoveOverlappedMatch
(
std
::
vector
<
subgraph_t
>*
subgraphs
);
#ifdef PADDLE_WITH_TESTING
FRIEND_TEST
(
GraphPatternDetecter
,
MarkPDNodesInGraph
);
FRIEND_TEST
(
GraphPatternDetecter
,
DetectPatterns
);
#endif
private:
using
hit_rcd_t
=
std
::
pair
<
Node
*
/*node in graph*/
,
PDNode
*
/*node in pattern*/
>
;
PDPattern
pattern_
;
std
::
vector
<
hit_rcd_t
>
marked_records_
;
std
::
unordered_map
<
const
PDNode
*
,
std
::
unordered_set
<
Node
*>>
pdnodes2nodes_
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc
0 → 100644
浏览文件 @
7c09d198
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/graph_pattern_detecter.h"
#include <gtest/gtest.h>
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
BuildGraph
(
Graph
*
g
)
{
ir
::
Node
*
o1
=
g
->
CreateEmptyNode
(
"op1"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o2
=
g
->
CreateEmptyNode
(
"op2"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o3
=
g
->
CreateEmptyNode
(
"op3"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o4
=
g
->
CreateEmptyNode
(
"op4"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
o5
=
g
->
CreateEmptyNode
(
"op5"
,
Node
::
Type
::
kOperation
);
ir
::
Node
*
v1
=
g
->
CreateEmptyNode
(
"var1"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v2
=
g
->
CreateEmptyNode
(
"var2"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v3
=
g
->
CreateEmptyNode
(
"var3"
,
Node
::
Type
::
kVariable
);
ir
::
Node
*
v4
=
g
->
CreateEmptyNode
(
"var4"
,
Node
::
Type
::
kVariable
);
// o1->v1->o2
o1
->
outputs
.
push_back
(
v1
);
o2
->
inputs
.
push_back
(
v1
);
v1
->
inputs
.
push_back
(
o1
);
v1
->
outputs
.
push_back
(
o2
);
// o2->v2->o3
// o2->v2->o4
o2
->
outputs
.
push_back
(
v2
);
o3
->
inputs
.
push_back
(
v2
);
o4
->
inputs
.
push_back
(
v2
);
v2
->
inputs
.
push_back
(
o2
);
v2
->
outputs
.
push_back
(
o3
);
v2
->
outputs
.
push_back
(
o4
);
// o2->v3->o5
o2
->
outputs
.
push_back
(
v3
);
o5
->
inputs
.
push_back
(
v3
);
v3
->
inputs
.
push_back
(
o2
);
v3
->
outputs
.
push_back
(
o5
);
// o3-v4->o5
o3
->
outputs
.
push_back
(
v4
);
o5
->
inputs
.
push_back
(
v4
);
v4
->
inputs
.
push_back
(
o3
);
v4
->
outputs
.
push_back
(
o5
);
}
TEST
(
PDPattern
,
NewNode
)
{
PDPattern
x
;
auto
*
n
=
x
.
NewNode
([](
Node
*
x
)
{
return
true
;
});
ASSERT_TRUE
(
n
);
ASSERT_EQ
(
x
.
nodes_
.
size
(),
1UL
);
}
TEST
(
PDPattern
,
AddEdge
)
{
PDPattern
x
;
auto
*
a
=
x
.
NewNode
([](
Node
*
x
)
{
return
true
;
});
auto
*
b
=
x
.
NewNode
([](
Node
*
x
)
{
return
true
;
});
ASSERT_TRUE
(
a
);
ASSERT_TRUE
(
b
);
x
.
AddEdge
(
a
,
b
);
ASSERT_EQ
(
x
.
nodes_
.
size
(),
2UL
);
ASSERT_EQ
(
x
.
edges_
.
size
(),
1UL
);
ASSERT_EQ
(
x
.
edges_
.
front
().
first
,
a
);
ASSERT_EQ
(
x
.
edges_
.
front
().
second
,
b
);
ASSERT_EQ
(
x
.
nodes
().
size
(),
2UL
);
ASSERT_EQ
(
x
.
edges
().
size
(),
1UL
);
ASSERT_EQ
(
x
.
edges
().
front
().
first
,
a
);
ASSERT_EQ
(
x
.
edges
().
front
().
second
,
b
);
}
TEST
(
GraphPatternDetecter
,
MarkPDNodesInGraph
)
{
GraphPatternDetecter
x
;
// mark o2, o3, v2
// The pattern is a graph:
// o2(a node named o2) -> v2(a node named v2)
// v2 -> o3(a node named o3)
auto
*
o2
=
x
.
pattern_
.
NewNode
([](
Node
*
node
)
{
// The teller can be any condition, such as op type, or variable's shape.
return
node
&&
node
->
Name
()
==
"op2"
&&
node
->
IsOp
();
});
auto
*
o3
=
x
.
pattern_
.
NewNode
([](
Node
*
node
)
{
// The teller can be any condition, such as op type, or variable's shape.
return
node
&&
node
->
Name
()
==
"op3"
&&
node
->
IsOp
();
});
auto
*
v2
=
x
.
pattern_
.
NewNode
([](
Node
*
node
)
{
// The teller can be any condition, such as op type, or variable's shape.
return
node
&&
node
->
Name
()
==
"var2"
&&
node
->
IsVar
();
});
ASSERT_FALSE
(
o2
->
Tell
(
nullptr
));
ASSERT_FALSE
(
o3
->
Tell
(
nullptr
));
ASSERT_FALSE
(
v2
->
Tell
(
nullptr
));
x
.
pattern_
.
AddEdge
(
o2
,
v2
);
x
.
pattern_
.
AddEdge
(
v2
,
o3
);
ASSERT_EQ
(
x
.
pattern_
.
edges
().
size
(),
2UL
);
ASSERT_EQ
(
x
.
pattern_
.
edges
()[
0
].
first
,
o2
);
ASSERT_EQ
(
x
.
pattern_
.
edges
()[
0
].
second
,
v2
);
ASSERT_EQ
(
x
.
pattern_
.
edges
()[
1
].
first
,
v2
);
ASSERT_EQ
(
x
.
pattern_
.
edges
()[
1
].
second
,
o3
);
ProgramDesc
program
;
Graph
graph
(
program
);
BuildGraph
(
&
graph
);
x
.
MarkPDNodesInGraph
(
graph
);
ASSERT_EQ
(
x
.
pdnodes2nodes_
.
size
(),
3UL
);
auto
subgraphs
=
x
.
DetectPatterns
();
ASSERT_EQ
(
subgraphs
.
size
(),
1UL
);
}
TEST
(
GraphPatternDetecter
,
MultiSubgraph
)
{
ProgramDesc
program
;
Graph
graph
(
program
);
BuildGraph
(
&
graph
);
GraphPatternDetecter
x
;
// The pattern is a graph:
// op -> var
auto
*
any_op
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsOp
()
&&
(
node
->
Name
()
==
"op2"
||
node
->
Name
()
==
"op3"
);
},
"OP0"
);
auto
*
any_var
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsVar
();
},
"VAR"
);
auto
*
any_op1
=
x
.
mutable_pattern
()
->
NewNode
(
[](
Node
*
node
)
{
return
node
->
IsOp
();
},
"OP1"
);
x
.
mutable_pattern
()
->
AddEdge
(
any_op
,
any_var
);
x
.
mutable_pattern
()
->
AddEdge
(
any_var
,
any_op1
);
int
count
=
0
;
GraphPatternDetecter
::
handle_t
handle
=
[
&
](
const
GraphPatternDetecter
::
subgraph_t
&
s
,
Graph
*
g
)
{
LOG
(
INFO
)
<<
"Detect "
<<
s
.
at
(
any_op
)
->
Name
()
<<
" -> "
<<
s
.
at
(
any_var
)
->
Name
()
<<
" -> "
<<
s
.
at
(
any_op1
)
->
Name
();
count
++
;
};
x
(
&
graph
,
handle
);
// 1. Detect op3 -> var4 -> op5
// 2. Detect op2 -> var2 -> op3
// 3. Detect op2 -> var2 -> op4
// 4. Detect op2 -> var3 -> op5
// But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2
ASSERT_GE
(
count
,
1UL
);
ASSERT_LE
(
count
,
2UL
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_test.cc
浏览文件 @
7c09d198
...
...
@@ -36,7 +36,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
public:
void
Make
()
{
AddInput
(
"X"
,
""
).
AsDuplicable
();
AddOutput
(
"Out"
,
""
);
AddOutput
(
"Out"
,
""
)
.
AsDuplicable
()
;
AddComment
(
""
);
}
};
...
...
@@ -59,11 +59,27 @@ class SumOpVarTypeInference : public VarTypeInference {
block
->
Var
(
out_var_name
)
->
SetType
(
default_var_type
);
}
};
class
DummyOpMaker
:
public
OpProtoAndCheckerMaker
{
public:
void
Make
()
{
AddInput
(
"X"
,
""
).
AsDuplicable
();
AddOutput
(
"Out"
,
""
).
AsDuplicable
();
AddComment
(
""
);
}
};
class
DummyOpVarTypeInference
:
public
VarTypeInference
{
public:
void
operator
()(
const
OpDesc
&
op_desc
,
BlockDesc
*
block
)
const
override
{}
};
}
// namespace framework
}
// namespace paddle
REGISTER_OPERATOR
(
sum
,
paddle
::
framework
::
NOP
,
paddle
::
framework
::
SumOpMaker
,
paddle
::
framework
::
SumOpVarTypeInference
);
REGISTER_OPERATOR
(
dummy
,
paddle
::
framework
::
NOP
,
paddle
::
framework
::
SumOpMaker
,
paddle
::
framework
::
SumOpVarTypeInference
);
REGISTER_OPERATOR
(
sum_without_infer_var_type
,
paddle
::
framework
::
NOP
,
paddle
::
framework
::
SumOpMaker
);
...
...
@@ -110,5 +126,83 @@ TEST(GraphTest, Basic) {
}
ASSERT_EQ
(
nodes
.
size
(),
5
);
}
TEST
(
GraphTest
,
WriteAfterRead
)
{
// void Test() {
ProgramDesc
prog
;
auto
*
op
=
prog
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"sum"
);
op
->
SetInput
(
"X"
,
{
"a"
});
op
->
SetOutput
(
"Out"
,
{
"b"
});
op
->
SetAttr
(
"op_role"
,
1
);
op
=
prog
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"dummy"
);
op
->
SetInput
(
"X"
,
{
"c"
});
op
->
SetOutput
(
"Out"
,
{
"a"
});
op
->
SetAttr
(
"op_role"
,
1
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
std
::
unique_ptr
<
ir
::
Graph
>
g
(
new
ir
::
Graph
(
prog
));
ir
::
Node
*
control_dep1
=
nullptr
;
ir
::
Node
*
control_dep2
=
nullptr
;
for
(
ir
::
Node
*
n
:
g
->
Nodes
())
{
if
(
n
->
Name
()
==
"sum"
)
{
ASSERT_EQ
(
n
->
outputs
[
0
]
->
Name
(),
"b"
);
ASSERT_TRUE
(
ir
::
IsControlDepVar
(
*
n
->
outputs
[
1
]));
control_dep1
=
n
->
outputs
[
1
];
ASSERT_EQ
(
n
->
outputs
.
size
(),
2
);
}
if
(
n
->
Name
()
==
"dummy"
)
{
ASSERT_EQ
(
n
->
inputs
[
0
]
->
Name
(),
"c"
);
ASSERT_TRUE
(
ir
::
IsControlDepVar
(
*
n
->
inputs
[
1
]));
control_dep2
=
n
->
inputs
[
1
];
ASSERT_EQ
(
n
->
inputs
.
size
(),
2
);
}
}
ASSERT_EQ
(
control_dep1
,
control_dep2
);
}
TEST
(
GraphTest
,
WriteAfterWrite
)
{
// void Test() {
ProgramDesc
prog
;
auto
*
op
=
prog
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"sum"
);
op
->
SetInput
(
"X"
,
{
"a"
});
op
->
SetOutput
(
"Out"
,
{
"b"
});
op
->
SetAttr
(
"op_role"
,
1
);
op
=
prog
.
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"dummy"
);
op
->
SetInput
(
"X"
,
{
"c"
});
op
->
SetOutput
(
"Out"
,
{
"b"
});
op
->
SetAttr
(
"op_role"
,
1
);
prog
.
MutableBlock
(
0
)
->
Var
(
"a"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"b"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
prog
.
MutableBlock
(
0
)
->
Var
(
"c"
)
->
SetType
(
proto
::
VarType
::
LOD_TENSOR
);
std
::
unique_ptr
<
ir
::
Graph
>
g
(
new
ir
::
Graph
(
prog
));
ir
::
Node
*
control_dep1
=
nullptr
;
ir
::
Node
*
control_dep2
=
nullptr
;
for
(
ir
::
Node
*
n
:
g
->
Nodes
())
{
if
(
n
->
Name
()
==
"sum"
)
{
ASSERT_EQ
(
n
->
outputs
[
0
]
->
Name
(),
"b"
);
ASSERT_TRUE
(
ir
::
IsControlDepVar
(
*
n
->
outputs
[
1
]));
ASSERT_EQ
(
n
->
outputs
.
size
(),
2
);
control_dep1
=
n
->
outputs
[
1
];
}
if
(
n
->
Name
()
==
"dummy"
)
{
ASSERT_EQ
(
n
->
inputs
[
0
]
->
Name
(),
"c"
);
ASSERT_TRUE
(
ir
::
IsControlDepVar
(
*
n
->
inputs
[
1
]));
control_dep2
=
n
->
inputs
[
1
];
ASSERT_EQ
(
n
->
inputs
.
size
(),
2
);
ASSERT_EQ
(
control_dep1
,
control_dep2
);
}
}
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_traits.cc
0 → 100644
浏览文件 @
7c09d198
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/graph_traits.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
//
// NodesDFSIterator
//
NodesDFSIterator
::
NodesDFSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
for
(
auto
*
x
:
source
)
stack_
.
push
(
x
);
}
NodesDFSIterator
::
NodesDFSIterator
(
NodesDFSIterator
&&
other
)
noexcept
:
stack_
(
std
::
move
(
other
.
stack_
)),
visited_
(
std
::
move
(
other
.
visited_
))
{}
NodesDFSIterator
::
NodesDFSIterator
(
const
NodesDFSIterator
&
other
)
:
stack_
(
other
.
stack_
),
visited_
(
other
.
visited_
)
{}
Node
&
NodesDFSIterator
::
operator
*
()
{
PADDLE_ENFORCE
(
!
stack_
.
empty
());
return
*
stack_
.
top
();
}
NodesDFSIterator
&
NodesDFSIterator
::
operator
++
()
{
PADDLE_ENFORCE
(
!
stack_
.
empty
(),
"the iterator exceeds range"
);
visited_
.
insert
(
stack_
.
top
());
auto
*
cur
=
stack_
.
top
();
stack_
.
pop
();
for
(
auto
*
x
:
cur
->
outputs
)
{
if
(
!
visited_
.
count
(
x
))
{
stack_
.
push
(
x
);
}
}
return
*
this
;
}
bool
NodesDFSIterator
::
operator
==
(
const
NodesDFSIterator
&
other
)
{
if
(
stack_
.
empty
())
return
other
.
stack_
.
empty
();
if
((
!
stack_
.
empty
())
&&
(
!
other
.
stack_
.
empty
()))
{
return
stack_
.
top
()
==
other
.
stack_
.
top
();
}
return
false
;
}
NodesDFSIterator
&
NodesDFSIterator
::
operator
=
(
const
NodesDFSIterator
&
other
)
{
stack_
=
other
.
stack_
;
visited_
=
other
.
visited_
;
return
*
this
;
}
Node
*
NodesDFSIterator
::
operator
->
()
{
return
stack_
.
top
();
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_traits.h
0 → 100644
浏览文件 @
7c09d198
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stack>
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/node.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
template
<
typename
IteratorT
>
class
iterator_range
{
IteratorT
begin_
,
end_
;
public:
template
<
typename
Container
>
explicit
iterator_range
(
Container
&&
c
)
:
begin_
(
c
.
begin
()),
end_
(
c
.
end
())
{}
iterator_range
(
const
IteratorT
&
begin
,
const
IteratorT
&
end
)
:
begin_
(
begin
),
end_
(
end
)
{}
const
IteratorT
&
begin
()
const
{
return
begin_
;
}
const
IteratorT
&
end
()
const
{
return
end_
;
}
};
// DFS iterator on nodes.
struct
NodesDFSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesDFSIterator
()
=
default
;
explicit
NodesDFSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesDFSIterator
(
NodesDFSIterator
&&
other
)
noexcept
;
NodesDFSIterator
(
const
NodesDFSIterator
&
other
);
Node
&
operator
*
();
NodesDFSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesDFSIterator
&
operator
=
(
const
NodesDFSIterator
&
other
);
bool
operator
==
(
const
NodesDFSIterator
&
other
);
bool
operator
!=
(
const
NodesDFSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
stack
<
Node
*>
stack_
;
std
::
unordered_set
<
Node
*>
visited_
;
};
/*
* GraphTraits contains some graph traversal algorithms.
*
* Usage:
*
*/
struct
GraphTraits
{
static
iterator_range
<
NodesDFSIterator
>
DFS
(
const
Graph
&
g
)
{
auto
start_points
=
ExtractStartPoints
(
g
);
NodesDFSIterator
x
(
start_points
);
return
iterator_range
<
NodesDFSIterator
>
(
NodesDFSIterator
(
start_points
),
NodesDFSIterator
());
}
private:
// The nodes those have no input will be treated as start points.
static
std
::
vector
<
Node
*>
ExtractStartPoints
(
const
Graph
&
g
)
{
std
::
vector
<
Node
*>
result
;
for
(
auto
*
node
:
g
.
Nodes
())
{
if
(
node
->
inputs
.
empty
())
{
result
.
push_back
(
node
);
}
}
return
result
;
}
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/node.h
浏览文件 @
7c09d198
...
...
@@ -58,6 +58,9 @@ class Node {
return
op_desc_
;
}
bool
IsOp
()
const
{
return
type_
==
Type
::
kOperation
;
}
bool
IsVar
()
const
{
return
type_
==
Type
::
kVariable
;
}
std
::
vector
<
Node
*>
inputs
;
std
::
vector
<
Node
*>
outputs
;
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
7c09d198
...
...
@@ -238,7 +238,20 @@ Attribute OpDesc::GetNullableAttr(const std::string &name) const {
}
}
int
OpDesc
::
GetBlockAttr
(
const
std
::
string
&
name
)
const
{
std
::
vector
<
int
>
OpDesc
::
GetBlocksAttrIds
(
const
std
::
string
&
name
)
const
{
auto
it
=
attrs_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
attrs_
.
end
(),
"Attribute %s is not found"
,
name
);
auto
blocks
=
boost
::
get
<
std
::
vector
<
BlockDesc
*>>
(
it
->
second
);
std
::
vector
<
int
>
ids
;
for
(
auto
n
:
blocks
)
{
ids
.
push_back
(
n
->
ID
());
}
return
ids
;
}
int
OpDesc
::
GetBlockAttrId
(
const
std
::
string
&
name
)
const
{
auto
it
=
attrs_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
attrs_
.
end
(),
"Attribute %s is not found"
,
name
);
return
boost
::
get
<
BlockDesc
*>
(
it
->
second
)
->
ID
();
...
...
paddle/fluid/framework/op_desc.h
浏览文件 @
7c09d198
...
...
@@ -83,7 +83,9 @@ class OpDesc {
Attribute
GetNullableAttr
(
const
std
::
string
&
name
)
const
;
int
GetBlockAttr
(
const
std
::
string
&
name
)
const
;
int
GetBlockAttrId
(
const
std
::
string
&
name
)
const
;
std
::
vector
<
int
>
GetBlocksAttrIds
(
const
std
::
string
&
name
)
const
;
void
Rename
(
const
std
::
string
&
old_name
,
const
std
::
string
&
new_name
);
...
...
paddle/fluid/framework/program_desc.cc
浏览文件 @
7c09d198
...
...
@@ -58,7 +58,7 @@ ProgramDesc::ProgramDesc(const ProgramDesc &o) {
for
(
const
std
::
string
&
attr_name
:
op
->
AttrNames
())
{
if
(
op
->
GetAttrType
(
attr_name
)
==
proto
::
AttrType
::
BLOCK
)
{
int
sub_block_id
=
o
.
Block
(
block_id
).
Op
(
op_id
)
->
GetBlockAttr
(
attr_name
);
o
.
Block
(
block_id
).
Op
(
op_id
)
->
GetBlockAttr
Id
(
attr_name
);
op
->
SetBlockAttr
(
attr_name
,
MutableBlock
(
sub_block_id
));
}
}
...
...
paddle/fluid/framework/tensor.cc
浏览文件 @
7c09d198
...
...
@@ -112,5 +112,6 @@ Tensor& Tensor::Resize(const DDim& dims) {
const
DDim
&
Tensor
::
dims
()
const
{
return
dims_
;
}
int64_t
Tensor
::
numel
()
const
{
return
product
(
dims_
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/tensor_impl.h
浏览文件 @
7c09d198
...
...
@@ -59,6 +59,14 @@ inline T* Tensor::mutable_data(platform::Place place) {
}
inline
Tensor
ReshapeToMatrix
(
const
Tensor
&
src
,
int
num_col_dims
)
{
int
rank
=
src
.
dims
().
size
();
PADDLE_ENFORCE_GE
(
rank
,
2
,
"'ReshapeToMatrix()' is only used for flatten high rank "
"tensors to matrixs. Can not be used in reshaping vectors."
);
if
(
rank
==
2
)
{
return
src
;
}
Tensor
res
;
res
.
ShareDataWith
(
src
);
res
.
Resize
(
flatten_to_2d
(
src
.
dims
(),
num_col_dims
));
...
...
paddle/fluid/framework/threadpool.cc
浏览文件 @
7c09d198
...
...
@@ -20,6 +20,9 @@
DEFINE_int32
(
io_threadpool_size
,
100
,
"number of threads used for doing IO, default 100"
);
DEFINE_int32
(
dist_threadpool_size
,
0
,
"number of threads used for distributed executed."
);
namespace
paddle
{
namespace
framework
{
...
...
@@ -35,6 +38,10 @@ void ThreadPool::Init() {
if
(
threadpool_
.
get
()
==
nullptr
)
{
// TODO(Yancey1989): specify the max threads number
int
num_threads
=
std
::
thread
::
hardware_concurrency
();
if
(
FLAGS_dist_threadpool_size
>
0
)
{
num_threads
=
FLAGS_dist_threadpool_size
;
VLOG
(
1
)
<<
"set dist_threadpool_size to "
<<
num_threads
;
}
PADDLE_ENFORCE_GT
(
num_threads
,
0
);
threadpool_
.
reset
(
new
ThreadPool
(
num_threads
));
}
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
7c09d198
...
...
@@ -22,6 +22,9 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_bool
(
profile
,
false
,
"Turn on profiler for fluid"
);
namespace
paddle
{
namespace
{
...
...
@@ -58,6 +61,15 @@ bool NativePaddlePredictor::Init(
std
::
shared_ptr
<
framework
::
Scope
>
parent_scope
)
{
VLOG
(
3
)
<<
"Predictor::init()"
;
if
(
FLAGS_profile
)
{
LOG
(
WARNING
)
<<
"Profiler is actived, might affect the performance"
;
LOG
(
INFO
)
<<
"You can turn off by set gflags '-profile false'"
;
auto
tracking_device
=
config_
.
use_gpu
?
platform
::
ProfilerState
::
kAll
:
platform
::
ProfilerState
::
kCPU
;
platform
::
EnableProfiler
(
tracking_device
);
}
if
(
config_
.
use_gpu
)
{
place_
=
paddle
::
platform
::
CUDAPlace
(
config_
.
device
);
}
else
{
...
...
@@ -102,6 +114,10 @@ bool NativePaddlePredictor::Init(
}
NativePaddlePredictor
::~
NativePaddlePredictor
()
{
if
(
FLAGS_profile
)
{
platform
::
DisableProfiler
(
platform
::
EventSortingKey
::
kTotal
,
"./profile.log"
);
}
if
(
sub_scope_
)
{
scope_
->
DeleteScope
(
sub_scope_
);
}
...
...
paddle/fluid/operators/.flatten_op.cc.swp
已删除
100644 → 0
浏览文件 @
3373535b
文件已删除
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
7c09d198
...
...
@@ -170,6 +170,9 @@ function(op_library TARGET)
file
(
APPEND
${
pybind_file
}
"USE_OP(fake_dequantize_max_abs);
\n
"
)
elseif
(
${
TARGET
}
STREQUAL
"tensorrt_engine_op"
)
message
(
STATUS
"Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
)
elseif
(
${
TARGET
}
STREQUAL
"fc"
)
# HACK: fc only have mkldnn and cpu, which would mismatch the cpu only condition
file
(
APPEND
${
pybind_file
}
"USE_CPU_ONLY_OP(
${
TARGET
}
);
\n
"
)
else
()
file
(
APPEND
${
pybind_file
}
"USE_OP(
${
TARGET
}
);
\n
"
)
endif
()
...
...
@@ -300,12 +303,6 @@ op_library(channel_recv_op DEPS concurrency)
list
(
REMOVE_ITEM GENERAL_OPS
${
DEPS_OPS
}
)
# The fully connected layer is deleted when the WITH_MKLDNN flag is OFF
# Because the fully connected layer has only one MKLDNN's operator
if
(
NOT WITH_MKLDNN
)
list
(
REMOVE_ITEM GENERAL_OPS fc_op
)
endif
(
NOT WITH_MKLDNN
)
foreach
(
src
${
GENERAL_OPS
}
)
op_library
(
${
src
}
)
endforeach
()
...
...
paddle/fluid/operators/cross_entropy_op.cc
浏览文件 @
7c09d198
...
...
@@ -28,23 +28,26 @@ class CrossEntropyOp : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2UL
,
"Input(X)'s rank should be 2."
);
PADDLE_ENFORCE_EQ
(
label_dims
.
size
(),
2UL
,
"Input(Label)'s rank should be 2."
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
label_dims
[
0
],
"The 1st dimension of Input(X) and Input(Label) should "
"be equal."
);
int
rank
=
x_dims
.
size
();
PADDLE_ENFORCE_EQ
(
rank
,
label_dims
.
size
(),
"Input(X) and Input(Label) shall have the same rank."
);
PADDLE_ENFORCE_EQ
(
framework
::
slice_ddim
(
x_dims
,
0
,
rank
-
1
),
framework
::
slice_ddim
(
label_dims
,
0
,
rank
-
1
),
"Input(X) and Input(Label) shall have the same shape "
"except the last dimension."
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"soft_label"
))
{
PADDLE_ENFORCE_EQ
(
x_dims
[
1
],
label_dims
[
1
],
"If Attr(soft_label) == true, the
2nd
dimension of "
PADDLE_ENFORCE_EQ
(
x_dims
[
rank
-
1
],
label_dims
[
rank
-
1
],
"If Attr(soft_label) == true, the
last
dimension of "
"Input(X) and Input(Label) should be equal."
);
}
else
{
PADDLE_ENFORCE_EQ
(
label_dims
[
1
],
1UL
,
"If Attr(softLabel) == false, the
2nd
dimension of "
PADDLE_ENFORCE_EQ
(
label_dims
[
rank
-
1
],
1UL
,
"If Attr(softLabel) == false, the
last
dimension of "
"Input(Label) should be 1."
);
}
ctx
->
SetOutputDim
(
"Y"
,
{
x_dims
[
0
],
1
});
auto
y_dims
=
x_dims
;
y_dims
[
rank
-
1
]
=
1
;
ctx
->
SetOutputDim
(
"Y"
,
y_dims
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Y"
);
}
...
...
@@ -74,24 +77,28 @@ class CrossEntropyGradientOp : public framework::OperatorWithKernel {
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
label_dims
=
ctx
->
GetInputDim
(
"Label"
);
auto
dy_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Y"
));
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2
,
"Input(X)'s rank should be 2."
);
PADDLE_ENFORCE_EQ
(
dy_dims
.
size
(),
2
,
"Input(Y@Grad)'s rank should be 2."
);
PADDLE_ENFORCE_EQ
(
label_dims
.
size
(),
2
,
"Input(Label)'s rank should be 2."
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
label_dims
[
0
],
"The 1st dimension of Input(X) and Input(Label) should "
"be equal."
);
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
dy_dims
[
0
],
"The 1st dimension of Input(X) and Input(Y@Grad) should "
"be equal."
);
PADDLE_ENFORCE_EQ
(
dy_dims
[
1
],
1
,
"The 2nd dimension of Input(Y@Grad) should be 1."
);
int
rank
=
x_dims
.
size
();
PADDLE_ENFORCE_EQ
(
dy_dims
.
size
(),
rank
,
"Input(Y@Grad) and Input(X) should have the same rank."
);
PADDLE_ENFORCE_EQ
(
label_dims
.
size
(),
rank
,
"Input(Label) and Input(X) should have the same rank."
);
PADDLE_ENFORCE_EQ
(
framework
::
slice_ddim
(
x_dims
,
0
,
rank
-
1
),
framework
::
slice_ddim
(
label_dims
,
0
,
rank
-
1
),
"The Input(X) and Input(Label) should have the same "
"shape except the last dimension."
);
PADDLE_ENFORCE_EQ
(
framework
::
slice_ddim
(
x_dims
,
0
,
rank
-
1
),
framework
::
slice_ddim
(
dy_dims
,
0
,
rank
-
1
),
"The Input(X) and Input(Y@Grad) should have the same "
"shape except the last dimension."
);
PADDLE_ENFORCE_EQ
(
dy_dims
[
rank
-
1
],
1
,
"The last dimension of Input(Y@Grad) should be 1."
);
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"soft_label"
))
{
PADDLE_ENFORCE_EQ
(
x_dims
[
1
],
label_dims
[
1
],
"When Attr(soft_label) == true, the
2nd
dimension of "
PADDLE_ENFORCE_EQ
(
x_dims
[
rank
-
1
],
label_dims
[
rank
-
1
],
"When Attr(soft_label) == true, the
last
dimension of "
"Input(X) and Input(Label) should be equal."
);
}
else
{
PADDLE_ENFORCE_EQ
(
label_dims
[
1
],
1
,
"When Attr(soft_label) == false, the
2nd
dimension of "
PADDLE_ENFORCE_EQ
(
label_dims
[
rank
-
1
],
1
,
"When Attr(soft_label) == false, the
last
dimension of "
"Input(Label) should be 1."
);
}
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
...
...
@@ -113,18 +120,20 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor, default Tensor<float>), a 2-D tensor with shape [N x D],"
" where N is the batch size and D is the number of classes. "
"This input is a probability computed by the previous operator, "
"which is almost always the result of a softmax operator."
);
AddInput
(
"Label"
,
"(Tensor), the ground truth which is a 2-D tensor. When "
"soft_label is set to false, Label is a Tensor<int64> with shape "
"[N x 1]. When soft_label is set to true, Label is a "
"Tensor<float/double> with shape [N x D]."
);
"(Tensor, default Tensor<float>), a tensor whose last dimension "
"size is equal to the number of classes. This input is a "
"probability computed by the previous operator, which is almost "
"always the result of a softmax operator."
);
AddInput
(
"Label"
,
"(Tensor), the tensor which represents the ground truth. It has the "
"same shape with 'X' except the last dimension. When soft_label is set "
"to false, the last dimension size is 1; when soft_label is set to "
"true, the last dimension size is equal to the number of classes."
);
AddOutput
(
"Y"
,
"(Tensor, default Tensor<float>), a 2-D tensor with shape "
"[N x 1]. The cross entropy loss."
);
"(Tensor, default Tensor<float>), a tensor whose shape is same "
"with 'X' except that the last dimension size is 1. It "
"represents the cross entropy loss."
);
AddAttr
<
bool
>
(
"soft_label"
,
"(bool, default false), a flag indicating whether to "
"interpretate the given labels as soft labels."
)
...
...
@@ -132,6 +141,12 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
CrossEntropy Operator.
The input 'X' and 'Label' will first be logically flattened to 2-D matrixs.
The matrix's second dimension(row length) is as same as the original last
dimension, and the first dimension(column length) is the product of all other
original dimensions. Then the softmax computation will take palce on each raw
of flattened matrixs.
It supports both standard cross-entropy and soft-label cross-entropy loss
computation.
1) One-hot cross-entropy:
...
...
paddle/fluid/operators/cross_entropy_op.h
浏览文件 @
7c09d198
...
...
@@ -33,8 +33,13 @@ class CrossEntropyOpKernel : public framework::OpKernel<T> {
auto
*
y
=
ctx
.
Output
<
Tensor
>
(
"Y"
);
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
rank
=
x
->
dims
().
size
();
Tensor
x_2d
=
framework
::
ReshapeToMatrix
(
*
x
,
rank
-
1
);
Tensor
labels_2d
=
framework
::
ReshapeToMatrix
(
*
labels
,
rank
-
1
);
Tensor
y_2d
=
framework
::
ReshapeToMatrix
(
*
y
,
rank
-
1
);
math
::
CrossEntropyFunctor
<
DeviceContext
,
T
>
()(
ctx
.
template
device_context
<
DeviceContext
>(),
y
,
x
,
labels
,
ctx
.
template
device_context
<
DeviceContext
>(),
&
y_2d
,
&
x_2d
,
&
labels_2d
,
ctx
.
Attr
<
bool
>
(
"soft_label"
));
}
};
...
...
@@ -98,9 +103,12 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
auto
*
dy
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
label
=
ctx
.
Input
<
Tensor
>
(
"Label"
);
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
dx_data
=
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
class_num
=
x
->
dims
()[
1
];
// Following computation only depends on the last dimension size. So it's
// unnecessary to convert tensors to 2-D views.
int
rank
=
x
->
dims
().
size
();
int64_t
class_num
=
x
->
dims
()[
rank
-
1
];
if
(
ctx
.
Attr
<
bool
>
(
"soft_label"
))
{
XeSoftlabelGradFunctor
<
T
>
functor
(
dx_data
,
dy
->
data
<
T
>
(),
x
->
data
<
T
>
(),
label
->
data
<
T
>
(),
...
...
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
7c09d198
...
...
@@ -190,12 +190,15 @@ bool VariableResponse::ProcSerializedField(
#endif
}
VLOG
(
7
)
<<
"ProcSerializedField:"
<<
meta_
.
varname
()
<<
", type:"
<<
meta_
.
type
()
<<
std
::
endl
;
framework
::
DDim
dims
=
GetDims
(
meta_
.
dims
());
if
(
meta_
.
type
()
==
sendrecv
::
LOD_TENSOR
)
{
PADDLE_ENFORCE
(
meta_
.
lod_size
()
>=
0
,
"lod info should be got first!"
);
if
(
!
CopyLodTensorData
(
input
,
*
dev_ctx_
,
dims
,
num_bytes
))
{
return
false
;
}
return
true
;
}
...
...
@@ -206,7 +209,9 @@ bool VariableResponse::ProcSerializedField(
return
true
;
}
return
true
;
PADDLE_ENFORCE
(
"not supported var types:"
,
meta_
.
varname
(),
meta_
.
type
());
return
false
;
}
};
// namespace distributed
...
...
paddle/fluid/operators/fc_mkldnn_op.cc
浏览文件 @
7c09d198
...
...
@@ -125,13 +125,16 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
PADDLE_ENFORCE
(
input
->
dims
().
size
()
==
2
||
input
->
dims
().
size
()
==
4
,
"Input must be with 2 or 4 dimensions, i.e. NCHW"
);
// TODO(intel friends): the native weight format is io,
// but the mkldnn weight format is oihw, which may need be transposed.
PADDLE_ENFORCE
(
w
->
dims
().
size
()
==
2
||
w
->
dims
().
size
()
==
4
,
"Weights must be with 2 or 4 dimensions, i.e. OI or OIHW"
);
bool
with_bias
=
ctx
.
Attr
<
bool
>
(
"bias_attr"
)
;
bool
with_bias
=
bias
!=
nullptr
;
MKLDNNMD
<
Tensor
>
md
(
input
,
w
,
with_bias
);
std
::
shared_ptr
<
mkldnn
::
inner_product_forward
::
primitive_desc
>
pd
=
...
...
@@ -154,6 +157,7 @@ class FCMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
dst_memory
=
mem
.
dst
(
output_data
);
auto
src_memory
=
mem
.
src
(
input_data
);
auto
weights_memory
=
mem
.
weights
(
w_data
);
// TODO(intel friends): bias memory should also be obtain from bias->data()
auto
bias_memory
=
mem
.
bias
();
auto
forward
=
with_bias
?
mkldnn
::
inner_product_forward
(
...
...
@@ -216,7 +220,8 @@ class FCMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
const
Tensor
*
out_grad
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
bool
with_bias
=
ctx
.
Attr
<
bool
>
(
"bias_attr"
);
auto
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
bool
with_bias
=
bias
!=
nullptr
;
MKLDNNMD
<
Tensor
>
md
(
input
,
w
,
with_bias
);
MKLDNNMemory
mem
(
&
md
,
mkldnn_engine
);
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
7c09d198
...
...
@@ -14,6 +14,9 @@ limitations under the License. */
#include "paddle/fluid/operators/fc_op.h"
#include <vector>
#include "paddle/fluid/operators/math/blas.h"
DECLARE_int32
(
paddle_num_threads
);
namespace
paddle
{
namespace
operators
{
...
...
@@ -25,16 +28,24 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
"Out(Output) of Fully Connected should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"W(Input) of Fully Connected should not be null."
);
// NCHW
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
// IO, I=C*H*W
auto
w_dims
=
ctx
->
GetInputDim
(
"W"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
w_dims
[
1
]});
if
(
ctx
->
HasInput
(
"Bias"
))
{
auto
bias_dims
=
ctx
->
GetInputDim
(
"Bias"
);
PADDLE_ENFORCE_EQ
(
bias_dims
[
0
],
1
,
"The shape of Bias must be [1, dim]."
);
PADDLE_ENFORCE_EQ
(
bias_dims
[
1
],
w_dims
[
1
],
"The shape of Bias must be [1, dim]."
);
}
PADDLE_ENFORCE
(
in_dims
.
size
()
==
2
||
in_dims
.
size
()
==
4
,
"Fully Connected input should be 2-D or 4-D tensor."
);
PADDLE_ENFORCE
(
w_dims
.
size
()
==
2
||
w_dims
.
size
()
==
4
,
"Fully Connected input should be 2-D or 4-D tensor."
);
PADDLE_ENFORCE_EQ
(
w_dims
.
size
(),
2UL
,
"Fully Connected input should be 2-D tensor."
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
in_dims
)
/
in_dims
[
0
],
w_dims
[
0
],
"Fully Connected input and weigth size do not match."
);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
ShareLoD
(
"Input"
,
"Out"
);
...
...
@@ -42,9 +53,12 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
OpKernelType
FCOp
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kMKLDNN
};
framework
::
LibraryType
library
=
framework
::
LibraryType
::
kPlain
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
if
(
ctx
.
Attr
<
bool
>
(
"use_mkldnn"
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
...
...
@@ -60,27 +74,39 @@ void FCOpGrad::InferShape(framework::InferShapeContext* ctx) const {
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"W"
)))
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"W"
),
w_dims
);
}
if
(
ctx
->
HasInput
(
"Bias"
))
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"Bias"
)),
"Should have bias grad"
);
auto
bias_dims
=
ctx
->
GetInputDim
(
"Bias"
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Bias"
),
bias_dims
);
}
}
framework
::
OpKernelType
FCOpGrad
::
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kMKLDNN
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kMKLDNN
};
framework
::
LibraryType
library
=
framework
::
LibraryType
::
kPlain
;
framework
::
DataLayout
layout
=
framework
::
DataLayout
::
kAnyLayout
;
if
(
ctx
.
Attr
<
bool
>
(
"use_mkldnn"
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"Input"
)
->
type
()),
ctx
.
GetPlace
(),
layout
,
library
);
}
void
FCOpMaker
::
Make
()
{
AddInput
(
"Input"
,
"(Tensor) The input tensor of fully connected operator. "
);
AddInput
(
"W"
,
"(Tensor), The second input tensor of fc op."
);
AddInput
(
"Input"
,
"(Tensor), The input tensor of fully connected operator with format "
"(NCHW). "
);
AddInput
(
"W"
,
"(Tensor), The weight fc op with shape (I, O)."
);
AddInput
(
"Bias"
,
"(Tensor, optional) Bias vector with shape (1 x O"
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"(Tensor) The output tensor of fully connected operator. "
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
bool
>
(
"bias_attr"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Fully Connected Operator.
...
...
@@ -94,9 +120,47 @@ void FCOpMaker::Make() {
)DOC"
);
}
template
<
typename
T
>
class
FCOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
input
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
w
=
ctx
.
Input
<
Tensor
>
(
"W"
);
auto
bias
=
ctx
.
Input
<
Tensor
>
(
"Bias"
);
auto
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
in_dims
=
input
->
dims
();
auto
w_dims
=
w
->
dims
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
platform
::
CPUDeviceContext
>();
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
dev_ctx
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
in_dims
[
0
],
w_dims
[
1
],
w_dims
[
0
],
static_cast
<
T
>
(
1
),
input_data
,
w_data
,
static_cast
<
T
>
(
0
),
output_data
);
if
(
bias
)
{
const
T
*
bias_data
=
bias
->
data
<
T
>
();
#ifdef PADDLE_WITH_MKLML
#pragma omp parallel for if (FLAGS_paddle_num_threads > 1)
#endif
for
(
int
bs
=
0
;
bs
<
in_dims
[
0
];
bs
++
)
{
blas
.
AXPY
(
w_dims
[
1
],
static_cast
<
T
>
(
1
),
bias_data
,
output_data
+
bs
*
w_dims
[
1
]);
}
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
fc
,
paddle
::
operators
::
FCOp
,
paddle
::
operators
::
FCOpMaker
,
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fc
,
ops
::
FCOp
,
ops
::
FCOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
fc_grad
,
paddle
::
operators
::
FCOpGrad
);
REGISTER_OPERATOR
(
fc_grad
,
ops
::
FCOpGrad
);
REGISTER_OP_CPU_KERNEL
(
fc
,
ops
::
FCOpKernel
<
float
>
,
ops
::
FCOpKernel
<
double
>
);
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
7c09d198
...
...
@@ -123,8 +123,11 @@ void ListenAndServOp::RunSyncLoop(
optimize_prepared
.
begin
(),
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
(
nullptr
));
// Trainers will get all parameters from pserver in the
// startup program, so we will wait RequestGet first
rpc_service_
->
SetCond
(
distributed
::
kRequestGet
);
rpc_service_
->
WaitBarrier
(
distributed
::
kRequestGet
);
rpc_service_
->
ResetBarrierCounter
();
while
(
true
)
{
rpc_service_
->
Profiler
().
OneStep
();
// Get from multiple trainers, we don't care about the order in which
...
...
paddle/fluid/operators/prelu_op.cc
浏览文件 @
7c09d198
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
@@ -26,14 +23,40 @@ class PReluOp : public framework::OperatorWithKernel {
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
std
::
string
mode
=
ctx
->
Attrs
().
Get
<
std
::
string
>
(
"mode"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Alpha"
),
"Input(Alpha) should not be null"
);
PADDLE_ENFORCE
(
product
(
ctx
->
GetInputDim
(
"Alpha"
))
==
1
,
"Size of weight Alpha must be one."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null"
);
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"X"
));
if
(
mode
==
"all"
)
{
PADDLE_ENFORCE
(
product
(
ctx
->
GetInputDim
(
"Alpha"
))
==
1
,
"For mode 'all', size of weight Alpha must be one."
);
}
else
if
(
mode
==
"channel"
)
{
PADDLE_ENFORCE
(
product
(
ctx
->
GetInputDim
(
"Alpha"
))
==
x_dim
[
1
],
"For channel-wise mode, size of weight Alpha must be "
"equal to the number of channels, should be %d"
,
x_dim
[
1
]);
}
else
if
(
mode
==
"element"
)
{
PADDLE_ENFORCE
(
product
(
ctx
->
GetInputDim
(
"Alpha"
))
==
product
(
x_dim
),
"For element-wise mode, size of weight Alpha must be "
"equal to the number of input, should be %d"
,
product
(
x_dim
));
}
else
{
PADDLE_THROW
(
"Unkown mode %s"
,
mode
);
}
ctx
->
SetOutputDim
(
"Out"
,
x_dim
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
platform
::
CPUPlace
());
}
};
class
PReluOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -44,9 +67,7 @@ class PReluOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Out"
,
"The output tensor of prelu operator."
);
AddComment
(
R"DOC(
PRelu Operator.
The equation is:
$$
f(x) =
\begin{cases}
...
...
@@ -54,11 +75,15 @@ f(x) =
x, \qquad \text{if} \ x >= 0
\end{cases}
$$
The input `X` can carry the LoD (Level of Details) information,
or not. And the output shares the LoD information with input `X`.
There are modes:
all: all elements share same weight
channel: elements in a channel share same weight
element: each element has a weight
)DOC"
);
AddAttr
<
std
::
string
>
(
"mode"
,
"The mode for inputs to share weights."
)
.
SetDefault
(
"all"
);
}
};
...
...
@@ -71,9 +96,23 @@ class PReluGradOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"Alpha"
),
ctx
->
GetInputDim
(
"Alpha"
));
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
alpha_grad_name
=
framework
::
GradVarName
(
"Alpha"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
ctx
->
GetInputDim
(
"X"
));
}
if
(
ctx
->
HasOutput
(
alpha_grad_name
))
{
ctx
->
SetOutputDim
(
alpha_grad_name
,
ctx
->
GetInputDim
(
"Alpha"
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
()),
platform
::
CPUPlace
());
}
};
...
...
paddle/fluid/operators/prelu_op.cu
已删除
100644 → 0
浏览文件 @
3373535b
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/prelu_op.h"
REGISTER_OP_CUDA_KERNEL
(
prelu
,
paddle
::
operators
::
PReluKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
prelu_grad
,
paddle
::
operators
::
PReluGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/prelu_op.h
浏览文件 @
7c09d198
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
...
@@ -13,32 +10,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/transform.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
platform
::
Transform
;
template
<
typename
T
>
class
PReluFunctor
{
public:
explicit
PReluFunctor
(
const
T
*
alpha
)
:
alpha_
(
alpha
)
{}
HOSTDEVICE
T
operator
()(
const
T
&
x
)
const
{
if
(
x
>
0
)
return
x
;
else
return
x
*
(
*
alpha_
);
}
private:
const
T
*
alpha_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
PReluKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -50,53 +31,93 @@ class PReluKernel : public framework::OpKernel<T> {
const
T
*
x_ptr
=
x
->
data
<
T
>
();
T
*
o_ptr
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
alpha_ptr
=
alpha
->
data
<
T
>
();
const
T
*
alpha_ptr
=
alpha
->
data
<
T
>
();
std
::
string
mode
=
context
.
Attr
<
std
::
string
>
(
"mode"
);
int
numel
=
x
->
numel
();
Transform
<
DeviceContext
>
trans
;
trans
(
context
.
template
device_context
<
DeviceContext
>(),
x_ptr
,
x_ptr
+
numel
,
o_ptr
,
PReluFunctor
<
T
>
(
alpha_ptr
));
}
};
template
<
typename
T
>
class
PReluGradFunctor
{
public:
explicit
PReluGradFunctor
(
const
T
*
alpha
)
:
alpha_
(
alpha
)
{}
HOSTDEVICE
T
operator
()(
const
T
&
out
,
const
T
&
dout
)
const
{
if
(
out
>
0
)
return
dout
;
else
return
dout
*
(
*
alpha_
);
auto
dim
=
x
->
dims
();
int
index
=
0
;
int
i
=
0
;
int
temp
=
0
;
if
(
mode
==
"channel"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
temp
=
numel
/
(
dim
[
0
]
*
dim
[
1
]);
index
=
(
i
/
temp
)
%
dim
[
1
];
o_ptr
[
i
]
=
x_ptr
[
i
]
>
0
?
x_ptr
[
i
]
:
alpha_ptr
[
index
]
*
x_ptr
[
i
];
}
}
else
if
(
mode
==
"element"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
o_ptr
[
i
]
=
x_ptr
[
i
]
>
0
?
x_ptr
[
i
]
:
alpha_ptr
[
i
]
*
x_ptr
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
o_ptr
[
i
]
=
x_ptr
[
i
]
>
0
?
x_ptr
[
i
]
:
alpha_ptr
[
0
]
*
x_ptr
[
i
];
}
}
}
private:
const
T
*
alpha_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
PReluGradKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
x
=
context
.
Input
<
Tensor
>
(
"X"
);
auto
*
dx
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dout
=
context
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dalpha
=
context
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Alpha"
));
auto
*
out
=
context
.
Input
<
Tensor
>
(
"Out"
);
auto
*
alpha
=
context
.
Input
<
Tensor
>
(
"Alpha"
);
auto
*
alpha_ptr
=
alpha
->
data
<
T
>
();
T
*
dx_ptr
=
dx
->
mutable_data
<
T
>
(
context
.
GetPlace
());
const
T
*
alpha_ptr
=
alpha
->
data
<
T
>
();
const
T
*
x_ptr
=
x
->
data
<
T
>
();
const
T
*
dout_ptr
=
dout
->
data
<
T
>
();
const
T
*
out_ptr
=
out
->
data
<
T
>
();
int
numel
=
dx
->
numel
();
Transform
<
DeviceContext
>
trans
;
trans
(
context
.
template
device_context
<
DeviceContext
>(),
out_ptr
,
out_ptr
+
numel
,
dout_ptr
,
dx_ptr
,
PReluGradFunctor
<
T
>
(
alpha_ptr
));
// TODO(Zhuoyuan): add dalpha upgrade when GPU kernels ready
std
::
string
mode
=
context
.
Attr
<
std
::
string
>
(
"mode"
);
int
numel
=
x
->
numel
();
auto
dim
=
x
->
dims
();
int
index
=
0
;
int
i
=
0
;
int
temp
=
0
;
if
(
dx
)
{
T
*
dx_ptr
=
dx
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
mode
==
"channel"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
temp
=
numel
/
(
dim
[
0
]
*
dim
[
1
]);
index
=
(
i
/
temp
)
%
dim
[
1
];
dx_ptr
[
i
]
=
out_ptr
[
i
]
>
0
?
dout_ptr
[
i
]
:
alpha_ptr
[
index
]
*
dout_ptr
[
i
];
}
}
else
if
(
mode
==
"element"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
dx_ptr
[
i
]
=
out_ptr
[
i
]
>
0
?
dout_ptr
[
i
]
:
alpha_ptr
[
i
]
*
dout_ptr
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
dx_ptr
[
i
]
=
out_ptr
[
i
]
>
0
?
dout_ptr
[
i
]
:
alpha_ptr
[
0
]
*
dout_ptr
[
i
];
}
}
}
index
=
0
;
if
(
dalpha
)
{
T
*
dalpha_ptr
=
dalpha
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
mode
==
"channel"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
temp
=
numel
/
(
dim
[
0
]
*
dim
[
1
]);
index
=
(
i
/
temp
)
%
dim
[
1
];
dalpha_ptr
[
index
]
+=
out_ptr
[
i
]
>
0
?
0
:
x_ptr
[
i
]
*
dout_ptr
[
i
];
}
}
else
if
(
mode
==
"element"
)
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
dalpha_ptr
[
i
]
+=
out_ptr
[
i
]
>
0
?
0
:
x_ptr
[
i
]
*
dout_ptr
[
i
];
}
}
else
{
for
(
i
=
0
;
i
<
numel
;
i
++
)
{
dalpha_ptr
[
0
]
+=
out_ptr
[
i
]
>
0
?
0
:
x_ptr
[
i
]
*
dout_ptr
[
i
];
}
}
}
// TODO(Guanzhong): add GPU kernels
}
};
...
...
paddle/fluid/operators/shape_op.cc
浏览文件 @
7c09d198
...
...
@@ -38,7 +38,7 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Input"
,
"(Tensor), The input tensor."
);
AddOutput
(
"Out"
,
"(Tensor), The shape of input tensor, the data type of the shape"
" is int
64
_t, will be on the same device with the input Tensor."
);
" is int
32
_t, will be on the same device with the input Tensor."
);
AddComment
(
R"DOC(
Shape Operator
...
...
@@ -53,5 +53,5 @@ Get the shape of input tensor. Only support CPU input Tensor now.
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
shape
,
ops
::
ShapeOp
,
ops
::
ShapeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
shape
,
ops
::
ShapeKernel
<
int
>
,
ops
::
ShapeKernel
<
int
64
_t
>
,
REGISTER_OP_CPU_KERNEL
(
shape
,
ops
::
ShapeKernel
<
int
>
,
ops
::
ShapeKernel
<
int
32
_t
>
,
ops
::
ShapeKernel
<
float
>
,
ops
::
ShapeKernel
<
double
>
);
paddle/fluid/operators/shape_op.cu
浏览文件 @
7c09d198
...
...
@@ -15,6 +15,6 @@ limitations under the License. */
#include "paddle/fluid/operators/shape_op.h"
REGISTER_OP_CUDA_KERNEL
(
shape
,
paddle
::
operators
::
ShapeKernel
<
int
>
,
paddle
::
operators
::
ShapeKernel
<
int
64
_t
>
,
paddle
::
operators
::
ShapeKernel
<
int
32
_t
>
,
paddle
::
operators
::
ShapeKernel
<
float
>
,
paddle
::
operators
::
ShapeKernel
<
double
>
);
paddle/fluid/operators/shape_op.h
浏览文件 @
7c09d198
...
...
@@ -27,7 +27,7 @@ class ShapeKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in_t
=
ctx
.
Input
<
Tensor
>
(
"Input"
);
auto
*
out_t
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
out_data
=
out_t
->
mutable_data
<
int
64
_t
>
(
platform
::
CPUPlace
());
auto
out_data
=
out_t
->
mutable_data
<
int
32
_t
>
(
platform
::
CPUPlace
());
auto
in_dims
=
in_t
->
dims
();
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
out_data
[
i
]
=
in_dims
[
i
];
...
...
paddle/fluid/operators/softmax_op.h
浏览文件 @
7c09d198
...
...
@@ -31,16 +31,12 @@ class SoftmaxKernel : public framework::OpKernel<T> {
// allocate memory on device.
Out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dims
=
X
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_x
;
framework
::
LoDTensor
flattened_out
;
flattened_x
.
ShareDataWith
(
*
X
).
Resize
(
flattened_dims
);
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
int
rank
=
X
->
dims
().
size
();
Tensor
X_2d
=
framework
::
ReshapeToMatrix
(
*
X
,
rank
-
1
);
Tensor
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
math
::
SoftmaxFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
flattened_x
,
&
flattened_out
);
context
.
template
device_context
<
DeviceContext
>(),
&
X_2d
,
&
Out_2d
);
}
};
...
...
@@ -55,18 +51,14 @@ class SoftmaxGradKernel : public framework::OpKernel<T> {
// allocate memory on device.
dX
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
dims
=
Out
->
dims
();
auto
flattened_dims
=
framework
::
flatten_to_2d
(
dims
,
dims
.
size
()
-
1
);
framework
::
LoDTensor
flattened_out
;
framework
::
LoDTensor
flattened_d_out
;
framework
::
LoDTensor
flattened_d_x
;
flattened_out
.
ShareDataWith
(
*
Out
).
Resize
(
flattened_dims
);
flattened_d_out
.
ShareDataWith
(
*
dOut
).
Resize
(
flattened_dims
);
flattened_d_x
.
ShareDataWith
(
*
dX
).
Resize
(
flattened_dims
);
int
rank
=
Out
->
dims
().
size
();
Tensor
Out_2d
=
framework
::
ReshapeToMatrix
(
*
Out
,
rank
-
1
);
Tensor
dOut_2d
=
framework
::
ReshapeToMatrix
(
*
dOut
,
rank
-
1
);
Tensor
dX_2d
=
framework
::
ReshapeToMatrix
(
*
dX
,
rank
-
1
);
math
::
SoftmaxGradFunctor
<
DeviceContext
,
T
>
()(
context
.
template
device_context
<
DeviceContext
>(),
&
flattened_out
,
&
flattened_d_out
,
&
flattened_d_x
);
context
.
template
device_context
<
DeviceContext
>(),
&
Out_2d
,
&
dOut_2d
,
&
dX_2d
);
}
};
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
7c09d198
...
...
@@ -270,12 +270,13 @@ struct EventItem {
double
min_time
;
double
max_time
;
double
ave_time
;
float
ratio
;
};
// Print results
void
PrintProfiler
(
const
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
const
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
const
size_t
data_width
,
double
total
)
{
// Output header information
std
::
cout
<<
"
\n
------------------------->"
<<
" Profiling Report "
...
...
@@ -300,7 +301,8 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
std
::
cout
<<
std
::
setw
(
name_width
)
<<
"Event"
<<
std
::
setw
(
data_width
)
<<
"Calls"
<<
std
::
setw
(
data_width
)
<<
"Total"
<<
std
::
setw
(
data_width
)
<<
"Min."
<<
std
::
setw
(
data_width
)
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
endl
;
<<
"Max."
<<
std
::
setw
(
data_width
)
<<
"Ave."
<<
std
::
setw
(
data_width
)
<<
"Ratio."
<<
std
::
endl
;
for
(
size_t
i
=
0
;
i
<
events_table
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events_table
[
i
].
size
();
++
j
)
{
const
EventItem
&
event_item
=
events_table
[
i
][
j
];
...
...
@@ -309,7 +311,9 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
min_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
max_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
endl
;
<<
std
::
setw
(
data_width
)
<<
event_item
.
ave_time
<<
std
::
setw
(
data_width
)
<<
event_item
.
total_time
/
total
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
...
...
@@ -359,6 +363,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
std
::
vector
<
std
::
vector
<
EventItem
>>
events_table
;
size_t
max_name_width
=
0
;
double
total
=
0.
;
// the total time
for
(
size_t
i
=
0
;
i
<
events
.
size
();
i
++
)
{
std
::
list
<
Event
>
pushed_events
;
std
::
vector
<
EventItem
>
event_items
;
...
...
@@ -379,6 +384,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
g_state
==
ProfilerState
::
kAll
)
?
rit
->
CudaElapsedMs
(
events
[
i
][
j
])
:
rit
->
CpuElapsedMs
(
events
[
i
][
j
]);
total
+=
event_time
;
std
::
string
event_name
=
"thread"
+
std
::
to_string
(
rit
->
thread_id
())
+
"::"
+
rit
->
name
();
...
...
@@ -387,7 +393,8 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
if
(
event_idx
.
find
(
event_name
)
==
event_idx
.
end
())
{
event_idx
[
event_name
]
=
event_items
.
size
();
EventItem
event_item
=
{
event_name
,
1
,
event_time
,
event_time
,
event_time
,
event_time
};
event_time
,
event_time
,
event_time
,
0.
};
event_items
.
push_back
(
event_item
);
}
else
{
int
index
=
event_idx
[
event_name
];
...
...
@@ -431,7 +438,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
}
// Print report
PrintProfiler
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
PrintProfiler
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
,
total
);
}
void
DisableProfiler
(
EventSortingKey
sorted_key
,
...
...
paddle/fluid/pybind/protobuf.cc
浏览文件 @
7c09d198
...
...
@@ -301,7 +301,8 @@ void BindOpDesc(pybind11::module *m) {
std
::
string
ser
(
seriralized
);
self
.
SetAttr
(
name
,
ser
);
})
.
def
(
"block_attr"
,
&
pd
::
OpDesc
::
GetBlockAttr
)
.
def
(
"block_attr_id"
,
&
pd
::
OpDesc
::
GetBlockAttrId
)
.
def
(
"blocks_attr_ids"
,
&
pd
::
OpDesc
::
GetBlocksAttrIds
)
.
def
(
"check_attrs"
,
&
pd
::
OpDesc
::
CheckAttrs
)
.
def
(
"infer_shape"
,
&
pd
::
OpDesc
::
InferShape
)
.
def
(
"infer_var_type"
,
&
pd
::
OpDesc
::
InferVarType
)
...
...
python/paddle/fluid/__init__.py
浏览文件 @
7c09d198
...
...
@@ -122,7 +122,7 @@ def __bootstrap__():
'use_pinned_memory'
,
'check_nan_inf'
,
'benchmark'
,
'warpctc_dir'
,
'eager_delete_scope'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
,
'free_idle_memory'
,
'paddle_num_threads'
,
'cpu_deterministic'
"dist_threadpool_size"
,
'cpu_deterministic'
]
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
...
...
python/paddle/fluid/backward.py
浏览文件 @
7c09d198
...
...
@@ -344,7 +344,7 @@ def _append_backward_ops_(block,
grad_sub_block_list
=
[]
# If the op has its own sub-block, deal with the sub-block first
if
op
.
has_attr
(
"sub_block"
):
sub_block
=
program
.
block
(
op
.
block_attr
(
"sub_block"
))
sub_block
=
program
.
block
(
op
.
block_attr
_id
(
"sub_block"
))
grad_sub_block
=
program
.
create_block
()
grad_sub_block
.
_set_forward_block_idx
(
sub_block
.
idx
)
cb
=
_callback_lookup_
(
op
)
...
...
@@ -406,7 +406,7 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
for
op_idx
in
range
(
start_op_idx
,
block
.
desc
.
op_size
()):
op_desc
=
block
.
desc
.
op
(
op_idx
)
if
op_desc
.
has_attr
(
"sub_block"
):
sub_block
=
block
.
program
.
block
(
op_desc
.
block_attr
(
"sub_block"
))
sub_block
=
block
.
program
.
block
(
op_desc
.
block_attr
_id
(
"sub_block"
))
_append_backward_vars_
(
sub_block
,
0
,
grad_to_var
,
grad_info_map
)
new_vars
=
set
()
# create new gradient variables
...
...
python/paddle/fluid/framework.py
浏览文件 @
7c09d198
...
...
@@ -476,23 +476,25 @@ class Operator(object):
attrs
=
None
):
self
.
block
=
block
self
.
desc
=
desc
self
.
attrs
=
attrs
if
self
.
attrs
is
None
:
self
.
attrs
=
dict
()
# note: not add self.attrs here:
# https://github.com/PaddlePaddle/Paddle/pull/12583#pullrequestreview-145093173
op_attrs
=
attrs
if
op_attrs
is
None
:
op_attrs
=
dict
()
del
attrs
op_maker
=
core
.
op_proto_and_checker_maker
if
op_maker
.
kOpRoleAttrName
()
not
in
self
.
attrs
:
self
.
attrs
[
op_maker
.
kOpRoleAttrName
()]
=
self
.
block
.
program
.
op_role
if
op_maker
.
kOpRoleAttrName
()
not
in
op_
attrs
:
op_
attrs
[
op_maker
.
kOpRoleAttrName
()]
=
self
.
block
.
program
.
op_role
role_var_name
=
op_maker
.
kOpRoleVarAttrName
()
if
len
(
self
.
block
.
program
.
op_role_var
)
!=
0
and
role_var_name
not
in
self
.
attrs
:
self
.
attrs
[
role_var_name
]
=
self
.
block
.
program
.
op_role_var
op_role_var
)
!=
0
and
role_var_name
not
in
op_
attrs
:
op_
attrs
[
role_var_name
]
=
self
.
block
.
program
.
op_role_var
if
role_var_name
in
self
.
attrs
and
len
(
self
.
attrs
[
role_var_name
])
==
0
:
del
self
.
attrs
[
role_var_name
]
if
role_var_name
in
op_attrs
and
len
(
op_
attrs
[
role_var_name
])
==
0
:
del
op_
attrs
[
role_var_name
]
if
len
(
self
.
desc
.
type
())
!=
0
:
return
...
...
@@ -576,15 +578,14 @@ class Operator(object):
arg
.
op
=
self
self
.
desc
.
set_output
(
out_proto
.
name
,
out_arg_names
)
if
self
.
attrs
is
not
None
:
if
not
isinstance
(
self
.
attrs
,
dict
):
if
op_
attrs
is
not
None
:
if
not
isinstance
(
op_
attrs
,
dict
):
raise
TypeError
(
"'attrs' should be a dict."
)
for
attr
in
proto
.
attrs
:
attr_name
=
attr
.
name
if
(
attr_name
not
in
self
.
attrs
)
or
(
self
.
attrs
[
attr_name
]
is
None
):
if
(
attr_name
not
in
op_attrs
)
or
(
op_attrs
[
attr_name
]
is
None
):
continue
attr_val
=
self
.
attrs
[
attr_name
]
attr_val
=
op_
attrs
[
attr_name
]
self
.
_update_desc_attr
(
attr_name
,
attr_val
)
self
.
desc
.
check_attrs
()
...
...
@@ -732,7 +733,6 @@ class Operator(object):
Raises:
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
self
.
attrs
[
name
]
=
val
self
.
_update_desc_attr
(
name
,
val
)
def
_update_desc_attr
(
self
,
name
,
val
):
...
...
@@ -774,9 +774,9 @@ class Operator(object):
"""
return
self
.
desc
.
attr
(
name
)
def
block_attr
(
self
,
name
):
def
block_attr
_id
(
self
,
name
):
"""
Get the block attribute by name.
Get the block attribute
's id
by name.
Args:
name(str): the attribute name.
...
...
@@ -784,22 +784,74 @@ class Operator(object):
Returns:
int: the block index.
"""
return
self
.
desc
.
block_attr
(
name
)
return
self
.
desc
.
block_attr_id
(
name
)
def
block_attr
(
self
,
name
):
"""
Get the block attribute by name.
Args:
name(str): the attribute name.
Returns:
block: the block attribute.
"""
id
=
self
.
block_attr_id
(
name
)
assert
(
id
>=
0
and
id
<
len
(
self
.
block
.
program
.
blocks
))
return
self
.
block
.
program
.
blocks
[
id
]
def
blocks_attr
(
self
,
name
):
"""
Get the blocks attribute by name.
Args:
name(str): the attribute name.
Returns:
list: list of the blocks attribute.
"""
attrs
=
[]
for
i
in
self
.
blocks_attr_ids
(
name
):
assert
(
i
>=
0
and
i
<
len
(
self
.
block
.
program
.
blocks
))
attrs
.
append
(
self
.
block
.
program
.
blocks
[
i
])
return
attrs
def
blocks_attr_ids
(
self
,
name
):
"""
Get the blocks attribute's ids by name.
Args:
name(str): the attribute name.
Returns:
list: list of the blocks ids.
"""
return
self
.
desc
.
blocks_attr_ids
(
name
)
def
all_attrs
(
self
):
"""
Get the attribute dict.
Returns:
dict: The Operator's attribute dict.
dict: The Operator's attribute dict
, name->attr
.
"""
attr_names
=
self
.
attr_names
attr_map
=
{}
for
n
in
attr_names
:
if
n
==
'sub_block'
:
attr_type
=
self
.
desc
.
attr_type
(
n
)
if
attr_type
==
core
.
AttrType
.
BLOCK
:
attr_map
[
n
]
=
self
.
block_attr
(
n
)
else
:
attr_map
[
n
]
=
self
.
attr
(
n
)
continue
if
attr_type
==
core
.
AttrType
.
BLOCKS
:
attr_map
[
n
]
=
self
.
blocks_attr
(
n
)
continue
attr_map
[
n
]
=
self
.
attr
(
n
)
return
attr_map
...
...
@@ -1518,11 +1570,17 @@ class Program(object):
The two code snippets above will generate same programs.
"""
if
for_test
:
p
=
self
.
inference_optimize
()
p
=
self
.
inference_optimize
(
export_for_deployment
=
False
)
else
:
p
=
Program
()
p
.
current_block_idx
=
self
.
current_block_idx
p
.
_seed
=
self
.
_seed
p
.
desc
=
core
.
ProgramDesc
(
self
.
desc
)
p
.
blocks
=
[
Block
(
p
,
i
)
for
i
in
range
(
self
.
desc
.
num_blocks
())]
p
.
blocks
=
[
Block
(
p
,
i
)
for
i
in
xrange
(
self
.
desc
.
num_blocks
())]
p
.
_current_role
=
self
.
_current_role
p
.
_op_role_var
=
self
.
_op_role_var
p
.
_sync_with_cpp
()
p
.
_copy_param_info_from
(
self
)
...
...
@@ -1578,7 +1636,7 @@ class Program(object):
res
.
_sync_with_cpp
()
return
res
def
inference_optimize
(
self
):
def
inference_optimize
(
self
,
export_for_deployment
=
True
):
"""
This method will create a new program and do following adjustments on it:
1. Remove all reader variables and their creator ops if exist.
...
...
@@ -1589,6 +1647,10 @@ class Program(object):
attribute of operators to :code:`True`. All the :code:`Parameter`
information will be lost.
Args:
export_for_deployment(bool): remove the read ops that are added by py_reader
for cpp inference library
Notes: This API is a very low level API. Use
:code:`Program.clone(for_test=True)` instead.
...
...
@@ -1603,16 +1665,17 @@ class Program(object):
# remove all readers and the read_op if exist
read_op_idx
=
0
root_block
=
res
.
desc
.
block
(
0
)
while
True
:
if
read_op_idx
>=
root_block
.
op_size
()
or
root_block
.
op
(
read_op_idx
).
type
()
==
'read'
:
break
read_op_idx
+=
1
if
read_op_idx
<
root_block
.
op_size
():
root_block
.
_remove_op
(
0
,
read_op_idx
+
1
)
for
var
in
root_block
.
all_vars
():
if
var
.
type
()
==
core
.
VarDesc
.
VarType
.
READER
:
root_block
.
_remove_var
(
var
.
name
())
if
export_for_deployment
:
while
True
:
if
read_op_idx
>=
root_block
.
op_size
()
or
root_block
.
op
(
read_op_idx
).
type
()
==
'read'
:
break
read_op_idx
+=
1
if
read_op_idx
<
root_block
.
op_size
():
root_block
.
_remove_op
(
0
,
read_op_idx
+
1
)
for
var
in
root_block
.
all_vars
():
if
var
.
type
()
==
core
.
VarDesc
.
VarType
.
READER
:
root_block
.
_remove_var
(
var
.
name
())
# change all `is_test` attributes to True
for
i
in
range
(
res
.
desc
.
num_blocks
()):
...
...
python/paddle/fluid/initializer.py
浏览文件 @
7c09d198
...
...
@@ -15,7 +15,6 @@
from
.
import
framework
import
numpy
as
np
import
contextlib
from
.framework
import
convert_np_dtype_to_dtype_
from
.core
import
VarDesc
__all__
=
[
...
...
@@ -264,7 +263,8 @@ class NormalInitializer(Initializer):
"dtype"
:
int
(
var
.
dtype
),
"mean"
:
self
.
_mean
,
"std"
:
self
.
_std_dev
,
"seed"
:
self
.
_seed
"seed"
:
self
.
_seed
,
"use_mkldnn"
:
False
})
var
.
op
=
op
return
op
...
...
python/paddle/fluid/io.py
浏览文件 @
7c09d198
...
...
@@ -555,7 +555,8 @@ def save_inference_model(dirname,
executor
,
main_program
=
None
,
model_filename
=
None
,
params_filename
=
None
):
params_filename
=
None
,
export_for_deployment
=
True
):
"""
Prune the given `main_program` to build a new program especially for inference,
and then save it and all related parameters to given `dirname` by the `executor`.
...
...
@@ -577,6 +578,8 @@ def save_inference_model(dirname,
params_filename(str|None): The name of file to save all related parameters.
If it is setted None, parameters will be saved
in separate files .
export_for_deployment(bool): remove the read ops that are added by py_reader
for cpp inference lib. Default True
Returns:
None
...
...
@@ -643,7 +646,8 @@ def save_inference_model(dirname,
copy_program
.
desc
.
flush
()
pruned_program
=
copy_program
.
prune
(
targets
=
target_vars
)
inference_program
=
pruned_program
.
inference_optimize
()
inference_program
=
pruned_program
.
inference_optimize
(
export_for_deployment
=
export_for_deployment
)
fetch_var_names
=
[
v
.
name
for
v
in
target_vars
]
prepend_feed_ops
(
inference_program
,
feeded_var_names
)
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
7c09d198
...
...
@@ -20,7 +20,9 @@ from .layer_function_generator import autodoc, templatedoc
from
..layer_helper
import
LayerHelper
from
.
import
tensor
from
.
import
nn
from
.
import
ops
import
math
import
numpy
from
functools
import
reduce
__all__
=
[
...
...
@@ -264,10 +266,11 @@ def detection_output(loc,
prior_box_var
=
prior_box_var
,
target_box
=
loc
,
code_type
=
'decode_center_size'
)
old_shape
=
scores
.
shape
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
(
-
1
,
old_shape
[
-
1
]))
compile_shape
=
scores
.
shape
run_shape
=
ops
.
shape
(
scores
)
scores
=
nn
.
flatten
(
x
=
scores
,
axis
=
2
)
scores
=
nn
.
softmax
(
input
=
scores
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
old
_shape
)
scores
=
nn
.
reshape
(
x
=
scores
,
shape
=
compile_shape
,
actual_shape
=
run
_shape
)
scores
=
nn
.
transpose
(
scores
,
perm
=
[
0
,
2
,
1
])
scores
.
stop_gradient
=
True
nmsed_outs
=
helper
.
create_tmp_variable
(
dtype
=
decoded_box
.
dtype
)
...
...
@@ -677,9 +680,10 @@ def ssd_loss(location,
raise
ValueError
(
"Only support mining_type == max_negative now."
)
num
,
num_prior
,
num_class
=
confidence
.
shape
conf_shape
=
ops
.
shape
(
confidence
)
def
__reshape_to_2d
(
var
):
return
nn
.
reshape
(
x
=
var
,
shape
=
[
-
1
,
var
.
shape
[
-
1
]]
)
return
nn
.
flatten
(
x
=
var
,
axis
=
2
)
# 1. Find matched boundding box by prior box.
# 1.1 Compute IOU similarity between ground-truth boxes and prior boxes.
...
...
@@ -690,7 +694,8 @@ def ssd_loss(location,
# 2. Compute confidence for mining hard examples
# 2.1. Get the target label based on matched indices
gt_label
=
nn
.
reshape
(
x
=
gt_label
,
shape
=
gt_label
.
shape
+
(
1
,
))
gt_label
=
nn
.
reshape
(
x
=
gt_label
,
shape
=
(
len
(
gt_label
.
shape
)
-
1
)
*
(
0
,
)
+
(
-
1
,
1
))
gt_label
.
stop_gradient
=
True
target_label
,
_
=
target_assign
(
gt_label
,
matched_indices
,
mismatch_value
=
background_label
)
...
...
@@ -701,9 +706,12 @@ def ssd_loss(location,
target_label
=
__reshape_to_2d
(
target_label
)
target_label
.
stop_gradient
=
True
conf_loss
=
nn
.
softmax_with_cross_entropy
(
confidence
,
target_label
)
# 3. Mining hard examples
conf_loss
=
nn
.
reshape
(
x
=
conf_loss
,
shape
=
(
num
,
num_prior
))
conf_loss
=
nn
.
reshape
(
x
=
conf_loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
ops
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
]))
conf_loss
.
stop_gradient
=
True
neg_indices
=
helper
.
create_tmp_variable
(
dtype
=
'int32'
)
dtype
=
matched_indices
.
dtype
...
...
@@ -772,7 +780,11 @@ def ssd_loss(location,
# 5.3 Compute overall weighted loss.
loss
=
conf_loss_weight
*
conf_loss
+
loc_loss_weight
*
loc_loss
# reshape to [N, Np], N is the batch size and Np is the prior box number.
loss
=
nn
.
reshape
(
x
=
loss
,
shape
=
[
-
1
,
num_prior
])
loss
=
nn
.
reshape
(
x
=
loss
,
shape
=
(
num
,
num_prior
),
actual_shape
=
ops
.
slice
(
conf_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
2
]))
loss
=
nn
.
reduce_sum
(
loss
,
dim
=
1
,
keep_dim
=
True
)
if
normalize
:
normalizer
=
nn
.
reduce_sum
(
target_loc_weight
)
...
...
@@ -1005,13 +1017,7 @@ def multi_box_head(inputs,
"""
def
_reshape_with_axis_
(
input
,
axis
=
1
):
if
not
(
axis
>
0
and
axis
<
len
(
input
.
shape
)):
raise
ValueError
(
"The axis should be smaller than "
"the arity of input and bigger than 0."
)
new_shape
=
[
-
1
,
reduce
(
lambda
x
,
y
:
x
*
y
,
input
.
shape
[
axis
:
len
(
input
.
shape
)])
]
out
=
nn
.
reshape
(
x
=
input
,
shape
=
new_shape
)
out
=
nn
.
flatten
(
x
=
input
,
axis
=
axis
)
return
out
def
_is_list_or_tuple_
(
data
):
...
...
@@ -1101,11 +1107,13 @@ def multi_box_head(inputs,
stride
=
stride
)
mbox_loc
=
nn
.
transpose
(
mbox_loc
,
perm
=
[
0
,
2
,
3
,
1
])
new
_shape
=
[
compile
_shape
=
[
mbox_loc
.
shape
[
0
],
mbox_loc
.
shape
[
1
]
*
mbox_loc
.
shape
[
2
]
*
mbox_loc
.
shape
[
3
]
/
4
,
4
]
mbox_loc_flatten
=
nn
.
reshape
(
mbox_loc
,
shape
=
new_shape
)
run_shape
=
tensor
.
assign
(
numpy
.
array
([
0
,
-
1
,
4
]).
astype
(
"int32"
))
mbox_loc_flatten
=
nn
.
reshape
(
mbox_loc
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
mbox_locs
.
append
(
mbox_loc_flatten
)
# get conf
...
...
@@ -1117,11 +1125,15 @@ def multi_box_head(inputs,
padding
=
pad
,
stride
=
stride
)
conf_loc
=
nn
.
transpose
(
conf_loc
,
perm
=
[
0
,
2
,
3
,
1
])
new_shape
=
[
new_shape
=
[
0
,
-
1
,
num_classes
]
compile_shape
=
[
conf_loc
.
shape
[
0
],
conf_loc
.
shape
[
1
]
*
conf_loc
.
shape
[
2
]
*
conf_loc
.
shape
[
3
]
/
num_classes
,
num_classes
]
conf_loc_flatten
=
nn
.
reshape
(
conf_loc
,
shape
=
new_shape
)
run_shape
=
tensor
.
assign
(
numpy
.
array
([
0
,
-
1
,
num_classes
]).
astype
(
"int32"
))
conf_loc_flatten
=
nn
.
reshape
(
conf_loc
,
shape
=
compile_shape
,
actual_shape
=
run_shape
)
mbox_confs
.
append
(
conf_loc_flatten
)
if
len
(
box_results
)
==
1
:
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
7c09d198
...
...
@@ -112,6 +112,8 @@ __all__ = [
'log'
,
'crop'
,
'rank_loss'
,
'prelu'
,
'flatten'
,
]
...
...
@@ -5361,3 +5363,123 @@ def rank_loss(label, left, right, name=None):
"Right"
:
right
},
outputs
=
{
'Out'
:
out
})
return
out
def
prelu
(
x
,
mode
,
param_attr
=
None
,
name
=
None
):
"""
Equation:
y = \max(0, x) + alpha \min(0, x)
Args:
x (Variable): The input tensor.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight (alpha).
mode (string): The mode for weight sharing
all: all elements share same weight
channel:elements in a channel share same weight
element:each element has a weight
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The output tensor with the same shape as input.
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[10,10], dtype="float32")
mode = 'channel'
output = fluid.layers.prelu(x,mode)
"""
helper
=
LayerHelper
(
'prelu'
,
**
locals
())
if
mode
not
in
[
'all'
,
'channel'
,
'element'
]:
raise
ValueError
(
'mode should be one of all, channel, element.'
)
alpha_shape
=
[
1
]
if
mode
==
'channel'
:
alpha_shape
=
[
1
,
x
.
shape
[
1
],
1
,
1
]
elif
mode
==
'element'
:
alpha_shape
=
x
.
shape
dtype
=
helper
.
input_dtype
(
input_param_name
=
'x'
)
alpha
=
helper
.
create_parameter
(
attr
=
param_attr
,
shape
=
alpha_shape
,
dtype
=
'float32'
,
is_bias
=
False
,
default_initializer
=
Constant
(
1.0
))
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"prelu"
,
inputs
=
{
"X"
:
x
,
'Alpha'
:
alpha
},
attrs
=
{
"mode"
:
mode
},
outputs
=
{
"Out"
:
out
})
return
out
def
flatten
(
x
,
axis
=
1
,
name
=
None
):
"""
**Flatten layer**
Flattens the input tensor into a 2D matrix.
Examples:
Case 1:
Given
X.shape = (3, 100, 100, 4)
and
axis = 2
We get:
Out.shape = (3 * 100, 4 * 100)
Case 2:
Given
X.shape = (3, 100, 100, 4)
and
axis = 0
We get:
Out.shape = (1, 3 * 100 * 100 * 4)
Args:
x (Variable): A tensor of rank >= axis.
axis (int): Indicate up to which input dimensions (exclusive) should
be flattened to the outer dimension of the output.
The value for axis must be in the range [0, R], where R
is the rank of the input tensor. When axis = 0, the shape
of the output tensor is (1, (d_0 X d_1 ... d_n), where the
shape of the input tensor is (d_0, d_1, ... d_n).
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: A 2D tensor with the contents of the input tensor, with input
dimensions up to axis flattened to the outer dimension of
the output and remaining input dimensions flattened into the
inner dimension of the output.
Raises:
ValueError: If x is not a variable.
ValueError: If axis is not in range [0, rank(x)].
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[4, 4, 3], dtype="float32")
out = fluid.layers.flatten(x=x, axis=2)
"""
helper
=
LayerHelper
(
'flatten'
,
**
locals
())
if
not
(
isinstance
(
x
,
Variable
)):
raise
ValueError
(
"The input x should be a Variable"
)
if
not
(
isinstance
(
axis
,
int
))
or
axis
>
len
(
x
.
shape
)
or
axis
<
0
:
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'flatten'
,
inputs
=
{
"X"
:
x
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
"axis"
:
axis
})
return
out
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
7c09d198
...
...
@@ -59,8 +59,8 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$
if
(
WITH_DISTRIBUTE
)
py_test_modules
(
test_dist_train MODULES test_dist_train SERIAL
)
set_tests_properties
(
test_listen_and_serv_op PROPERTIES TIMEOUT 20
)
set_tests_properties
(
test_dist_mnist PROPERTIES TIMEOUT
18
0
)
set_tests_properties
(
test_dist_word2vec PROPERTIES TIMEOUT
18
0
)
set_tests_properties
(
test_dist_mnist PROPERTIES TIMEOUT
20
0
)
set_tests_properties
(
test_dist_word2vec PROPERTIES TIMEOUT
20
0
)
endif
()
py_test_modules
(
test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL
)
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
...
...
python/paddle/fluid/tests/unittests/test_cross_entropy_op.py
浏览文件 @
7c09d198
...
...
@@ -105,5 +105,107 @@ class TestCrossEntropyOp3(OpTest):
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp4
(
OpTest
):
"""Test high rank tensor cross-entropy with discrete one-hot labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
shape
=
[
10
,
2
,
4
]
ins_num
=
np
.
prod
(
np
.
array
(
shape
))
class_num
=
10
X_2d
=
randomize_probability
(
ins_num
,
class_num
,
dtype
=
'float64'
)
label_2d
=
np
.
random
.
randint
(
0
,
class_num
,
(
ins_num
,
1
),
dtype
=
"int64"
)
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
X_2d
[
i
][
label_2d
[
i
][
0
]])]
for
i
in
range
(
X_2d
.
shape
[
0
])],
dtype
=
"float64"
)
X
=
X_2d
.
reshape
(
shape
+
[
class_num
])
label
=
label_2d
.
reshape
(
shape
+
[
1
])
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
shape
+
[
1
])
self
.
inputs
=
{
"X"
:
X
,
"Label"
:
label
}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
self
.
attrs
=
{
"soft_label"
:
False
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Y"
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp5
(
OpTest
):
"""Test high rank tensor cross-entropy with vectorized soft labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
shape
=
[
4
,
3
]
ins_num
=
np
.
prod
(
np
.
array
(
shape
))
class_num
=
37
X_2d
=
randomize_probability
(
ins_num
,
class_num
)
label_2d
=
np
.
random
.
uniform
(
0.1
,
1.0
,
[
ins_num
,
class_num
]).
astype
(
"float32"
)
label_2d
/=
label_2d
.
sum
(
axis
=
1
,
keepdims
=
True
)
cross_entropy_2d
=
(
-
label_2d
*
np
.
log
(
X_2d
)).
sum
(
axis
=
1
,
keepdims
=
True
).
astype
(
"float32"
)
X
=
X_2d
.
reshape
(
shape
+
[
class_num
])
label
=
label_2d
.
reshape
(
shape
+
[
class_num
])
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
shape
+
[
1
])
self
.
inputs
=
{
"X"
:
X
,
"Label"
:
label
}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
self
.
attrs
=
{
"soft_label"
:
True
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
class
TestCrossEntropyOp6
(
OpTest
):
"""Test high rank tensor cross-entropy with vectorized one-hot representation of labels.
"""
def
setUp
(
self
):
self
.
op_type
=
"cross_entropy"
shape
=
[
4
,
3
,
2
]
ins_num
=
np
.
prod
(
np
.
array
(
shape
))
class_num
=
17
X_2d
=
randomize_probability
(
ins_num
,
class_num
)
label_index_2d
=
np
.
random
.
randint
(
0
,
class_num
,
(
ins_num
),
dtype
=
"int32"
)
label_2d
=
np
.
zeros
(
X_2d
.
shape
)
label_2d
[
np
.
arange
(
ins_num
),
label_index_2d
]
=
1
cross_entropy_2d
=
np
.
asmatrix
(
[[
-
np
.
log
(
X_2d
[
i
][
label_index_2d
[
i
]])]
for
i
in
range
(
X_2d
.
shape
[
0
])],
dtype
=
"float32"
)
X
=
X_2d
.
reshape
(
shape
+
[
class_num
])
label
=
label_2d
.
reshape
(
shape
+
[
class_num
])
cross_entropy
=
np
.
array
(
cross_entropy_2d
).
reshape
(
shape
+
[
1
])
self
.
inputs
=
{
"X"
:
X
,
"Label"
:
label
.
astype
(
np
.
float32
)}
self
.
outputs
=
{
"Y"
:
cross_entropy
}
self
.
attrs
=
{
"soft_label"
:
True
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
[
"X"
],
"Y"
,
max_relative_error
=
0.05
,
numeric_grad_delta
=
0.001
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_desc_clone.py
0 → 100644
浏览文件 @
7c09d198
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
argparse
import
time
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
os
import
signal
import
collections
SEED
=
1
DTYPE
=
"float32"
paddle
.
dataset
.
mnist
.
fetch
()
# random seed must set before configuring the network.
# fluid.default_startup_program().random_seed = SEED
def
cnn_model
(
data
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
data
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
# TODO(dzhwinter) : refine the initializer and random seed settting
SIZE
=
10
input_shape
=
conv_pool_2
.
shape
param_shape
=
[
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
1
:],
1
)]
+
[
SIZE
]
scale
=
(
2.0
/
(
param_shape
[
0
]
**
2
*
SIZE
))
**
0.5
predict
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
SIZE
,
act
=
"softmax"
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)))
return
predict
def
get_model
(
batch_size
):
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
predict
=
cnn_model
(
images
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
# Evaluator
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
inference_program
=
fluid
.
default_main_program
().
clone
()
# Optimization
opt
=
fluid
.
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
,
beta1
=
0.9
,
beta2
=
0.999
)
# Reader
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
opt
.
minimize
(
avg_cost
)
return
inference_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
def
get_transpiler
(
trainer_id
,
main_program
,
pserver_endpoints
,
trainers
):
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
=
trainer_id
,
program
=
main_program
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
return
t
def
operator_equal
(
a
,
b
):
for
k
,
v
in
a
.
__dict__
.
iteritems
():
if
isinstance
(
v
,
fluid
.
framework
.
Program
)
or
\
isinstance
(
v
,
fluid
.
framework
.
Block
):
continue
elif
isinstance
(
v
,
core
.
OpDesc
):
if
v
.
serialize_to_string
()
!=
b
.
__dict__
[
k
].
serialize_to_string
():
raise
ValueError
(
"In operator_equal not equal:{0}
\n
"
.
format
(
k
))
elif
isinstance
(
v
,
collections
.
OrderedDict
):
v0
=
sorted
(
v
.
iteritems
(),
key
=
lambda
x
:
x
[
0
])
v1
=
sorted
(
b
.
__dict__
[
k
].
iteritems
(),
key
=
lambda
x
:
x
[
0
])
if
v0
!=
v1
:
raise
ValueError
(
"In operator_equal not equal:{0}
\n
"
.
format
(
k
))
elif
(
v
!=
b
.
__dict__
[
k
]):
raise
ValueError
(
"In operator_equal not equal:{0}
\n
"
.
format
(
k
))
return
True
def
block_equal
(
a
,
b
):
for
k
,
v
in
a
.
__dict__
.
iteritems
():
if
isinstance
(
v
,
core
.
ProgramDesc
)
or
isinstance
(
v
,
fluid
.
framework
.
Program
)
or
isinstance
(
v
,
core
.
BlockDesc
):
continue
elif
k
==
"ops"
:
for
i
in
range
(
0
,
len
(
a
.
ops
)):
if
not
operator_equal
(
a
.
ops
[
i
],
b
.
ops
[
i
]):
raise
ValueError
(
"In block_equal not equal:{0}
\n
"
.
format
(
k
))
assert
(
len
(
a
.
ops
)
==
len
(
b
.
ops
))
elif
isinstance
(
v
,
collections
.
OrderedDict
):
v0
=
sorted
(
v
.
iteritems
(),
key
=
lambda
x
:
x
[
0
])
v1
=
sorted
(
b
.
__dict__
[
k
].
iteritems
(),
key
=
lambda
x
:
x
[
0
])
if
v0
!=
v1
:
raise
ValueError
(
"In block_equal not equal:{0}
\n
"
.
format
(
k
))
elif
(
v
!=
b
.
__dict__
[
k
]):
raise
ValueError
(
"In block_equal not equal:{0}
\n
"
.
format
(
k
))
return
True
def
program_equal
(
a
,
b
):
for
k
,
v
in
a
.
__dict__
.
iteritems
():
if
isinstance
(
v
,
core
.
ProgramDesc
):
continue
elif
k
==
'blocks'
:
for
i
in
range
(
0
,
len
(
a
.
blocks
)):
if
not
block_equal
(
a
.
blocks
[
i
],
b
.
blocks
[
i
]):
raise
ValueError
(
"In operator_equal not equal:{0}
\n
"
.
format
(
k
))
return
False
assert
(
len
(
a
.
blocks
)
==
len
(
b
.
blocks
))
elif
(
v
!=
b
.
__dict__
[
k
]):
raise
ValueError
(
"In program_equal not equal:{0}
\n
"
.
format
(
k
))
return
True
class
TestDistMnist
(
unittest
.
TestCase
):
def
test_desc_clone
(
self
):
get_model
(
batch_size
=
20
)
pserver_endpoints
=
"127.0.0.1:9123"
trainers
=
1
current_endpoint
=
"127.0.0.1:9123"
t
=
get_transpiler
(
0
,
fluid
.
default_main_program
(),
pserver_endpoints
,
trainers
)
pserver_prog
=
t
.
get_pserver_program
(
current_endpoint
)
startup_prog
=
t
.
get_startup_program
(
current_endpoint
,
pserver_prog
)
main
=
pserver_prog
.
clone
()
startup
=
startup_prog
.
clone
()
self
.
assertTrue
(
program_equal
(
main
,
pserver_prog
))
self
.
assertTrue
(
program_equal
(
startup
,
startup_prog
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
7c09d198
...
...
@@ -130,7 +130,7 @@ class TestDistBase(unittest.TestCase):
self
.
_ps_endpoints
=
"127.0.0.1:9123,127.0.0.1:9124"
self
.
_python_interp
=
"python"
def
start_pserver
(
self
,
model_file
):
def
start_pserver
(
self
,
model_file
,
check_error_log
):
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps0_cmd
=
"%s %s pserver %s 0 %s %d TRUE"
%
\
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps0_ep
,
...
...
@@ -139,11 +139,23 @@ class TestDistBase(unittest.TestCase):
(
self
.
_python_interp
,
model_file
,
self
.
_ps_endpoints
,
ps1_ep
,
self
.
_trainers
)
ps0_pipe
=
subprocess
.
PIPE
ps1_pipe
=
subprocess
.
PIPE
if
check_error_log
:
print
(
"ps0_cmd:"
,
ps0_cmd
)
print
(
"ps1_cmd:"
,
ps1_cmd
)
ps0_pipe
=
open
(
"/tmp/ps0_err.log"
,
"wb"
)
ps1_pipe
=
open
(
"/tmp/ps1_err.log"
,
"wb"
)
ps0_proc
=
subprocess
.
Popen
(
ps0_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
ps0_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps0_pipe
)
ps1_proc
=
subprocess
.
Popen
(
ps1_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
)
return
ps0_proc
,
ps1_proc
ps1_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
ps1_pipe
)
if
not
check_error_log
:
return
ps0_proc
,
ps1_proc
,
None
,
None
else
:
return
ps0_proc
,
ps1_proc
,
ps0_pipe
,
ps1_pipe
def
_wait_ps_ready
(
self
,
pid
):
retry_times
=
50
...
...
@@ -160,7 +172,7 @@ class TestDistBase(unittest.TestCase):
(
e
,
retry_times
))
retry_times
-=
1
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
):
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
):
# *ATTENTION* THIS TEST NEEDS AT LEAST 2GPUS TO RUN
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
),
...
...
@@ -169,17 +181,32 @@ class TestDistBase(unittest.TestCase):
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
}
if
check_error_log
:
required_envs
[
"GLOG_v"
]
=
"7"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
# Run local to get a base line
env_local
=
{
"CUDA_VISIBLE_DEVICES"
:
"0"
}
env_local
.
update
(
required_envs
)
local_cmd
=
"%s %s trainer %s 0 %s %d FLASE"
%
\
(
self
.
_python_interp
,
model_file
,
"127.0.0.1:1234"
,
"127.0.0.1:1234"
,
1
)
local_proc
=
subprocess
.
Popen
(
local_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
env
=
env_local
)
if
not
check_error_log
:
local_proc
=
subprocess
.
Popen
(
local_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
env
=
env_local
)
else
:
print
(
"trainer cmd:"
,
local_cmd
)
err_log
=
open
(
"/tmp/trainer.err.log"
,
"wb"
)
local_proc
=
subprocess
.
Popen
(
local_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
err_log
,
env
=
env_local
)
local_proc
.
wait
()
out
,
err
=
local_proc
.
communicate
()
local_ret
=
out
...
...
@@ -187,7 +214,8 @@ class TestDistBase(unittest.TestCase):
sys
.
stderr
.
write
(
'local_stderr: %s
\n
'
%
err
)
# Run dist train to compare with local results
ps0
,
ps1
=
self
.
start_pserver
(
model_file
)
ps0
,
ps1
,
ps0_pipe
,
ps1_pipe
=
self
.
start_pserver
(
model_file
,
check_error_log
)
self
.
_wait_ps_ready
(
ps0
.
pid
)
self
.
_wait_ps_ready
(
ps1
.
pid
)
...
...
@@ -205,15 +233,23 @@ class TestDistBase(unittest.TestCase):
env1
.
update
(
required_envs
)
FNULL
=
open
(
os
.
devnull
,
'w'
)
tr0_pipe
=
subprocess
.
PIPE
tr1_pipe
=
subprocess
.
PIPE
if
check_error_log
:
print
(
"tr0_cmd:"
,
tr0_cmd
)
print
(
"tr1_cmd:"
,
tr1_cmd
)
tr0_pipe
=
open
(
"/tmp/tr0_err.log"
,
"wb"
)
tr1_pipe
=
open
(
"/tmp/tr1_err.log"
,
"wb"
)
tr0_proc
=
subprocess
.
Popen
(
tr0_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
stderr
=
tr0_pipe
,
env
=
env0
)
tr1_proc
=
subprocess
.
Popen
(
tr1_cmd
.
split
(
" "
),
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
stderr
=
tr1_pipe
,
env
=
env1
)
tr0_proc
.
wait
()
...
...
@@ -230,6 +266,13 @@ class TestDistBase(unittest.TestCase):
local_first_loss
=
eval
(
local_lines
[
0
])[
0
]
local_last_loss
=
eval
(
local_lines
[
1
])[
0
]
# close trainer file
if
check_error_log
:
tr0_pipe
.
close
()
tr1_pipe
.
close
()
ps0_pipe
.
close
()
ps1_pipe
.
close
()
# FIXME: use terminate() instead of sigkill.
os
.
kill
(
ps0
.
pid
,
signal
.
SIGKILL
)
os
.
kill
(
ps1
.
pid
,
signal
.
SIGKILL
)
...
...
python/paddle/fluid/tests/unittests/test_dist_train.py
浏览文件 @
7c09d198
...
...
@@ -26,6 +26,12 @@ from paddle.fluid.layers.io import ListenAndServ
from
paddle.fluid.layers.io
import
Recv
from
paddle.fluid.layers.io
import
Send
from
paddle.fluid
import
core
RPC_OP_ROLE_ATTR_NAME
=
op_role_attr_name
=
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(
)
RPC_OP_ROLE_ATTR_VALUE
=
core
.
op_proto_and_checker_maker
.
OpRole
.
RPC
class
TestSendOp
(
unittest
.
TestCase
):
def
test_send
(
self
):
...
...
@@ -89,18 +95,29 @@ class TestSendOp(unittest.TestCase):
def
init_client
(
self
,
place
,
port
):
main
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
):
main
.
global_block
().
append_op
(
type
=
"fetch_barrier"
,
inputs
=
{},
outputs
=
{},
attrs
=
{
"endpoints"
:
[
"127.0.0.1:{0}"
.
format
(
port
)],
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
})
x
=
layers
.
data
(
shape
=
[
32
,
32
],
dtype
=
'float32'
,
name
=
'X'
,
append_batch_size
=
False
)
fluid
.
initializer
.
Constant
(
value
=
2.3
)(
x
,
main
.
global_block
())
get_var
=
main
.
global_block
().
create_var
(
name
=
"scale_0.tmp_0"
,
# server side var
dtype
=
"float32"
,
persistable
=
False
,
shape
=
[
32
,
32
])
fluid
.
initializer
.
Constant
(
value
=
2.3
)(
get_var
,
main
.
global_block
())
Send
(
"127.0.0.1:%d"
%
port
,
[
x
])
o
=
Recv
(
"127.0.0.1:%d"
%
port
,
[
get_var
])
...
...
python/paddle/fluid/tests/unittests/test_dist_transpiler.py
浏览文件 @
7c09d198
...
...
@@ -12,10 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.transpiler.distribute_transpiler
import
delete_ops
import
traceback
import
collections
class
TranspilerTest
(
unittest
.
TestCase
):
...
...
@@ -51,9 +54,18 @@ class TranspilerTest(unittest.TestCase):
self
.
origin_prog
=
main
.
clone
()
return
main
def
get_trainer
(
self
,
config
=
None
,
sync_mode
=
True
):
t
=
self
.
_transpiler_instance
(
config
,
sync_mode
)
return
t
.
get_trainer_program
()
def
get_trainer
(
self
,
config
=
None
):
src
=
fluid
.
default_startup_program
().
clone
()
t
=
self
.
_transpiler_instance
(
config
)
trainer_main
=
t
.
get_trainer_program
()
trainer_startup
=
fluid
.
default_startup_program
()
assert
(
src
.
num_blocks
==
1
)
assert
(
trainer_startup
.
num_blocks
==
src
.
num_blocks
)
return
trainer_main
,
trainer_startup
def
get_pserver
(
self
,
ep
,
config
=
None
,
sync_mode
=
True
):
t
=
self
.
_transpiler_instance
(
config
,
sync_mode
)
...
...
@@ -89,7 +101,21 @@ class TestBasicModel(TranspilerTest):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
pserver2
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
)
trainer
=
self
.
get_trainer
()
trainer
,
trainer_startup
=
self
.
get_trainer
()
# splited var blocks should be in startup program
self
.
assertTrue
(
"fc_w.block0"
in
trainer_startup
.
global_block
().
vars
)
self
.
assertTrue
(
"fc_w.block1"
in
trainer_startup
.
global_block
().
vars
)
self
.
assertTrue
(
"fc_w"
in
trainer_startup
.
global_block
().
vars
)
self
.
assertTrue
(
"fc_b"
in
trainer_startup
.
global_block
().
vars
)
self
.
assertTrue
(
"fc_w@GRAD"
not
in
trainer_startup
.
global_block
().
vars
)
self
.
assertTrue
(
"fc_b@GRAD"
not
in
trainer_startup
.
global_block
().
vars
)
src
=
[
op
.
type
for
op
in
trainer_startup
.
global_block
().
ops
]
dst
=
[
'fill_constant'
,
'fill_constant'
,
'uniform_random'
,
'recv'
,
'recv'
,
\
'fetch_barrier'
,
'concat'
]
self
.
assertEqual
(
src
,
dst
)
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
global_block
().
ops
],
[
'mul'
,
'elementwise_add'
,
'elementwise_sub'
,
'square'
,
'mean'
,
...
...
@@ -140,7 +166,7 @@ class TestBasicModelWithLargeBlockSize(TranspilerTest):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
)
pserver2
,
startup2
=
self
.
get_pserver
(
self
.
pserver2_ep
,
config
)
trainer
=
self
.
get_trainer
(
config
)
trainer
,
_
=
self
.
get_trainer
(
config
)
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
global_block
().
ops
],
[
'mul'
,
'elementwise_add'
,
'elementwise_sub'
,
'square'
,
'mean'
,
...
...
@@ -224,7 +250,7 @@ class TestLRDecay(TranspilerTest):
def
transpiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
self
.
assertEqual
(
len
(
pserver
.
blocks
),
4
)
lr_decay_ops
=
[
op
.
type
for
op
in
pserver
.
blocks
[
1
].
ops
]
...
...
@@ -254,12 +280,12 @@ class TestLRDecayConditional(TranspilerTest):
def
transpiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
serv_op
=
pserver
.
blocks
[
0
].
ops
[
0
]
sub_blocks
=
[]
optimize_blocks
=
[]
for
b
in
serv_op
.
a
ttrs
[
"optimize_blocks"
]:
for
b
in
serv_op
.
a
ll_attrs
()
[
"optimize_blocks"
]:
optimize_blocks
.
append
(
b
.
idx
)
for
b
in
pserver
.
blocks
:
if
b
.
idx
not
in
optimize_blocks
:
...
...
@@ -303,7 +329,7 @@ class TestL2Decay(TranspilerTest):
def
transpiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
self
.
assertEqual
(
len
(
pserver
.
blocks
),
3
)
self
.
assertEqual
([
op
.
type
for
op
in
pserver
.
blocks
[
1
].
ops
],
...
...
@@ -338,7 +364,7 @@ class TestL2DecayWithPiecewise(TranspilerTest):
def
transpiler_test_impl
(
self
):
pserver
,
startup
=
self
.
get_pserver
(
self
.
pserver1_ep
)
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
self
.
assertEqual
(
len
(
pserver
.
blocks
),
9
)
self
.
assertEqual
([
op
.
type
for
op
in
pserver
.
blocks
[
1
].
ops
],
[
...
...
@@ -362,12 +388,13 @@ class TestL2DecayWithPiecewise(TranspilerTest):
class
TestDistLookupTableBase
(
TranspilerTest
):
def
network_with_table
(
self
,
is_sparse
,
is_distributed
):
self
.
table_size
=
1000
self
.
emb_size
=
64
def
emb_pool
(
ids
):
table_size
=
1000
emb_size
=
64
emb
=
fluid
.
layers
.
embedding
(
input
=
ids
,
size
=
[
table_size
,
emb_size
],
size
=
[
self
.
table_size
,
self
.
emb_size
],
dtype
=
'float32'
,
param_attr
=
'shared_w'
,
# share parameter
is_sparse
=
is_sparse
,
...
...
@@ -412,7 +439,7 @@ class TestLocalLookupTable(TestDistLookupTableBase):
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
2
].
ops
],
[
"sum"
,
"adam"
,
"scale"
,
"scale"
])
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'lookup_table'
,
'sequence_pool'
,
'lookup_table'
,
'sequence_pool'
,
...
...
@@ -450,7 +477,7 @@ class TestDistLookupTable(TestDistLookupTableBase):
# 5 save table
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
5
].
ops
],
[
"save"
])
trainer
=
self
.
get_trainer
()
trainer
,
_
=
self
.
get_trainer
()
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'split_ids'
,
'prefetch'
,
'merge_ids'
,
'sequence_pool'
,
'split_ids'
,
...
...
@@ -483,7 +510,7 @@ class TestAsyncLocalLookupTable(TestDistLookupTableBase):
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
2
].
ops
],
[
"adam"
,
"scale"
,
"scale"
])
trainer
=
self
.
get_trainer
(
config
)
trainer
,
_
=
self
.
get_trainer
(
config
)
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'lookup_table'
,
'sequence_pool'
,
'lookup_table'
,
'sequence_pool'
,
...
...
@@ -522,7 +549,7 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase):
# 5 save table
self
.
assertEqual
([
op
.
type
for
op
in
pserver1
.
blocks
[
5
].
ops
],
[
"save"
])
trainer
=
self
.
get_trainer
(
config
)
trainer
,
_
=
self
.
get_trainer
(
config
)
self
.
assertEqual
(
len
(
trainer
.
blocks
),
1
)
ops
=
[
'split_ids'
,
'prefetch'
,
'merge_ids'
,
'sequence_pool'
,
'split_ids'
,
...
...
@@ -536,6 +563,22 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase):
self
.
assertEqual
([
op
.
type
for
op
in
trainer
.
blocks
[
0
].
ops
],
ops
)
class
TestDistLookupTableSliceSize
(
TestDistLookupTableBase
):
def
net_conf
(
self
):
self
.
network_with_table
(
is_sparse
=
True
,
is_distributed
=
True
)
def
transpiler_test_impl
(
self
):
config
=
fluid
.
DistributeTranspilerConfig
()
pserver1
,
startup1
=
self
.
get_pserver
(
self
.
pserver1_ep
,
config
)
self
.
assertTrue
(
self
.
transpiler
.
has_distributed_lookup_table
)
lookup_table_var
=
pserver1
.
global_block
().
vars
[
self
.
transpiler
.
table_name
]
row_size
=
lookup_table_var
.
shape
[
0
]
calc_row_size
=
int
(
math
.
ceil
(
self
.
table_size
/
self
.
pservers
))
self
.
assertEqual
(
row_size
,
calc_row_size
)
class
TestRMSPropOptimizer
(
TranspilerTest
):
def
net_conf
(
self
):
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
1000
],
dtype
=
'float32'
)
...
...
python/paddle/fluid/tests/unittests/test_fc_mkldnn_op.py
浏览文件 @
7c09d198
...
...
@@ -22,6 +22,7 @@ def fully_connected_naive(input, weights, bias_data=None):
w_h
,
w_c
=
weights
.
shape
x_data
=
np
.
reshape
(
input
,
[
in_n
,
in_c
*
in_h
*
in_w
])
# this transpose should be implemented at C code
w_data
=
np
.
transpose
(
np
.
reshape
(
weights
,
(
w_c
,
in_c
*
in_h
*
in_w
)))
result
=
None
...
...
@@ -43,15 +44,11 @@ class TestFCMKLDNNOp(OpTest):
def
setUp
(
self
):
self
.
op_type
=
"fc"
self
.
use_mkldnn
=
True
self
.
with_bias
=
True
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
'with_bias'
:
self
.
with_bias
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
,
}
self
.
outputs
=
{
'Out'
:
fully_connected_naive
(
self
.
matrix
.
input
,
self
.
matrix
.
weights
)
...
...
@@ -85,13 +82,11 @@ class TestFCMKLDNNOp3(TestFCMKLDNNOp):
class
TestFCMKLDNNOp4
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
with_bias
=
False
self
.
matrix
=
MatrixGenerate
(
2
,
32
,
48
,
2
,
2
)
class
TestFCMKLDNNOp4
(
TestFCMKLDNNOp
):
def
init_op_type
(
self
):
self
.
with_bias
=
False
self
.
matrix
=
MatrixGenerate
(
2
,
32
,
1000
,
6
,
6
)
...
...
python/paddle/fluid/tests/unittests/test_fc_op.py
0 → 100644
浏览文件 @
7c09d198
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
def
fc_refer
(
matrix
,
with_bias
):
in_n
,
in_c
,
in_h
,
in_w
=
matrix
.
input
.
shape
w_i
,
w_o
=
matrix
.
weights
.
shape
x_data
=
np
.
reshape
(
matrix
.
input
,
[
in_n
,
in_c
*
in_h
*
in_w
])
w_data
=
np
.
reshape
(
matrix
.
weights
,
[
w_i
,
w_o
])
b_data
=
np
.
reshape
(
matrix
.
bias
,
[
1
,
w_o
])
result
=
None
if
with_bias
:
result
=
np
.
dot
(
x_data
,
w_data
)
+
b_data
else
:
result
=
np
.
dot
(
x_data
,
w_data
)
return
result
class
MatrixGenerate
:
def
__init__
(
self
,
mb
,
ic
,
oc
,
h
,
w
):
self
.
input
=
np
.
random
.
random
((
mb
,
ic
,
h
,
w
)).
astype
(
"float32"
)
self
.
weights
=
np
.
random
.
random
((
ic
*
h
*
w
,
oc
)).
astype
(
"float32"
)
self
.
bias
=
np
.
random
.
random
((
1
,
oc
)).
astype
(
"float32"
)
class
TestFCOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fc"
self
.
matrix
=
MatrixGenerate
(
1
,
10
,
15
,
3
,
3
)
self
.
with_bias
=
True
if
self
.
with_bias
:
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
,
'Bias'
:
self
.
matrix
.
bias
}
else
:
self
.
inputs
=
{
'Input'
:
self
.
matrix
.
input
,
'W'
:
self
.
matrix
.
weights
}
self
.
attrs
=
{
'use_mkldnn'
:
False
}
self
.
outputs
=
{
'Out'
:
fc_refer
(
self
.
matrix
,
self
.
with_bias
)}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFCOpBiasBoth
(
TestFCOp
):
def
init_shapes
(
self
,
mb
,
ic
,
oc
,
h
,
w
):
for
with_bias
in
{
True
,
False
}:
self
.
with_bias
=
with_bias
self
.
matrix
=
MatrixGenerate
(
mb
,
ic
,
oc
,
h
,
w
)
class
TestFCOp1
(
TestFCOpBiasBoth
):
def
init_op_type
(
self
):
self
.
init_shapes
(
2
,
8
,
10
,
1
,
1
)
class
TestFCOp2
(
TestFCOpBiasBoth
):
def
init_op_type
(
self
):
self
.
init_shapes
(
4
,
5
,
6
,
2
,
2
)
class
TestFCOp4
(
TestFCOpBiasBoth
):
def
init_op_type
(
self
):
self
.
init_shapes
(
1
,
32
,
64
,
3
,
3
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
7c09d198
...
...
@@ -21,6 +21,7 @@ import paddle.fluid.nets as nets
from
paddle.fluid.framework
import
Program
,
program_guard
,
default_main_program
from
paddle.fluid.param_attr
import
ParamAttr
import
decorators
from
paddle.fluid.initializer
import
Constant
class
TestBook
(
unittest
.
TestCase
):
...
...
@@ -465,6 +466,17 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_flatten
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
append_batch_size
=
False
,
shape
=
[
4
,
4
,
3
],
dtype
=
"float32"
)
out
=
layers
.
flatten
(
x
,
axis
=
1
,
name
=
"flatten"
)
self
.
assertIsNotNone
(
out
)
def
test_shape
(
self
):
program
=
Program
()
with
program_guard
(
program
):
...
...
@@ -474,6 +486,20 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_prelu
(
self
):
program
=
Program
()
with
program_guard
(
program
):
input
=
layers
.
data
(
name
=
"input"
,
shape
=
[
5
,
200
,
100
,
100
],
dtype
=
"float32"
)
mode
=
'channel'
out
=
layers
.
prelu
(
input
,
mode
,
param_attr
=
ParamAttr
(
initializer
=
Constant
(
1.0
)),
name
=
'prelu'
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_prelu_op.py
浏览文件 @
7c09d198
...
...
@@ -20,30 +20,58 @@ from op_test import OpTest
class
PReluTest
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"prelu"
x_np
=
np
.
random
.
normal
(
size
=
(
10
,
10
)).
astype
(
"float32"
)
for
pos
,
val
in
np
.
ndenumerate
(
x_np
):
# Since zero point in prelu is not differentiable, avoid randomize
# zero.
while
abs
(
val
)
<
1e-3
:
x_np
[
pos
]
=
np
.
random
.
normal
()
val
=
x_np
[
pos
]
x_np_sign
=
np
.
sign
(
x_np
)
x_np
=
x_np_sign
*
np
.
maximum
(
x_np
,
.
005
)
alpha_np
=
np
.
array
([.
1
],
dtype
=
"float32"
)
self
.
inputs
=
{
'X'
:
x_np
,
'Alpha'
:
alpha_np
}
self
.
initTestCase
()
x_np
=
np
.
random
.
normal
(
size
=
(
3
,
5
,
5
,
10
)).
astype
(
"float32"
)
# Since zero point in prelu is not differentiable, avoid randomize
# zero.
x_np
[
np
.
abs
(
x_np
)
<
0.005
]
=
0.02
if
self
.
attrs
==
{
'mode'
:
"all"
}:
alpha_np
=
np
.
random
.
rand
(
1
).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x_np
,
'Alpha'
:
alpha_np
}
elif
self
.
attrs
==
{
'mode'
:
"channel"
}:
alpha_np
=
np
.
random
.
rand
(
1
,
x_np
.
shape
[
1
],
1
,
1
).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x_np
,
'Alpha'
:
alpha_np
}
else
:
alpha_np
=
np
.
random
.
rand
(
*
x_np
.
shape
).
astype
(
"float32"
)
self
.
inputs
=
{
'X'
:
x_np
,
'Alpha'
:
alpha_np
}
out_np
=
np
.
maximum
(
self
.
inputs
[
'X'
],
0.
)
out_np
=
out_np
+
np
.
minimum
(
self
.
inputs
[
'X'
],
0.
)
*
self
.
inputs
[
'Alpha'
]
assert
out_np
is
not
self
.
inputs
[
'X'
]
self
.
outputs
=
{
'Out'
:
out_np
}
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"channel"
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
,
'Alpha'
],
'Out'
)
def
test_check_grad_ignore_x
(
self
):
self
.
check_grad
([
'Alpha'
],
'Out'
,
no_grad_set
=
set
(
'X'
))
def
test_check_grad_ignore_alpha
(
self
):
self
.
check_grad
([
'X'
],
'Out'
,
no_grad_set
=
set
(
'Alpha'
))
class
TestCase1
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"all"
}
class
TestCase2
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"channel"
}
class
TestCase3
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"element"
}
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_program.py
浏览文件 @
7c09d198
...
...
@@ -17,6 +17,7 @@ import unittest
from
paddle.fluid.framework
import
Program
,
default_main_program
,
program_guard
,
grad_var_name
import
paddle.fluid.layers
as
layers
import
paddle.fluid
as
fluid
main_program
=
default_main_program
()
...
...
@@ -98,6 +99,39 @@ class TestProgram(unittest.TestCase):
new_program
=
main_program
.
clone
()
self
.
assertNotEqual
(
0
,
len
(
new_program
.
blocks
[
0
].
all_parameters
()))
def
test_program_inference_optimize
(
self
):
def
net
():
reader
=
fluid
.
layers
.
py_reader
(
capacity
=
10
,
shapes
=
[[
-
1
,
10
],
[
-
1
,
1
]],
lod_levels
=
[
0
,
0
],
dtypes
=
[
'float32'
,
'int64'
],
use_double_buffer
=
True
)
in_data
,
label
=
fluid
.
layers
.
read_file
(
reader
)
predict_label
=
fluid
.
layers
.
fc
(
in_data
,
size
=
2
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
mean
(
fluid
.
layers
.
cross_entropy
(
input
=
predict_label
,
label
=
label
))
optimizer
=
fluid
.
optimizer
.
Adam
()
optimizer
.
minimize
(
loss
)
startup_program
=
fluid
.
Program
()
main_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
,
startup_program
):
net
()
no_read_program
=
main_program
.
inference_optimize
()
keep_read_program
=
main_program
.
inference_optimize
(
export_for_deployment
=
False
)
no_read_ops
=
no_read_program
.
global_block
().
ops
keep_read_ops
=
keep_read_program
.
global_block
().
ops
self
.
assertEqual
(
len
(
keep_read_ops
)
-
len
(
no_read_ops
),
2
)
self
.
assertEqual
(
keep_read_ops
[
0
].
type
,
'create_double_buffer_reader'
)
self
.
assertEqual
(
keep_read_ops
[
1
].
type
,
'read'
)
for
i
in
range
(
len
(
no_read_ops
)):
self
.
assertEqual
(
no_read_ops
[
i
].
type
,
keep_read_ops
[
i
+
2
].
type
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_protobuf_descs.py
浏览文件 @
7c09d198
...
...
@@ -68,7 +68,7 @@ class TestOpDesc(unittest.TestCase):
self
.
assertEqual
(
8
,
len
(
op
.
attr_names
()))
op
.
set_block_attr
(
"block_attr"
,
program_desc
.
block
(
0
))
self
.
assertEqual
(
0
,
op
.
block_attr
(
"block_attr"
))
self
.
assertEqual
(
0
,
op
.
block_attr
_id
(
"block_attr"
))
mul_op
=
block
.
append_op
()
mul_op
.
set_type
(
"mul"
)
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
7c09d198
...
...
@@ -195,6 +195,9 @@ class DistributeTranspiler(object):
if
program
is
None
:
program
=
default_main_program
()
self
.
origin_program
=
program
self
.
origin_startup_program
=
default_startup_program
().
clone
()
self
.
startup_program
=
default_startup_program
()
self
.
trainer_num
=
trainers
self
.
sync_mode
=
sync_mode
self
.
trainer_id
=
trainer_id
...
...
@@ -205,10 +208,10 @@ class DistributeTranspiler(object):
ps_dispatcher
=
self
.
config
.
split_method
(
self
.
pserver_endpoints
)
self
.
has_distributed_lookup_table
=
self
.
_has_distributed_lookup_table
()
# split and create vars, then put splited vars in dicts for later use.
# s
tep 1: s
plit and create vars, then put splited vars in dicts for later use.
self
.
_init_splited_vars
()
# step
3.1
: insert send op to send gradient vars to parameter servers
# step
2
: insert send op to send gradient vars to parameter servers
ps_dispatcher
.
reset
()
send_vars
=
[]
...
...
@@ -265,7 +268,7 @@ class DistributeTranspiler(object):
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
})
# step 3
.2
: insert recv op to receive parameters from parameter server
# step 3: insert recv op to receive parameters from parameter server
recv_vars
=
[]
for
_
,
var
in
enumerate
(
send_vars
):
recv_vars
.
append
(
self
.
grad_param_mapping
[
var
])
...
...
@@ -312,6 +315,8 @@ class DistributeTranspiler(object):
outputs
=
{
"Out"
:
[
orig_param
]},
attrs
=
{
"axis"
:
0
})
self
.
_get_trainer_startup_program
(
recv_vars
=
recv_vars
,
eplist
=
eplist
)
if
self
.
has_distributed_lookup_table
:
self
.
_replace_lookup_table_op_with_prefetch
(
program
,
pserver_endpoints
)
...
...
@@ -328,8 +333,78 @@ class DistributeTranspiler(object):
# FIXME(typhoonzero): Also ops like clip_gradient, lrn_decay?
delete_ops
(
self
.
origin_program
.
global_block
(),
self
.
optimize_ops
)
self
.
origin_program
.
__str__
()
return
self
.
origin_program
def
_get_trainer_startup_program
(
self
,
recv_vars
,
eplist
,
startup_program
=
None
):
"""
Get transpiled trainer side startup program.
Args:
startup_program(Program): Startup program.
Returns:
Program: trainer side startup program.
"""
if
startup_program
is
None
:
startup_program
=
self
.
startup_program
# FIXME(gongwb): delete not need ops.
# note that: some parameter is not trainable and those ops can't be deleted.
for
varname
,
splited_var
in
self
.
param_var_mapping
.
iteritems
():
# Get the eplist of recv vars
eps
=
[]
for
var
in
splited_var
:
index
=
[
v
.
name
for
v
in
recv_vars
].
index
(
var
.
name
)
eps
.
append
(
eplist
[
index
])
for
var
in
splited_var
:
if
startup_program
.
global_block
().
has_var
(
var
.
name
):
continue
startup_program
.
global_block
().
create_var
(
name
=
var
.
name
,
persistable
=
False
,
type
=
var
.
type
,
dtype
=
var
.
dtype
,
shape
=
var
.
shape
,
lod_level
=
var
.
lod_level
)
op
=
startup_program
.
global_block
().
append_op
(
type
=
"recv"
,
inputs
=
{},
outputs
=
{
"Out"
:
splited_var
},
attrs
=
{
"epmap"
:
eps
,
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
})
startup_program
.
global_block
().
append_op
(
type
=
"fetch_barrier"
,
inputs
=
{},
outputs
=
{},
attrs
=
{
"endpoints"
:
self
.
pserver_endpoints
,
RPC_OP_ROLE_ATTR_NAME
:
RPC_OP_ROLE_ATTR_VALUE
})
for
varname
,
splited_var
in
self
.
param_var_mapping
.
iteritems
():
#add concat ops to merge splited parameters received from parameter servers.
if
len
(
splited_var
)
<=
1
:
continue
orig_param
=
startup_program
.
global_block
().
vars
[
varname
]
startup_program
.
global_block
().
append_op
(
type
=
"concat"
,
inputs
=
{
"X"
:
splited_var
},
outputs
=
{
"Out"
:
[
orig_param
]},
attrs
=
{
"axis"
:
0
})
return
startup_program
def
get_pserver_program
(
self
,
endpoint
):
"""
Get parameter server side program.
...
...
@@ -530,7 +605,10 @@ class DistributeTranspiler(object):
pserver_program
.
_sync_with_cpp
()
return
pserver_program
def
get_startup_program
(
self
,
endpoint
,
pserver_program
):
def
get_startup_program
(
self
,
endpoint
,
pserver_program
,
startup_program
=
None
):
"""
Get startup program for current parameter server.
Modify operator input variables if there are variables that
...
...
@@ -540,12 +618,17 @@ class DistributeTranspiler(object):
endpoint (str): current pserver endpoint.
pserver_program (Program): call get_pserver_program first and
pass the result here.
startup_program (Program): if pass None, will use
default_startup_program
Returns:
Program: parameter server side startup program.
"""
s_prog
=
Program
()
orig_s_prog
=
default_startup_program
()
if
not
startup_program
:
orig_s_prog
=
default_startup_program
()
else
:
orig_s_prog
=
startup_program
s_prog
.
random_seed
=
orig_s_prog
.
random_seed
params
=
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]
...
...
@@ -568,14 +651,16 @@ class DistributeTranspiler(object):
new_outputs
=
dict
()
# do not append startup op if var is not on this pserver
op_on_pserver
=
False
for
key
in
op
.
output_names
:
newname
,
_
=
_get_splited_name_and_shape
(
op
.
output
(
key
)[
0
])
if
newname
:
op_on_pserver
=
True
new_outputs
[
key
]
=
created_var_map
[
newname
]
elif
op
.
output
(
key
)[
0
]
in
pserver_vars
:
op_on_pserver
=
True
new_outputs
[
key
]
=
pserver_vars
[
op
.
output
(
key
)[
0
]]
# TODO(gongwb): remove this line.
if
op
.
type
not
in
[
"recv"
,
"fetch_barrier"
,
"concat"
]:
for
key
in
op
.
output_names
:
newname
,
_
=
_get_splited_name_and_shape
(
op
.
output
(
key
)[
0
])
if
newname
:
op_on_pserver
=
True
new_outputs
[
key
]
=
created_var_map
[
newname
]
elif
op
.
output
(
key
)[
0
]
in
pserver_vars
:
op_on_pserver
=
True
new_outputs
[
key
]
=
pserver_vars
[
op
.
output
(
key
)[
0
]]
if
op_on_pserver
:
# most startup program ops have no inputs
...
...
@@ -584,12 +669,12 @@ class DistributeTranspiler(object):
if
op
.
type
in
[
"gaussian_random"
,
"fill_constant"
,
"uniform_random"
]:
op
.
attrs
[
"shape"
]
=
new_outputs
[
"Out"
].
shape
op
.
set_attr
(
"shape"
,
list
(
new_outputs
[
"Out"
].
shape
))
s_prog
.
global_block
().
append_op
(
type
=
op
.
type
,
inputs
=
new_inputs
,
outputs
=
new_outputs
,
attrs
=
op
.
a
ttrs
)
attrs
=
op
.
a
ll_attrs
()
)
return
s_prog
# ====================== private transpiler functions =====================
...
...
@@ -603,7 +688,7 @@ class DistributeTranspiler(object):
self
.
table_name
=
None
for
op
in
self
.
origin_program
.
global_block
().
ops
:
if
op
.
type
==
LOOKUP_TABLE_TYPE
:
if
op
.
attr
s
[
'is_distributed'
]
is
True
:
if
op
.
attr
(
'is_distributed'
)
is
True
:
if
self
.
table_name
is
None
:
self
.
table_name
=
op
.
input
(
"W"
)[
0
]
if
self
.
table_name
!=
op
.
input
(
"W"
)[
0
]:
...
...
@@ -877,9 +962,15 @@ class DistributeTranspiler(object):
# create table param and grad var in pserver program
origin_param_var
=
self
.
origin_program
.
global_block
().
vars
[
self
.
table_name
]
zero_dim
=
int
(
math
.
ceil
(
origin_param_var
.
shape
[
0
]
/
len
(
self
.
pserver_endpoints
)))
table_shape
=
list
(
origin_param_var
.
shape
)
table_shape
[
0
]
=
zero_dim
param_var
=
pserver_program
.
global_block
().
create_var
(
name
=
origin_param_var
.
name
,
shape
=
origin_param_var
.
shape
,
shape
=
table_
shape
,
dtype
=
origin_param_var
.
dtype
,
type
=
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
,
persistable
=
True
)
...
...
@@ -1008,7 +1099,6 @@ class DistributeTranspiler(object):
var_mapping
[
varname
]
=
\
[
program
.
global_block
().
var
(
orig_var
.
name
)]
continue
var_mapping
[
varname
]
=
[]
orig_shape
=
orig_var
.
shape
orig_dim1_flatten
=
1
...
...
@@ -1263,7 +1353,7 @@ class DistributeTranspiler(object):
type
=
opt_op
.
type
,
inputs
=
new_inputs
,
outputs
=
outputs
,
attrs
=
opt_op
.
a
ttrs
)
attrs
=
opt_op
.
a
ll_attrs
()
)
def
_is_splited_grad_var
(
self
,
var
,
var_dict
):
grad_block
=
None
...
...
@@ -1296,7 +1386,7 @@ class DistributeTranspiler(object):
block
.
_clone_variable
(
var
)
return
block
.
append_op
(
type
=
op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
op
.
a
ttrs
)
type
=
op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
op
.
a
ll_attrs
()
)
def
_append_pserver_non_opt_ops
(
self
,
optimize_block
,
opt_op
):
program
=
optimize_block
.
program
...
...
@@ -1337,7 +1427,7 @@ class DistributeTranspiler(object):
type
=
opt_op
.
type
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
opt_op
.
a
ttrs
)
attrs
=
opt_op
.
a
ll_attrs
()
)
def
_is_op_connected
(
self
,
op1
,
op2
):
# If one op's input is another op's output or
...
...
@@ -1442,8 +1532,8 @@ class DistributeTranspiler(object):
# optimize
op_maker
=
core
.
op_proto_and_checker_maker
optimize_role
=
core
.
op_proto_and_checker_maker
.
OpRole
.
Optimize
if
op_maker
.
kOpRoleAttrName
()
in
op
.
attrs
and
\
int
(
op
.
attrs
[
op_maker
.
kOpRoleAttrName
()])
==
int
(
optimize_role
):
if
op_maker
.
kOpRoleAttrName
()
in
op
.
attr
_name
s
and
\
int
(
op
.
all_attrs
()
[
op_maker
.
kOpRoleAttrName
()])
==
int
(
optimize_role
):
return
True
return
False
...
...
@@ -1466,8 +1556,8 @@ class DistributeTranspiler(object):
# and op_role_var to get the pair.
for
input_name
in
op
.
input_arg_names
:
if
input_name
.
find
(
"@GRAD"
)
!=
-
1
and
\
op
.
attr
s
[
RPC_OP_ROLE_ATTR_NAME
]
:
param_name
=
op
.
attr
s
[
OP_ROLE_VAR_ATTR_NAME
]
[
0
]
op
.
attr
(
RPC_OP_ROLE_ATTR_NAME
)
:
param_name
=
op
.
attr
(
OP_ROLE_VAR_ATTR_NAME
)
[
0
]
params_grads
.
append
([
origin_var_dict
[
param_name
],
origin_var_dict
[
input_name
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录