提交 540af318 编写于 作者: Y Yang Yu

Merge branch 'develop' of github.com:baidu/Paddle into feature/add_reorder_lod_tensor

...@@ -6,8 +6,18 @@ height = 227 ...@@ -6,8 +6,18 @@ height = 227
width = 227 width = 227
num_class = 1000 num_class = 1000
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
gp = get_config_arg('layer_num', int, 1)
is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class} args = {
'height': height,
'width': width,
'color': True,
'num_class': num_class,
'is_infer': is_infer,
'num_samples': num_samples
}
define_py_data_sources2( define_py_data_sources2(
"train.list", None, module="provider", obj="process", args=args) "train.list", None, module="provider", obj="process", args=args)
...@@ -31,7 +41,7 @@ net = img_pool_layer(input=net, pool_size=3, stride=2) ...@@ -31,7 +41,7 @@ net = img_pool_layer(input=net, pool_size=3, stride=2)
# conv2 # conv2
net = img_conv_layer( net = img_conv_layer(
input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1) input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=gp)
net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75) net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
net = img_pool_layer(input=net, pool_size=3, stride=2) net = img_pool_layer(input=net, pool_size=3, stride=2)
...@@ -40,11 +50,11 @@ net = img_conv_layer( ...@@ -40,11 +50,11 @@ net = img_conv_layer(
input=net, filter_size=3, num_filters=384, stride=1, padding=1) input=net, filter_size=3, num_filters=384, stride=1, padding=1)
# conv4 # conv4
net = img_conv_layer( net = img_conv_layer(
input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1) input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=gp)
# conv5 # conv5
net = img_conv_layer( net = img_conv_layer(
input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1) input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=gp)
net = img_pool_layer(input=net, pool_size=3, stride=2) net = img_pool_layer(input=net, pool_size=3, stride=2)
net = fc_layer( net = fc_layer(
...@@ -59,6 +69,9 @@ net = fc_layer( ...@@ -59,6 +69,9 @@ net = fc_layer(
layer_attr=ExtraAttr(drop_rate=0.5)) layer_attr=ExtraAttr(drop_rate=0.5))
net = fc_layer(input=net, size=1000, act=SoftmaxActivation()) net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
lab = data_layer('label', num_class) if is_infer:
loss = cross_entropy(input=net, label=lab) outputs(net)
outputs(loss) else:
lab = data_layer('label', num_class)
loss = cross_entropy(input=net, label=lab)
outputs(loss)
...@@ -7,13 +7,15 @@ num_class = 1000 ...@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 128) batch_size = get_config_arg('batch_size', int, 128)
use_gpu = get_config_arg('use_gpu', bool, True) use_gpu = get_config_arg('use_gpu', bool, True)
is_infer = get_config_arg("is_infer", bool, False) is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = { args = {
'height': height, 'height': height,
'width': width, 'width': width,
'color': True, 'color': True,
'num_class': num_class, 'num_class': num_class,
'is_infer': is_infer 'is_infer': is_infer,
'num_samples': num_samples
} }
define_py_data_sources2( define_py_data_sources2(
"train.list" if not is_infer else None, "train.list" if not is_infer else None,
......
...@@ -14,6 +14,7 @@ def initHook(settings, height, width, color, num_class, **kwargs): ...@@ -14,6 +14,7 @@ def initHook(settings, height, width, color, num_class, **kwargs):
else: else:
settings.data_size = settings.height * settings.width settings.data_size = settings.height * settings.width
settings.is_infer = kwargs.get('is_infer', False) settings.is_infer = kwargs.get('is_infer', False)
settings.num_samples = kwargs.get('num_samples', 2560)
if settings.is_infer: if settings.is_infer:
settings.slots = [dense_vector(settings.data_size)] settings.slots = [dense_vector(settings.data_size)]
else: else:
...@@ -23,7 +24,7 @@ def initHook(settings, height, width, color, num_class, **kwargs): ...@@ -23,7 +24,7 @@ def initHook(settings, height, width, color, num_class, **kwargs):
@provider( @provider(
init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM) init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_list): def process(settings, file_list):
for i in xrange(2560 if settings.is_infer else 1024): for i in xrange(settings.num_samples):
img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten() img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
if settings.is_infer: if settings.is_infer:
yield img.astype('float32') yield img.astype('float32')
......
...@@ -7,13 +7,15 @@ num_class = 1000 ...@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 64) batch_size = get_config_arg('batch_size', int, 64)
layer_num = get_config_arg("layer_num", int, 50) layer_num = get_config_arg("layer_num", int, 50)
is_infer = get_config_arg("is_infer", bool, False) is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = { args = {
'height': height, 'height': height,
'width': width, 'width': width,
'color': True, 'color': True,
'num_class': num_class, 'num_class': num_class,
'is_infer': is_infer 'is_infer': is_infer,
'num_samples': num_samples
} }
define_py_data_sources2( define_py_data_sources2(
"train.list" if not is_infer else None, "train.list" if not is_infer else None,
......
...@@ -37,7 +37,7 @@ function infer() { ...@@ -37,7 +37,7 @@ function infer() {
--trainer_count=1 \ --trainer_count=1 \
--num_passes=1 \ --num_passes=1 \
--save_dir="models/${topology}-${layer_num}" \ --save_dir="models/${topology}-${layer_num}" \
--config_args="batch_size=128,layer_num=${layer_num}" \ --config_args="batch_size=128,layer_num=${layer_num},num_samples=256" \
> /dev/null 2>&1 > /dev/null 2>&1
echo "Done" echo "Done"
fi fi
...@@ -79,8 +79,9 @@ fi ...@@ -79,8 +79,9 @@ fi
# inference benchmark # inference benchmark
for use_mkldnn in True False; do for use_mkldnn in True False; do
for batchsize in 1 2 4 8 16; do for batchsize in 1 2 4 8 16; do
infer googlenet v1 $batchsize $use_mkldnn
infer resnet 50 $batchsize $use_mkldnn
infer vgg 19 $batchsize $use_mkldnn infer vgg 19 $batchsize $use_mkldnn
infer resnet 50 $batchsize $use_mkldnn
infer googlenet v1 $batchsize $use_mkldnn
infer alexnet 2 $batchsize $use_mkldnn
done done
done done
...@@ -47,5 +47,6 @@ for use_mkldnn in True False; do ...@@ -47,5 +47,6 @@ for use_mkldnn in True False; do
train vgg 19 $batchsize $use_mkldnn train vgg 19 $batchsize $use_mkldnn
train resnet 50 $batchsize $use_mkldnn train resnet 50 $batchsize $use_mkldnn
train googlenet v1 $batchsize $use_mkldnn train googlenet v1 $batchsize $use_mkldnn
train alexnet 2 $batchsize $use_mkldnn
done done
done done
...@@ -23,24 +23,25 @@ function infer() { ...@@ -23,24 +23,25 @@ function infer() {
echo "./run_mkl_infer.sh to save the model first" echo "./run_mkl_infer.sh to save the model first"
exit 0 exit 0
fi fi
log_period=$((256 / bs)) log_period=$((32 / bs))
paddle train --job=test \ paddle train --job=test \
--config="${topology}.py" \ --config="${topology}.py" \
--use_mkldnn=False \
--use_gpu=False \ --use_gpu=False \
--trainer_count=$thread \ --trainer_count=$thread \
--log_period=$log_period \ --log_period=$log_period \
--config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True" \ --config_args="batch_size=${bs},layer_num=${layer_num},is_infer=True,num_samples=256" \
--init_model_path=$models_in \ --init_model_path=$models_in \
2>&1 | tee ${log} 2>&1 | tee ${log}
# calculate the last 5 logs period time of 1280 samples, # calculate the last 5 logs period time of 160(=32*5) samples,
# the time before are burning time. # the time before are burning time.
start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs` start=`tail ${log} -n 7 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs` end=`tail ${log} -n 2 | head -n 1 | awk -F ' ' '{print $2}' | xargs`
start_sec=`clock_to_seconds $start` start_sec=`clock_to_seconds $start`
end_sec=`clock_to_seconds $end` end_sec=`clock_to_seconds $end`
fps=`awk 'BEGIN{printf "%.2f",(1280 / ('$end_sec' - '$start_sec'))}'` fps=`awk 'BEGIN{printf "%.2f",(160 / ('$end_sec' - '$start_sec'))}'`
echo "Last 1280 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log} echo "Last 160 samples start: ${start}(${start_sec} sec), end: ${end}(${end_sec} sec;" >> ${log}
echo "FPS: $fps images/sec" 2>&1 | tee -a ${log} echo "FPS: $fps images/sec" 2>&1 | tee -a ${log}
} }
...@@ -56,7 +57,8 @@ fi ...@@ -56,7 +57,8 @@ fi
# inference benchmark # inference benchmark
for batchsize in 1 2 4 8 16; do for batchsize in 1 2 4 8 16; do
infer googlenet v1 $batchsize
infer resnet 50 $batchsize
infer vgg 19 $batchsize infer vgg 19 $batchsize
infer resnet 50 $batchsize
infer googlenet v1 $batchsize
infer alexnet 2 $batchsize
done done
...@@ -12,10 +12,11 @@ function train() { ...@@ -12,10 +12,11 @@ function train() {
config="${topology}.py" config="${topology}.py"
paddle train --job=time \ paddle train --job=time \
--config=$config \ --config=$config \
--use_mkldnn=False \
--use_gpu=False \ --use_gpu=False \
--trainer_count=$thread \ --trainer_count=$thread \
--log_period=10 \ --log_period=3 \
--test_period=100 \ --test_period=30 \
--config_args=$args \ --config_args=$args \
2>&1 | tee ${log} 2>&1 | tee ${log}
...@@ -36,4 +37,5 @@ for batchsize in 64 128 256; do ...@@ -36,4 +37,5 @@ for batchsize in 64 128 256; do
train vgg 19 $batchsize train vgg 19 $batchsize
train resnet 50 $batchsize train resnet 50 $batchsize
train googlenet v1 $batchsize train googlenet v1 $batchsize
train alexnet 2 $batchsize
done done
...@@ -7,13 +7,15 @@ num_class = 1000 ...@@ -7,13 +7,15 @@ num_class = 1000
batch_size = get_config_arg('batch_size', int, 64) batch_size = get_config_arg('batch_size', int, 64)
layer_num = get_config_arg('layer_num', int, 19) layer_num = get_config_arg('layer_num', int, 19)
is_infer = get_config_arg("is_infer", bool, False) is_infer = get_config_arg("is_infer", bool, False)
num_samples = get_config_arg('num_samples', int, 2560)
args = { args = {
'height': height, 'height': height,
'width': width, 'width': width,
'color': True, 'color': True,
'num_class': num_class, 'num_class': num_class,
'is_infer': is_infer 'is_infer': is_infer,
'num_samples': num_samples
} }
define_py_data_sources2( define_py_data_sources2(
"train.list" if not is_infer else None, "train.list" if not is_infer else None,
......
...@@ -253,9 +253,9 @@ IF(NOT PROTOBUF_FOUND) ...@@ -253,9 +253,9 @@ IF(NOT PROTOBUF_FOUND)
IF(WITH_C_API) IF(WITH_C_API)
INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf) INSTALL(DIRECTORY ${PROTOBUF_INCLUDE_DIR} DESTINATION third_party/protobuf)
IF(ANDROID) IF(ANDROID)
INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI}) INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib/${ANDROID_ABI})
ELSE() ELSE()
INSTALL(FILES ${PROTOBUF_LIBRARY} DESTINATION third_party/protobuf/lib) INSTALL(FILES ${PROTOBUF_LITE_LIBRARY} DESTINATION third_party/protobuf/lib)
ENDIF() ENDIF()
ENDIF() ENDIF()
......
...@@ -467,7 +467,7 @@ lambda_cost ...@@ -467,7 +467,7 @@ lambda_cost
:noindex: :noindex:
square_error_cost square_error_cost
-------- -----------------
.. autoclass:: paddle.v2.layer.square_error_cost .. autoclass:: paddle.v2.layer.square_error_cost
:noindex: :noindex:
...@@ -533,7 +533,7 @@ Miscs ...@@ -533,7 +533,7 @@ Miscs
===== =====
dropout dropout
-------------- --------
.. autoclass:: paddle.v2.layer.dropout .. autoclass:: paddle.v2.layer.dropout
:noindex: :noindex:
......
...@@ -19,17 +19,17 @@ dynamic_lstm ...@@ -19,17 +19,17 @@ dynamic_lstm
:noindex: :noindex:
data data
--------- ----
.. autofunction:: paddle.v2.fluid.layers.data .. autofunction:: paddle.v2.fluid.layers.data
:noindex: :noindex:
mean mean
--------- ----
.. autofunction:: paddle.v2.fluid.layers.mean .. autofunction:: paddle.v2.fluid.layers.mean
:noindex: :noindex:
mul mul
--------- ---
.. autofunction:: paddle.v2.fluid.layers.mul .. autofunction:: paddle.v2.fluid.layers.mul
:noindex: :noindex:
...@@ -45,13 +45,13 @@ elementwise_div ...@@ -45,13 +45,13 @@ elementwise_div
dropout dropout
--------- -------
.. autofunction:: paddle.v2.fluid.layers.dropout .. autofunction:: paddle.v2.fluid.layers.dropout
:noindex: :noindex:
reshape reshape
--------- --------
.. autofunction:: paddle.v2.fluid.layers.reshape .. autofunction:: paddle.v2.fluid.layers.reshape
:noindex: :noindex:
...@@ -81,67 +81,67 @@ transpose ...@@ -81,67 +81,67 @@ transpose
sigmoid_cross_entropy_with_logits sigmoid_cross_entropy_with_logits
--------- ---------------------------------
.. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits .. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits
:noindex: :noindex:
cast cast
--------- ----
.. autofunction:: paddle.v2.fluid.layers.cast .. autofunction:: paddle.v2.fluid.layers.cast
:noindex: :noindex:
concat concat
--------- -------
.. autofunction:: paddle.v2.fluid.layers.concat .. autofunction:: paddle.v2.fluid.layers.concat
:noindex: :noindex:
sums sums
--------- ----
.. autofunction:: paddle.v2.fluid.layers.sums .. autofunction:: paddle.v2.fluid.layers.sums
:noindex: :noindex:
linear_chain_crf linear_chain_crf
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf .. autofunction:: paddle.v2.fluid.layers.linear_chain_crf
:noindex: :noindex:
assign assign
--------- -------
.. autofunction:: paddle.v2.fluid.layers.embedding .. autofunction:: paddle.v2.fluid.layers.embedding
:noindex: :noindex:
split_lod_tensor split_lod_tensor
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor .. autofunction:: paddle.v2.fluid.layers.split_lod_tensor
:noindex: :noindex:
merge_lod_tensor merge_lod_tensor
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor .. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor
:noindex: :noindex:
cos_sim cos_sim
--------- --------
.. autofunction:: paddle.v2.fluid.layers.cos_sim .. autofunction:: paddle.v2.fluid.layers.cos_sim
:noindex: :noindex:
cross_entropy cross_entropy
--------- -------------
.. autofunction:: paddle.v2.fluid.layers.cross_entropy .. autofunction:: paddle.v2.fluid.layers.cross_entropy
:noindex: :noindex:
square_error_cost square_error_cost
--------- -----------------
.. autofunction:: paddle.v2.fluid.layers.square_error_cost .. autofunction:: paddle.v2.fluid.layers.square_error_cost
:noindex: :noindex:
...@@ -153,68 +153,68 @@ accuracy ...@@ -153,68 +153,68 @@ accuracy
sequence_conv sequence_conv
--------- -------------
.. autofunction:: paddle.v2.fluid.layers.sequence_conv .. autofunction:: paddle.v2.fluid.layers.sequence_conv
:noindex: :noindex:
conv2d conv2d
--------- ------
.. autofunction:: paddle.v2.fluid.layers.conv2d .. autofunction:: paddle.v2.fluid.layers.conv2d
:noindex: :noindex:
sequence_pool sequence_pool
--------- -------------
.. autofunction:: paddle.v2.fluid.layers.sequence_pool .. autofunction:: paddle.v2.fluid.layers.sequence_pool
:noindex: :noindex:
pool2d pool2d
--------- ------
.. autofunction:: paddle.v2.fluid.layers.pool2d .. autofunction:: paddle.v2.fluid.layers.pool2d
:noindex: :noindex:
batch_norm batch_norm
--------- ----------
.. autofunction:: paddle.v2.fluid.layers.batch_norm .. autofunction:: paddle.v2.fluid.layers.batch_norm
:noindex: :noindex:
beam_search_decode beam_search_decode
--------- ------------------
.. autofunction:: paddle.v2.fluid.layers.beam_search_decode .. autofunction:: paddle.v2.fluid.layers.beam_search_decode
:noindex: :noindex:
lod_rank_table lod_rank_table
--------- --------------
.. autofunction:: paddle.v2.fluid.layers.lod_rank_table .. autofunction:: paddle.v2.fluid.layers.lod_rank_table
:noindex: :noindex:
max_sequence_len max_sequence_len
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.max_sequence_len .. autofunction:: paddle.v2.fluid.layers.max_sequence_len
:noindex: :noindex:
topk topk
--------- -----
.. autofunction:: paddle.v2.fluid.layers.topk .. autofunction:: paddle.v2.fluid.layers.topk
:noindex: :noindex:
lod_tensor_to_array lod_tensor_to_array
--------- -------------------
.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array .. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array
:noindex: :noindex:
array_to_lod_tensor array_to_lod_tensor
--------- -------------------
.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor .. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor
:noindex: :noindex:
...@@ -222,26 +222,26 @@ array_to_lod_tensor ...@@ -222,26 +222,26 @@ array_to_lod_tensor
fill_constant fill_constant
--------- -------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant .. autofunction:: paddle.v2.fluid.layers.fill_constant
:noindex: :noindex:
fill_constant_batch_size_like fill_constant_batch_size_like
--------- -----------------------------
.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like .. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like
:noindex: :noindex:
ones ones
--------- ----
.. autofunction:: paddle.v2.fluid.layers.ones .. autofunction:: paddle.v2.fluid.layers.ones
:noindex: :noindex:
zeros zeros
--------- -----
.. autofunction:: paddle.v2.fluid.layers.zeros .. autofunction:: paddle.v2.fluid.layers.zeros
:noindex: :noindex:
...@@ -253,14 +253,14 @@ increment ...@@ -253,14 +253,14 @@ increment
array_write array_write
--------- -----------
.. autofunction:: paddle.v2.fluid.layers.array_write .. autofunction:: paddle.v2.fluid.layers.array_write
:noindex: :noindex:
create_array create_array
--------- ------------
.. autofunction:: paddle.v2.fluid.layers.create_array .. autofunction:: paddle.v2.fluid.layers.create_array
:noindex: :noindex:
...@@ -272,31 +272,31 @@ less_than ...@@ -272,31 +272,31 @@ less_than
array_read array_read
--------- ----------
.. autofunction:: paddle.v2.fluid.layers.array_read .. autofunction:: paddle.v2.fluid.layers.array_read
:noindex: :noindex:
shrink_memory shrink_memory
--------- --------------
.. autofunction:: paddle.v2.fluid.layers.shrink_memory .. autofunction:: paddle.v2.fluid.layers.shrink_memory
:noindex: :noindex:
array_length array_length
--------- -------------
.. autofunction:: paddle.v2.fluid.layers.array_length .. autofunction:: paddle.v2.fluid.layers.array_length
:noindex: :noindex:
conv2d_transpose conv2d_transpose
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose .. autofunction:: paddle.v2.fluid.layers.conv2d_transpose
:noindex: :noindex:
sequence_expand sequence_expand
--------- ---------------
.. autofunction:: paddle.v2.fluid.layers.sequence_expand .. autofunction:: paddle.v2.fluid.layers.sequence_expand
:noindex: :noindex:
...@@ -308,13 +308,13 @@ lstm_unit ...@@ -308,13 +308,13 @@ lstm_unit
sequence_softmax sequence_softmax
--------- ----------------
.. autofunction:: paddle.v2.fluid.layers.sequence_softmax .. autofunction:: paddle.v2.fluid.layers.sequence_softmax
:noindex: :noindex:
reduce_sum reduce_sum
--------- ----------
.. autofunction:: paddle.v2.fluid.layers.reduce_sum .. autofunction:: paddle.v2.fluid.layers.reduce_sum
:noindex: :noindex:
...@@ -3,19 +3,19 @@ Nets ...@@ -3,19 +3,19 @@ Nets
=========== ===========
simple_img_conv_pool simple_img_conv_pool
----------- --------------------
.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool .. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool
:noindex: :noindex:
img_conv_group img_conv_group
----------- ---------------
.. autofunction:: paddle.v2.fluid.nets.img_conv_group .. autofunction:: paddle.v2.fluid.nets.img_conv_group
:noindex: :noindex:
sequence_conv_pool sequence_conv_pool
----------- ------------------
.. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool .. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool
:noindex: :noindex:
......
...@@ -18,7 +18,7 @@ SGDOptimizer ...@@ -18,7 +18,7 @@ SGDOptimizer
MomentumOptimizer MomentumOptimizer
----------- -----------------
.. automodule:: paddle.v2.fluid.optimizer .. automodule:: paddle.v2.fluid.optimizer
:members: MomentumOptimizer :members: MomentumOptimizer
:noindex: :noindex:
...@@ -26,14 +26,14 @@ MomentumOptimizer ...@@ -26,14 +26,14 @@ MomentumOptimizer
AdagradOptimizer AdagradOptimizer
----------- ----------------
.. automodule:: paddle.v2.fluid.optimizer .. automodule:: paddle.v2.fluid.optimizer
:members: AdagradOptimizer :members: AdagradOptimizer
:noindex: :noindex:
AdamOptimizer AdamOptimizer
----------- -------------
.. automodule:: paddle.v2.fluid.optimizer .. automodule:: paddle.v2.fluid.optimizer
:members: AdamOptimizer :members: AdamOptimizer
:noindex: :noindex:
...@@ -47,7 +47,7 @@ AdamaxOptimizer ...@@ -47,7 +47,7 @@ AdamaxOptimizer
DecayedAdagradOptimizer DecayedAdagradOptimizer
----------- -----------------------
.. automodule:: paddle.v2.fluid.optimizer .. automodule:: paddle.v2.fluid.optimizer
:members: DecayedAdagradOptimizer :members: DecayedAdagradOptimizer
:noindex: :noindex:
......
...@@ -3,14 +3,14 @@ Regularizer ...@@ -3,14 +3,14 @@ Regularizer
=========== ===========
WeightDecayRegularizer WeightDecayRegularizer
----------- ----------------------
.. automodule:: paddle.v2.fluid.regularizer .. automodule:: paddle.v2.fluid.regularizer
:members: WeightDecayRegularizer :members: WeightDecayRegularizer
:noindex: :noindex:
L2DecayRegularizer L2DecayRegularizer
----------- ------------------
.. automodule:: paddle.v2.fluid.regularizer .. automodule:: paddle.v2.fluid.regularizer
:members: L2DecayRegularizer :members: L2DecayRegularizer
:noindex: :noindex:
...@@ -18,7 +18,7 @@ L2DecayRegularizer ...@@ -18,7 +18,7 @@ L2DecayRegularizer
L1DecayRegularizer L1DecayRegularizer
----------- -------------------
.. automodule:: paddle.v2.fluid.regularizer .. automodule:: paddle.v2.fluid.regularizer
:members: L1DecayRegularizer :members: L1DecayRegularizer
......
# Design Doc: The Keys of Operator Kernel Type
## Problem
An operator can have different kernel implementations, and each operator will have a map to store the related kernels. Fluid uses `OpKernelType` as a key to identify a unique Kernel. Before an operator runs, an certain kernel must be chosen by a key of `OpKernelType`. Currently, `OpKernelType` is defined as follows:
```cpp
struct OpKernelType {
platform::Place place_;
proto::DataType data_type_;
};
```
For more details, please refer to [codes](https://github.com/PaddlePaddle/Paddle/blob/2d5ec16bc8a09fb8e0f62c89b116b0cd1d333907/paddle/framework/operator.h#L348-L374) in github.
It contains two keys, `Place` and `DataType`. And these two keys will be hashed to a unique key to represent a certain type of kernel. However, these two keys are not enough. We need a more complete representation of `OpKernelType`.
We often implement a kernel of an operator with some computing library in certain device(place). Please remind that computing library and device are not one-to-one corresponding. A device can have a lot of computing libraries and a computing library can also support several devices.
For example, Eigen library can support Nvidia GPU/AMD GPU/CPU. And MKLDNN library can support Intel CPU/Intel FPGA. Both `Place` and `Library` should be a key of `OpKernelType`.
It's obvious that different DataTypes, like fp64/fp32/int8 will have different kernels. But the data layout of a Tensor will also lead to different implementation. Please refer to the batch norm operator [kernels](https://github.com/PaddlePaddle/Paddle/blob/a948fac4d0ad7e0412d373b8aabeb711c2899563/paddle/operators/batch_norm_op.cc#L180-L209). Data Layout should also be taken into consideration.
## Solution
There are four keys to determine a kernel type of an operator: `Place`/`Library`/`DataType`/`Layout`.
```cpp
struct OpKernelType {
platform::Place place_;
platform::Library library_;
proto::DataType data_type_;
framework::Layout layout_;
};
```
Following is the details:
### Place
`Place` is defined as follows:
```cpp
typedef boost::variant<CUDAPlace, ROCmPlace, FPGAPlace, CPUPlace> Place;
```
`Place` is to represent the device memory where data is locating.
### Library
One operator kernel is usually implemented based on one library. `Library` is defined as a enum variable:
```cpp
enum Library { Plain, MKLDNN, CUDNN };
```
We use `Plain` enumerator to represent default library. Since most operators in Fluid are implemented based on `Eigen` library, we take `Eigen` library as the `Plain` enumerator.
A library usually has a corresponding `DeviceContext` which contains some handles needed by computation. Fluid now have two default DeviceContexts in CPU and CUDA, `CPUDeviceContext` and `CUDADeviceContext`. `CPUDeviceContext` contains a Eigen library handle and `CDUADeviceContext` contains a Eigen library handle and cuBLAS handle.
If we want to support new Library, a new enumerator need to be added to `Library` and a new corresponding `LibraryDeviceContext` will be created.
### DataType
`DataType` is defined in [framework.proto](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/framework.proto). Currently, int32/int64/fp32/fp64 are supported.
### Layout
Actually, a Tensor is a view of a block of memory. Besides a pointer to the memory, we also have to get some other descriptions of this block of memory, such as shape(ddim), stride, and layout.
Different layout leads to different implementation of operator kernel. There are mainly 4 principles we have to follow to support layout in our fluid framework.
- We take layout as a data member of Tensor. Layout is actually a enum variable. If fluid is built with MKLDNN, then, the memory format in MKLDNN will be added into this enum variable too.
- Users have to set layout for input data. And some operators like fill_constant/random, also have to set layout of generating data. Of course, we can have some default layout, like NCHW.
- The inference of Layout is at run-time, not compile-time.
- Every operator have to implement different kernels for different layouts. Let's take MKLDNN as an example, if we want to implement a MKLDNN convolution operator, we have to realize all the kernels for different layout, list at [here](http://01org.github.io/mkl-dnn/structmkldnn_1_1memory.html). And we will have a special macro to do registering kernels for MKLDNN operators.
`Layout` is also defined as a enum variable:
```cpp
enum Layout {
kNCHW,
kNHWC,
#ifdef PADDLE_WITH_MKLDNN
knChw8c
...
#endif
};
```
...@@ -37,11 +37,11 @@ PaddlePaddle可以使用常用的Python包管理工具 ...@@ -37,11 +37,11 @@ PaddlePaddle可以使用常用的Python包管理工具
:header: "版本说明", "cp27-cp27mu", "cp27-cp27m", "C-API" :header: "版本说明", "cp27-cp27mu", "cp27-cp27m", "C-API"
:widths: 1, 3, 3, 3 :widths: 1, 3, 3, 3
"cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无" "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "暂无"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -40,11 +40,11 @@ If the links below shows up the login form, just click "Log in as guest" to star ...@@ -40,11 +40,11 @@ If the links below shows up the login form, just click "Log in as guest" to star
:header: "version", "cp27-cp27mu", "cp27-cp27m", "C-API" :header: "version", "cp27-cp27mu", "cp27-cp27m", "C-API"
:widths: 1, 3, 3, 3 :widths: 1, 3, 3, 3
"cpu_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cpu_avx_mkl", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxCp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cpu_avx_openblas", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available" "cpu_avx_openblas", "`paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_CpuAvxOpenblas/.lastSuccessful/paddlepaddle-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "Not Available"
"cuda7.5_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda7.5_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn5_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda8.0_cudnn5_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
"cuda8.0_cudnn7_avx_mkl", "`paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle-0.10.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <http://guest@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_" "cuda8.0_cudnn7_avx_mkl", "`paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27mu-linux_x86_64.whl>`_", "`paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddlepaddle_gpu-0.11.0-cp27-cp27m-linux_x86_64.whl>`_", "`paddle.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/paddle.tgz>`_"
.. _pip_dependency: .. _pip_dependency:
......
...@@ -42,7 +42,7 @@ static std::unordered_set<std::string>& CtrlFlowOps() { ...@@ -42,7 +42,7 @@ static std::unordered_set<std::string>& CtrlFlowOps() {
static inline std::unique_ptr<OperatorBase> CreateGradOp( static inline std::unique_ptr<OperatorBase> CreateGradOp(
const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set, const OperatorBase& op, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var) { std::unordered_map<std::string, std::string>* grad_to_var) {
OpDescBind op_desc; OpDesc op_desc;
op_desc.SetInputMap(op.Inputs()); op_desc.SetInputMap(op.Inputs());
op_desc.SetOutputMap(op.Outputs()); op_desc.SetOutputMap(op.Outputs());
op_desc.SetType(op.Type()); op_desc.SetType(op.Type());
...@@ -53,7 +53,7 @@ static inline std::unique_ptr<OperatorBase> CreateGradOp( ...@@ -53,7 +53,7 @@ static inline std::unique_ptr<OperatorBase> CreateGradOp(
grad_ops.reserve(grad_descs.size()); grad_ops.reserve(grad_descs.size());
std::transform(grad_descs.begin(), grad_descs.end(), std::transform(grad_descs.begin(), grad_descs.end(),
std::back_inserter(grad_ops), std::back_inserter(grad_ops),
[](const std::unique_ptr<OpDescBind>& grad_desc) { [](const std::unique_ptr<OpDesc>& grad_desc) {
return OpRegistry::CreateOp(*grad_desc); return OpRegistry::CreateOp(*grad_desc);
}); });
PADDLE_ENFORCE(!grad_ops.empty()); PADDLE_ENFORCE(!grad_ops.empty());
...@@ -217,7 +217,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive( ...@@ -217,7 +217,7 @@ static std::unique_ptr<OperatorBase> BackwardRecursive(
// If part of input gradient of that operator is not calculated, fill // If part of input gradient of that operator is not calculated, fill
// zero variables to that input gradient. // zero variables to that input gradient.
net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}}, net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
{{"Y", {grad_input}}}, {{"Out", {grad_input}}},
AttributeMap{})); AttributeMap{}));
} }
return false; return false;
...@@ -296,7 +296,7 @@ static std::string FwdName(const std::string& grad_name) { ...@@ -296,7 +296,7 @@ static std::string FwdName(const std::string& grad_name) {
static void CreateGradVarInBlock( static void CreateGradVarInBlock(
size_t grad_op_start_index, size_t grad_op_start_index,
const std::unordered_map<std::string, std::string>& param_name_map, const std::unordered_map<std::string, std::string>& param_name_map,
BlockDescBind* block_desc, BlockDesc* block_desc,
std::unordered_map<std::string, GradVarInfo>* grad_var_record) { std::unordered_map<std::string, GradVarInfo>* grad_var_record) {
auto ops = block_desc->AllOps(); auto ops = block_desc->AllOps();
for (size_t op_index = grad_op_start_index; op_index < ops.size(); for (size_t op_index = grad_op_start_index; op_index < ops.size();
...@@ -350,12 +350,11 @@ static void CreateGradVarInBlock( ...@@ -350,12 +350,11 @@ static void CreateGradVarInBlock(
} }
} }
std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( std::vector<std::unique_ptr<OpDesc>> MakeOpGrad(
const OpDescBind* op_desc, std::unordered_set<std::string>* no_grad_vars, const OpDesc* op_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var, std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block = const std::vector<BlockDesc*>& grad_block = std::vector<BlockDesc*>()) {
std::vector<BlockDescBind*>()) { std::vector<std::unique_ptr<OpDesc>> grad_op_descs;
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs;
// All input gradients of forwarding operator do not need to calculate. // All input gradients of forwarding operator do not need to calculate.
const std::vector<std::string>& inputs = op_desc->InputArgumentNames(); const std::vector<std::string>& inputs = op_desc->InputArgumentNames();
if (AllGradInSet(inputs, *no_grad_vars)) { if (AllGradInSet(inputs, *no_grad_vars)) {
...@@ -386,7 +385,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -386,7 +385,7 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
.Get(op_desc->Type()) .Get(op_desc->Type())
.GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block); .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block);
std::list<std::unique_ptr<OpDescBind>> pending_fill_zeros_ops; std::list<std::unique_ptr<OpDesc>> pending_fill_zeros_ops;
for (auto& desc : grad_op_descs) { for (auto& desc : grad_op_descs) {
for (const std::string& in_name : desc->InputArgumentNames()) { for (const std::string& in_name : desc->InputArgumentNames()) {
if (no_grad_vars->count(in_name)) { if (no_grad_vars->count(in_name)) {
...@@ -394,9 +393,9 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -394,9 +393,9 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1); 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
std::string new_name = prefix + kZeroVarSuffix; std::string new_name = prefix + kZeroVarSuffix;
desc->Rename(in_name, new_name); desc->Rename(in_name, new_name);
std::unique_ptr<OpDescBind> fill_zeros_op( std::unique_ptr<OpDesc> fill_zeros_op(
new OpDescBind("fill_zeros_like", {{"X", {prefix}}}, new OpDesc("fill_zeros_like", {{"X", {prefix}}},
{{"Y", {new_name}}}, AttributeMap{})); {{"Out", {new_name}}}, AttributeMap{}));
pending_fill_zeros_ops.push_back(std::move(fill_zeros_op)); pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
} }
} }
...@@ -408,34 +407,33 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad( ...@@ -408,34 +407,33 @@ std::vector<std::unique_ptr<OpDescBind>> MakeOpGrad(
return grad_op_descs; return grad_op_descs;
} }
static BlockDescBind* CreateStepBlock( static BlockDesc* CreateStepBlock(
ProgramDescBind& program_desc, ProgramDesc& program_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var, std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx); int step_block_idx);
std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( std::vector<std::unique_ptr<OpDesc>> MakeBlockBackward(
ProgramDescBind& program_desc, int block_idx, ProgramDesc& program_desc, int block_idx,
std::unordered_set<std::string>* no_grad_vars, std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var) { std::unordered_map<std::string, std::string>* grad_to_var) {
VLOG(5) << "MakeBlockBackward"; VLOG(5) << "MakeBlockBackward";
BlockDescBind* cur_block = program_desc.MutableBlock(block_idx); BlockDesc* cur_block = program_desc.MutableBlock(block_idx);
std::vector<OpDescBind*> op_descs = cur_block->AllOps(); std::vector<OpDesc*> op_descs = cur_block->AllOps();
std::unordered_map<std::string, std::vector<size_t>> dup_out_ops; std::unordered_map<std::string, std::vector<size_t>> dup_out_ops;
size_t grad_desc_idx = 0; size_t grad_desc_idx = 0;
std::vector<std::unique_ptr<OpDescBind>> backward_descs; std::vector<std::unique_ptr<OpDesc>> backward_descs;
for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
VLOG(5) << "Making backward " << (*it)->Type() << " op"; VLOG(5) << "Making backward " << (*it)->Type() << " op";
std::vector<std::unique_ptr<OpDescBind>> op_grads; std::vector<std::unique_ptr<OpDesc>> op_grads;
if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") { if ((*it)->Type() == "recurrent" || (*it)->Type() == "while") {
int step_block_idx = (*it)->GetBlockAttr("sub_block"); int step_block_idx = (*it)->GetBlockAttr("sub_block");
BlockDescBind* backward_block = CreateStepBlock( BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
program_desc, no_grad_vars, grad_to_var, step_block_idx); grad_to_var, step_block_idx);
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
} else if ((*it)->Type() == "conditional_block") { } else if ((*it)->Type() == "conditional_block") {
BlockDescBind* backward_block = BlockDesc* backward_block =
CreateStepBlock(program_desc, no_grad_vars, grad_to_var, CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
(*it)->GetBlockAttr("sub_block")); (*it)->GetBlockAttr("sub_block"));
op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block}); op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
...@@ -463,14 +461,14 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -463,14 +461,14 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
} }
++grad_desc_idx; ++grad_desc_idx;
} }
std::transform( std::transform(op_grads.begin(), op_grads.end(),
op_grads.begin(), op_grads.end(), std::back_inserter(backward_descs), std::back_inserter(backward_descs),
[](std::unique_ptr<OpDescBind>& ptr) { return std::move(ptr); }); [](std::unique_ptr<OpDesc>& ptr) { return std::move(ptr); });
} }
VLOG(5) << "Appending Sums"; VLOG(5) << "Appending Sums";
// Check whether some variables are written more than once // Check whether some variables are written more than once
std::list<std::pair<size_t, std::unique_ptr<OpDescBind>>> pending_sum_ops; std::list<std::pair<size_t, std::unique_ptr<OpDesc>>> pending_sum_ops;
for (const auto& dup : dup_out_ops) { for (const auto& dup : dup_out_ops) {
const std::string& out_name = dup.first; const std::string& out_name = dup.first;
const std::vector<size_t> dup_op = dup.second; const std::vector<size_t> dup_op = dup.second;
...@@ -486,16 +484,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -486,16 +484,15 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
sum_op_inputs.emplace_back(new_name); sum_op_inputs.emplace_back(new_name);
next_g_name = sum_op_inputs.back(); next_g_name = sum_op_inputs.back();
} }
std::unique_ptr<OpDescBind> sum_op( std::unique_ptr<OpDesc> sum_op(new OpDesc("sum", {{"X", sum_op_inputs}},
new OpDescBind("sum", {{"X", sum_op_inputs}}, {{"Out", {out_name}}}, {{"Out", {out_name}}},
AttributeMap{})); AttributeMap{}));
pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)}); pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
} }
} }
pending_sum_ops.sort( pending_sum_ops.sort([](const std::pair<size_t, std::unique_ptr<OpDesc>>& a,
[](const std::pair<size_t, std::unique_ptr<OpDescBind>>& a, const std::pair<size_t, std::unique_ptr<OpDesc>>& b) {
const std::pair<size_t, std::unique_ptr<OpDescBind>>& b) {
return a.first > b.first; return a.first > b.first;
}); });
for (auto& p : pending_sum_ops) { for (auto& p : pending_sum_ops) {
...@@ -508,14 +505,13 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward( ...@@ -508,14 +505,13 @@ std::vector<std::unique_ptr<OpDescBind>> MakeBlockBackward(
return backward_descs; return backward_descs;
} }
static BlockDescBind* CreateStepBlock( static BlockDesc* CreateStepBlock(
ProgramDescBind& program_desc, ProgramDesc& program_desc, std::unordered_set<std::string>* no_grad_vars,
std::unordered_set<std::string>* no_grad_vars,
std::unordered_map<std::string, std::string>* grad_to_var, std::unordered_map<std::string, std::string>* grad_to_var,
int step_block_idx) { int step_block_idx) {
auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx, auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
no_grad_vars, grad_to_var); no_grad_vars, grad_to_var);
BlockDescBind* backward_block = BlockDesc* backward_block =
program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx)); program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
for (auto& ptr : backward_block_op_descs) { for (auto& ptr : backward_block_op_descs) {
backward_block->AppendAllocatedOp(move(ptr)); backward_block->AppendAllocatedOp(move(ptr));
...@@ -524,7 +520,7 @@ static BlockDescBind* CreateStepBlock( ...@@ -524,7 +520,7 @@ static BlockDescBind* CreateStepBlock(
} }
ParamGradInfoMap AppendBackward( ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target, ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars) { const std::unordered_set<std::string>& no_grad_vars) {
std::unordered_set<std::string> no_grad_var_names; std::unordered_set<std::string> no_grad_var_names;
no_grad_var_names.reserve(no_grad_vars.size() + 1); no_grad_var_names.reserve(no_grad_vars.size() + 1);
...@@ -541,8 +537,8 @@ ParamGradInfoMap AppendBackward( ...@@ -541,8 +537,8 @@ ParamGradInfoMap AppendBackward(
PADDLE_ENFORCE(is_scalar, "target should be scalar"); PADDLE_ENFORCE(is_scalar, "target should be scalar");
VLOG(3) << "backward from loss=" << target.Name() VLOG(3) << "backward from loss=" << target.Name()
<< " data_type=" << target.GetDataType(); << " data_type=" << target.GetDataType();
std::unique_ptr<OpDescBind> fill_one_op( std::unique_ptr<OpDesc> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", std::vector<int>{1}}, {{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)}, {"value", static_cast<float>(1.0)},
{"dtype", target.GetDataType()}})); {"dtype", target.GetDataType()}}));
......
...@@ -49,7 +49,7 @@ using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/, ...@@ -49,7 +49,7 @@ using ParamGradInfoMap = std::unordered_map<std::string /*fwd_var_name*/,
GradVarInfo /*grad_var_info*/>; GradVarInfo /*grad_var_info*/>;
ParamGradInfoMap AppendBackward( ParamGradInfoMap AppendBackward(
ProgramDescBind& program_desc, const VarDescBind& target, ProgramDesc& program_desc, const VarDesc& target,
const std::unordered_set<std::string>& no_grad_vars); const std::unordered_set<std::string>& no_grad_vars);
} // namespace framework } // namespace framework
......
...@@ -58,13 +58,13 @@ class RowWiseAddGradMaker : public SingleGradOpDescMaker { ...@@ -58,13 +58,13 @@ class RowWiseAddGradMaker : public SingleGradOpDescMaker {
using SingleGradOpDescMaker::SingleGradOpDescMaker; using SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<OpDescBind> Apply() const override { std::unique_ptr<OpDesc> Apply() const override {
auto grad_op = new OpDescBind(); auto grad_op = new OpDesc();
grad_op->SetInput(GradVarName("Out"), OutputGrad("Out")); grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(GradVarName("X"), InputGrad("X")); grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
grad_op->SetOutput(GradVarName("b"), InputGrad("b")); grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
grad_op->SetType("rowwise_add_grad"); grad_op->SetType("rowwise_add_grad");
return std::unique_ptr<OpDescBind>(grad_op); return std::unique_ptr<OpDesc>(grad_op);
} }
}; };
...@@ -159,7 +159,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker { ...@@ -159,7 +159,7 @@ class FillZeroOpMaker : public OpProtoAndCheckerMaker {
FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker) FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "x"); AddInput("X", "x");
AddOutput("Y", "out"); AddOutput("Out", "out");
AddComment(""); AddComment("");
} }
}; };
...@@ -190,11 +190,11 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase { ...@@ -190,11 +190,11 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase {
public: public:
using GradOpDescMakerBase::GradOpDescMakerBase; using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override { std::vector<std::unique_ptr<OpDesc>> operator()() const override {
std::vector<std::unique_ptr<OpDescBind>> retv; std::vector<std::unique_ptr<OpDesc>> retv;
auto x_g = InputGrad("X"); auto x_g = InputGrad("X");
if (!x_g.empty()) { if (!x_g.empty()) {
auto *op_desc = new OpDescBind(); auto *op_desc = new OpDesc();
op_desc->SetType("scale"); op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", x_g); op_desc->SetOutput("Out", x_g);
...@@ -204,7 +204,7 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase { ...@@ -204,7 +204,7 @@ class MinusGradOpDescMaker : public GradOpDescMakerBase {
auto y_g = InputGrad("Y"); auto y_g = InputGrad("Y");
if (!y_g.empty()) { if (!y_g.empty()) {
auto *op_desc = new OpDescBind(); auto *op_desc = new OpDesc();
op_desc->SetType("scale"); op_desc->SetType("scale");
op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetInput("X", OutputGrad("Out"));
op_desc->SetOutput("Out", y_g); op_desc->SetOutput("Out", y_g);
...@@ -430,8 +430,8 @@ TEST(Backward, op_part_of_output_are_not_need) { ...@@ -430,8 +430,8 @@ TEST(Backward, op_part_of_output_are_not_need) {
ASSERT_EQ("fill_zeros_like", fill_zero.Type()); ASSERT_EQ("fill_zeros_like", fill_zero.Type());
ASSERT_EQ(1UL, fill_zero.Inputs("X").size()); ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
ASSERT_EQ("Z", fill_zero.Input("X")); ASSERT_EQ("Z", fill_zero.Input("X"));
ASSERT_EQ(1UL, fill_zero.Outputs("Y").size()); ASSERT_EQ(1UL, fill_zero.Outputs("Out").size());
ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Y")); ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out"));
auto &d_many_out = *net->ops_[1]; auto &d_many_out = *net->ops_[1];
ASSERT_EQ("many_output_op_grad", d_many_out.Type()); ASSERT_EQ("many_output_op_grad", d_many_out.Type());
...@@ -505,25 +505,25 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ...@@ -505,25 +505,25 @@ TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
} }
TEST(Backward, simple_single_op) { TEST(Backward, simple_single_op) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op = block->AppendOp(); f::OpDesc *op = block->AppendOp();
op->SetType("rowwise_add"); op->SetType("rowwise_add");
op->SetInput("X", {"x"}); op->SetInput("X", {"x"});
op->SetInput("b", {"b"}); op->SetInput("b", {"b"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out"); auto target = f::VarDesc("out");
target.SetShape({1}); target.SetShape({1});
auto var_to_grad = auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{}); AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 3UL); ASSERT_EQ(block->AllOps().size(), 3UL);
f::OpDescBind *fill_op = block->AllOps()[1]; f::OpDesc *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2]; f::OpDesc *grad_op = block->AllOps()[2];
EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->InputNames().size(), 1UL);
ASSERT_EQ(grad_op->OutputNames().size(), 2UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
...@@ -543,16 +543,16 @@ TEST(Backward, simple_single_op) { ...@@ -543,16 +543,16 @@ TEST(Backward, simple_single_op) {
} }
TEST(Backward, default_attribute) { TEST(Backward, default_attribute) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op = block->AppendOp(); f::OpDesc *op = block->AppendOp();
op->SetType("mul"); op->SetType("mul");
op->SetInput("X", {"x"}); op->SetInput("X", {"x"});
op->SetInput("Y", {"y"}); op->SetInput("Y", {"y"});
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
op->CheckAttrs(); op->CheckAttrs();
auto target = f::VarDescBind("out"); auto target = f::VarDesc("out");
target.SetShape({1}); target.SetShape({1});
AppendBackward(program, target, std::unordered_set<std::string>{}); AppendBackward(program, target, std::unordered_set<std::string>{});
...@@ -560,47 +560,47 @@ TEST(Backward, default_attribute) { ...@@ -560,47 +560,47 @@ TEST(Backward, default_attribute) {
EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(op->GetAttr("y_num_col_dims")), 1);
f::OpDescBind *fill_op = block->AllOps()[1]; f::OpDesc *fill_op = block->AllOps()[1];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op = block->AllOps()[2]; f::OpDesc *grad_op = block->AllOps()[2];
ASSERT_EQ(grad_op->Type(), "mul_grad"); ASSERT_EQ(grad_op->Type(), "mul_grad");
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("x_num_col_dims")), 1);
EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1); EXPECT_EQ(boost::get<int>(grad_op->GetAttr("y_num_col_dims")), 1);
} }
TEST(Backward, simple_mult_op) { TEST(Backward, simple_mult_op) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp(); f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add"); op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"}); op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"}); op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"}); op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp(); f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul"); op2->SetType("mul");
op2->SetInput("X", {"out1"}); op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"}); op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"}); op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp(); f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add"); op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"}); op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3"); auto target = f::VarDesc("out3");
target.SetShape({1}); target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{}); AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 6UL + 1); ASSERT_EQ(block->AllOps().size(), 6UL + 1);
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6]; f::OpDesc *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -611,7 +611,7 @@ TEST(Backward, simple_mult_op) { ...@@ -611,7 +611,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op2 = block->AllOps()[5]; f::OpDesc *grad_op2 = block->AllOps()[5];
EXPECT_EQ(grad_op2->Type(), "mul_grad"); EXPECT_EQ(grad_op2->Type(), "mul_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -625,7 +625,7 @@ TEST(Backward, simple_mult_op) { ...@@ -625,7 +625,7 @@ TEST(Backward, simple_mult_op) {
EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *grad_op3 = block->AllOps()[4]; f::OpDesc *grad_op3 = block->AllOps()[4];
EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -655,42 +655,42 @@ TEST(Backward, simple_mult_op) { ...@@ -655,42 +655,42 @@ TEST(Backward, simple_mult_op) {
} }
TEST(Backward, intermedia_var_no_grad) { TEST(Backward, intermedia_var_no_grad) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp(); f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add"); op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"}); op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"}); op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"}); op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp(); f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul"); op2->SetType("mul");
op2->SetInput("X", {"x2"}); op2->SetInput("X", {"x2"});
op2->SetInput("Y", {"y2"}); op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"}); op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp(); f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add"); op3->SetType("rowwise_add");
op3->SetInput("X", {"out2"}); op3->SetInput("X", {"out2"});
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
f::OpDescBind *op4 = block->AppendOp(); f::OpDesc *op4 = block->AppendOp();
op4->SetType("mul"); op4->SetType("mul");
op4->SetInput("X", {"out1"}); op4->SetInput("X", {"out1"});
op4->SetInput("Y", {"out3"}); op4->SetInput("Y", {"out3"});
op4->SetOutput("Out", {"out4"}); op4->SetOutput("Out", {"out4"});
auto target = f::VarDescBind("out4"); auto target = f::VarDesc("out4");
target.SetShape({1}); target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"}); auto var_to_grad = AppendBackward(program, target, {"out3"});
ASSERT_EQ(block->AllOps().size(), 7UL); ASSERT_EQ(block->AllOps().size(), 7UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op1 = block->AllOps()[6]; f::OpDesc *grad_op1 = block->AllOps()[6];
EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -701,7 +701,7 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -701,7 +701,7 @@ TEST(Backward, intermedia_var_no_grad) {
EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b1")})); std::vector<std::string>({f::GradVarName("b1")}));
f::OpDescBind *grad_op4 = block->AllOps()[5]; f::OpDesc *grad_op4 = block->AllOps()[5];
EXPECT_EQ(grad_op4->Type(), "mul_grad"); EXPECT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -726,32 +726,32 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -726,32 +726,32 @@ TEST(Backward, intermedia_var_no_grad) {
} }
TEST(Backward, var_no_grad) { TEST(Backward, var_no_grad) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp(); f::OpDesc *op1 = block->AppendOp();
op1->SetType("mult_in_out"); op1->SetType("mult_in_out");
op1->SetInput("X", {"x1"}); op1->SetInput("X", {"x1"});
op1->SetInput("H", {"h1"}); op1->SetInput("H", {"h1"});
op1->SetOutput("Y", {"y1"}); op1->SetOutput("Y", {"y1"});
op1->SetOutput("Z", {"z1"}); op1->SetOutput("Z", {"z1"});
f::OpDescBind *op2 = block->AppendOp(); f::OpDesc *op2 = block->AppendOp();
op2->SetType("mult_in_out"); op2->SetType("mult_in_out");
op2->SetInput("X", {"y1"}); op2->SetInput("X", {"y1"});
op2->SetInput("H", {"z1"}); op2->SetInput("H", {"z1"});
op2->SetOutput("Y", {"y2"}); op2->SetOutput("Y", {"y2"});
op2->SetOutput("Z", {"z2"}); op2->SetOutput("Z", {"z2"});
auto target = f::VarDescBind("z2"); auto target = f::VarDesc("z2");
target.SetShape({1}); target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"}); auto var_to_grad = AppendBackward(program, target, {"z1"});
ASSERT_EQ(block->AllOps().size(), 6UL); ASSERT_EQ(block->AllOps().size(), 6UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op2 = block->AllOps()[3]; f::OpDesc *grad_op2 = block->AllOps()[3];
ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
...@@ -767,15 +767,15 @@ TEST(Backward, var_no_grad) { ...@@ -767,15 +767,15 @@ TEST(Backward, var_no_grad) {
std::vector<std::string>({f::GradVarName("y1")})); std::vector<std::string>({f::GradVarName("y1")}));
EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>()); EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector<std::string>());
f::OpDescBind *fill_zero_op = block->AllOps()[4]; f::OpDesc *fill_zero_op = block->AllOps()[4];
ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
EXPECT_EQ(fill_zero_op->Input("X"), std::vector<std::string>({"z1"})); EXPECT_EQ(fill_zero_op->Input("X"), std::vector<std::string>({"z1"}));
EXPECT_EQ(fill_zero_op->Output("Y"), EXPECT_EQ(fill_zero_op->Output("Out"),
std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix})); std::vector<std::string>({std::string("z1") + f::kZeroVarSuffix}));
f::OpDescBind *grad_op1 = block->AllOps()[5]; f::OpDesc *grad_op1 = block->AllOps()[5];
ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -803,37 +803,37 @@ TEST(Backward, var_no_grad) { ...@@ -803,37 +803,37 @@ TEST(Backward, var_no_grad) {
} }
TEST(Backward, shared_var) { TEST(Backward, shared_var) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
f::OpDescBind *op1 = block->AppendOp(); f::OpDesc *op1 = block->AppendOp();
op1->SetType("rowwise_add"); op1->SetType("rowwise_add");
op1->SetInput("X", {"x1"}); op1->SetInput("X", {"x1"});
op1->SetInput("b", {"b1"}); op1->SetInput("b", {"b1"});
op1->SetOutput("Out", {"out1"}); op1->SetOutput("Out", {"out1"});
f::OpDescBind *op2 = block->AppendOp(); f::OpDesc *op2 = block->AppendOp();
op2->SetType("mul"); op2->SetType("mul");
op2->SetInput("X", {"out1"}); op2->SetInput("X", {"out1"});
op2->SetInput("Y", {"y2"}); op2->SetInput("Y", {"y2"});
op2->SetOutput("Out", {"out2"}); op2->SetOutput("Out", {"out2"});
f::OpDescBind *op3 = block->AppendOp(); f::OpDesc *op3 = block->AppendOp();
op3->SetType("rowwise_add"); op3->SetType("rowwise_add");
op3->SetInput("X", {"out1"}); op3->SetInput("X", {"out1"});
op3->SetInput("b", {"b3"}); op3->SetInput("b", {"b3"});
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3"); auto target = f::VarDesc("out3");
target.SetShape({1}); target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = auto var_to_grad =
AppendBackward(program, target, std::unordered_set<std::string>{}); AppendBackward(program, target, std::unordered_set<std::string>{});
ASSERT_EQ(block->AllOps().size(), 8UL); ASSERT_EQ(block->AllOps().size(), 8UL);
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
f::OpDescBind *grad_op3 = block->AllOps()[4]; f::OpDesc *grad_op3 = block->AllOps()[4];
ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
...@@ -844,7 +844,7 @@ TEST(Backward, shared_var) { ...@@ -844,7 +844,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
std::vector<std::string>({f::GradVarName("b3")})); std::vector<std::string>({f::GradVarName("b3")}));
f::OpDescBind *grad_op4 = block->AllOps()[5]; f::OpDesc *grad_op4 = block->AllOps()[5];
ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->Type(), "mul_grad");
ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
...@@ -858,7 +858,7 @@ TEST(Backward, shared_var) { ...@@ -858,7 +858,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
std::vector<std::string>({f::GradVarName("y2")})); std::vector<std::string>({f::GradVarName("y2")}));
f::OpDescBind *sum_op = block->AllOps()[6]; f::OpDesc *sum_op = block->AllOps()[6];
ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->Type(), "sum");
ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->InputNames().size(), 1UL);
ASSERT_EQ(sum_op->OutputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
...@@ -868,7 +868,7 @@ TEST(Backward, shared_var) { ...@@ -868,7 +868,7 @@ TEST(Backward, shared_var) {
EXPECT_EQ(sum_op->Output("Out"), EXPECT_EQ(sum_op->Output("Out"),
std::vector<std::string>({f::GradVarName("out1")})); std::vector<std::string>({f::GradVarName("out1")}));
f::OpDescBind *grad_op1 = block->AllOps()[7]; f::OpDesc *grad_op1 = block->AllOps()[7];
ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
...@@ -895,19 +895,19 @@ TEST(Backward, shared_var) { ...@@ -895,19 +895,19 @@ TEST(Backward, shared_var) {
} }
TEST(Backward, half_backward) { TEST(Backward, half_backward) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
auto *op1 = block->AppendOp(); auto *op1 = block->AppendOp();
op1->SetType("minus"); op1->SetType("minus");
op1->SetInput("X", {"a"}); op1->SetInput("X", {"a"});
op1->SetInput("Y", {"b"}); op1->SetInput("Y", {"b"});
op1->SetOutput("Out", {"out"}); op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out"); auto target = f::VarDesc("out");
target.SetShape({1}); target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"}); auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDesc *fill_op = block->AllOps()[forward_len];
EXPECT_EQ(fill_op->Type(), "fill_constant"); EXPECT_EQ(fill_op->Type(), "fill_constant");
auto ops = block->AllOps(); auto ops = block->AllOps();
ASSERT_EQ(3UL, ops.size()); ASSERT_EQ(3UL, ops.size());
......
...@@ -19,18 +19,18 @@ limitations under the License. */ ...@@ -19,18 +19,18 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
VarDescBind *BlockDescBind::Var(const std::string &name) { VarDesc *BlockDesc::Var(const std::string &name) {
auto it = vars_.find(name); auto it = vars_.find(name);
if (it != vars_.end()) { if (it != vars_.end()) {
return it->second.get(); return it->second.get();
} }
need_update_ = true; need_update_ = true;
auto *var = new VarDescBind(name); auto *var = new VarDesc(name);
vars_[name].reset(var); vars_[name].reset(var);
return var; return var;
} }
VarDescBind *BlockDescBind::FindVar(const std::string &name) const { VarDesc *BlockDesc::FindVar(const std::string &name) const {
auto it = vars_.find(name); auto it = vars_.find(name);
if (it == vars_.end()) { if (it == vars_.end()) {
return nullptr; return nullptr;
...@@ -38,11 +38,11 @@ VarDescBind *BlockDescBind::FindVar(const std::string &name) const { ...@@ -38,11 +38,11 @@ VarDescBind *BlockDescBind::FindVar(const std::string &name) const {
return it->second.get(); return it->second.get();
} }
bool BlockDescBind::HasVar(const std::string &name) const { bool BlockDesc::HasVar(const std::string &name) const {
return vars_.find(name) != vars_.end(); return vars_.find(name) != vars_.end();
} }
VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const { VarDesc *BlockDesc::FindVarRecursive(const std::string &name) const {
if (name == kEmptyVarName) return nullptr; if (name == kEmptyVarName) return nullptr;
auto it = vars_.find(name); auto it = vars_.find(name);
...@@ -53,53 +53,52 @@ VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const { ...@@ -53,53 +53,52 @@ VarDescBind *BlockDescBind::FindVarRecursive(const std::string &name) const {
return it->second.get(); return it->second.get();
} }
VarDescBind *BlockDescBind::FindRecursiveOrCreateVar( VarDesc *BlockDesc::FindRecursiveOrCreateVar(const std::string &name_bytes) {
const std::string &name_bytes) { VarDesc *res = FindVarRecursive(name_bytes);
VarDescBind *res = FindVarRecursive(name_bytes);
if (res == nullptr) { if (res == nullptr) {
res = Var(name_bytes); res = Var(name_bytes);
} }
return res; return res;
} }
bool BlockDescBind::HasVarRecursive(const std::string &name) const { bool BlockDesc::HasVarRecursive(const std::string &name) const {
return FindVarRecursive(name) != nullptr; return FindVarRecursive(name) != nullptr;
} }
std::vector<VarDescBind *> BlockDescBind::AllVars() const { std::vector<VarDesc *> BlockDesc::AllVars() const {
std::vector<VarDescBind *> res; std::vector<VarDesc *> res;
for (const auto &p : vars_) { for (const auto &p : vars_) {
res.push_back(p.second.get()); res.push_back(p.second.get());
} }
return res; return res;
} }
OpDescBind *BlockDescBind::AppendOp() { OpDesc *BlockDesc::AppendOp() {
need_update_ = true; need_update_ = true;
ops_.emplace_back(new OpDescBind()); ops_.emplace_back(new OpDesc());
return ops_.back().get(); return ops_.back().get();
} }
void BlockDescBind::AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc) { void BlockDesc::AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc) {
need_update_ = true; need_update_ = true;
ops_.emplace_back(std::move(op_desc)); ops_.emplace_back(std::move(op_desc));
} }
OpDescBind *BlockDescBind::PrependOp() { OpDesc *BlockDesc::PrependOp() {
need_update_ = true; need_update_ = true;
ops_.emplace_front(new OpDescBind()); ops_.emplace_front(new OpDesc());
return ops_.front().get(); return ops_.front().get();
} }
std::vector<OpDescBind *> BlockDescBind::AllOps() const { std::vector<OpDesc *> BlockDesc::AllOps() const {
std::vector<OpDescBind *> res; std::vector<OpDesc *> res;
for (const auto &op : ops_) { for (const auto &op : ops_) {
res.push_back(op.get()); res.push_back(op.get());
} }
return res; return res;
} }
void BlockDescBind::Flush() { void BlockDesc::Flush() {
for (auto &op_desc : ops_) { for (auto &op_desc : ops_) {
op_desc->Flush(); op_desc->Flush();
} }
...@@ -121,43 +120,43 @@ void BlockDescBind::Flush() { ...@@ -121,43 +120,43 @@ void BlockDescBind::Flush() {
} }
} }
BlockDescBind *BlockDescBind::ParentBlock() const { BlockDesc *BlockDesc::ParentBlock() const {
if (this->desc_->parent_idx() == kNoneBlockIndex) { if (this->desc_->parent_idx() == kNoneBlockIndex) {
return nullptr; return nullptr;
} }
return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx())); return prog_->MutableBlock(static_cast<size_t>(this->desc_->parent_idx()));
} }
proto::BlockDesc *BlockDescBind::Proto() { proto::BlockDesc *BlockDesc::Proto() {
Flush(); Flush();
return desc_; return desc_;
} }
BlockDescBind::BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc) BlockDesc::BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc)
: prog_(prog), desc_(desc), need_update_(false) { : prog_(prog), desc_(desc), need_update_(false) {
for (const proto::VarDesc &var_desc : desc_->vars()) { for (const proto::VarDesc &var_desc : desc_->vars()) {
vars_[var_desc.name()].reset(new VarDescBind(var_desc)); vars_[var_desc.name()].reset(new VarDesc(var_desc));
} }
for (const proto::OpDesc &op_desc : desc_->ops()) { for (const proto::OpDesc &op_desc : desc_->ops()) {
ops_.emplace_back(new OpDescBind(op_desc, prog)); ops_.emplace_back(new OpDesc(op_desc, prog));
} }
} }
BlockDescBind::BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc, BlockDesc::BlockDesc(const BlockDesc &other, proto::BlockDesc *desc,
ProgramDescBind *prog) ProgramDesc *prog)
: prog_(prog), desc_(desc) { : prog_(prog), desc_(desc) {
need_update_ = true; need_update_ = true;
for (auto &op : other.ops_) { for (auto &op : other.ops_) {
ops_.emplace_back(new OpDescBind(*op)); ops_.emplace_back(new OpDesc(*op));
} }
for (auto &it : other.vars_) { for (auto &it : other.vars_) {
auto *var = new VarDescBind(*it.second); auto *var = new VarDesc(*it.second);
vars_[it.first].reset(var); vars_[it.first].reset(var);
} }
} }
void BlockDescBind::ClearPBOps() { void BlockDesc::ClearPBOps() {
auto ops = this->desc_->mutable_ops(); auto ops = this->desc_->mutable_ops();
while (!ops->empty()) { while (!ops->empty()) {
// we do not own the OpDesc, so release the ownership. // we do not own the OpDesc, so release the ownership.
...@@ -165,7 +164,7 @@ void BlockDescBind::ClearPBOps() { ...@@ -165,7 +164,7 @@ void BlockDescBind::ClearPBOps() {
} }
} }
void BlockDescBind::ClearPBVars() { void BlockDesc::ClearPBVars() {
auto vars = this->desc_->mutable_vars(); auto vars = this->desc_->mutable_vars();
while (!vars->empty()) { while (!vars->empty()) {
// we do not own the VarDesc, so release the ownership. // we do not own the VarDesc, so release the ownership.
......
...@@ -28,20 +28,19 @@ limitations under the License. */ ...@@ -28,20 +28,19 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class ProgramDescBind; class ProgramDesc;
// Each Protobuf Message, we provide a XXXBind class. In that class, we optimize // Each Protobuf Message, we provide a XXXBind class. In that class, we optimize
// read/write speed. Only when we want the protobuf message, the local changes // read/write speed. Only when we want the protobuf message, the local changes
// will be synchronized (by `Sync` method). // will be synchronized (by `Sync` method).
class BlockDescBind { class BlockDesc {
public: public:
BlockDescBind(ProgramDescBind *prog, proto::BlockDesc *desc); BlockDesc(ProgramDesc *prog, proto::BlockDesc *desc);
BlockDescBind(const BlockDescBind &other, proto::BlockDesc *desc, BlockDesc(const BlockDesc &other, proto::BlockDesc *desc, ProgramDesc *prog);
ProgramDescBind *prog);
~BlockDescBind() { ~BlockDesc() {
this->ClearPBVars(); this->ClearPBVars();
this->ClearPBOps(); this->ClearPBOps();
} }
...@@ -50,15 +49,15 @@ class BlockDescBind { ...@@ -50,15 +49,15 @@ class BlockDescBind {
int32_t Parent() const { return desc_->parent_idx(); } int32_t Parent() const { return desc_->parent_idx(); }
VarDescBind *Var(const std::string &name_bytes); VarDesc *Var(const std::string &name_bytes);
VarDescBind *FindVar(const std::string &name_bytes) const; VarDesc *FindVar(const std::string &name_bytes) const;
bool HasVar(const std::string &var_name) const; bool HasVar(const std::string &var_name) const;
VarDescBind *FindVarRecursive(const std::string &name_bytes) const; VarDesc *FindVarRecursive(const std::string &name_bytes) const;
VarDescBind *FindRecursiveOrCreateVar(const std::string &name_bytes); VarDesc *FindRecursiveOrCreateVar(const std::string &name_bytes);
bool HasVarRecursive(const std::string &var_name) const; bool HasVarRecursive(const std::string &var_name) const;
...@@ -70,41 +69,41 @@ class BlockDescBind { ...@@ -70,41 +69,41 @@ class BlockDescBind {
return var_names; return var_names;
} }
std::vector<VarDescBind *> AllVars() const; std::vector<VarDesc *> AllVars() const;
BlockDescBind *ParentBlock() const; BlockDesc *ParentBlock() const;
OpDescBind *AppendOp(); OpDesc *AppendOp();
void AppendAllocatedOp(std::unique_ptr<OpDescBind> &&op_desc); void AppendAllocatedOp(std::unique_ptr<OpDesc> &&op_desc);
OpDescBind *PrependOp(); OpDesc *PrependOp();
std::vector<OpDescBind *> AllOps() const; std::vector<OpDesc *> AllOps() const;
size_t OpSize() const { return ops_.size(); } size_t OpSize() const { return ops_.size(); }
OpDescBind *Op(int idx) { return ops_.at(idx).get(); } OpDesc *Op(int idx) { return ops_.at(idx).get(); }
void Flush(); void Flush();
proto::BlockDesc *Proto(); proto::BlockDesc *Proto();
ProgramDescBind *Program() { return this->prog_; } ProgramDesc *Program() { return this->prog_; }
private: private:
void ClearPBOps(); void ClearPBOps();
void ClearPBVars(); void ClearPBVars();
private: private:
ProgramDescBind *prog_; // not_own ProgramDesc *prog_; // not_own
proto::BlockDesc *desc_; // not_own proto::BlockDesc *desc_; // not_own
bool need_update_; bool need_update_;
std::deque<std::unique_ptr<OpDescBind>> ops_; std::deque<std::unique_ptr<OpDesc>> ops_;
std::unordered_map<std::string, std::unique_ptr<VarDescBind>> vars_; std::unordered_map<std::string, std::unique_ptr<VarDesc>> vars_;
DISABLE_COPY_AND_ASSIGN(BlockDescBind); DISABLE_COPY_AND_ASSIGN(BlockDesc);
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -106,10 +106,10 @@ template <typename T> ...@@ -106,10 +106,10 @@ template <typename T>
struct OpInfoFiller<T, kGradOpDescMaker> { struct OpInfoFiller<T, kGradOpDescMaker> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->grad_op_maker_ = []( info->grad_op_maker_ = [](
const OpDescBind& fwd_op, const OpDesc& fwd_op,
const std::unordered_set<std::string>& no_grad_set, const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var, std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block) { const std::vector<BlockDesc*>& grad_block) {
T maker(fwd_op, no_grad_set, grad_to_var, grad_block); T maker(fwd_op, no_grad_set, grad_to_var, grad_block);
return maker(); return maker();
}; };
...@@ -119,7 +119,7 @@ struct OpInfoFiller<T, kGradOpDescMaker> { ...@@ -119,7 +119,7 @@ struct OpInfoFiller<T, kGradOpDescMaker> {
template <typename T> template <typename T>
struct OpInfoFiller<T, kVarTypeInference> { struct OpInfoFiller<T, kVarTypeInference> {
void operator()(const char* op_type, OpInfo* info) const { void operator()(const char* op_type, OpInfo* info) const {
info->infer_var_type_ = [](const OpDescBind& fwd_op, BlockDescBind* block) { info->infer_var_type_ = [](const OpDesc& fwd_op, BlockDesc* block) {
T inference; T inference;
inference(fwd_op, block); inference(fwd_op, block);
}; };
......
...@@ -64,7 +64,7 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { ...@@ -64,7 +64,7 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
} }
} }
void Executor::Run(const ProgramDescBind& pdesc, Scope* scope, int block_id, void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope) { bool create_local_scope) {
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
// - only runs on the first device (i.e. no interdevice communication) // - only runs on the first device (i.e. no interdevice communication)
......
...@@ -40,6 +40,16 @@ class DeviceContextPool { ...@@ -40,6 +40,16 @@ class DeviceContextPool {
return *pool; return *pool;
} }
const platform::DeviceContext* Borrow(const platform::Place& place) {
auto range = device_contexts_.equal_range(place);
if (range.first == range.second) {
PADDLE_THROW(
"'Place' is not supported, Please re-compile with WITH_GPU "
"option");
}
return range.first->second;
}
std::vector<const platform::DeviceContext*> Borrow( std::vector<const platform::DeviceContext*> Borrow(
const std::vector<platform::Place>& places) { const std::vector<platform::Place>& places) {
PADDLE_ENFORCE_GT(places.size(), 0); PADDLE_ENFORCE_GT(places.size(), 0);
...@@ -114,7 +124,7 @@ class Executor { ...@@ -114,7 +124,7 @@ class Executor {
* ProgramDesc * ProgramDesc
* Scope * Scope
*/ */
void Run(const ProgramDescBind&, Scope*, int, bool create_local_scope = true); void Run(const ProgramDesc&, Scope*, int, bool create_local_scope = true);
private: private:
std::vector<const platform::DeviceContext*> device_contexts_; std::vector<const platform::DeviceContext*> device_contexts_;
......
...@@ -22,21 +22,27 @@ ...@@ -22,21 +22,27 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
/*
This functor class is responsible for creating the gradient ops for the given
operator fwd_op. After it is called (through operator()), the pairs of
(gradient variable, corresponding input variable of fwd_op) will be added to
grad_to_var. If an input variable of fwd_op is contained in no_grad_set, its
gradient varialbe will be ignored or kEmptyVarName depending on the template
argument DropEmptyIG in the derived classes.
*/
class GradOpDescMakerBase { class GradOpDescMakerBase {
public: public:
explicit GradOpDescMakerBase( explicit GradOpDescMakerBase(
const OpDescBind& fwd_op, const OpDesc& fwd_op, const std::unordered_set<std::string>& no_grad_set,
const std::unordered_set<std::string>& no_grad_set,
std::unordered_map<std::string, std::string>* grad_to_var, std::unordered_map<std::string, std::string>* grad_to_var,
const std::vector<BlockDescBind*>& grad_block = const std::vector<BlockDesc*>& grad_block = std::vector<BlockDesc*>())
std::vector<BlockDescBind*>())
: fwd_op_(fwd_op), : fwd_op_(fwd_op),
no_grad_set_(no_grad_set), no_grad_set_(no_grad_set),
grad_to_var_(grad_to_var), grad_to_var_(grad_to_var),
grad_block_(grad_block) {} grad_block_(grad_block) {}
virtual ~GradOpDescMakerBase() = default; virtual ~GradOpDescMakerBase() = default;
virtual std::vector<std::unique_ptr<OpDescBind>> operator()() const = 0; virtual std::vector<std::unique_ptr<OpDesc>> operator()() const = 0;
protected: protected:
std::vector<std::string> InputGrad(const std::string& name, std::vector<std::string> InputGrad(const std::string& name,
...@@ -58,6 +64,16 @@ class GradOpDescMakerBase { ...@@ -58,6 +64,16 @@ class GradOpDescMakerBase {
if (!drop_empty_grad) { if (!drop_empty_grad) {
return ret_val; return ret_val;
} }
PADDLE_ENFORCE_LE(var_names.size(), 1UL,
"BUG from operator developer:"
" for input argument with a list of variables, "
" drop_empty_grad is not allowed because it makes"
" the correspondence bewteen a variable and its gradient"
" ambiguous. Use REGISTER_OP_EX to register the op"
" or call InputGrad(?,false) in GradOpDescMaker."
" Op type %s",
fwd_op_.Type());
std::vector<std::string> dropped_ret_val; std::vector<std::string> dropped_ret_val;
dropped_ret_val.reserve(ret_val.size()); dropped_ret_val.reserve(ret_val.size());
std::copy_if(ret_val.begin(), ret_val.end(), std::copy_if(ret_val.begin(), ret_val.end(),
...@@ -105,26 +121,26 @@ class GradOpDescMakerBase { ...@@ -105,26 +121,26 @@ class GradOpDescMakerBase {
std::string ForwardOpType() const { return this->fwd_op_.Type(); } std::string ForwardOpType() const { return this->fwd_op_.Type(); }
private: private:
const OpDescBind& fwd_op_; const OpDesc& fwd_op_;
const std::unordered_set<std::string>& no_grad_set_; const std::unordered_set<std::string>& no_grad_set_;
std::unordered_map<std::string, std::string>* grad_to_var_; std::unordered_map<std::string, std::string>* grad_to_var_;
protected: protected:
std::vector<BlockDescBind*> grad_block_; std::vector<BlockDesc*> grad_block_;
}; };
class SingleGradOpDescMaker : public GradOpDescMakerBase { class SingleGradOpDescMaker : public GradOpDescMakerBase {
public: public:
using GradOpDescMakerBase::GradOpDescMakerBase; using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const { std::vector<std::unique_ptr<OpDesc>> operator()() const {
std::vector<std::unique_ptr<OpDescBind>> retv; std::vector<std::unique_ptr<OpDesc>> retv;
retv.emplace_back(this->Apply()); retv.emplace_back(this->Apply());
return retv; return retv;
} }
protected: protected:
virtual std::unique_ptr<OpDescBind> Apply() const = 0; virtual std::unique_ptr<OpDesc> Apply() const = 0;
}; };
template <bool DropEmptyIG = true> template <bool DropEmptyIG = true>
...@@ -133,8 +149,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -133,8 +149,8 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
using SingleGradOpDescMaker::SingleGradOpDescMaker; using SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
virtual std::unique_ptr<OpDescBind> Apply() const { virtual std::unique_ptr<OpDesc> Apply() const {
auto* grad = new OpDescBind(); auto* grad = new OpDesc();
grad->SetType(this->GradOpType()); grad->SetType(this->GradOpType());
for (auto& input_param : this->InputNames()) { for (auto& input_param : this->InputNames()) {
...@@ -150,7 +166,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -150,7 +166,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
grad->SetAttrMap(this->Attrs()); grad->SetAttrMap(this->Attrs());
return std::unique_ptr<OpDescBind>(grad); return std::unique_ptr<OpDesc>(grad);
} }
virtual std::string GradOpType() const { virtual std::string GradOpType() const {
...@@ -161,7 +177,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker { ...@@ -161,7 +177,7 @@ class DefaultGradOpDescMaker : public SingleGradOpDescMaker {
class EmptyGradOpMaker : public GradOpDescMakerBase { class EmptyGradOpMaker : public GradOpDescMakerBase {
public: public:
using GradOpDescMakerBase::GradOpDescMakerBase; using GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<OpDescBind>> operator()() const override { std::vector<std::unique_ptr<OpDesc>> operator()() const override {
return {}; return {};
} }
}; };
......
...@@ -25,12 +25,11 @@ limitations under the License. */ ...@@ -25,12 +25,11 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpDescBind; class OpDesc;
class BlockDescBind; class BlockDesc;
class CompileTimeInferShapeContext : public InferShapeContext { class CompileTimeInferShapeContext : public InferShapeContext {
public: public:
CompileTimeInferShapeContext(const OpDescBind &op, CompileTimeInferShapeContext(const OpDesc &op, const BlockDesc &block);
const BlockDescBind &block);
bool HasInput(const std::string &name) const override; bool HasInput(const std::string &name) const override;
...@@ -76,13 +75,12 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -76,13 +75,12 @@ class CompileTimeInferShapeContext : public InferShapeContext {
void SetDim(const std::string &name, const DDim &dim) override; void SetDim(const std::string &name, const DDim &dim) override;
const OpDescBind &op_; const OpDesc &op_;
const BlockDescBind &block_; const BlockDesc &block_;
}; };
OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs, const AttributeMap &attrs) {
const AttributeMap &attrs) {
desc_.set_type(type); desc_.set_type(type);
inputs_ = inputs; inputs_ = inputs;
outputs_ = outputs; outputs_ = outputs;
...@@ -90,7 +88,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, ...@@ -90,7 +88,7 @@ OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
need_update_ = true; need_update_ = true;
} }
OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog) OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
: desc_(desc), need_update_(false) { : desc_(desc), need_update_(false) {
// restore inputs_ // restore inputs_
int input_size = desc_.inputs_size(); int input_size = desc_.inputs_size();
...@@ -126,20 +124,19 @@ OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog) ...@@ -126,20 +124,19 @@ OpDescBind::OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog)
} }
} }
proto::OpDesc *OpDescBind::Proto() { proto::OpDesc *OpDesc::Proto() {
Flush(); Flush();
return &desc_; return &desc_;
} }
const std::vector<std::string> &OpDescBind::Input( const std::vector<std::string> &OpDesc::Input(const std::string &name) const {
const std::string &name) const {
auto it = inputs_.find(name); auto it = inputs_.find(name);
PADDLE_ENFORCE(it != inputs_.end(), "Input %s cannot be found in Op %s", name, PADDLE_ENFORCE(it != inputs_.end(), "Input %s cannot be found in Op %s", name,
Type()); Type());
return it->second; return it->second;
} }
std::vector<std::string> OpDescBind::InputArgumentNames() const { std::vector<std::string> OpDesc::InputArgumentNames() const {
std::vector<std::string> retv; std::vector<std::string> retv;
for (auto &ipt : this->inputs_) { for (auto &ipt : this->inputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end()); retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
...@@ -147,21 +144,20 @@ std::vector<std::string> OpDescBind::InputArgumentNames() const { ...@@ -147,21 +144,20 @@ std::vector<std::string> OpDescBind::InputArgumentNames() const {
return retv; return retv;
} }
void OpDescBind::SetInput(const std::string &param_name, void OpDesc::SetInput(const std::string &param_name,
const std::vector<std::string> &args) { const std::vector<std::string> &args) {
need_update_ = true; need_update_ = true;
inputs_[param_name] = args; inputs_[param_name] = args;
} }
const std::vector<std::string> &OpDescBind::Output( const std::vector<std::string> &OpDesc::Output(const std::string &name) const {
const std::string &name) const {
auto it = outputs_.find(name); auto it = outputs_.find(name);
PADDLE_ENFORCE(it != outputs_.end(), "Output %s cannot be found in Op %s", PADDLE_ENFORCE(it != outputs_.end(), "Output %s cannot be found in Op %s",
name, Type()); name, Type());
return it->second; return it->second;
} }
std::vector<std::string> OpDescBind::OutputArgumentNames() const { std::vector<std::string> OpDesc::OutputArgumentNames() const {
std::vector<std::string> retv; std::vector<std::string> retv;
for (auto &ipt : this->outputs_) { for (auto &ipt : this->outputs_) {
retv.insert(retv.end(), ipt.second.begin(), ipt.second.end()); retv.insert(retv.end(), ipt.second.begin(), ipt.second.end());
...@@ -169,19 +165,19 @@ std::vector<std::string> OpDescBind::OutputArgumentNames() const { ...@@ -169,19 +165,19 @@ std::vector<std::string> OpDescBind::OutputArgumentNames() const {
return retv; return retv;
} }
void OpDescBind::SetOutput(const std::string &param_name, void OpDesc::SetOutput(const std::string &param_name,
const std::vector<std::string> &args) { const std::vector<std::string> &args) {
need_update_ = true; need_update_ = true;
this->outputs_[param_name] = args; this->outputs_[param_name] = args;
} }
proto::AttrType OpDescBind::GetAttrType(const std::string &name) const { proto::AttrType OpDesc::GetAttrType(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return static_cast<proto::AttrType>(it->second.which() - 1); return static_cast<proto::AttrType>(it->second.which() - 1);
} }
std::vector<std::string> OpDescBind::AttrNames() const { std::vector<std::string> OpDesc::AttrNames() const {
std::vector<std::string> retv; std::vector<std::string> retv;
retv.reserve(attrs_.size()); retv.reserve(attrs_.size());
for (auto &attr : attrs_) { for (auto &attr : attrs_) {
...@@ -190,41 +186,39 @@ std::vector<std::string> OpDescBind::AttrNames() const { ...@@ -190,41 +186,39 @@ std::vector<std::string> OpDescBind::AttrNames() const {
return retv; return retv;
} }
void OpDescBind::SetAttr(const std::string &name, const Attribute &v) { void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
this->attrs_[name] = v; this->attrs_[name] = v;
need_update_ = true; need_update_ = true;
} }
void OpDescBind::SetBlockAttr(const std::string &name, BlockDescBind &block) { void OpDesc::SetBlockAttr(const std::string &name, BlockDesc &block) {
this->attrs_[name] = &block; this->attrs_[name] = &block;
need_update_ = true; need_update_ = true;
} }
void OpDescBind::SetAttrMap( void OpDesc::SetAttrMap(
const std::unordered_map<std::string, Attribute> &attr_map) { const std::unordered_map<std::string, Attribute> &attr_map) {
attrs_ = attr_map; attrs_ = attr_map;
need_update_ = true; need_update_ = true;
} }
Attribute OpDescBind::GetAttr(const std::string &name) const { Attribute OpDesc::GetAttr(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return it->second; return it->second;
} }
int OpDescBind::GetBlockAttr(const std::string &name) const { int OpDesc::GetBlockAttr(const std::string &name) const {
auto it = attrs_.find(name); auto it = attrs_.find(name);
PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name); PADDLE_ENFORCE(it != attrs_.end(), "Attribute %s is not found", name);
return boost::get<BlockDescBind *>(it->second)->ID(); return boost::get<BlockDesc *>(it->second)->ID();
} }
const std::unordered_map<std::string, Attribute> &OpDescBind::GetAttrMap() const std::unordered_map<std::string, Attribute> &OpDesc::GetAttrMap() const {
const {
return attrs_; return attrs_;
} }
void OpDescBind::Rename(const std::string &old_name, void OpDesc::Rename(const std::string &old_name, const std::string &new_name) {
const std::string &new_name) {
for (auto &input : inputs_) { for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name); std::replace(input.second.begin(), input.second.end(), old_name, new_name);
} }
...@@ -235,7 +229,7 @@ void OpDescBind::Rename(const std::string &old_name, ...@@ -235,7 +229,7 @@ void OpDescBind::Rename(const std::string &old_name,
need_update_ = true; need_update_ = true;
} }
void OpDescBind::RenameOutput(const std::string &old_name, void OpDesc::RenameOutput(const std::string &old_name,
const std::string &new_name) { const std::string &new_name) {
for (auto &output : outputs_) { for (auto &output : outputs_) {
std::replace(output.second.begin(), output.second.end(), old_name, std::replace(output.second.begin(), output.second.end(), old_name,
...@@ -244,7 +238,7 @@ void OpDescBind::RenameOutput(const std::string &old_name, ...@@ -244,7 +238,7 @@ void OpDescBind::RenameOutput(const std::string &old_name,
need_update_ = true; need_update_ = true;
} }
void OpDescBind::RenameInput(const std::string &old_name, void OpDesc::RenameInput(const std::string &old_name,
const std::string &new_name) { const std::string &new_name) {
for (auto &input : inputs_) { for (auto &input : inputs_) {
std::replace(input.second.begin(), input.second.end(), old_name, new_name); std::replace(input.second.begin(), input.second.end(), old_name, new_name);
...@@ -278,7 +272,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> { ...@@ -278,7 +272,7 @@ struct SetAttrDescVisitor : public boost::static_visitor<void> {
void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); }
}; };
void OpDescBind::Flush() { void OpDesc::Flush() {
if (need_update_) { if (need_update_) {
this->desc_.mutable_inputs()->Clear(); this->desc_.mutable_inputs()->Clear();
for (auto &ipt : inputs_) { for (auto &ipt : inputs_) {
...@@ -330,7 +324,7 @@ static void InitInferShapeFuncs() { ...@@ -330,7 +324,7 @@ static void InitInferShapeFuncs() {
}); });
} }
void OpDescBind::CheckAttrs() { void OpDesc::CheckAttrs() {
PADDLE_ENFORCE(!Type().empty(), PADDLE_ENFORCE(!Type().empty(),
"CheckAttr() can not be called before type is setted."); "CheckAttr() can not be called before type is setted.");
auto *checker = OpInfoMap::Instance().Get(Type()).Checker(); auto *checker = OpInfoMap::Instance().Get(Type()).Checker();
...@@ -342,7 +336,7 @@ void OpDescBind::CheckAttrs() { ...@@ -342,7 +336,7 @@ void OpDescBind::CheckAttrs() {
checker->Check(attrs_); checker->Check(attrs_);
} }
void OpDescBind::InferShape(const BlockDescBind &block) const { void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(3) << "CompileTime infer shape on " << Type(); VLOG(3) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs(); InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
...@@ -365,7 +359,7 @@ void OpDescBind::InferShape(const BlockDescBind &block) const { ...@@ -365,7 +359,7 @@ void OpDescBind::InferShape(const BlockDescBind &block) const {
infer_shape(&ctx); infer_shape(&ctx);
} }
void OpDescBind::InferVarType(BlockDescBind *block) const { void OpDesc::InferVarType(BlockDesc *block) const {
auto &info = OpInfoMap::Instance().Get(this->Type()); auto &info = OpInfoMap::Instance().Get(this->Type());
if (info.infer_var_type_) { if (info.infer_var_type_) {
info.infer_var_type_(*this, block); info.infer_var_type_(*this, block);
...@@ -384,7 +378,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const { ...@@ -384,7 +378,7 @@ void OpDescBind::InferVarType(BlockDescBind *block) const {
} }
CompileTimeInferShapeContext::CompileTimeInferShapeContext( CompileTimeInferShapeContext::CompileTimeInferShapeContext(
const OpDescBind &op, const BlockDescBind &block) const OpDesc &op, const BlockDesc &block)
: op_(op), block_(block) {} : op_(op), block_(block) {}
bool CompileTimeInferShapeContext::HasInput(const std::string &name) const { bool CompileTimeInferShapeContext::HasInput(const std::string &name) const {
......
...@@ -23,17 +23,17 @@ limitations under the License. */ ...@@ -23,17 +23,17 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class BlockDescBind; class BlockDesc;
class ProgramDescBind; class ProgramDesc;
class OpDescBind { class OpDesc {
public: public:
OpDescBind() {} OpDesc() {}
OpDescBind(const std::string &type, const VariableNameMap &inputs, OpDesc(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs); const VariableNameMap &outputs, const AttributeMap &attrs);
OpDescBind(const proto::OpDesc &desc, ProgramDescBind *prog); OpDesc(const proto::OpDesc &desc, ProgramDesc *prog);
proto::OpDesc *Proto(); proto::OpDesc *Proto();
...@@ -65,7 +65,7 @@ class OpDescBind { ...@@ -65,7 +65,7 @@ class OpDescBind {
void SetAttr(const std::string &name, const Attribute &v); void SetAttr(const std::string &name, const Attribute &v);
void SetBlockAttr(const std::string &name, BlockDescBind &block); void SetBlockAttr(const std::string &name, BlockDesc &block);
Attribute GetAttr(const std::string &name) const; Attribute GetAttr(const std::string &name) const;
...@@ -107,9 +107,9 @@ class OpDescBind { ...@@ -107,9 +107,9 @@ class OpDescBind {
void CheckAttrs(); void CheckAttrs();
void InferShape(const BlockDescBind &block) const; void InferShape(const BlockDesc &block) const;
void InferVarType(BlockDescBind *block) const; void InferVarType(BlockDesc *block) const;
void MarkAsTarget() { desc_.set_is_target(true); } void MarkAsTarget() { desc_.set_is_target(true); }
...@@ -127,7 +127,9 @@ class OpDescBind { ...@@ -127,7 +127,9 @@ class OpDescBind {
} }
proto::OpDesc desc_; proto::OpDesc desc_;
// input arg name => output variable names
VariableNameMap inputs_; VariableNameMap inputs_;
// output arg name => output variable names
VariableNameMap outputs_; VariableNameMap outputs_;
AttributeMap attrs_; AttributeMap attrs_;
......
...@@ -47,7 +47,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( ...@@ -47,7 +47,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp( std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) { const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be" VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDescBind& op_desc) " "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead."; "instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
...@@ -59,7 +59,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp( ...@@ -59,7 +59,7 @@ std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
return CreateOp(op_desc.type(), inputs, outputs, attrs); return CreateOp(op_desc.type(), inputs, outputs, attrs);
} }
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDescBind& op_desc) { std::unique_ptr<OperatorBase> OpRegistry::CreateOp(const OpDesc& op_desc) {
return CreateOp(op_desc.Type(), op_desc.Inputs(), op_desc.Outputs(), return CreateOp(op_desc.Type(), op_desc.Inputs(), op_desc.Outputs(),
op_desc.GetAttrMap()); op_desc.GetAttrMap());
} }
......
...@@ -79,7 +79,7 @@ class OpRegistry { ...@@ -79,7 +79,7 @@ class OpRegistry {
static std::unique_ptr<OperatorBase> CreateOp(const proto::OpDesc& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const proto::OpDesc& op_desc);
static std::unique_ptr<OperatorBase> CreateOp(const OpDescBind& op_desc); static std::unique_ptr<OperatorBase> CreateOp(const OpDesc& op_desc);
}; };
template <typename PlaceType, bool at_end, size_t I, typename... KernelType> template <typename PlaceType, bool at_end, size_t I, typename... KernelType>
...@@ -126,6 +126,14 @@ class OpKernelRegistrar : public Registrar { ...@@ -126,6 +126,14 @@ class OpKernelRegistrar : public Registrar {
__test_global_namespace_##uniq_name##__>::value, \ __test_global_namespace_##uniq_name##__>::value, \
msg) msg)
/*
The variadic arguments should be class types derived from one of the
following classes:
OpProtoAndCheckerMaker
GradOpDescMakerBase
VarTypeInference
InferShapeBase
*/
#define REGISTER_OPERATOR(op_type, op_class, ...) \ #define REGISTER_OPERATOR(op_type, op_class, ...) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_op__##op_type, \ __reg_op__##op_type, \
...@@ -144,15 +152,24 @@ class OpKernelRegistrar : public Registrar { ...@@ -144,15 +152,24 @@ class OpKernelRegistrar : public Registrar {
} }
/** /**
* Macro to register Operator. * Macro to register Operator. When the input is duplicable, you should
* use REGISTER_OP_EX with deop_empty_grad=false instead.
*/ */
#define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \ #define REGISTER_OP(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class) \ grad_op_class) \
REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, true)
// When an argument is duplicable, we need to use this version.
// Perhaps we can omit DropEmptyIG template parameter and
// only have one version of REGISTER_OP.
#define REGISTER_OP_EX(op_type, op_class, op_maker_class, grad_op_type, \
grad_op_class, drop_empty_grad) \
REGISTER_OPERATOR(grad_op_type, grad_op_class); \ REGISTER_OPERATOR(grad_op_type, grad_op_class); \
class _GradOpDescMaker_##grad_op_type##_ \ class _GradOpDescMaker_##grad_op_type##_ \
: public ::paddle::framework::DefaultGradOpDescMaker<true> { \ : public ::paddle::framework::DefaultGradOpDescMaker<drop_empty_grad> { \
using ::paddle::framework::DefaultGradOpDescMaker< \ using ::paddle::framework::DefaultGradOpDescMaker< \
true>::DefaultGradOpDescMaker; \ drop_empty_grad>::DefaultGradOpDescMaker; \
\ \
protected: \ protected: \
virtual std::string GradOpType() const { return #grad_op_type; } \ virtual std::string GradOpType() const { return #grad_op_type; } \
......
...@@ -18,49 +18,49 @@ limitations under the License. */ ...@@ -18,49 +18,49 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
BlockDescBind *ProgramDescBind::AppendBlock(const BlockDescBind &parent) { BlockDesc *ProgramDesc::AppendBlock(const BlockDesc &parent) {
auto *b = desc_.add_blocks(); auto *b = desc_.add_blocks();
b->set_parent_idx(parent.ID()); b->set_parent_idx(parent.ID());
b->set_idx(desc_.blocks_size() - 1); b->set_idx(desc_.blocks_size() - 1);
blocks_.emplace_back(new BlockDescBind(this, b)); blocks_.emplace_back(new BlockDesc(this, b));
return blocks_.back().get(); return blocks_.back().get();
} }
proto::ProgramDesc *ProgramDescBind::Proto() { proto::ProgramDesc *ProgramDesc::Proto() {
for (auto &block : blocks_) { for (auto &block : blocks_) {
block->Flush(); block->Flush();
} }
return &desc_; return &desc_;
} }
ProgramDescBind::ProgramDescBind() { ProgramDesc::ProgramDesc() {
auto *block = desc_.mutable_blocks()->Add(); auto *block = desc_.mutable_blocks()->Add();
block->set_idx(kRootBlockIndex); block->set_idx(kRootBlockIndex);
block->set_parent_idx(kNoneBlockIndex); block->set_parent_idx(kNoneBlockIndex);
blocks_.emplace_back(new BlockDescBind(this, block)); blocks_.emplace_back(new BlockDesc(this, block));
} }
ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) { ProgramDesc::ProgramDesc(const ProgramDesc &o) {
desc_ = o.desc_; desc_ = o.desc_;
for (int i = 0; i < desc_.blocks_size(); ++i) { for (int i = 0; i < desc_.blocks_size(); ++i) {
auto *block = desc_.mutable_blocks(i); auto *block = desc_.mutable_blocks(i);
blocks_.emplace_back(new BlockDescBind(*o.blocks_[i], block, this)); blocks_.emplace_back(new BlockDesc(*o.blocks_[i], block, this));
} }
} }
ProgramDescBind::ProgramDescBind(const proto::ProgramDesc &desc) { ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) {
desc_ = desc; desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) { for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc)); blocks_.emplace_back(new BlockDesc(this, &block_desc));
} }
} }
ProgramDescBind::ProgramDescBind(const std::string &binary_str) { ProgramDesc::ProgramDesc(const std::string &binary_str) {
PADDLE_ENFORCE(desc_.ParseFromString(binary_str), PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
"Fail to parse program_desc from binary string."); "Fail to parse program_desc from binary string.");
for (auto &block_desc : *desc_.mutable_blocks()) { for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc)); blocks_.emplace_back(new BlockDesc(this, &block_desc));
} }
} }
......
...@@ -23,23 +23,23 @@ limitations under the License. */ ...@@ -23,23 +23,23 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class BlockDescBind; class BlockDesc;
class ProgramDescBind { class ProgramDesc {
public: public:
ProgramDescBind(); ProgramDesc();
explicit ProgramDescBind(const proto::ProgramDesc &desc); explicit ProgramDesc(const proto::ProgramDesc &desc);
ProgramDescBind(const ProgramDescBind &o); ProgramDesc(const ProgramDesc &o);
explicit ProgramDescBind(const std::string &binary_str); explicit ProgramDesc(const std::string &binary_str);
BlockDescBind *AppendBlock(const BlockDescBind &parent); BlockDesc *AppendBlock(const BlockDesc &parent);
BlockDescBind *MutableBlock(size_t idx) { return blocks_[idx].get(); } BlockDesc *MutableBlock(size_t idx) { return blocks_[idx].get(); }
const BlockDescBind &Block(size_t idx) const { return *blocks_[idx]; } const BlockDesc &Block(size_t idx) const { return *blocks_[idx]; }
size_t Size() const { return blocks_.size(); } size_t Size() const { return blocks_.size(); }
...@@ -48,7 +48,7 @@ class ProgramDescBind { ...@@ -48,7 +48,7 @@ class ProgramDescBind {
private: private:
proto::ProgramDesc desc_; proto::ProgramDesc desc_;
std::vector<std::unique_ptr<BlockDescBind>> blocks_; std::vector<std::unique_ptr<BlockDesc>> blocks_;
}; };
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
TEST(ProgramDesc, copy_ctor) { TEST(ProgramDesc, copy_ctor) {
ProgramDescBind program; ProgramDesc program;
auto* global_block = program.MutableBlock(0); auto* global_block = program.MutableBlock(0);
auto* x = global_block->Var("X"); auto* x = global_block->Var("X");
x->SetType(proto::VarDesc_VarType_LOD_TENSOR); x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
...@@ -42,12 +42,12 @@ TEST(ProgramDesc, copy_ctor) { ...@@ -42,12 +42,12 @@ TEST(ProgramDesc, copy_ctor) {
out->SetType(proto::VarDesc_VarType_LOD_TENSOR); out->SetType(proto::VarDesc_VarType_LOD_TENSOR);
op->SetOutput("Y", {out->Name()}); op->SetOutput("Y", {out->Name()});
ProgramDescBind program_copy(program); ProgramDesc program_copy(program);
auto* global_block_copy = program_copy.MutableBlock(0); auto* global_block_copy = program_copy.MutableBlock(0);
ASSERT_NE(global_block, global_block_copy); ASSERT_NE(global_block, global_block_copy);
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) { auto assert_same_var = [&](const std::string& name, VarDesc* var_before) {
ASSERT_TRUE(global_block_copy->HasVar(name)); ASSERT_TRUE(global_block_copy->HasVar(name));
auto* copy = global_block_copy->Var(name); auto* copy = global_block_copy->Var(name);
ASSERT_NE(copy, var_before); ASSERT_NE(copy, var_before);
...@@ -81,7 +81,7 @@ TEST(ProgramDesc, copy_ctor) { ...@@ -81,7 +81,7 @@ TEST(ProgramDesc, copy_ctor) {
} }
TEST(ProgramDescBind, serialize_and_deserialize) { TEST(ProgramDescBind, serialize_and_deserialize) {
ProgramDescBind program_origin; ProgramDesc program_origin;
auto* global_block = program_origin.MutableBlock(0); auto* global_block = program_origin.MutableBlock(0);
auto* x = global_block->Var("X"); auto* x = global_block->Var("X");
x->SetType(proto::VarDesc_VarType_LOD_TENSOR); x->SetType(proto::VarDesc_VarType_LOD_TENSOR);
...@@ -107,11 +107,11 @@ TEST(ProgramDescBind, serialize_and_deserialize) { ...@@ -107,11 +107,11 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
std::string binary_str; std::string binary_str;
program_origin.Proto()->SerializeToString(&binary_str); program_origin.Proto()->SerializeToString(&binary_str);
ProgramDescBind program_restored(binary_str); ProgramDesc program_restored(binary_str);
auto* global_block_restored = program_restored.MutableBlock(0); auto* global_block_restored = program_restored.MutableBlock(0);
ASSERT_NE(global_block, global_block_restored); ASSERT_NE(global_block, global_block_restored);
auto assert_same_var = [&](const std::string& name, VarDescBind* var_before) { auto assert_same_var = [&](const std::string& name, VarDesc* var_before) {
ASSERT_TRUE(global_block_restored->HasVar(name)); ASSERT_TRUE(global_block_restored->HasVar(name));
auto* restored = global_block_restored->Var(name); auto* restored = global_block_restored->Var(name);
ASSERT_NE(restored, var_before); ASSERT_NE(restored, var_before);
......
...@@ -29,7 +29,7 @@ namespace ops = paddle::operators; ...@@ -29,7 +29,7 @@ namespace ops = paddle::operators;
void AddOp(const std::string &type, const f::VariableNameMap &inputs, void AddOp(const std::string &type, const f::VariableNameMap &inputs,
const f::VariableNameMap &outputs, f::AttributeMap attrs, const f::VariableNameMap &outputs, f::AttributeMap attrs,
paddle::framework::BlockDescBind *block) { paddle::framework::BlockDesc *block) {
// insert output // insert output
for (auto kv : outputs) { for (auto kv : outputs) {
for (auto v : kv.second) { for (auto v : kv.second) {
...@@ -51,8 +51,8 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, ...@@ -51,8 +51,8 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
} }
TEST(Prune, one_operator) { TEST(Prune, one_operator) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{},
block); block);
...@@ -69,8 +69,8 @@ TEST(Prune, one_operator) { ...@@ -69,8 +69,8 @@ TEST(Prune, one_operator) {
} }
TEST(Prune, forward) { TEST(Prune, forward) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"a"}}}, {{"output", {"b"}}}, f::AttributeMap{},
block); block);
...@@ -92,8 +92,8 @@ TEST(Prune, forward) { ...@@ -92,8 +92,8 @@ TEST(Prune, forward) {
} }
TEST(Prune, multi_input_op) { TEST(Prune, multi_input_op) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_one", {{"input", {"a0"}}}, {{"output", {"b0"}}}, f::AttributeMap{}, AddOp("one_one", {{"input", {"a0"}}}, {{"output", {"b0"}}}, f::AttributeMap{},
block); block);
...@@ -113,8 +113,8 @@ TEST(Prune, multi_input_op) { ...@@ -113,8 +113,8 @@ TEST(Prune, multi_input_op) {
} }
TEST(Prune, multi_output_op) { TEST(Prune, multi_output_op) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}}, AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}},
f::AttributeMap{}, block); f::AttributeMap{}, block);
...@@ -132,8 +132,8 @@ TEST(Prune, multi_output_op) { ...@@ -132,8 +132,8 @@ TEST(Prune, multi_output_op) {
} }
TEST(Prune, multi_target) { TEST(Prune, multi_target) {
f::ProgramDescBind program; f::ProgramDesc program;
f::BlockDescBind *block = program.MutableBlock(0); f::BlockDesc *block = program.MutableBlock(0);
AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}}, AddOp("one_two", {{"input", {"a"}}}, {{"output", {"b", "c"}}},
f::AttributeMap{}, block); f::AttributeMap{}, block);
......
...@@ -25,11 +25,9 @@ ...@@ -25,11 +25,9 @@
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OperatorBase; class OperatorBase;
class OpDescBind; class OpDesc;
class BlockDescBind;
class BlockDesc;
class InferShapeContext; class InferShapeContext;
class BlockDescBind; class BlockDesc;
using VariableNameMap = std::map<std::string, std::vector<std::string>>; using VariableNameMap = std::map<std::string, std::vector<std::string>>;
...@@ -37,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>; ...@@ -37,7 +35,7 @@ using VariableNameMap = std::map<std::string, std::vector<std::string>>;
using Attribute = using Attribute =
boost::variant<boost::blank, int, float, std::string, std::vector<int>, boost::variant<boost::blank, int, float, std::string, std::vector<int>,
std::vector<float>, std::vector<std::string>, bool, std::vector<float>, std::vector<std::string>, bool,
std::vector<bool>, BlockDescBind*>; std::vector<bool>, BlockDesc*>;
using AttributeMap = std::unordered_map<std::string, Attribute>; using AttributeMap = std::unordered_map<std::string, Attribute>;
...@@ -45,13 +43,13 @@ using OpCreator = std::function<OperatorBase*( ...@@ -45,13 +43,13 @@ using OpCreator = std::function<OperatorBase*(
const std::string& /*type*/, const VariableNameMap& /*inputs*/, const std::string& /*type*/, const VariableNameMap& /*inputs*/,
const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>; const VariableNameMap& /*outputs*/, const AttributeMap& /*attrs*/)>;
using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDescBind>>( using GradOpMakerFN = std::function<std::vector<std::unique_ptr<OpDesc>>(
const OpDescBind&, const std::unordered_set<std::string>& /*no_grad_set*/, const OpDesc&, const std::unordered_set<std::string>& /*no_grad_set*/,
std::unordered_map<std::string, std::string>* /*grad_to_var*/, std::unordered_map<std::string, std::string>* /*grad_to_var*/,
const std::vector<BlockDescBind*>& grad_block)>; const std::vector<BlockDesc*>& grad_block)>;
using InferVarTypeFN = std::function<void(const OpDescBind& /*op_desc*/, using InferVarTypeFN =
BlockDescBind* /*block*/)>; std::function<void(const OpDesc& /*op_desc*/, BlockDesc* /*block*/)>;
using InferShapeFN = std::function<void(InferShapeContext*)>; using InferShapeFN = std::function<void(InferShapeContext*)>;
......
...@@ -18,29 +18,27 @@ limitations under the License. */ ...@@ -18,29 +18,27 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
proto::VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); } proto::VarDesc::VarType VarDesc::GetType() const { return desc_.type(); }
void VarDescBind::SetType(proto::VarDesc::VarType type) { void VarDesc::SetType(proto::VarDesc::VarType type) { desc_.set_type(type); }
desc_.set_type(type);
}
void VarDescBind::SetShape(const std::vector<int64_t> &dims) { void VarDesc::SetShape(const std::vector<int64_t> &dims) {
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims()); VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
} }
void VarDescBind::SetDataType(proto::DataType data_type) { void VarDesc::SetDataType(proto::DataType data_type) {
mutable_tensor_desc()->set_data_type(data_type); mutable_tensor_desc()->set_data_type(data_type);
} }
std::vector<int64_t> VarDescBind::Shape() const { std::vector<int64_t> VarDesc::Shape() const {
return RepeatedToVector(tensor_desc().dims()); return RepeatedToVector(tensor_desc().dims());
} }
proto::DataType VarDescBind::GetDataType() const { proto::DataType VarDesc::GetDataType() const {
return tensor_desc().data_type(); return tensor_desc().data_type();
} }
void VarDescBind::SetLoDLevel(int32_t lod_level) { void VarDesc::SetLoDLevel(int32_t lod_level) {
switch (desc_.type()) { switch (desc_.type()) {
case proto::VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
desc_.mutable_lod_tensor()->set_lod_level(lod_level); desc_.mutable_lod_tensor()->set_lod_level(lod_level);
...@@ -54,7 +52,7 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) { ...@@ -54,7 +52,7 @@ void VarDescBind::SetLoDLevel(int32_t lod_level) {
} }
} }
int32_t VarDescBind::GetLodLevel() const { int32_t VarDesc::GetLodLevel() const {
switch (desc_.type()) { switch (desc_.type()) {
case proto::VarDesc::LOD_TENSOR: case proto::VarDesc::LOD_TENSOR:
return desc_.lod_tensor().lod_level(); return desc_.lod_tensor().lod_level();
...@@ -66,7 +64,7 @@ int32_t VarDescBind::GetLodLevel() const { ...@@ -66,7 +64,7 @@ int32_t VarDescBind::GetLodLevel() const {
} }
} }
const proto::TensorDesc &VarDescBind::tensor_desc() const { const proto::TensorDesc &VarDesc::tensor_desc() const {
PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type"); PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
switch (desc_.type()) { switch (desc_.type()) {
case proto::VarDesc::SELECTED_ROWS: case proto::VarDesc::SELECTED_ROWS:
...@@ -80,7 +78,7 @@ const proto::TensorDesc &VarDescBind::tensor_desc() const { ...@@ -80,7 +78,7 @@ const proto::TensorDesc &VarDescBind::tensor_desc() const {
} }
} }
proto::TensorDesc *VarDescBind::mutable_tensor_desc() { proto::TensorDesc *VarDesc::mutable_tensor_desc() {
PADDLE_ENFORCE(desc_.has_type(), PADDLE_ENFORCE(desc_.has_type(),
"invoke MutableTensorDesc must after set type"); "invoke MutableTensorDesc must after set type");
switch (desc_.type()) { switch (desc_.type()) {
......
...@@ -53,14 +53,14 @@ inline void VectorToRepeated(const std::vector<bool> &vec, ...@@ -53,14 +53,14 @@ inline void VectorToRepeated(const std::vector<bool> &vec,
} }
} }
class VarDescBind { class VarDesc {
public: public:
explicit VarDescBind(const std::string &name) { explicit VarDesc(const std::string &name) {
desc_.set_name(name); desc_.set_name(name);
desc_.set_type(proto::VarDesc::LOD_TENSOR); desc_.set_type(proto::VarDesc::LOD_TENSOR);
} }
explicit VarDescBind(const proto::VarDesc &desc) : desc_(desc) {} explicit VarDesc(const proto::VarDesc &desc) : desc_(desc) {}
proto::VarDesc *Proto() { return &desc_; } proto::VarDesc *Proto() { return &desc_; }
......
...@@ -21,8 +21,7 @@ namespace framework { ...@@ -21,8 +21,7 @@ namespace framework {
class VarTypeInference { class VarTypeInference {
public: public:
virtual ~VarTypeInference() {} virtual ~VarTypeInference() {}
virtual void operator()(const OpDescBind& op_desc, virtual void operator()(const OpDesc& op_desc, BlockDesc* block) const = 0;
BlockDescBind* block) const = 0;
}; };
} // namespace framework } // namespace framework
......
...@@ -33,8 +33,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker { ...@@ -33,8 +33,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker {
class SumOpVarTypeInference : public VarTypeInference { class SumOpVarTypeInference : public VarTypeInference {
public: public:
void operator()(const OpDescBind &op_desc, void operator()(const OpDesc &op_desc, BlockDesc *block) const override {
BlockDescBind *block) const override {
auto &inputs = op_desc.Input("X"); auto &inputs = op_desc.Input("X");
auto default_var_type = proto::VarDesc::SELECTED_ROWS; auto default_var_type = proto::VarDesc::SELECTED_ROWS;
...@@ -62,7 +61,7 @@ namespace paddle { ...@@ -62,7 +61,7 @@ namespace paddle {
namespace framework { namespace framework {
TEST(InferVarType, sum_op) { TEST(InferVarType, sum_op) {
ProgramDescBind prog; ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp(); auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum"); op->SetType("sum");
op->SetInput("X", {"test_a", "test_b", "test_c"}); op->SetInput("X", {"test_a", "test_b", "test_c"});
...@@ -85,7 +84,7 @@ TEST(InferVarType, sum_op) { ...@@ -85,7 +84,7 @@ TEST(InferVarType, sum_op) {
} }
TEST(InferVarType, sum_op_without_infer_var_type) { TEST(InferVarType, sum_op_without_infer_var_type) {
ProgramDescBind prog; ProgramDesc prog;
auto *op = prog.MutableBlock(0)->AppendOp(); auto *op = prog.MutableBlock(0)->AppendOp();
op->SetType("sum_without_infer_var_type"); op->SetType("sum_without_infer_var_type");
op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); op->SetInput("X", {"test2_a", "test2_b", "test2_c"});
......
...@@ -62,33 +62,6 @@ void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place, ...@@ -62,33 +62,6 @@ void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
} }
} }
template <>
void Copy<platform::CPUPlace, platform::GPUPlace>(platform::CPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(src_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost);
}
template <>
void Copy<platform::GPUPlace, platform::CPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::CPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice);
}
template <>
void Copy<platform::GPUPlace, platform::GPUPlace>(platform::GPUPlace dst_place,
void* dst,
platform::GPUPlace src_place,
const void* src, size_t num) {
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToDevice);
}
#endif #endif
} // namespace memory } // namespace memory
......
...@@ -149,14 +149,14 @@ class ArrayToLoDTensorGradMaker : public framework::SingleGradOpDescMaker { ...@@ -149,14 +149,14 @@ class ArrayToLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("lod_tensor_to_array"); grad_op->SetType("lod_tensor_to_array");
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("RankTable", Input("RankTable")); grad_op->SetInput("RankTable", Input("RankTable"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -121,12 +121,12 @@ class AssignGradMaker : public framework::SingleGradOpDescMaker { ...@@ -121,12 +121,12 @@ class AssignGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op = new framework::OpDescBind(); auto *op = new framework::OpDesc();
op->SetType("assign"); op->SetType("assign");
op->SetInput("X", OutputGrad("Out")); op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X")); op->SetOutput("Out", InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(op); return std::unique_ptr<framework::OpDesc>(op);
} }
}; };
......
...@@ -119,8 +119,8 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase { ...@@ -119,8 +119,8 @@ class BeamSearchDecodeInferShape : public framework::InferShapeBase {
class BeamSearchDecodeInferVarType : public framework::VarTypeInference { class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind& op_desc, void operator()(const framework::OpDesc& op_desc,
framework::BlockDescBind* block) const override { framework::BlockDesc* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) { for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR); block->Var(o)->SetType(framework::proto::VarDesc::LOD_TENSOR);
} }
......
...@@ -52,14 +52,14 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker { ...@@ -52,14 +52,14 @@ class CastOpGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto grad = new framework::OpDescBind(); auto grad = new framework::OpDesc();
grad->SetType("cast"); grad->SetType("cast");
grad->SetInput("X", OutputGrad("Out")); grad->SetInput("X", OutputGrad("Out"));
grad->SetOutput("Out", InputGrad("X")); grad->SetOutput("Out", InputGrad("X"));
grad->SetAttr("out_dtype", GetAttr("in_dtype")); grad->SetAttr("out_dtype", GetAttr("in_dtype"));
grad->SetAttr("in_dtype", GetAttr("out_dtype")); grad->SetAttr("in_dtype", GetAttr("out_dtype"));
return std::unique_ptr<framework::OpDescBind>(grad); return std::unique_ptr<framework::OpDesc>(grad);
} }
}; };
......
...@@ -98,8 +98,8 @@ class ConcatOpGrad : public framework::OperatorWithKernel { ...@@ -98,8 +98,8 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad, REGISTER_OP_EX(concat, ops::ConcatOp, ops::ConcatOpMaker, concat_grad,
ops::ConcatOpGrad) ops::ConcatOpGrad, false)
REGISTER_OP_CPU_KERNEL(concat, REGISTER_OP_CPU_KERNEL(concat,
ops::ConcatKernel<paddle::platform::CPUPlace, float>) ops::ConcatKernel<paddle::platform::CPUPlace, float>)
REGISTER_OP_CPU_KERNEL(concat_grad, REGISTER_OP_CPU_KERNEL(concat_grad,
......
...@@ -65,7 +65,7 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -65,7 +65,7 @@ class ConditionalBlockOp : public ConditionalOp {
scopes->front() = &scope.NewScope(); scopes->front() = &scope.NewScope();
auto &cur_scope = *scopes->front(); auto &cur_scope = *scopes->front();
auto *block = Attr<framework::BlockDescBind *>("sub_block"); auto *block = Attr<framework::BlockDesc *>("sub_block");
framework::Executor exec(dev_ctx); framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false); exec.Run(*block->Program(), &cur_scope, block->ID(), false);
} }
...@@ -86,7 +86,7 @@ class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker { ...@@ -86,7 +86,7 @@ class ConditionalBlockOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"(std::vector<Scope*>) The step scope of conditional block. To " "(std::vector<Scope*>) The step scope of conditional block. To "
"unify the conditional block, rnn and while op, the type of " "unify the conditional block, rnn and while op, the type of "
"scope is std::vector<Scope*>"); "scope is std::vector<Scope*>");
AddAttr<framework::BlockDescBind *>( AddAttr<framework::BlockDesc *>(
"sub_block", "The step block of conditional block operator"); "sub_block", "The step block of conditional block operator");
AddComment(R"DOC(Conditional block operator AddComment(R"DOC(Conditional block operator
...@@ -116,7 +116,7 @@ class ConditionalBlockGradOp : public ConditionalOp { ...@@ -116,7 +116,7 @@ class ConditionalBlockGradOp : public ConditionalOp {
auto &scopes = scope_var->Get<std::vector<framework::Scope *>>(); auto &scopes = scope_var->Get<std::vector<framework::Scope *>>();
framework::Scope &cur_scope = *scopes[0]; framework::Scope &cur_scope = *scopes[0];
auto *block = Attr<framework::BlockDescBind *>("sub_block"); auto *block = Attr<framework::BlockDesc *>("sub_block");
framework::Executor exec(dev_ctx); framework::Executor exec(dev_ctx);
exec.Run(*block->Program(), &cur_scope, block->ID(), false); exec.Run(*block->Program(), &cur_scope, block->ID(), false);
...@@ -170,18 +170,19 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker { ...@@ -170,18 +170,19 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto grad_op = new framework::OpDescBind(); auto grad_op = new framework::OpDesc();
grad_op->SetType("conditional_block_grad"); grad_op->SetType("conditional_block_grad");
grad_op->SetInput("X", Input("X")); grad_op->SetInput("X", Input("X"));
grad_op->SetInput("Params", Input("Params")); grad_op->SetInput("Params", Input("Params"));
grad_op->SetInput("Out", Output("Out")); grad_op->SetInput("Out", Output("Out"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetInput("Scope", Output("Scope")); grad_op->SetInput("Scope", Output("Scope"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X", false));
grad_op->SetOutput(framework::GradVarName("Params"), InputGrad("Params")); grad_op->SetOutput(framework::GradVarName("Params"),
InputGrad("Params", false));
grad_op->SetBlockAttr("sub_block", *this->grad_block_[0]); grad_op->SetBlockAttr("sub_block", *this->grad_block_[0]);
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -21,8 +21,6 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { ...@@ -21,8 +21,6 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker {
public: public:
CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) CudnnConv2DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv2DTransposeOpMaker(proto, op_checker) { : Conv2DTransposeOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault({1, 1});
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be " "workspace is a section of GPU memory which will be "
...@@ -37,8 +35,6 @@ class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { ...@@ -37,8 +35,6 @@ class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker {
public: public:
CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker) CudnnConv3DTransposeOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: Conv3DTransposeOpMaker(proto, op_checker) { : Conv3DTransposeOpMaker(proto, op_checker) {
AddAttr<std::vector<int>>("dilations", "dilations of convolution operator.")
.SetDefault({1, 1, 1});
AddAttr<int>("workspace_size_MB", AddAttr<int>("workspace_size_MB",
"workspace size for cudnn, in MB, " "workspace size for cudnn, in MB, "
"workspace is a section of GPU memory which will be " "workspace is a section of GPU memory which will be "
......
...@@ -29,6 +29,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -29,6 +29,7 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
auto filter_dims = ctx->GetInputDim("Filter"); auto filter_dims = ctx->GetInputDim("Filter");
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides"); std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
std::vector<int> dilations = ctx->Attrs().Get<std::vector<int>>("dilations");
PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5, PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
"ConvTransposeOp intput should be 4-D or 5-D tensor."); "ConvTransposeOp intput should be 4-D or 5-D tensor.");
...@@ -41,14 +42,18 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -41,14 +42,18 @@ void ConvTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(paddings.size(), strides.size(), PADDLE_ENFORCE_EQ(paddings.size(), strides.size(),
"ConvTransposeOp paddings dimension and strides " "ConvTransposeOp paddings dimension and strides "
"dimension should be the same."); "dimension should be the same.");
PADDLE_ENFORCE_EQ(paddings.size(), dilations.size(),
"ConvTransposeOp paddings dimension and dilations "
"dimension should be the same.");
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0], PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
"In ConvTransposeOp, The input channel should be the same " "In ConvTransposeOp, The input channel should be the same "
"as the number of filters."); "as the number of filters.");
std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]}); std::vector<int64_t> output_shape({in_dims[0], filter_dims[1]});
for (size_t i = 0; i < strides.size(); ++i) { for (size_t i = 0; i < strides.size(); ++i) {
auto filter_extent = dilations[i] * (filter_dims[i + 2] - 1) + 1;
output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] + output_shape.push_back((in_dims[i + 2] - 1) * strides[i] - 2 * paddings[i] +
filter_dims[i + 2]); filter_extent);
} }
ctx->SetOutputDim("Output", framework::make_ddim(output_shape)); ctx->SetOutputDim("Output", framework::make_ddim(output_shape));
} }
...@@ -73,6 +78,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, ...@@ -73,6 +78,12 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
AddOutput("Output", AddOutput("Output",
"(Tensor) The output tensor of convolution transpose operator. " "(Tensor) The output tensor of convolution transpose operator. "
"The format of output tensor is also NCHW."); "The format of output tensor is also NCHW.");
AddAttr<std::vector<int>>("dilations",
"(vector<int> default:{1, 1}), the "
"dilations(h_dilation, w_dilation) of convolution "
"transpose operator.")
.SetDefault({1, 1});
AddAttr<std::vector<int>>( AddAttr<std::vector<int>>(
"strides", "strides",
"(vector<int> default:{1, 1}), the strides(h_stride, w_stride) of " "(vector<int> default:{1, 1}), the strides(h_stride, w_stride) of "
...@@ -87,7 +98,7 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto, ...@@ -87,7 +98,7 @@ Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(OpProto* proto,
Convolution2D Transpose Operator. Convolution2D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and dilations, strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the Input(Input) and output(Output) are in NCHW format. Where N is batchsize, C is the
number of channels, H is the height of the feature, and W is the width of the feature. number of channels, H is the height of the feature, and W is the width of the feature.
...@@ -136,6 +147,13 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto, ...@@ -136,6 +147,13 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
"Where N is batch size, C is " "Where N is batch size, C is "
"the number of channels, D is the depth of the feature, H is the " "the number of channels, D is the depth of the feature, H is the "
"height of the feature, and W is the width of the feature."); "height of the feature, and W is the width of the feature.");
AddAttr<std::vector<int>>(
"dilations",
"(vector<int> default:{1, 1, 1}), the "
"dilations(d_dilation,h_dilation, w_dilation) of convolution "
"transpose operator.")
.SetDefault({1, 1, 1});
AddAttr<std::vector<int>>("strides", AddAttr<std::vector<int>>("strides",
"(vector<int> default:{1, 1, 1}), the " "(vector<int> default:{1, 1, 1}), the "
"strides{d_stride, h_stride, w_stride} of " "strides{d_stride, h_stride, w_stride} of "
...@@ -149,7 +167,7 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto, ...@@ -149,7 +167,7 @@ Conv3DTransposeOpMaker::Conv3DTransposeOpMaker(OpProto* proto,
Convolution3D Transpose Operator. Convolution3D Transpose Operator.
The convolution transpose operation calculates the output based on the input, filter The convolution transpose operation calculates the output based on the input, filter
and strides, paddings, groups parameters. The size of each dimension of the and dilations, strides, paddings, groups parameters. The size of each dimension of the
parameters is checked in the infer-shape. parameters is checked in the infer-shape.
Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the Input(Input) and output(Output) are in NCDHW format. Where N is batch size, C is the
number of channels, D is the depth of the feature, H is the height of the feature, number of channels, D is the depth of the feature, H is the height of the feature,
......
...@@ -61,6 +61,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> { ...@@ -61,6 +61,7 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
// groups will alway be disabled in conv2dtranspose. // groups will alway be disabled in conv2dtranspose.
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
...@@ -113,7 +114,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> { ...@@ -113,7 +114,6 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im; math::Col2ImFunctor<math::ColFormat::kCFO, DeviceContext, T> col2im;
math::Col2VolFunctor<DeviceContext, T> col2vol; math::Col2VolFunctor<DeviceContext, T> col2vol;
std::vector<int> dilations({1, 1, 1});
// convolution transpose: gemm + col2im or col2vol (similar to conv-backward // convolution transpose: gemm + col2im or col2vol (similar to conv-backward
// on input) // on input)
...@@ -165,6 +165,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> { ...@@ -165,6 +165,7 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
std::vector<int> strides = context.Attr<std::vector<int>>("strides"); std::vector<int> strides = context.Attr<std::vector<int>>("strides");
std::vector<int> paddings = context.Attr<std::vector<int>>("paddings"); std::vector<int> paddings = context.Attr<std::vector<int>>("paddings");
std::vector<int> dilations = context.Attr<std::vector<int>>("dilations");
const int batch_size = static_cast<int>(input->dims()[0]); const int batch_size = static_cast<int>(input->dims()[0]);
...@@ -219,7 +220,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> { ...@@ -219,7 +220,6 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col; math::Im2ColFunctor<math::ColFormat::kCFO, DeviceContext, T> im2col;
math::Vol2ColFunctor<DeviceContext, T> vol2col; math::Vol2ColFunctor<DeviceContext, T> vol2col;
std::vector<int> dilations({1, 1, 1});
if (input_grad) { if (input_grad) {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
......
...@@ -24,10 +24,10 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { ...@@ -24,10 +24,10 @@ class FillZerosLikeOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of FillZerosLikeOp should not be null."); "Input(X) of FillZerosLikeOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Y"), PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Y) of FillZerosLikeOp should not be null."); "Output(Out) of FillZerosLikeOp should not be null.");
ctx->SetOutputDim("Y", ctx->GetInputDim("X")); ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Y"); ctx->ShareLoD("X", /*->*/ "Out");
} }
}; };
...@@ -36,7 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -36,7 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
FillZerosLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker) FillZerosLikeOpMaker(OpProto *proto, OpAttrChecker *op_checker)
: framework::OpProtoAndCheckerMaker(proto, op_checker) { : framework::OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The input of fill-zeros-like op."); AddInput("X", "The input of fill-zeros-like op.");
AddOutput("Y", "The variable will be filled up with zeros."); AddOutput("Out", "The variable will be filled up with zeros.");
AddComment(R"DOC( AddComment(R"DOC(
FillZerosLike Operator. FillZerosLike Operator.
......
...@@ -23,7 +23,7 @@ template <typename DeviceContext, typename T> ...@@ -23,7 +23,7 @@ template <typename DeviceContext, typename T>
class FillZerosLikeKernel : public framework::OpKernel<T> { class FillZerosLikeKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Output<framework::Tensor>("Y"); auto* out = context.Output<framework::Tensor>("Out");
out->mutable_data<T>(context.GetPlace()); out->mutable_data<T>(context.GetPlace());
math::SetConstant<DeviceContext, T> setter; math::SetConstant<DeviceContext, T> setter;
......
...@@ -93,13 +93,13 @@ class IncrementGradOpMaker : public framework::SingleGradOpDescMaker { ...@@ -93,13 +93,13 @@ class IncrementGradOpMaker : public framework::SingleGradOpDescMaker {
public: public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("increment"); grad_op->SetType("increment");
grad_op->SetInput("X", Output("Out")); grad_op->SetInput("X", Output("Out"));
grad_op->SetOutput("Out", Input("X")); grad_op->SetOutput("Out", Input("X"));
grad_op->SetAttr("step", -boost::get<float>(GetAttr("step"))); grad_op->SetAttr("step", -boost::get<float>(GetAttr("step")));
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -63,8 +63,8 @@ class LoDRankTableInferShape : public framework::InferShapeBase { ...@@ -63,8 +63,8 @@ class LoDRankTableInferShape : public framework::InferShapeBase {
class LoDRankTableInferVarType : public framework::VarTypeInference { class LoDRankTableInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind &op_desc, void operator()(const framework::OpDesc &op_desc,
framework::BlockDescBind *block) const override { framework::BlockDesc *block) const override {
for (auto &o : op_desc.Output("Out")) { for (auto &o : op_desc.Output("Out")) {
block->FindRecursiveOrCreateVar(o)->SetType( block->FindRecursiveOrCreateVar(o)->SetType(
framework::proto::VarDesc::LOD_RANK_TABLE); framework::proto::VarDesc::LOD_RANK_TABLE);
......
...@@ -127,8 +127,8 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase { ...@@ -127,8 +127,8 @@ class LoDTensorToArrayInferShape : public framework::InferShapeBase {
class LoDTensorToArrayInferVarType : public framework::VarTypeInference { class LoDTensorToArrayInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind &op_desc, void operator()(const framework::OpDesc &op_desc,
framework::BlockDescBind *block) const override { framework::BlockDesc *block) const override {
for (auto &out_var : op_desc.Output("Out")) { for (auto &out_var : op_desc.Output("Out")) {
block->Var(out_var)->SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY); block->Var(out_var)->SetType(framework::proto::VarDesc::LOD_TENSOR_ARRAY);
} }
...@@ -140,14 +140,14 @@ class LoDTensorToArrayGradMaker : public framework::SingleGradOpDescMaker { ...@@ -140,14 +140,14 @@ class LoDTensorToArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("array_to_lod_tensor"); grad_op->SetType("array_to_lod_tensor");
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("RankTable", Input("RankTable")); grad_op->SetInput("RankTable", Input("RankTable"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -108,8 +108,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel { ...@@ -108,8 +108,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind& op_desc, void operator()(const framework::OpDesc& op_desc,
framework::BlockDescBind* block) const override { framework::BlockDesc* block) const override {
auto out_var_name = op_desc.Output(framework::GradVarName("W")).front(); auto out_var_name = op_desc.Output(framework::GradVarName("W")).front();
auto attr = op_desc.GetAttr("is_sparse"); auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
......
...@@ -67,18 +67,45 @@ void RowwiseAdd<DeviceContext, T>::operator()(const DeviceContext& context, ...@@ -67,18 +67,45 @@ void RowwiseAdd<DeviceContext, T>::operator()(const DeviceContext& context,
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context, void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input, const framework::Tensor& input,
framework::Tensor* vector) { framework::Tensor* out) {
auto in_dims = input.dims(); auto in_dims = input.dims();
auto size = input.numel() / in_dims[0]; auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector->numel(), size); PADDLE_ENFORCE_EQ(out->numel(), size);
auto vec = framework::EigenMatrix<T>::From(*vector);
auto in = framework::EigenMatrix<T>::From(input); auto in = framework::EigenMatrix<T>::From(input);
Eigen::array<int, 2> shape({{1, static_cast<int>(size)}}); auto vec = framework::EigenVector<T>::Flatten(*out);
vec.reshape(shape).device(*context.eigen_device()) =
in.sum(Eigen::array<int, 1>({{0}})).reshape(shape); vec.device(*context.eigen_device()) = in.sum(Eigen::array<int, 1>({{0}}));
} }
// Specialize for CPU, since Eigen implement a general reduce. However,
// colwise-sum can be easily implemented. General reduce has a huge overhead in
// CPU
template <typename T>
class ColwiseSum<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input, framework::Tensor* out) {
auto& in_dims = input.dims();
auto height = in_dims[0];
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->place());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < height; ++i) {
for (size_t j = 0; j < size; ++j) {
if (i == 0) {
out_buf[j] = in_buf[i * size + j];
} else {
out_buf[j] += in_buf[i * size + j];
}
}
}
}
};
} // namespace math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -60,13 +60,13 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker { ...@@ -60,13 +60,13 @@ class MeanGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDescBind(); auto* grad_op = new framework::OpDesc();
grad_op->SetType("mean_grad"); grad_op->SetType("mean_grad");
grad_op->SetInput("X", Input("X")); grad_op->SetInput("X", Input("X"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X")); grad_op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -161,15 +161,15 @@ class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker { ...@@ -161,15 +161,15 @@ class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("split_lod_tensor"); grad_op->SetType("split_lod_tensor");
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetInput("Mask", Input("Mask")); grad_op->SetInput("Mask", Input("Mask"));
grad_op->SetOutput("OutTrue", InputGrad("InTrue")); grad_op->SetOutput("OutTrue", InputGrad("InTrue"));
grad_op->SetOutput("OutFalse", InputGrad("InFalse")); grad_op->SetOutput("OutFalse", InputGrad("InFalse"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -70,12 +70,11 @@ class MinusGradMaker : public framework::GradOpDescMakerBase { ...@@ -70,12 +70,11 @@ class MinusGradMaker : public framework::GradOpDescMakerBase {
public: public:
using framework::GradOpDescMakerBase::GradOpDescMakerBase; using framework::GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<framework::OpDescBind>> operator()() std::vector<std::unique_ptr<framework::OpDesc>> operator()() const override {
const override { std::vector<std::unique_ptr<framework::OpDesc>> ops;
std::vector<std::unique_ptr<framework::OpDescBind>> ops;
auto x_g = InputGrad("X"); auto x_g = InputGrad("X");
if (!x_g.empty()) { if (!x_g.empty()) {
auto *x_g_op = new framework::OpDescBind(); auto *x_g_op = new framework::OpDesc();
x_g_op->SetType("scale"); x_g_op->SetType("scale");
x_g_op->SetInput("X", OutputGrad("Out")); x_g_op->SetInput("X", OutputGrad("Out"));
x_g_op->SetOutput("Out", x_g); x_g_op->SetOutput("Out", x_g);
...@@ -85,7 +84,7 @@ class MinusGradMaker : public framework::GradOpDescMakerBase { ...@@ -85,7 +84,7 @@ class MinusGradMaker : public framework::GradOpDescMakerBase {
auto y_g = InputGrad("Y"); auto y_g = InputGrad("Y");
if (!y_g.empty()) { if (!y_g.empty()) {
auto *y_g_op = new framework::OpDescBind(); auto *y_g_op = new framework::OpDesc();
y_g_op->SetType("scale"); y_g_op->SetType("scale");
y_g_op->SetInput("X", OutputGrad("Out")); y_g_op->SetInput("X", OutputGrad("Out"));
y_g_op->SetOutput("Out", y_g); y_g_op->SetOutput("Out", y_g);
......
...@@ -73,39 +73,50 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -73,39 +73,50 @@ class MulOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
MulOpMaker(OpProto* proto, OpAttrChecker* op_checker) MulOpMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "The first input of mul op"); AddInput("X", "(Tensor), The first input tensor of mul op.");
AddInput("Y", "The second input of mul op"); AddInput("Y", "(Tensor), The second input tensor of mul op.");
AddOutput("Out", "The output of mul op"); AddOutput("Out", "(Tensor), The output tensor of mul op.");
AddAttr<int>( AddAttr<int>(
"x_num_col_dims", "x_num_col_dims",
"(int, default 1) " R"DOC((int, default 1), The mul_op can take tensors with more than two
R"DOC(mul_op can take tensors with more than two dimensions as input `X`, dimensions as its inputs. If the input $X$ is a tensor with more
in that case, tensors will be reshaped to a matrix. The matrix's first than two dimensions, $X$ will be flattened into a two-dimensional
dimension(column length) will be the product of tensor's last matrix first. The flattening rule is: the first `num_col_dims`
`num_col_dims` dimensions, and the matrix's second dimension(row length) will be flattened to form the first dimension of the final matrix
will be the product of tensor's first `rank - num_col_dims` dimensions. (the height of the matrix), and the rest `rank(X) - num_col_dims`
dimensions are flattened to form the second dimension of the final
matrix (the width of the matrix). As a result, height of the
flattened matrix is equal to the product of $X$'s first
`x_num_col_dims` dimensions' sizes, and width of the flattened
matrix is equal to the product of $X$'s last `rank(x) - num_col_dims`
dimensions' size. For example, suppose $X$ is a 6-dimensional
tensor with the shape [2, 3, 4, 5, 6], and `x_num_col_dims` = 3.
Thus, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] =
[24, 30].
)DOC") )DOC")
.SetDefault(1) .SetDefault(1)
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddAttr<int>( AddAttr<int>(
"y_num_col_dims", "y_num_col_dims",
"(int, default 1) " R"DOC((int, default 1), The mul_op can take tensors with more than two,
R"DOC(mul_op can take tensors with more than two dimensions as input `Y`, dimensions as its inputs. If the input $Y$ is a tensor with more
in that case, tensors will be reshaped to a matrix. Just like input `X`. than two dimensions, $Y$ will be flattened into a two-dimensional
matrix first. The attribute `y_num_col_dims` determines how $Y$ is
flattened. See comments of `x_num_col_dims` for more details.
)DOC") )DOC")
.SetDefault(1) .SetDefault(1)
.EqualGreaterThan(1); .EqualGreaterThan(1);
AddComment(R"DOC( AddComment(R"DOC(
Mul Operator. Mul Operator.
This operator is used to perform matrix multiplication for input X and Y. This operator is used to perform matrix multiplication for input $X$ and $Y$.
The equation is: The equation is:
$$Out = X * Y$$ $$Out = X * Y$$
Both the input `X` and `Y` can carry the LoD (Level of Details) information, Both the input $X$ and $Y$ can carry the LoD (Level of Details) information,
or not. But the output only shares the LoD information with input `X`. or not. But the output only shares the LoD information with input $X$.
)DOC"); )DOC");
} }
......
...@@ -65,7 +65,7 @@ class NCCLTester : public ::testing::Test { ...@@ -65,7 +65,7 @@ class NCCLTester : public ::testing::Test {
} }
void NCCLInitOp() { void NCCLInitOp() {
std::unique_ptr<f::OpDescBind> op1(new f::OpDescBind); std::unique_ptr<f::OpDesc> op1(new f::OpDesc);
op1->SetType("ncclInit"); op1->SetType("ncclInit");
op1->SetOutput("Communicator", {"comm"}); op1->SetOutput("Communicator", {"comm"});
...@@ -81,10 +81,9 @@ class NCCLTester : public ::testing::Test { ...@@ -81,10 +81,9 @@ class NCCLTester : public ::testing::Test {
} }
template <class T> template <class T>
void PerThreadProgram(int gpu_id, const f::OpDescBind &op_desc, void PerThreadProgram(int gpu_id, const f::OpDesc &op_desc, f::Scope *scope) {
f::Scope *scope) {
std::unique_lock<std::mutex> lk(mu); std::unique_lock<std::mutex> lk(mu);
const f::OpDescBind *op1 = &op_desc; const f::OpDesc *op1 = &op_desc;
p::GPUPlace place(gpu_id); p::GPUPlace place(gpu_id);
auto &ctx = dev_ctxs.at(gpu_id); auto &ctx = dev_ctxs.at(gpu_id);
...@@ -125,7 +124,7 @@ class NCCLTester : public ::testing::Test { ...@@ -125,7 +124,7 @@ class NCCLTester : public ::testing::Test {
// ncclInitOp with desc // ncclInitOp with desc
TEST(NCCL, ncclInitOp) { TEST(NCCL, ncclInitOp) {
std::unique_ptr<f::OpDescBind> op_desc(new f::OpDescBind); std::unique_ptr<f::OpDesc> op_desc(new f::OpDesc);
op_desc->SetType("ncclInit"); op_desc->SetType("ncclInit");
op_desc->SetOutput("Communicator", {"x1"}); op_desc->SetOutput("Communicator", {"x1"});
...@@ -145,7 +144,7 @@ TEST(NCCL, ncclInitOp) { ...@@ -145,7 +144,7 @@ TEST(NCCL, ncclInitOp) {
// ncclAllReduceOp with desc // ncclAllReduceOp with desc
TEST_F(NCCLTester, ncclAllReduceOp) { TEST_F(NCCLTester, ncclAllReduceOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind); std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
op2->SetType("ncclAllReduce"); op2->SetType("ncclAllReduce");
op2->SetInput("X", {"st"}); op2->SetInput("X", {"st"});
op2->SetInput("Communicator", {"comm"}); op2->SetInput("Communicator", {"comm"});
...@@ -192,7 +191,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) { ...@@ -192,7 +191,7 @@ TEST_F(NCCLTester, ncclAllReduceOp) {
// ncclReduceOp with desc // ncclReduceOp with desc
TEST_F(NCCLTester, ncclReduceOp) { TEST_F(NCCLTester, ncclReduceOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind); std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
const int kRoot = 0; const int kRoot = 0;
op2->SetType("ncclReduce"); op2->SetType("ncclReduce");
op2->SetInput("X", {"st"}); op2->SetInput("X", {"st"});
...@@ -240,7 +239,7 @@ TEST_F(NCCLTester, ncclReduceOp) { ...@@ -240,7 +239,7 @@ TEST_F(NCCLTester, ncclReduceOp) {
// ncclBcastOp with desc // ncclBcastOp with desc
TEST_F(NCCLTester, ncclBcastOp) { TEST_F(NCCLTester, ncclBcastOp) {
std::unique_ptr<f::OpDescBind> op2(new f::OpDescBind); std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
const int kRoot = 5; const int kRoot = 5;
op2->SetType("ncclBcast"); op2->SetType("ncclBcast");
op2->SetInput("X", {"st"}); op2->SetInput("X", {"st"});
......
...@@ -116,14 +116,14 @@ class PadOpGradMaker : public framework::SingleGradOpDescMaker { ...@@ -116,14 +116,14 @@ class PadOpGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto* bind = new framework::OpDescBind(); auto* bind = new framework::OpDesc();
bind->SetInput("X", Input("X")); bind->SetInput("X", Input("X"));
bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
bind->SetOutput(framework::GradVarName("X"), InputGrad("X")); bind->SetOutput(framework::GradVarName("X"), InputGrad("X"));
bind->SetAttrMap(Attrs()); bind->SetAttrMap(Attrs());
bind->SetType("pad_grad"); bind->SetType("pad_grad");
return std::unique_ptr<framework::OpDescBind>(bind); return std::unique_ptr<framework::OpDesc>(bind);
} }
}; };
......
...@@ -234,7 +234,7 @@ class RecurrentOp : public RecurrentBase { ...@@ -234,7 +234,7 @@ class RecurrentOp : public RecurrentBase {
auto reverse = Attr<bool>(kReverse); auto reverse = Attr<bool>(kReverse);
framework::Executor executor(dev_ctx); framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock); auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program(); auto *program = block->Program();
for (size_t i = 0; i < seq_len; ++i) { for (size_t i = 0; i < seq_len; ++i) {
...@@ -317,7 +317,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -317,7 +317,7 @@ class RecurrentGradOp : public RecurrentBase {
auto reverse = Attr<bool>(kReverse); auto reverse = Attr<bool>(kReverse);
framework::Executor executor(dev_ctx); framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock); auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program(); auto *program = block->Program();
for (size_t step_id = 0; step_id < seq_len; ++step_id) { for (size_t step_id = 0; step_id < seq_len; ++step_id) {
...@@ -522,8 +522,7 @@ The ex-state means the state value in the ex-timestep or the previous time step ...@@ -522,8 +522,7 @@ The ex-state means the state value in the ex-timestep or the previous time step
string::Sprintf( string::Sprintf(
"The state variable names. [%s, %s, %s] must be the same order", "The state variable names. [%s, %s, %s] must be the same order",
kExStates, kStates, kInitStateGrads)); kExStates, kStates, kInitStateGrads));
AddAttr<framework::BlockDescBind *>(kStepBlock, AddAttr<framework::BlockDesc *>(kStepBlock, "The step block inside RNN");
"The step block inside RNN");
AddAttr<bool>(kReverse, R"DOC(Calculate RNN reversely or not. AddAttr<bool>(kReverse, R"DOC(Calculate RNN reversely or not.
By default reverse=False By default reverse=False
...@@ -565,13 +564,13 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -565,13 +564,13 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
virtual std::unique_ptr<framework::OpDescBind> Apply() const { virtual std::unique_ptr<framework::OpDesc> Apply() const {
auto *grad = new framework::OpDescBind(); auto *grad = new framework::OpDesc();
grad->SetType("recurrent_grad"); grad->SetType("recurrent_grad");
for (auto &input_param : this->InputNames()) { for (auto &input_param : this->InputNames()) {
grad->SetInput(input_param, this->Input(input_param)); grad->SetInput(input_param, this->Input(input_param));
grad->SetOutput(framework::GradVarName(input_param), grad->SetOutput(framework::GradVarName(input_param),
this->InputGrad(input_param)); this->InputGrad(input_param, false));
} }
for (auto &output_param : this->OutputNames()) { for (auto &output_param : this->OutputNames()) {
...@@ -588,7 +587,7 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -588,7 +587,7 @@ class RecurrentGradOpDescMaker : public framework::SingleGradOpDescMaker {
grad->SetAttrMap(this->Attrs()); grad->SetAttrMap(this->Attrs());
grad->SetBlockAttr(kStepBlock, *grad_block_[0]); grad->SetBlockAttr(kStepBlock, *grad_block_[0]);
return std::unique_ptr<framework::OpDescBind>(grad); return std::unique_ptr<framework::OpDesc>(grad);
} }
}; };
......
...@@ -58,13 +58,13 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker { ...@@ -58,13 +58,13 @@ class ScaleGradMaker : public framework::SingleGradOpDescMaker {
public: public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("scale"); grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", GetAttr("scale")); grad_op->SetAttr("scale", GetAttr("scale"));
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -124,8 +124,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel { ...@@ -124,8 +124,9 @@ class SequenceConcatGradOp : public framework::OperatorWithKernel {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP(sequence_concat, ops::SequenceConcatOp, ops::SequenceConcatOpMaker, REGISTER_OP_EX(sequence_concat, ops::SequenceConcatOp,
sequence_concat_grad, ops::SequenceConcatGradOp); ops::SequenceConcatOpMaker, sequence_concat_grad,
ops::SequenceConcatGradOp, false);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
sequence_concat, sequence_concat,
ops::SequenceConcatOpKernel<paddle::platform::CPUDeviceContext, float>); ops::SequenceConcatOpKernel<paddle::platform::CPUDeviceContext, float>);
......
...@@ -136,14 +136,14 @@ class ShrinkRNNGradOpMaker : public framework::SingleGradOpDescMaker { ...@@ -136,14 +136,14 @@ class ShrinkRNNGradOpMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *op = new framework::OpDescBind(); auto *op = new framework::OpDesc();
op->SetType("shrink_rnn_memory_grad"); op->SetType("shrink_rnn_memory_grad");
op->SetInput("X", Input("X")); op->SetInput("X", Input("X"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X")); op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs()); op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(op); return std::unique_ptr<framework::OpDesc>(op);
} }
}; };
......
...@@ -50,13 +50,13 @@ class SignGradMaker : public framework::SingleGradOpDescMaker { ...@@ -50,13 +50,13 @@ class SignGradMaker : public framework::SingleGradOpDescMaker {
public: public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("scale"); grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", 0.0f); grad_op->SetAttr("scale", 0.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -173,8 +173,8 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker { ...@@ -173,8 +173,8 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto* grad_op = new framework::OpDescBind(); auto* grad_op = new framework::OpDesc();
grad_op->SetType("softmax_with_cross_entropy_grad"); grad_op->SetType("softmax_with_cross_entropy_grad");
grad_op->SetInput("Label", Input("Label")); grad_op->SetInput("Label", Input("Label"));
grad_op->SetInput("Softmax", Output("Softmax")); grad_op->SetInput("Softmax", Output("Softmax"));
...@@ -183,7 +183,7 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker { ...@@ -183,7 +183,7 @@ class SoftmaxGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss")); grad_op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits")); grad_op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -163,8 +163,8 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { ...@@ -163,8 +163,8 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("merge_lod_tensor"); grad_op->SetType("merge_lod_tensor");
grad_op->SetInput("InTrue", OutputGrad("OutTrue")); grad_op->SetInput("InTrue", OutputGrad("OutTrue"));
grad_op->SetInput("InFalse", OutputGrad("OutFalse")); grad_op->SetInput("InFalse", OutputGrad("OutFalse"));
...@@ -172,7 +172,7 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker { ...@@ -172,7 +172,7 @@ class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
grad_op->SetInput("X", Input("X")); grad_op->SetInput("X", Input("X"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -108,13 +108,13 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker { ...@@ -108,13 +108,13 @@ class SplitGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto op = new framework::OpDescBind(); auto op = new framework::OpDesc();
op->SetType("concat"); op->SetType("concat");
op->SetInput("X", OutputGrad("Out")); op->SetInput("X", OutputGrad("Out"));
op->SetOutput("Out", InputGrad("X")); op->SetOutput("Out", InputGrad("X"));
op->SetAttrMap(Attrs()); op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(op); return std::unique_ptr<framework::OpDesc>(op);
} }
}; };
......
...@@ -85,8 +85,10 @@ TEST(StridedMemcpy, GPUCrop) { ...@@ -85,8 +85,10 @@ TEST(StridedMemcpy, GPUCrop) {
platform::GPUPlace gpu0(0); platform::GPUPlace gpu0(0);
platform::CPUPlace cpu; platform::CPUPlace cpu;
platform::CUDADeviceContext ctx(gpu0);
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src))); int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src)); memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream());
framework::DDim src_stride({5, 1}); framework::DDim src_stride({5, 1});
...@@ -96,7 +98,6 @@ TEST(StridedMemcpy, GPUCrop) { ...@@ -96,7 +98,6 @@ TEST(StridedMemcpy, GPUCrop) {
framework::DDim dst_dim({2, 2}); framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({2, 1}); framework::DDim dst_stride({2, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride, StridedMemcpy<int>(ctx, gpu_src + 1, src_stride, dst_dim, dst_stride,
gpu_dst); gpu_dst);
...@@ -122,9 +123,10 @@ TEST(StridedMemcpy, GPUConcat) { ...@@ -122,9 +123,10 @@ TEST(StridedMemcpy, GPUConcat) {
platform::GPUPlace gpu0(0); platform::GPUPlace gpu0(0);
platform::CPUPlace cpu; platform::CPUPlace cpu;
platform::CUDADeviceContext ctx(gpu0);
int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src))); int* gpu_src = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(src)));
memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src)); memory::Copy(gpu0, gpu_src, cpu, src, sizeof(src), ctx.stream());
int dst[8]; int dst[8];
int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst))); int* gpu_dst = reinterpret_cast<int*>(memory::Alloc(gpu0, sizeof(dst)));
...@@ -132,7 +134,6 @@ TEST(StridedMemcpy, GPUConcat) { ...@@ -132,7 +134,6 @@ TEST(StridedMemcpy, GPUConcat) {
framework::DDim src_stride({2, 1}); framework::DDim src_stride({2, 1});
framework::DDim dst_dim({2, 2}); framework::DDim dst_dim({2, 2});
framework::DDim dst_stride({4, 1}); framework::DDim dst_stride({4, 1});
platform::CUDADeviceContext ctx(gpu0);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst); StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, gpu_dst);
StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride, StridedMemcpy<int>(ctx, gpu_src, src_stride, dst_dim, dst_stride,
......
...@@ -115,8 +115,8 @@ the LoD information with the first input. ...@@ -115,8 +115,8 @@ the LoD information with the first input.
class SumOpVarTypeInference : public framework::VarTypeInference { class SumOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind& op_desc, void operator()(const framework::OpDesc& op_desc,
framework::BlockDescBind* block) const override { framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X"); auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarDesc::SELECTED_ROWS; auto var_type = framework::proto::VarDesc::SELECTED_ROWS;
...@@ -169,20 +169,19 @@ class SumGradMaker : public framework::GradOpDescMakerBase { ...@@ -169,20 +169,19 @@ class SumGradMaker : public framework::GradOpDescMakerBase {
public: public:
using framework::GradOpDescMakerBase::GradOpDescMakerBase; using framework::GradOpDescMakerBase::GradOpDescMakerBase;
std::vector<std::unique_ptr<framework::OpDescBind>> operator()() std::vector<std::unique_ptr<framework::OpDesc>> operator()() const override {
const override { auto x_grads = InputGrad("X", false);
auto x_grads = InputGrad("X"); std::vector<std::unique_ptr<framework::OpDesc>> grad_ops;
std::vector<std::unique_ptr<framework::OpDescBind>> grad_ops;
grad_ops.reserve(x_grads.size()); grad_ops.reserve(x_grads.size());
auto og = OutputGrad("Out"); auto og = OutputGrad("Out");
std::transform(x_grads.begin(), x_grads.end(), std::back_inserter(grad_ops), std::transform(x_grads.begin(), x_grads.end(), std::back_inserter(grad_ops),
[&og](const std::string& x_grad) { [&og](const std::string& x_grad) {
auto* grad_op = new framework::OpDescBind(); auto* grad_op = new framework::OpDesc();
grad_op->SetType("scale"); grad_op->SetType("scale");
grad_op->SetInput("X", og); grad_op->SetInput("X", og);
grad_op->SetOutput("Out", {x_grad}); grad_op->SetOutput("Out", {x_grad});
grad_op->SetAttr("scale", 1.0f); grad_op->SetAttr("scale", 1.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
}); });
return grad_ops; return grad_ops;
} }
......
...@@ -96,8 +96,8 @@ class WriteToArrayInferShape : public framework::InferShapeBase { ...@@ -96,8 +96,8 @@ class WriteToArrayInferShape : public framework::InferShapeBase {
class WriteToArrayInferVarType : public framework::VarTypeInference { class WriteToArrayInferVarType : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind &op_desc, void operator()(const framework::OpDesc &op_desc,
framework::BlockDescBind *block) const override { framework::BlockDesc *block) const override {
auto x_name = op_desc.Input("X")[0]; auto x_name = op_desc.Input("X")[0];
auto out_name = op_desc.Output("Out")[0]; auto out_name = op_desc.Output("Out")[0];
VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
...@@ -175,14 +175,14 @@ class WriteToArrayGradMaker : public framework::SingleGradOpDescMaker { ...@@ -175,14 +175,14 @@ class WriteToArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("read_from_array"); grad_op->SetType("read_from_array");
grad_op->SetInput("I", Input("I")); grad_op->SetInput("I", Input("I"));
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
...@@ -191,14 +191,14 @@ class ReadFromArrayGradMaker : public framework::SingleGradOpDescMaker { ...@@ -191,14 +191,14 @@ class ReadFromArrayGradMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad_op = new framework::OpDescBind(); auto *grad_op = new framework::OpDesc();
grad_op->SetType("write_to_array"); grad_op->SetType("write_to_array");
grad_op->SetInput("I", Input("I")); grad_op->SetInput("I", Input("I"));
grad_op->SetInput("X", OutputGrad("Out")); grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X")); grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttrMap(Attrs()); grad_op->SetAttrMap(Attrs());
return std::unique_ptr<framework::OpDescBind>(grad_op); return std::unique_ptr<framework::OpDesc>(grad_op);
} }
}; };
......
...@@ -46,7 +46,7 @@ class WhileOp : public framework::OperatorBase { ...@@ -46,7 +46,7 @@ class WhileOp : public framework::OperatorBase {
PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1})); PADDLE_ENFORCE_EQ(cond.dims(), paddle::framework::make_ddim({1}));
framework::Executor executor(dev_ctx); framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock); auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program(); auto *program = block->Program();
auto step_scopes = auto step_scopes =
...@@ -82,7 +82,7 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -82,7 +82,7 @@ class WhileOpMaker : public framework::OpProtoAndCheckerMaker {
"(StepScopeVar) A vector of local scope, which size equals the " "(StepScopeVar) A vector of local scope, which size equals the "
"step number of While Op. The i'th scope storages temporary " "step number of While Op. The i'th scope storages temporary "
"variables generated in the i'th step."); "variables generated in the i'th step.");
AddAttr<framework::BlockDescBind *>(kStepBlock, AddAttr<framework::BlockDesc *>(kStepBlock,
"The step block inside WhileOp"); "The step block inside WhileOp");
AddComment(R"DOC( AddComment(R"DOC(
)DOC"); )DOC");
...@@ -99,7 +99,7 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -99,7 +99,7 @@ class WhileGradOp : public framework::OperatorBase {
void Run(const framework::Scope &scope, void Run(const framework::Scope &scope,
const platform::DeviceContext &dev_ctx) const override { const platform::DeviceContext &dev_ctx) const override {
framework::Executor executor(dev_ctx); framework::Executor executor(dev_ctx);
auto *block = Attr<framework::BlockDescBind *>(kStepBlock); auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program(); auto *program = block->Program();
auto *step_scopes = auto *step_scopes =
...@@ -209,8 +209,8 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -209,8 +209,8 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected: protected:
std::unique_ptr<framework::OpDescBind> Apply() const override { std::unique_ptr<framework::OpDesc> Apply() const override {
auto *grad = new framework::OpDescBind(); auto *grad = new framework::OpDesc();
grad->SetType("while_grad"); grad->SetType("while_grad");
grad->SetInput(kParameters, Input(kParameters)); grad->SetInput(kParameters, Input(kParameters));
...@@ -279,14 +279,14 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -279,14 +279,14 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
// while operator could be renamed. // while operator could be renamed.
grad->SetAttr("original_output_grad", extra_inputs_list); grad->SetAttr("original_output_grad", extra_inputs_list);
return std::unique_ptr<framework::OpDescBind>(grad); return std::unique_ptr<framework::OpDesc>(grad);
} }
}; };
class WhileGradOpVarTypeInference : public framework::VarTypeInference { class WhileGradOpVarTypeInference : public framework::VarTypeInference {
public: public:
void operator()(const framework::OpDescBind &op_desc, void operator()(const framework::OpDesc &op_desc,
framework::BlockDescBind *block) const override { framework::BlockDesc *block) const override {
auto p_names = op_desc.Input(kParameters); auto p_names = op_desc.Input(kParameters);
auto pg_names = op_desc.Output(framework::GradVarName(kParameters)); auto pg_names = op_desc.Output(framework::GradVarName(kParameters));
......
...@@ -97,17 +97,6 @@ void GpuMemcpyAsync(void *dst, const void *src, size_t count, ...@@ -97,17 +97,6 @@ void GpuMemcpyAsync(void *dst, const void *src, size_t count,
"cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync"); "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync");
} }
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind) {
PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind),
"cudaMemcpy failed in paddle::platform::GpuMemcpySync");
// note: cudaMemcpy may actually be asynchronous with respect to the caller,
// block on stream 0 to make sure the copy has completed
PADDLE_ENFORCE(
cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in paddle::platform::GpuMemcpySync");
}
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream) { size_t count, cudaStream_t stream) {
PADDLE_ENFORCE( PADDLE_ENFORCE(
......
...@@ -52,10 +52,6 @@ size_t GpuMaxChunkSize(); ...@@ -52,10 +52,6 @@ size_t GpuMaxChunkSize();
void GpuMemcpyAsync(void *dst, const void *src, size_t count, void GpuMemcpyAsync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind, cudaStream_t stream); enum cudaMemcpyKind kind, cudaStream_t stream);
//! Copy memory from address src to dst synchronously.
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind);
//! Copy memory from one device to another device. //! Copy memory from one device to another device.
void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device, void GpuMemcpyPeer(void *dst, int dst_device, const void *src, int src_device,
size_t count, cudaStream_t stream); size_t count, cudaStream_t stream);
......
...@@ -53,11 +53,11 @@ TEST(Transform, GPUUnary) { ...@@ -53,11 +53,11 @@ TEST(Transform, GPUUnary) {
CUDADeviceContext ctx(gpu0); CUDADeviceContext ctx(gpu0);
float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4};
float* gpu_buf = static_cast<float*>(Alloc(gpu0, sizeof(float) * 4)); float* gpu_buf = static_cast<float*>(Alloc(gpu0, sizeof(float) * 4));
Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf)); Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf), ctx.stream());
Transform<paddle::platform::CUDADeviceContext> trans; Transform<paddle::platform::CUDADeviceContext> trans;
trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale<float>(10)); trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, Scale<float>(10));
ctx.Wait(); ctx.Wait();
Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf)); Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf), ctx.stream());
Free(gpu0, gpu_buf); Free(gpu0, gpu_buf);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5); ASSERT_NEAR(cpu_buf[i], static_cast<float>(i + 1), 1e-5);
...@@ -83,11 +83,11 @@ TEST(Transform, GPUBinary) { ...@@ -83,11 +83,11 @@ TEST(Transform, GPUBinary) {
GPUPlace gpu0(0); GPUPlace gpu0(0);
CUDADeviceContext ctx(gpu0); CUDADeviceContext ctx(gpu0);
int* gpu_buf = static_cast<int*>(Alloc(gpu0, sizeof(buf))); int* gpu_buf = static_cast<int*>(Alloc(gpu0, sizeof(buf)));
Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf)); Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf), ctx.stream());
Transform<paddle::platform::CUDADeviceContext> trans; Transform<paddle::platform::CUDADeviceContext> trans;
trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply<int>()); trans(ctx, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply<int>());
ctx.Wait(); ctx.Wait();
Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf)); Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf), ctx.stream());
Free(gpu0, gpu_buf); Free(gpu0, gpu_buf);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
ASSERT_EQ((i + 1) * (i + 1), buf[i]); ASSERT_EQ((i + 1) * (i + 1), buf[i]);
......
...@@ -108,21 +108,21 @@ static py::bytes SerializeMessage(T &self) { ...@@ -108,21 +108,21 @@ static py::bytes SerializeMessage(T &self) {
// Bind Methods // Bind Methods
void BindProgramDesc(py::module &m) { void BindProgramDesc(py::module &m) {
py::class_<ProgramDescBind>(m, "ProgramDesc", "") py::class_<ProgramDesc>(m, "ProgramDesc", "")
.def(py::init<>()) .def(py::init<>())
.def("__init__", .def("__init__",
[](ProgramDescBind &self, const ProgramDescBind &other) { [](ProgramDesc &self, const ProgramDesc &other) {
new (&self) ProgramDescBind(other); new (&self) ProgramDesc(other);
}) })
.def("__init__", .def("__init__",
[](ProgramDescBind &self, const py::bytes &binary_str) { [](ProgramDesc &self, const py::bytes &binary_str) {
std::string str(binary_str); std::string str(binary_str);
new (&self) ProgramDescBind(str); new (&self) ProgramDesc(str);
}) })
.def("append_block", &ProgramDescBind::AppendBlock, .def("append_block", &ProgramDesc::AppendBlock,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("append_backward", .def("append_backward",
[](ProgramDescBind &program_desc, const VarDescBind &target, [](ProgramDesc &program_desc, const VarDesc &target,
const std::unordered_set<std::string> &no_grad_vars) { const std::unordered_set<std::string> &no_grad_vars) {
ParamGradInfoMap param_grad_map = ParamGradInfoMap param_grad_map =
AppendBackward(program_desc, target, no_grad_vars); AppendBackward(program_desc, target, no_grad_vars);
...@@ -138,12 +138,12 @@ void BindProgramDesc(py::module &m) { ...@@ -138,12 +138,12 @@ void BindProgramDesc(py::module &m) {
} }
return retv; return retv;
}) })
.def("block", &ProgramDescBind::MutableBlock, .def("block", &ProgramDesc::MutableBlock,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("num_blocks", &ProgramDescBind::Size) .def("num_blocks", &ProgramDesc::Size)
.def("serialize_to_string", SerializeMessage<ProgramDescBind>) .def("serialize_to_string", SerializeMessage<ProgramDesc>)
.def("parse_from_string", .def("parse_from_string",
[](ProgramDescBind &program_desc, const std::string &data) { [](ProgramDesc &program_desc, const std::string &data) {
proto::ProgramDesc *desc = program_desc.Proto(); proto::ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->ParseFromString(data), PADDLE_ENFORCE(desc->ParseFromString(data),
"Fail to parse ProgramDesc from string. This could " "Fail to parse ProgramDesc from string. This could "
...@@ -152,35 +152,34 @@ void BindProgramDesc(py::module &m) { ...@@ -152,35 +152,34 @@ void BindProgramDesc(py::module &m) {
} }
void BindBlockDesc(py::module &m) { void BindBlockDesc(py::module &m) {
py::class_<BlockDescBind>(m, "BlockDesc", "") py::class_<BlockDesc>(m, "BlockDesc", "")
.def_property_readonly("id", &BlockDescBind::ID) .def_property_readonly("id", &BlockDesc::ID)
.def_property_readonly("parent", &BlockDescBind::Parent) .def_property_readonly("parent", &BlockDesc::Parent)
.def("append_op", &BlockDescBind::AppendOp, .def("append_op", &BlockDesc::AppendOp,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("prepend_op", &BlockDescBind::PrependOp, .def("prepend_op", &BlockDesc::PrependOp,
py::return_value_policy::reference) py::return_value_policy::reference)
.def("var", .def("var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.Var(name); return self.Var(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("has_var", .def("has_var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.HasVar(name); return self.HasVar(name);
}) })
.def("find_var", .def("find_var",
[](BlockDescBind &self, py::bytes byte_name) { [](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.FindVar(name); return self.FindVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("all_vars", &BlockDescBind::AllVars, .def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference)
py::return_value_policy::reference) .def("op_size", &BlockDesc::OpSize)
.def("op_size", &BlockDescBind::OpSize) .def("op", &BlockDesc::Op, py::return_value_policy::reference)
.def("op", &BlockDescBind::Op, py::return_value_policy::reference) .def("serialize_to_string", SerializeMessage<BlockDesc>);
.def("serialize_to_string", SerializeMessage<BlockDescBind>);
} }
void BindVarDsec(py::module &m) { void BindVarDsec(py::module &m) {
...@@ -193,25 +192,25 @@ void BindVarDsec(py::module &m) { ...@@ -193,25 +192,25 @@ void BindVarDsec(py::module &m) {
.value("FP32", proto::DataType::FP32) .value("FP32", proto::DataType::FP32)
.value("FP64", proto::DataType::FP64); .value("FP64", proto::DataType::FP64);
py::class_<VarDescBind> var_desc(m, "VarDesc", ""); py::class_<VarDesc> var_desc(m, "VarDesc", "");
var_desc var_desc
.def("name", .def("name",
[](const VarDescBind &self) { [](const VarDesc &self) {
py::bytes name = self.Name(); py::bytes name = self.Name();
return name; return name;
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("set_shape", &VarDescBind::SetShape) .def("set_shape", &VarDesc::SetShape)
.def("set_dtype", &VarDescBind::SetDataType) .def("set_dtype", &VarDesc::SetDataType)
.def("shape", &VarDescBind::Shape, py::return_value_policy::reference) .def("shape", &VarDesc::Shape, py::return_value_policy::reference)
.def("dtype", &VarDescBind::GetDataType) .def("dtype", &VarDesc::GetDataType)
.def("lod_level", &VarDescBind::GetLodLevel) .def("lod_level", &VarDesc::GetLodLevel)
.def("set_lod_level", &VarDescBind::SetLoDLevel) .def("set_lod_level", &VarDesc::SetLoDLevel)
.def("type", &VarDescBind::GetType) .def("type", &VarDesc::GetType)
.def("set_type", &VarDescBind::SetType) .def("set_type", &VarDesc::SetType)
.def("serialize_to_string", SerializeMessage<VarDescBind>) .def("serialize_to_string", SerializeMessage<VarDesc>)
.def("persistable", &VarDescBind::Persistable) .def("persistable", &VarDesc::Persistable)
.def("set_persistable", &VarDescBind::SetPersistable); .def("set_persistable", &VarDesc::SetPersistable);
py::enum_<proto::VarDesc::VarType>(var_desc, "VarType", "") py::enum_<proto::VarDesc::VarType>(var_desc, "VarType", "")
.value("LOD_TENSOR", proto::VarDesc::LOD_TENSOR) .value("LOD_TENSOR", proto::VarDesc::LOD_TENSOR)
...@@ -235,26 +234,26 @@ void BindOpDesc(py::module &m) { ...@@ -235,26 +234,26 @@ void BindOpDesc(py::module &m) {
.value("BOOLS", proto::AttrType::BOOLEANS) .value("BOOLS", proto::AttrType::BOOLEANS)
.value("BLOCK", proto::AttrType::BLOCK); .value("BLOCK", proto::AttrType::BLOCK);
py::class_<OpDescBind> op_desc(m, "OpDesc", ""); py::class_<OpDesc> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDescBind::Type) op_desc.def("type", &OpDesc::Type)
.def("set_type", &OpDescBind::SetType) .def("set_type", &OpDesc::SetType)
.def("input", &OpDescBind::Input) .def("input", &OpDesc::Input)
.def("input_names", &OpDescBind::InputNames) .def("input_names", &OpDesc::InputNames)
.def("set_input", &OpDescBind::SetInput) .def("set_input", &OpDesc::SetInput)
.def("output", &OpDescBind::Output) .def("output", &OpDesc::Output)
.def("output_names", &OpDescBind::OutputNames) .def("output_names", &OpDesc::OutputNames)
.def("set_output", &OpDescBind::SetOutput) .def("set_output", &OpDesc::SetOutput)
.def("has_attr", &OpDescBind::HasAttr) .def("has_attr", &OpDesc::HasAttr)
.def("attr_type", &OpDescBind::GetAttrType) .def("attr_type", &OpDesc::GetAttrType)
.def("attr_names", &OpDescBind::AttrNames) .def("attr_names", &OpDesc::AttrNames)
.def("set_attr", &OpDescBind::SetAttr) .def("set_attr", &OpDesc::SetAttr)
.def("attr", &OpDescBind::GetAttr) .def("attr", &OpDesc::GetAttr)
.def("set_block_attr", &OpDescBind::SetBlockAttr) .def("set_block_attr", &OpDesc::SetBlockAttr)
.def("block_attr", &OpDescBind::GetBlockAttr) .def("block_attr", &OpDesc::GetBlockAttr)
.def("check_attrs", &OpDescBind::CheckAttrs) .def("check_attrs", &OpDesc::CheckAttrs)
.def("infer_shape", &OpDescBind::InferShape) .def("infer_shape", &OpDesc::InferShape)
.def("infer_var_type", &OpDescBind::InferVarType) .def("infer_var_type", &OpDesc::InferVarType)
.def("serialize_to_string", SerializeMessage<OpDescBind>); .def("serialize_to_string", SerializeMessage<OpDesc>);
} }
} // namespace pybind } // namespace pybind
......
...@@ -266,36 +266,36 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -266,36 +266,36 @@ All parameter, weight, gradient are variables in Paddle.
return ret_values; return ret_values;
}); });
m.def("get_grad_op_descs", m.def("get_grad_op_descs",
[](const OpDescBind &op_desc, [](const OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set, const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> &grad_to_var, std::unordered_map<std::string, std::string> &grad_to_var,
const std::vector<BlockDescBind *> &grad_sub_block) { const std::vector<BlockDesc *> &grad_sub_block) {
std::vector<std::unique_ptr<OpDescBind>> grad_op_descs = std::vector<std::unique_ptr<OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance() framework::OpInfoMap::Instance()
.Get(op_desc.Type()) .Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, &grad_to_var, .GradOpMaker()(op_desc, no_grad_set, &grad_to_var,
grad_sub_block); grad_sub_block);
std::vector<OpDescBind *> grad_op_desc_ptrs(grad_op_descs.size()); std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size());
std::transform( std::transform(
grad_op_descs.begin(), grad_op_descs.end(), grad_op_descs.begin(), grad_op_descs.end(),
grad_op_desc_ptrs.begin(), grad_op_desc_ptrs.begin(),
[](std::unique_ptr<OpDescBind> &p) { return p.release(); }); [](std::unique_ptr<OpDesc> &p) { return p.release(); });
return grad_op_desc_ptrs; return grad_op_desc_ptrs;
}); });
m.def("prune", [](const ProgramDescBind &origin, m.def("prune", [](const ProgramDesc &origin,
const std::vector<std::array<size_t, 2>> &targets) { const std::vector<std::array<size_t, 2>> &targets) {
ProgramDescBind prog_with_targets(origin); ProgramDesc prog_with_targets(origin);
for (const auto &t : targets) { for (const auto &t : targets) {
prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget(); prog_with_targets.MutableBlock(t[0])->Op(t[1])->MarkAsTarget();
} }
proto::ProgramDesc pruned_desc; proto::ProgramDesc pruned_desc;
Prune(*prog_with_targets.Proto(), &pruned_desc); Prune(*prog_with_targets.Proto(), &pruned_desc);
return new ProgramDescBind(pruned_desc); return new ProgramDesc(pruned_desc);
}); });
m.def("inference_optimize", [](ProgramDescBind &origin) { m.def("inference_optimize", [](ProgramDesc &origin) {
proto::ProgramDesc pruned_desc; proto::ProgramDesc pruned_desc;
InferenceOptimize(*(origin.Proto()), &pruned_desc); InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDescBind(pruned_desc); return new ProgramDesc(pruned_desc);
}); });
m.def_submodule( m.def_submodule(
"var_names", "var_names",
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include "paddle/framework/executor.h"
#include "paddle/framework/tensor.h" #include "paddle/framework/tensor.h"
#include "paddle/memory/memcpy.h" #include "paddle/memory/memcpy.h"
#include "pybind11/numpy.h" #include "pybind11/numpy.h"
...@@ -61,11 +62,15 @@ struct CastToPyBufferImpl<true, I, ARGS...> { ...@@ -61,11 +62,15 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>()); auto *src_ptr = static_cast<const void *>(tensor.data<CUR_TYPE>());
auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>( auto *dst_ptr = static_cast<void *>(dst_tensor.mutable_data<CUR_TYPE>(
tensor.dims(), platform::CPUPlace())); tensor.dims(), platform::CPUPlace()));
// TODO(qijun): Here we use default CUDA stream to set GPU Tensor to
// a Python numpy array. It's better to manage CDUA stream unifiedly. framework::DeviceContextPool &pool =
paddle::platform::GpuMemcpySync(dst_ptr, src_ptr, framework::DeviceContextPool::Get();
sizeof(CUR_TYPE) * tensor.numel(), auto dev_ctx = static_cast<const platform::CUDADeviceContext *>(
cudaMemcpyDeviceToHost); pool.Borrow(tensor.place()));
paddle::platform::GpuMemcpyAsync(
dst_ptr, src_ptr, sizeof(CUR_TYPE) * tensor.numel(),
cudaMemcpyDeviceToHost, dev_ctx->stream());
#else #else
PADDLE_THROW("'GPUPlace' is not supported in CPU only device."); PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
#endif #endif
...@@ -132,10 +137,12 @@ void PyCUDATensorSetFromArray( ...@@ -132,10 +137,12 @@ void PyCUDATensorSetFromArray(
self.Resize(framework::make_ddim(dims)); self.Resize(framework::make_ddim(dims));
auto *dst = self.mutable_data<T>(place); auto *dst = self.mutable_data<T>(place);
// TODO(qijun): Here we use default CUDA stream to set a Python numpy
// array to a GPU Tensor. It's better to manage CDUA stream unifiedly. framework::DeviceContextPool &pool = framework::DeviceContextPool::Get();
paddle::platform::GpuMemcpySync(dst, array.data(), sizeof(T) * array.size(), auto dev_ctx =
cudaMemcpyHostToDevice); static_cast<const platform::CUDADeviceContext *>(pool.Borrow(place));
paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(),
cudaMemcpyHostToDevice, dev_ctx->stream());
} }
#endif #endif
......
...@@ -441,9 +441,25 @@ def topk(input, k): ...@@ -441,9 +441,25 @@ def topk(input, k):
def lod_tensor_to_array(x, table): def lod_tensor_to_array(x, table):
""" """This function performs the operation that converts an LOD_Tensor to
This function creates an operator to convert an LOD_Tensor to
an array. an array.
Args:
x (Variable|list): The tensor that needs to be converted to an array.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
Returns:
Variable: The variable of type array that has been converted from a
tensor.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
""" """
helper = LayerHelper("lod_tensor_to_array", **locals()) helper = LayerHelper("lod_tensor_to_array", **locals())
array = helper.create_variable( array = helper.create_variable(
...@@ -459,9 +475,26 @@ def lod_tensor_to_array(x, table): ...@@ -459,9 +475,26 @@ def lod_tensor_to_array(x, table):
def array_to_lod_tensor(x, table): def array_to_lod_tensor(x, table):
""" """This function performs the operations that converts an array to
This function creates an operator to convert an array to a an LOD_Tensor.
LOD_Tensor.
Args:
x (Variable|list): The array that needs to be converted to a tensor.
table (ParamAttr|list): The variable that stores the level of lod
which is ordered by sequence length in
descending order.
Returns:
Variable: The variable of type tensor that has been converted
from an array.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10])
table = fluid.layers.lod_rank_table(x, level=0)
array = fluid.layers.lod_tensor_to_array(x, table)
lod_tensor = fluid.layers.array_to_lod_tensor(array, table)
""" """
helper = LayerHelper("array_to_lod_tensor", **locals()) helper = LayerHelper("array_to_lod_tensor", **locals())
tmp = helper.create_tmp_variable(dtype=x.dtype) tmp = helper.create_tmp_variable(dtype=x.dtype)
...@@ -474,10 +507,24 @@ def array_to_lod_tensor(x, table): ...@@ -474,10 +507,24 @@ def array_to_lod_tensor(x, table):
def increment(x, value=1.0, in_place=True): def increment(x, value=1.0, in_place=True):
""" """This function performs an operation that increments each value in the
This function creates an operator to increment each value in the input input :math:`x` by an amount: :math:`value` as mentioned in the input
`x` by an amount: `value` as mentioned in the input parameter. This parameter. This operation is performed in-place by default.
operation is performed in-place by default.
Args:
x (Variable|list): The tensor that has the input values.
value (float): The amount by which the values should be incremented.
in_place (bool): If the increment should be performed in-place.
Returns:
Variable: The tensor variable storing the transformation of
element-wise increment of each value in the input.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32')
data = fluid.layers.increment(x=data, value=3.0, in_place=True)
""" """
helper = LayerHelper("increment", **locals()) helper = LayerHelper("increment", **locals())
if not in_place: if not in_place:
...@@ -493,9 +540,24 @@ def increment(x, value=1.0, in_place=True): ...@@ -493,9 +540,24 @@ def increment(x, value=1.0, in_place=True):
def array_write(x, i, array=None): def array_write(x, i, array=None):
""" """This function performs the operation to write the data out as an
This function creates an operator to write the data out as a
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
Args:
x (Variable|list): The input tensor from which the data will be read.
i (Variable|list): The subscript index in tensor array, that points the
place from which data will be read.
array (Variable|list): The data can be read into this variable if
this is assigned.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_write(tmp, i=i)
""" """
helper = LayerHelper('array_write', **locals()) helper = LayerHelper('array_write', **locals())
if array is None: if array is None:
...@@ -512,6 +574,21 @@ def array_write(x, i, array=None): ...@@ -512,6 +574,21 @@ def array_write(x, i, array=None):
def create_array(dtype): def create_array(dtype):
"""This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the
LayerHelper.
Args:
dtype (int|float): The data type of the elements in the array.
Returns:
Variable: The tensor variable storing the elements of data type.
Examples:
.. code-block:: python
data = fluid.layers.create_array(dtype='float32')
"""
helper = LayerHelper("array", **locals()) helper = LayerHelper("array", **locals())
return helper.create_variable( return helper.create_variable(
name="{0}.out".format(helper.name), name="{0}.out".format(helper.name),
...@@ -550,9 +627,19 @@ def less_than(x, y, cond=None, **ignored): ...@@ -550,9 +627,19 @@ def less_than(x, y, cond=None, **ignored):
def array_read(array, i): def array_read(array, i):
""" """This function performs the operation to read the data in as an
This function creates an operator to read the data in as a
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
Args:
array (Variable|list): The input tensor that will be written to an array.
i (Variable|list): The subscript index in tensor array, that points the
place where data will be written to.
Returns:
Variable: The tensor type variable that has the data written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = layers.array_read(tmp, i=i)
""" """
helper = LayerHelper('array_read', **locals()) helper = LayerHelper('array_read', **locals())
if not isinstance( if not isinstance(
...@@ -586,9 +673,23 @@ def shrink_memory(x, i, table): ...@@ -586,9 +673,23 @@ def shrink_memory(x, i, table):
def array_length(array): def array_length(array):
""" """This function performs the operation to find the length of the input
This function creates an operator to find the length of the
LOD_TENSOR_ARRAY. LOD_TENSOR_ARRAY.
Args:
array (LOD_TENSOR_ARRAY): The input array that will be used
to compute the length.
Returns:
Variable: The length of the input LoDTensorArray.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
arr = fluid.layers.array_write(tmp, i=i)
arr_len = fluid.layers.array_length(arr)
""" """
helper = LayerHelper('array_length', **locals()) helper = LayerHelper('array_length', **locals())
tmp = helper.create_tmp_variable(dtype='int64') tmp = helper.create_tmp_variable(dtype='int64')
......
...@@ -12,20 +12,9 @@ def data(name, ...@@ -12,20 +12,9 @@ def data(name,
type=core.VarDesc.VarType.LOD_TENSOR, type=core.VarDesc.VarType.LOD_TENSOR,
stop_gradient=True): stop_gradient=True):
""" """
Data Layer. **Data Layer**
Args: This function takes in the input and based on whether data has
name: The name/alias of the function
shape: Tuple declaring the shape.
append_batch_size: Whether or not to append the data as a batch.
dtype: The type of data : float32, float_16, int etc
type: The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
main_program: Name of the main program that calls this
startup_program: Name of the startup program
stop_gradient: A boolean that mentions whether gradient should flow.
This function takes in input and based on whether data has
to be returned back as a minibatch, it creates the global variable using to be returned back as a minibatch, it creates the global variable using
the helper functions. The global variables can be accessed by all the the helper functions. The global variables can be accessed by all the
following operations and layers in the graph. following operations and layers in the graph.
...@@ -33,6 +22,24 @@ def data(name, ...@@ -33,6 +22,24 @@ def data(name,
All the input variables of this function are passed in as local variables All the input variables of this function are passed in as local variables
to the LayerHelper constructor. to the LayerHelper constructor.
Args:
name(str): The name/alias of the function
shape(list): Tuple declaring the shape.
append_batch_size(bool): Whether or not to append the data as a batch.
dtype(int|float): The type of data : float32, float_16, int etc
type(VarType): The output type. By default it is LOD_TENSOR.
lod_level(int): The LoD Level. 0 means the input data is not a sequence.
main_program(Program): Name of the main program that calls this
startup_program(Program): Name of the startup program
stop_gradient(bool): A boolean that mentions whether gradient should flow.
Returns:
Variable: The global variable that gives access to the data.
Examples:
.. code-block:: python
data = fluid.layers.data(name='x', shape=[784], dtype='float32')
""" """
helper = LayerHelper('data', **locals()) helper = LayerHelper('data', **locals())
shape = list(shape) shape = list(shape)
......
...@@ -27,48 +27,81 @@ def fc(input, ...@@ -27,48 +27,81 @@ def fc(input,
""" """
**Fully Connected Layer** **Fully Connected Layer**
This layer accepts multiple inputs and applies a linear transformation to each input. The fully connected layer can take multiple tensors as its inputs. It
If activation type is provided, the corresponding activation function is applied to the creates a variable (one for each input tensor) called weights for each input
output of the linear transformation. For each input :math:`X`, the equation is: tensor, which represents a fully connected weight matrix from each input
unit to each output unit. The fully connected layer multiplies each input
tensor with its coresponding weight to produce an output Tensor. If
multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a biases variable will be
created and added to the output. Finally, if activation is not None,
it will be applied to the output as well.
This process can be formulated as follows:
.. math:: .. math::
Out = Act(WX + b) Out = Act\left({\sum_{i=0}^{N-1}W_iX_i + b}\right)
In the above equation: In the above equation:
* :math:`X`: Input value, a tensor with rank at least 2. * :math:`N`: Number of the input.
* :math:`W`: Weight, a 2-D tensor with shape [M, N]. * :math:`X_i`: The input tensor.
* :math:`b`: Bias, a 2-D tensor with shape [M, 1]. * :math:`W`: The weights created by this layer.
* :math:`Act`: Activation function. * :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Out`: Output value, same shape with :math:`X`. * :math:`Act`: The activation funtion.
* :math:`Out`: The output tensor.
All the input variables are passed in as local variables to the LayerHelper
constructor.
Args: Args:
input(Variable|list): Input tensors. Each tensor has a rank of atleast 2 input(Variable|list): The input tensor(s) to the fully connected layer.
size(int): Output size size(int): The number of output units in the fully connected layer.
num_flatten_dims(int): Number of columns in input num_flatten_dims(int): The fc layer can accept an input tensor with more
param_attr(ParamAttr|list): The parameters/weights to the FC Layer than two dimensions. If this happens, the
bias_attr(ParamAttr|list): Bias parameter for the FC layer multidimensional tensor will first be flattened
act(str): Activation type into a 2-dimensional matrix. The parameter
name(str): Name/alias of the function `num_flatten_dims` determines how the input tensor
is flattened: the first `num_flatten_dims`
dimensions will be flatten to form the first
dimension of the final matrix (height of the
matrix), and the rest `rank(X) - num_col_dims`
dimensions are flattened to form the second
dimension of the final matrix (width of the matrix).
For example, suppose `X` is a 6-dimensional tensor
with a shape [2, 3, 4, 5, 6], and
`x_num_col_dims` = 3. Then, the flattened matrix
will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
By default, `x_num_col_dims` is set to 1.
param_attr(ParamAttr|list): The parameter attribute for learnable
parameters/weights of the fully connected
layer.
param_initializer(ParamAttr|list): The initializer used for the
weight/parameter. If set None,
XavierInitializer() will be used.
bias_attr(ParamAttr|list): The parameter attribute for the bias parameter
for this layer. If set None, no bias will be
added to the output units.
bias_initializer(ParamAttr|list): The initializer used for the bias.
If set None, then ConstantInitializer()
will be used.
act(str): Activation to be applied to the output of the fully connected
layer.
name(str): Name/alias of the fully connected layer.
Returns: Returns:
Variable: The tensor variable storing the transformation and \ Variable: The output tensor variable.
non-linearity activation result.
Raises: Raises:
ValueError: If rank of input tensor is less than 2. ValueError: If rank of the input tensor is less than 2.
Examples: Examples:
.. code-block:: python .. code-block:: python
data = fluid.layers.data(name='data', shape=[32, 32], dtype='float32') data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
fc = fluid.layers.fc(input=data, size=1000, act="tanh") fc = fluid.layers.fc(input=data, size=1000, act="tanh")
""" """
helper = LayerHelper('fc', **locals())
helper = LayerHelper("fc", **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
...@@ -88,8 +121,8 @@ def fc(input, ...@@ -88,8 +121,8 @@ def fc(input,
"Y": w, "Y": w,
}, },
outputs={"Out": tmp}, outputs={"Out": tmp},
attrs={'x_num_col_dims': num_flatten_dims, attrs={"x_num_col_dims": num_flatten_dims,
'y_num_col_dims': 1}) "y_num_col_dims": 1})
mul_results.append(tmp) mul_results.append(tmp)
# sum # sum
...@@ -117,7 +150,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'): ...@@ -117,7 +150,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
Args: Args:
input(Variable): Input to the function input(Variable): Input to the function
size(int): Output size size(tuple|list|None): Shape of the look up table parameter
is_sparse(bool): Boolean flag that specifying whether the input is sparse is_sparse(bool): Boolean flag that specifying whether the input is sparse
param_attr(ParamAttr): Parameters for this layer param_attr(ParamAttr): Parameters for this layer
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
...@@ -704,6 +737,7 @@ def conv2d_transpose(input, ...@@ -704,6 +737,7 @@ def conv2d_transpose(input,
filter_size=None, filter_size=None,
padding=None, padding=None,
stride=None, stride=None,
dilation=None,
param_attr=None): param_attr=None):
""" """
The transpose of conv2d layer. The transpose of conv2d layer.
...@@ -727,6 +761,9 @@ def conv2d_transpose(input, ...@@ -727,6 +761,9 @@ def conv2d_transpose(input,
stride(int|tuple): The stride size. If stride is a tuple, it must stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. stride_H = stride_W = stride.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation.
param_attr: Parameter Attribute. param_attr: Parameter Attribute.
main_program(Program): the main program main_program(Program): the main program
startup_program(Program): the startup program startup_program(Program): the startup program
...@@ -747,10 +784,15 @@ def conv2d_transpose(input, ...@@ -747,10 +784,15 @@ def conv2d_transpose(input,
op_attr['paddings'] = padding op_attr['paddings'] = padding
if isinstance(stride, int): if isinstance(stride, int):
op_attr['strides'] = stride op_attr['strides'] = [stride, stride]
elif stride is not None: elif stride is not None:
op_attr['strides'] = stride op_attr['strides'] = stride
if isinstance(dilation, int):
op_attr['dilations'] = [dilation, dilation]
elif dilation is not None:
op_attr['dilations'] = dilation
if filter_size is None: if filter_size is None:
if output_size is None: if output_size is None:
raise ValueError("output_size must be set when filter_size is None") raise ValueError("output_size must be set when filter_size is None")
...@@ -759,14 +801,17 @@ def conv2d_transpose(input, ...@@ -759,14 +801,17 @@ def conv2d_transpose(input,
padding = op_attr.get('paddings', [0, 0]) padding = op_attr.get('paddings', [0, 0])
stride = op_attr.get('strides', [1, 1]) stride = op_attr.get('strides', [1, 1])
dilation = op_attr.get('dilations', [1, 1])
h_in = input.shape[2] h_in = input.shape[2]
w_in = input.shape[3] w_in = input.shape[3]
filter_size_h = output_size[0] - \
(h_in - 1) * stride[0] + 2 * padding[0] filter_size_h = (output_size[0] - (h_in - 1) * stride[0] + 2 *
filter_size_w = output_size[1] - \ padding[0] - 1) / dilation[0] + 1
(w_in - 1) * stride[1] + 2 * padding[1] filter_size_w = (output_size[1] - (w_in - 1) * stride[1] + 2 *
padding[1] - 1) / dilation[1] + 1
filter_size = [filter_size_h, filter_size_w] filter_size = [filter_size_h, filter_size_w]
elif isinstance(filter_size, int): elif isinstance(filter_size, int):
filter_size = [filter_size, filter_size] filter_size = [filter_size, filter_size]
......
...@@ -27,10 +27,23 @@ def cast(x, dtype): ...@@ -27,10 +27,23 @@ def cast(x, dtype):
return out return out
def concat(input, axis): def concat(input, axis=0):
""" """
This function concats the input along the axis mentioned **Concat**
This function concatenates the input along the axis mentioned
and returns that as the output. and returns that as the output.
Args:
input(list): List of tensors to be concatenated
axis(int): Integer axis along which the tensors will be concatenated
Returns:
Variable: Output variable of the concatenation
Examples:
.. code-block:: python
out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth])
""" """
helper = LayerHelper('concat', **locals()) helper = LayerHelper('concat', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype()) out = helper.create_tmp_variable(dtype=helper.input_dtype())
...@@ -43,9 +56,28 @@ def concat(input, axis): ...@@ -43,9 +56,28 @@ def concat(input, axis):
def sums(input, out=None): def sums(input, out=None):
""" """This function performs the sum operation on the input and returns the
This function takes in the input and performs the sum operation on it result as the output.
and returns that as the output.
Args:
input (Variable|list): The input tensor that has the elements
that need to be summed up.
Returns:
Variable: The tensor type variable that has the sum of input
written to it.
Examples:
.. code-block::python
tmp = fluid.layers.zeros(shape=[10], dtype='int32')
i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
a0 = layers.array_read(array=tmp, i=i)
i = layers.increment(x=i)
a1 = layers.array_read(array=tmp, i=i)
mean_a0 = layers.mean(x=a0)
mean_a1 = layers.mean(x=a1)
a_sum = layers.sums(input=[mean_a0, mean_a1])
""" """
helper = LayerHelper('sum', **locals()) helper = LayerHelper('sum', **locals())
if out is None: if out is None:
...@@ -55,6 +87,24 @@ def sums(input, out=None): ...@@ -55,6 +87,24 @@ def sums(input, out=None):
def assign(input, output): def assign(input, output):
"""
**Assign**
This function copies the *input* Variable to the *output* Variable.
Args:
input(Variable): The source variable
output(Variable): The destination variable
Returns:
Variable: The destination variable that was supplied as the *output*.
Examples:
.. code-block:: python
out = fluid.layers.create_tensor(dtype='float32')
hidden = fluid.layers.fc(input=data, size=10)
fluid.layers.assign(hidden, out)
"""
helper = LayerHelper('assign', **locals()) helper = LayerHelper('assign', **locals())
helper.append_op( helper.append_op(
type='scale', type='scale',
......
...@@ -58,7 +58,9 @@ class ParamAttr(object): ...@@ -58,7 +58,9 @@ class ParamAttr(object):
def to_kwargs(self, with_initializer=False): def to_kwargs(self, with_initializer=False):
kwargs = { kwargs = {
'name': self.name, 'name': self.name,
'learning_rate': self.learning_rate, 'optimize_attr': {
'learning_rate': self.learning_rate
},
'regularizer': self.regularizer, 'regularizer': self.regularizer,
'trainable': self.trainable, 'trainable': self.trainable,
'clip_attr': self.clip 'clip_attr': self.clip
......
...@@ -341,6 +341,10 @@ class TestBatchNormOp(OpTest): ...@@ -341,6 +341,10 @@ class TestBatchNormOp(OpTest):
places = [core.CPUPlace()] places = [core.CPUPlace()]
if core.is_compile_gpu() and core.op_support_gpu("batch_norm"): if core.is_compile_gpu() and core.op_support_gpu("batch_norm"):
places.append(core.GPUPlace(0)) places.append(core.GPUPlace(0))
core.init_devices(["CPU", "GPU:0"])
else:
core.init_devices(["CPU"])
for place in places: for place in places:
for data_format in ["NCHW", "NHWC"]: for data_format in ["NCHW", "NHWC"]:
test_with_place(place, data_format, [2, 3, 4, 5]) test_with_place(place, data_format, [2, 3, 4, 5])
......
...@@ -3,14 +3,17 @@ import numpy as np ...@@ -3,14 +3,17 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): def conv2dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_h, in_w = input_.shape in_n, in_c, in_h, in_w = input_.shape
f_c, out_c, f_h, f_w = filter_.shape f_c, out_c, f_h, f_w = filter_.shape
assert in_c == f_c assert in_c == f_c
stride, pad = conv2dtranspose_param['stride'], conv2dtranspose_param['pad'] stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
out_h = (in_h - 1) * stride[0] + f_h 'dilations']
out_w = (in_w - 1) * stride[1] + f_w d_bolck_h = dilations[0] * (f_h - 1) + 1
d_bolck_w = dilations[1] * (f_w - 1) + 1
out_h = (in_h - 1) * stride[0] + d_bolck_h
out_w = (in_w - 1) * stride[1] + d_bolck_w
out = np.zeros((in_n, out_c, out_h, out_w)) out = np.zeros((in_n, out_c, out_h, out_w))
...@@ -23,9 +26,9 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param): ...@@ -23,9 +26,9 @@ def conv2dtranspose_forward_naive(input_, filter_, conv2dtranspose_param):
for k in range(out_c): for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0) tmp_out = np.sum(input_masked * filter_[:, k, :, :], axis=0)
i1, i2 = i * stride[0], i * stride[0] + f_h i1, i2 = i * stride[0], i * stride[0] + d_bolck_h
j1, j2 = j * stride[0], j * stride[0] + f_w j1, j2 = j * stride[0], j * stride[0] + d_bolck_h
out[n, k, i1:i2, j1:j2] += tmp_out out[n, k, i1:i2:dilations[0], j1:j2:dilations[1]] += tmp_out
out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]] out = out[:, :, pad[0]:out_h - pad[0], pad[1]:out_w - pad[1]]
return out return out
...@@ -37,11 +40,8 @@ class TestConv2dTransposeOp(OpTest): ...@@ -37,11 +40,8 @@ class TestConv2dTransposeOp(OpTest):
self.init_op_type() self.init_op_type()
self.init_test_case() self.init_test_case()
conv2dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32") input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32") filter_ = np.random.random(self.filter_size).astype("float32")
output = conv2dtranspose_forward_naive(
input_, filter_, conv2dtranspose_param).astype('float32')
self.inputs = {'Input': input_, 'Filter': filter_} self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = { self.attrs = {
...@@ -49,6 +49,10 @@ class TestConv2dTransposeOp(OpTest): ...@@ -49,6 +49,10 @@ class TestConv2dTransposeOp(OpTest):
'paddings': self.pad, 'paddings': self.pad,
'dilations': self.dilations 'dilations': self.dilations
} }
output = conv2dtranspose_forward_naive(input_, filter_,
self.attrs).astype('float32')
self.outputs = {'Output': output} self.outputs = {'Output': output}
def test_check_output(self): def test_check_output(self):
...@@ -104,11 +108,60 @@ class TestWithStride(TestConv2dTransposeOp): ...@@ -104,11 +108,60 @@ class TestWithStride(TestConv2dTransposeOp):
self.filter_size = [f_c, 6, 3, 3] self.filter_size = [f_c, 6, 3, 3]
class TestWithDilation(TestConv2dTransposeOp):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [2, 2]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
# ------------ test_cudnn ------------ # ------------ test_cudnn ------------
class TestCudnn(TestConv2dTransposeOp): class TestCudnn(TestConv2dTransposeOp):
def init_op_type(self): def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn" self.op_type = "conv2d_transpose_cudnn"
class TestCudnnWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [1, 1]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
class TestCudnnWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1]
self.stride = [2, 2]
self.dilations = [1, 1]
self.input_size = [2, 3, 5, 5] # NCHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3]
def init_op_type(self):
self.op_type = "conv2d_transpose_cudnn"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1]
# self.stride = [2, 2]
# self.dilations = [2, 2]
# self.input_size = [2, 3, 5, 5] # NCHW
# f_c = self.input_size[1]
# self.filter_size = [f_c, 6, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv2d_transpose_cudnn"
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -3,15 +3,20 @@ import numpy as np ...@@ -3,15 +3,20 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): def conv3dtranspose_forward_naive(input_, filter_, attrs):
in_n, in_c, in_d, in_h, in_w = input_.shape in_n, in_c, in_d, in_h, in_w = input_.shape
f_c, out_c, f_d, f_h, f_w = filter_.shape f_c, out_c, f_d, f_h, f_w = filter_.shape
assert in_c == f_c assert in_c == f_c
stride, pad = conv3dtranspose_param['stride'], conv3dtranspose_param['pad'] stride, pad, dilations = attrs['strides'], attrs['paddings'], attrs[
out_d = (in_d - 1) * stride[0] + f_d 'dilations']
out_h = (in_h - 1) * stride[1] + f_h
out_w = (in_w - 1) * stride[2] + f_w d_bolck_d = dilations[0] * (f_d - 1) + 1
d_bolck_h = dilations[1] * (f_h - 1) + 1
d_bolck_w = dilations[2] * (f_w - 1) + 1
out_d = (in_d - 1) * stride[0] + d_bolck_d
out_h = (in_h - 1) * stride[1] + d_bolck_h
out_w = (in_w - 1) * stride[2] + d_bolck_w
out = np.zeros((in_n, out_c, out_d, out_h, out_w)) out = np.zeros((in_n, out_c, out_d, out_h, out_w))
for n in range(in_n): for n in range(in_n):
...@@ -25,10 +30,11 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param): ...@@ -25,10 +30,11 @@ def conv3dtranspose_forward_naive(input_, filter_, conv3dtranspose_param):
for k in range(out_c): for k in range(out_c):
tmp_out = np.sum(input_masked * filter_[:, k, :, :, :], tmp_out = np.sum(input_masked * filter_[:, k, :, :, :],
axis=0) axis=0)
d1, d2 = d * stride[0], d * stride[0] + f_d d1, d2 = d * stride[0], d * stride[0] + d_bolck_d
i1, i2 = i * stride[1], i * stride[1] + f_h i1, i2 = i * stride[1], i * stride[1] + d_bolck_h
j1, j2 = j * stride[2], j * stride[2] + f_w j1, j2 = j * stride[2], j * stride[2] + d_bolck_w
out[n, k, d1:d2, i1:i2, j1:j2] += tmp_out out[n, k, d1:d2:dilations[0], i1:i2:dilations[1], j1:j2:
dilations[2]] += tmp_out
out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w - out = out[:, :, pad[0]:out_d - pad[0], pad[1]:out_h - pad[1], pad[2]:out_w -
pad[2]] pad[2]]
...@@ -41,18 +47,19 @@ class TestConv3dTransposeOp(OpTest): ...@@ -41,18 +47,19 @@ class TestConv3dTransposeOp(OpTest):
self.init_op_type() self.init_op_type()
self.init_test_case() self.init_test_case()
conv3dtranspose_param = {'stride': self.stride, 'pad': self.pad}
input_ = np.random.random(self.input_size).astype("float32") input_ = np.random.random(self.input_size).astype("float32")
filter_ = np.random.random(self.filter_size).astype("float32") filter_ = np.random.random(self.filter_size).astype("float32")
output = conv3dtranspose_forward_naive(
input_, filter_, conv3dtranspose_param).astype("float32")
self.inputs = {'Input': input_, 'Filter': filter_} self.inputs = {'Input': input_, 'Filter': filter_}
self.attrs = { self.attrs = {
'strides': self.stride, 'strides': self.stride,
'paddings': self.pad, 'paddings': self.pad,
# 'dilations': self.dilations 'dilations': self.dilations
} }
output = conv3dtranspose_forward_naive(input_, filter_,
self.attrs).astype("float32")
self.outputs = {'Output': output} self.outputs = {'Output': output}
def test_check_output(self): def test_check_output(self):
...@@ -108,11 +115,60 @@ class TestWithStride(TestConv3dTransposeOp): ...@@ -108,11 +115,60 @@ class TestWithStride(TestConv3dTransposeOp):
self.filter_size = [f_c, 6, 3, 3, 3] self.filter_size = [f_c, 6, 3, 3, 3]
class TestWithDilation(TestConv3dTransposeOp):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [2, 2, 2]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
# ------------ test_cudnn ------------ # ------------ test_cudnn ------------
class TestCudnn(TestConv3dTransposeOp): class TestCudnn(TestConv3dTransposeOp):
def init_op_type(self): def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn" self.op_type = "conv3d_transpose_cudnn"
class TestCudnnWithPad(TestWithPad):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [1, 1, 1]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
class TestCudnnWithStride(TestWithStride):
def init_test_case(self):
self.pad = [1, 1, 1]
self.stride = [2, 2, 2]
self.dilations = [1, 1, 1]
self.input_size = [2, 3, 5, 5, 5] # NCDHW
f_c = self.input_size[1]
self.filter_size = [f_c, 6, 3, 3, 3]
def init_op_type(self):
self.op_type = "conv3d_transpose_cudnn"
# #cudnn v5 does not support dilation conv.
# class TestCudnnWithDilation(TestWithDilation):
# def init_test_case(self):
# self.pad = [1, 1, 1]
# self.stride = [2, 2, 2]
# self.dilations = [2, 2, 2]
# self.input_size = [2, 3, 5, 5, 5] # NCDHW
# f_c = self.input_size[1]
# self.filter_size = [f_c, 6, 3, 3, 3]
#
# def init_op_type(self):
# self.op_type = "conv3d_transpose_cudnn"
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -7,7 +7,7 @@ class TestFillZerosLikeOp(OpTest): ...@@ -7,7 +7,7 @@ class TestFillZerosLikeOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "fill_zeros_like" self.op_type = "fill_zeros_like"
self.inputs = {'X': np.random.random((219, 232)).astype("float32")} self.inputs = {'X': np.random.random((219, 232)).astype("float32")}
self.outputs = {'Y': np.zeros_like(self.inputs["X"])} self.outputs = {'Out': np.zeros_like(self.inputs["X"])}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
......
import unittest import unittest
import numpy
import paddle.v2.fluid as fluid
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.op import Operator from paddle.v2.fluid.op import Operator
import numpy from paddle.v2.fluid.executor import Executor
class TestGaussianRandomOp(unittest.TestCase): class TestGaussianRandomOp(unittest.TestCase):
def setUp(self):
self.op_type = "gaussian_random"
self.inputs = {}
self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10}
self.outputs = ["Out"]
def test_cpu(self): def test_cpu(self):
self.gaussian_random_test(place=core.CPUPlace()) self.gaussian_random_test(place=fluid.CPUPlace())
def test_gpu(self): def test_gpu(self):
if core.is_compile_gpu(): if core.is_compile_gpu():
self.gaussian_random_test(place=core.GPUPlace(0)) self.gaussian_random_test(place=fluid.GPUPlace(0))
def gaussian_random_test(self, place): def gaussian_random_test(self, place):
scope = core.Scope()
scope.var('Out').get_tensor()
op = Operator(
"gaussian_random",
Out='Out',
shape=[1000, 784],
mean=.0,
std=1.,
seed=10)
context = core.DeviceContext.create(place) context = core.DeviceContext.create(place)
op.run(scope, context) program = fluid.Program()
tensor = numpy.array(scope.find_var('Out').get_tensor()) block = program.global_block()
vout = block.create_var(name="Out")
op = block.append_op(
type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
op.desc.infer_var_type(block.desc)
op.desc.infer_shape(block.desc)
fetch_list = []
for var_name in self.outputs:
fetch_list.append(block.var(var_name))
exe = Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1)
self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1)
......
import unittest import unittest
import numpy
from paddle.v2.fluid.op import Operator from paddle.v2.fluid.op import Operator
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import numpy import paddle.v2.fluid as fluid
class TestUniformRandomOp(unittest.TestCase): class TestUniformRandomOp(unittest.TestCase):
def test_uniform_random_cpu(self): def setUp(self):
self.op_type = "uniform_random"
self.inputs = {}
self.attrs = {
"shape": [1000, 784],
"min": -5.0,
"max": 10.0,
"seed": 10
}
self.outputs = ["Out"]
def test_cpu(self):
self.uniform_random_test(place=core.CPUPlace()) self.uniform_random_test(place=core.CPUPlace())
def test_uniform_random_gpu(self): def test_gpu(self):
if core.is_compile_gpu(): if core.is_compile_gpu():
self.uniform_random_test(place=core.GPUPlace(0)) self.uniform_random_test(place=core.GPUPlace(0))
def uniform_random_test(self, place): def uniform_random_test(self, place):
scope = core.Scope() context = core.DeviceContext.create(place)
scope.var('X').get_tensor() program = fluid.Program()
block = program.global_block()
op = Operator( vout = block.create_var(name="Out")
"uniform_random", op = block.append_op(
Out='X', type=self.op_type, outputs={"Out": vout}, attrs=self.attrs)
shape=[1000, 784],
min=-5.0, op.desc.infer_var_type(block.desc)
max=10.0, op.desc.infer_shape(block.desc)
seed=10)
fetch_list = []
ctx = core.DeviceContext.create(place) for var_name in self.outputs:
op.run(scope, ctx) fetch_list.append(block.var(var_name))
tensor = numpy.array(scope.find_var('X').get_tensor())
exe = fluid.Executor(place)
outs = exe.run(program, fetch_list=fetch_list)
tensor = outs[0]
self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1) self.assertAlmostEqual(tensor.mean(), 2.5, delta=0.1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册