提交 fb69d38c 编写于 作者: C chengduoZH

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into Adapting_to_the_BatchNorm_structure_to_support_3D_data
......@@ -25,7 +25,12 @@ IF(NOT ${CBLAS_FOUND})
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE)
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs)
IF(APPLE)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs)
ELSE()
SET(COMMON_ARGS CC=${CMAKE_C_COMPILER} NO_SHARED=1 NO_LAPACK=1 libs)
ENDIF()
IF(CMAKE_CROSSCOMPILING)
IF(ANDROID)
......@@ -40,11 +45,11 @@ IF(NOT ${CBLAS_FOUND})
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=${TARGET} ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(RPI)
# use hardfp
SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPENBLAS_COMMIT "v0.2.20")
SET(OPTIONAL_ARGS HOSTCC=${HOST_C_COMPILER} TARGET=ARMV7 USE_THREAD=0)
ENDIF()
ELSE()
SET(OPENBLAS_COMMIT "v0.2.19")
SET(OPENBLAS_COMMIT "v0.2.20")
SET(OPTIONAL_ARGS "")
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
SET(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
......
......@@ -117,6 +117,8 @@ inline void Tensor::CopyFrom(const Tensor& src,
memory::Copy(boost::get<platform::GPUPlace>(dst_place), dst_ptr,
boost::get<platform::GPUPlace>(src_place), src_ptr, size, 0);
}
PADDLE_ENFORCE(cudaStreamSynchronize(0),
"cudaStreamSynchronize failed in Tensor CopyFrom");
#endif
}
......
......@@ -223,7 +223,7 @@ void CrossEntropyOverBeam::checkInputs() {
<< inputLayers_[i * 3]->getName()
<< " should be a nested sequence";
CHECK_EQ(getInputValue(i * 3 + 1)->getWidth(), beamSize_);
CHECK_EQ(scores.getNumSequences(), batchSize_);
CHECK_EQ(batchSize_, static_cast<size_t>(scores.getNumSequences()));
CHECK_EQ(scores.getNumSubSequences(), selCandidates.getBatchSize());
} else {
CHECK(scores.hasSeq()) << "input " << i << " "
......@@ -231,10 +231,10 @@ void CrossEntropyOverBeam::checkInputs() {
<< " should be a sequence";
batchSize_ = scores.getNumSequences();
beamSize_ = getInputValue(i * 3 + 1)->getWidth();
CHECK_EQ(batchSize_, selCandidates.getBatchSize());
CHECK_EQ(batchSize_, static_cast<size_t>(selCandidates.getBatchSize()));
}
CHECK_EQ(1U, scores.value->getWidth());
CHECK_EQ(batchSize_, goldSeq.getBatchSize());
CHECK_EQ(batchSize_, static_cast<size_t>(goldSeq.getBatchSize()));
}
}
......@@ -377,8 +377,8 @@ void CrossEntropyOverBeam::forward(PassType passType) {
MatrixPtr outputValue = getOutputValue();
for (size_t i = 0; i < batchSize_; ++i) {
beamCosts_[i].setData(
std::move(std::make_shared<BeamExpansion>(beamPerSeq_[i])), beamSize_);
BeamExpansionPtr ptr = std::make_shared<BeamExpansion>(beamPerSeq_[i]);
beamCosts_[i].setData(std::move(ptr), beamSize_);
outputValue->getData()[i] = beamCosts_[i].forward();
}
}
......
file(GLOB GENERAL_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
string(REPLACE ".cc" "" GENERAL_OPS "${GENERAL_OPS}")
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# for ops.
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE)
set(cc_srcs)
set(cu_srcs)
set(op_common_deps operator op_registry)
......@@ -43,33 +46,26 @@ endfunction()
add_subdirectory(math)
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
op_library(gather_op SRCS gather_op.cc gather_op.cu)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
op_library(scatter_op SRCS scatter_op.cc scatter_op.cu)
cc_library(net_op SRCS net_op.cc DEPS op_registry)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
op_library(add_op SRCS add_op.cc add_op.cu)
op_library(mean_op SRCS mean_op.cc mean_op.cu)
list(REMOVE_ITEM GENERAL_OPS
net_op
minus_op
mul_op
recurrent_op
scale_op)
op_library(net_op SRCS net_op.cc)
op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op)
op_library(mul_op SRCS mul_op.cc mul_op.cu DEPS math_function)
op_library(rowwise_add_op SRCS rowwise_add_op.cu rowwise_add_op.cc)
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS framework_proto tensor operator net_op)
op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op)
op_library(sigmoid_op SRCS sigmoid_op.cc sigmoid_op.cu)
op_library(softmax_op SRCS softmax_op.cc softmax_op.cu)
op_library(gaussian_random_op SRCS gaussian_random_op.cc gaussian_random_op.cu)
op_library(cross_entropy_op SRCS cross_entropy_op.cc cross_entropy_op.cu)
op_library(fill_zeros_like_op SRCS fill_zeros_like_op.cc fill_zeros_like_op.cu)
foreach(src ${GENERAL_OPS})
op_library(${src} SRCS ${src}.cc ${src}.cu)
endforeach()
op_library(sgd_op SRCS sgd_op.cc sgd_op.cu)
set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
DEPS framework_proto tensor op_registry operator net_op)
op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu)
op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu)
op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op)
op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op)
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
......@@ -2,21 +2,5 @@ if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc
DEPS pybind python backward
sgd_op
gather_op
scatter_op
add_op
mul_op
rowwise_add_op
sigmoid_op
softmax_op
mean_op
cross_entropy_op
recurrent_op
uniform_random_op
gaussian_random_op
fill_zeros_like_op
lookup_table_op
scale_op
minus_op)
${GLOB_OP_LIB})
endif(WITH_PYTHON)
......@@ -137,7 +137,7 @@ __all__ = [
'clip_layer',
'slice_projection',
'seq_slice_layer',
'kmax_sequence_score_layer',
'kmax_seq_score_layer',
'img_pool3d_layer',
'scale_shift_layer',
'img_conv3d_layer',
......@@ -5996,7 +5996,7 @@ def cross_entropy_over_beam(input, name=None):
Note that, if gold falls off the beam at search step t, then the cost is
calculated over the beam at step t.
This cost layer always works together with kmax_sequence_score_layer,
This cost layer always works together with kmax_seq_score_layer,
sub_nested_seq_layer, and sequence_slice_layer to trim the input to form a
sub-search space.
......@@ -6599,14 +6599,14 @@ def seq_slice_layer(input, starts, ends, name=None):
@wrap_name_default()
@layer_support()
def kmax_sequence_score_layer(input, name=None, beam_size=1):
def kmax_seq_score_layer(input, name=None, beam_size=1):
"""
This layer accepts one input which are scores over a sequence or a nested
sequence, and returns indices of beam_size sequences with highest scores.
.. code-block:: python
kmax_indices = kmax_sequence_score_layer(input=input_layer, beam_size)
kmax_indices = kmax_seq_score_layer(input=input_layer, beam_size)
:param name: The Layer Name.
......@@ -6619,10 +6619,10 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), ("kmax_sequence_score_layer "
assert isinstance(input, LayerOutput), ("kmax_seq_score_layer "
"accepts only one input.")
assert input.size == 1, (
"input of kmax_sequence_score_layer is a score"
"input of kmax_seq_score_layer is a score "
"over a sequence or a nested sequence, so its width must be 1.")
Layer(
......
......@@ -8,7 +8,7 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer
test_kmax_seq_socre_layer test_sub_nested_seq_select_layer test_scale_shift_layer
test_seq_slice_layer test_cross_entropy_over_beam test_pooling3D_layer
test_conv3d_layer test_deconv3d_layer test_BatchNorm3D)
......
......@@ -12,7 +12,7 @@ layers {
active_type: ""
}
layers {
name: "__kmax_sequence_score_layer_0__"
name: "__kmax_seq_score_layer_0__"
type: "kmax_seq_score"
active_type: ""
inputs {
......@@ -29,7 +29,7 @@ layers {
input_layer_name: "sentence_states"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
input_layer_name: "__kmax_seq_score_layer_0__"
}
}
layers {
......@@ -44,7 +44,7 @@ layers {
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_1__"
name: "__kmax_seq_score_layer_1__"
type: "kmax_seq_score"
active_type: ""
inputs {
......@@ -61,7 +61,7 @@ layers {
input_layer_name: "__sub_nested_seq_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
input_layer_name: "__kmax_seq_score_layer_1__"
}
select_first: true
}
......@@ -77,7 +77,7 @@ layers {
bias_parameter_name: "___fc_layer_1__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_2__"
name: "__kmax_seq_score_layer_2__"
type: "kmax_seq_score"
active_type: ""
inputs {
......@@ -111,7 +111,7 @@ layers {
input_layer_name: "sentence_scores"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_0__"
input_layer_name: "__kmax_seq_score_layer_0__"
}
inputs {
input_layer_name: "sentences_ids"
......@@ -120,7 +120,7 @@ layers {
input_layer_name: "__fc_layer_0__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_1__"
input_layer_name: "__kmax_seq_score_layer_1__"
}
inputs {
input_layer_name: "start_ids"
......@@ -129,7 +129,7 @@ layers {
input_layer_name: "__fc_layer_1__"
}
inputs {
input_layer_name: "__kmax_sequence_score_layer_2__"
input_layer_name: "__kmax_seq_score_layer_2__"
}
inputs {
input_layer_name: "end_ids"
......@@ -185,13 +185,13 @@ sub_models {
name: "root"
layer_names: "sentence_states"
layer_names: "sentence_scores"
layer_names: "__kmax_sequence_score_layer_0__"
layer_names: "__kmax_seq_score_layer_0__"
layer_names: "__sub_nested_seq_layer_0__"
layer_names: "__fc_layer_0__"
layer_names: "__kmax_sequence_score_layer_1__"
layer_names: "__kmax_seq_score_layer_1__"
layer_names: "__seq_slice_layer_0__"
layer_names: "__fc_layer_1__"
layer_names: "__kmax_sequence_score_layer_2__"
layer_names: "__kmax_seq_score_layer_2__"
layer_names: "sentences_ids"
layer_names: "start_ids"
layer_names: "end_ids"
......
......@@ -17,7 +17,7 @@ layers {
bias_parameter_name: "___fc_layer_0__.wbias"
}
layers {
name: "__kmax_sequence_score_layer_0__"
name: "__kmax_seq_score_layer_0__"
type: "kmax_seq_score"
active_type: ""
inputs {
......@@ -46,14 +46,14 @@ parameters {
initial_smart: false
}
input_layer_names: "input_seq"
output_layer_names: "__kmax_sequence_score_layer_0__"
output_layer_names: "__kmax_seq_score_layer_0__"
sub_models {
name: "root"
layer_names: "input_seq"
layer_names: "__fc_layer_0__"
layer_names: "__kmax_sequence_score_layer_0__"
layer_names: "__kmax_seq_score_layer_0__"
input_layer_names: "input_seq"
output_layer_names: "__kmax_sequence_score_layer_0__"
output_layer_names: "__kmax_seq_score_layer_0__"
is_recurrent_layer_group: false
}
......@@ -7,14 +7,14 @@ beam_size = 5
# the first beam expansion.
sentence_states = data_layer(name="sentence_states", size=32)
sentence_scores = data_layer(name="sentence_scores", size=1)
topk_sentence_ids = kmax_sequence_score_layer(
topk_sentence_ids = kmax_seq_score_layer(
input=sentence_scores, beam_size=beam_size)
# the second beam expansion.
topk_sen = sub_nested_seq_layer(
input=sentence_states, selected_indices=topk_sentence_ids)
start_pos_scores = fc_layer(input=topk_sen, size=1, act=LinearActivation())
topk_start_pos_ids = kmax_sequence_score_layer(
topk_start_pos_ids = kmax_seq_score_layer(
input=sentence_scores, beam_size=beam_size)
# the final beam expansion.
......@@ -22,7 +22,7 @@ topk_start_spans = seq_slice_layer(
input=topk_sen, starts=topk_start_pos_ids, ends=None)
end_pos_scores = fc_layer(
input=topk_start_spans, size=1, act=LinearActivation())
topk_end_pos_ids = kmax_sequence_score_layer(
topk_end_pos_ids = kmax_seq_score_layer(
input=end_pos_scores, beam_size=beam_size)
# define the cost
......
......@@ -4,6 +4,6 @@ from paddle.trainer_config_helpers import *
data = data_layer(name="input_seq", size=128)
scores = fc_layer(input=data, size=1, act=ExpActivation())
kmax_seq_id = kmax_sequence_score_layer(input=scores, beam_size=5)
kmax_seq_id = kmax_seq_score_layer(input=scores, beam_size=5)
outputs(kmax_seq_id)
......@@ -78,6 +78,8 @@ def init(**kwargs):
if 'use_gpu' in kwargs:
cp.g_command_config_args['use_gpu'] = kwargs['use_gpu']
if 'use_mkldnn' in kwargs:
cp.g_command_config_args['use_mkldnn'] = kwargs['use_mkldnn']
assert 'parallel_nn' not in kwargs, ("currently 'parallel_nn' is not "
"supported in v2 APIs.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册